1b8e80941Smrg/* 2b8e80941Smrg * Copyright 2014 Advanced Micro Devices, Inc. 3b8e80941Smrg * 4b8e80941Smrg * Permission is hereby granted, free of charge, to any person obtaining a 5b8e80941Smrg * copy of this software and associated documentation files (the "Software"), 6b8e80941Smrg * to deal in the Software without restriction, including without limitation 7b8e80941Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8b8e80941Smrg * and/or sell copies of the Software, and to permit persons to whom the 9b8e80941Smrg * Software is furnished to do so, subject to the following conditions: 10b8e80941Smrg * 11b8e80941Smrg * The above copyright notice and this permission notice (including the next 12b8e80941Smrg * paragraph) shall be included in all copies or substantial portions of the 13b8e80941Smrg * Software. 14b8e80941Smrg * 15b8e80941Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16b8e80941Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17b8e80941Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18b8e80941Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19b8e80941Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20b8e80941Smrg * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21b8e80941Smrg * SOFTWARE. 22b8e80941Smrg */ 23b8e80941Smrg 24b8e80941Smrg#include "ac_binary.h" 25b8e80941Smrg 26b8e80941Smrg#include "util/u_math.h" 27b8e80941Smrg#include "util/u_memory.h" 28b8e80941Smrg 29b8e80941Smrg#include <gelf.h> 30b8e80941Smrg#include <libelf.h> 31b8e80941Smrg#include <stdio.h> 32b8e80941Smrg 33b8e80941Smrg#include <sid.h> 34b8e80941Smrg 35b8e80941Smrg#define SPILLED_SGPRS 0x4 36b8e80941Smrg#define SPILLED_VGPRS 0x8 37b8e80941Smrg 38b8e80941Smrgstatic void parse_symbol_table(Elf_Data *symbol_table_data, 39b8e80941Smrg const GElf_Shdr *symbol_table_header, 40b8e80941Smrg struct ac_shader_binary *binary) 41b8e80941Smrg{ 42b8e80941Smrg GElf_Sym symbol; 43b8e80941Smrg unsigned i = 0; 44b8e80941Smrg unsigned symbol_count = 45b8e80941Smrg symbol_table_header->sh_size / symbol_table_header->sh_entsize; 46b8e80941Smrg 47b8e80941Smrg /* We are over allocating this list, because symbol_count gives the 48b8e80941Smrg * total number of symbols, and we will only be filling the list 49b8e80941Smrg * with offsets of global symbols. The memory savings from 50b8e80941Smrg * allocating the correct size of this list will be small, and 51b8e80941Smrg * I don't think it is worth the cost of pre-computing the number 52b8e80941Smrg * of global symbols. 53b8e80941Smrg */ 54b8e80941Smrg binary->global_symbol_offsets = CALLOC(symbol_count, sizeof(uint64_t)); 55b8e80941Smrg 56b8e80941Smrg while (gelf_getsym(symbol_table_data, i++, &symbol)) { 57b8e80941Smrg unsigned i; 58b8e80941Smrg if (GELF_ST_BIND(symbol.st_info) != STB_GLOBAL || 59b8e80941Smrg symbol.st_shndx == 0 /* Undefined symbol */) { 60b8e80941Smrg continue; 61b8e80941Smrg } 62b8e80941Smrg 63b8e80941Smrg binary->global_symbol_offsets[binary->global_symbol_count] = 64b8e80941Smrg symbol.st_value; 65b8e80941Smrg 66b8e80941Smrg /* Sort the list using bubble sort. This list will usually 67b8e80941Smrg * be small. */ 68b8e80941Smrg for (i = binary->global_symbol_count; i > 0; --i) { 69b8e80941Smrg uint64_t lhs = binary->global_symbol_offsets[i - 1]; 70b8e80941Smrg uint64_t rhs = binary->global_symbol_offsets[i]; 71b8e80941Smrg if (lhs < rhs) { 72b8e80941Smrg break; 73b8e80941Smrg } 74b8e80941Smrg binary->global_symbol_offsets[i] = lhs; 75b8e80941Smrg binary->global_symbol_offsets[i - 1] = rhs; 76b8e80941Smrg } 77b8e80941Smrg ++binary->global_symbol_count; 78b8e80941Smrg } 79b8e80941Smrg} 80b8e80941Smrg 81b8e80941Smrgstatic void parse_relocs(Elf *elf, Elf_Data *relocs, Elf_Data *symbols, 82b8e80941Smrg unsigned symbol_sh_link, 83b8e80941Smrg struct ac_shader_binary *binary) 84b8e80941Smrg{ 85b8e80941Smrg unsigned i; 86b8e80941Smrg 87b8e80941Smrg if (!relocs || !symbols || !binary->reloc_count) { 88b8e80941Smrg return; 89b8e80941Smrg } 90b8e80941Smrg binary->relocs = CALLOC(binary->reloc_count, 91b8e80941Smrg sizeof(struct ac_shader_reloc)); 92b8e80941Smrg for (i = 0; i < binary->reloc_count; i++) { 93b8e80941Smrg GElf_Sym symbol; 94b8e80941Smrg GElf_Rel rel; 95b8e80941Smrg char *symbol_name; 96b8e80941Smrg struct ac_shader_reloc *reloc = &binary->relocs[i]; 97b8e80941Smrg 98b8e80941Smrg gelf_getrel(relocs, i, &rel); 99b8e80941Smrg gelf_getsym(symbols, GELF_R_SYM(rel.r_info), &symbol); 100b8e80941Smrg symbol_name = elf_strptr(elf, symbol_sh_link, symbol.st_name); 101b8e80941Smrg 102b8e80941Smrg reloc->offset = rel.r_offset; 103b8e80941Smrg strncpy(reloc->name, symbol_name, sizeof(reloc->name)-1); 104b8e80941Smrg reloc->name[sizeof(reloc->name)-1] = 0; 105b8e80941Smrg } 106b8e80941Smrg} 107b8e80941Smrg 108b8e80941Smrgbool ac_elf_read(const char *elf_data, unsigned elf_size, 109b8e80941Smrg struct ac_shader_binary *binary) 110b8e80941Smrg{ 111b8e80941Smrg char *elf_buffer; 112b8e80941Smrg Elf *elf; 113b8e80941Smrg Elf_Scn *section = NULL; 114b8e80941Smrg Elf_Data *symbols = NULL, *relocs = NULL; 115b8e80941Smrg size_t section_str_index; 116b8e80941Smrg unsigned symbol_sh_link = 0; 117b8e80941Smrg bool success = true; 118b8e80941Smrg 119b8e80941Smrg /* One of the libelf implementations 120b8e80941Smrg * (http://www.mr511.de/software/english.htm) requires calling 121b8e80941Smrg * elf_version() before elf_memory(). 122b8e80941Smrg */ 123b8e80941Smrg elf_version(EV_CURRENT); 124b8e80941Smrg elf_buffer = MALLOC(elf_size); 125b8e80941Smrg memcpy(elf_buffer, elf_data, elf_size); 126b8e80941Smrg 127b8e80941Smrg elf = elf_memory(elf_buffer, elf_size); 128b8e80941Smrg 129b8e80941Smrg elf_getshdrstrndx(elf, §ion_str_index); 130b8e80941Smrg 131b8e80941Smrg while ((section = elf_nextscn(elf, section))) { 132b8e80941Smrg const char *name; 133b8e80941Smrg Elf_Data *section_data = NULL; 134b8e80941Smrg GElf_Shdr section_header; 135b8e80941Smrg if (gelf_getshdr(section, §ion_header) != §ion_header) { 136b8e80941Smrg fprintf(stderr, "Failed to read ELF section header\n"); 137b8e80941Smrg success = false; 138b8e80941Smrg break; 139b8e80941Smrg } 140b8e80941Smrg name = elf_strptr(elf, section_str_index, section_header.sh_name); 141b8e80941Smrg if (!strcmp(name, ".text")) { 142b8e80941Smrg section_data = elf_getdata(section, section_data); 143b8e80941Smrg binary->code_size = section_data->d_size; 144b8e80941Smrg binary->code = MALLOC(binary->code_size * sizeof(unsigned char)); 145b8e80941Smrg memcpy(binary->code, section_data->d_buf, binary->code_size); 146b8e80941Smrg } else if (!strcmp(name, ".AMDGPU.config")) { 147b8e80941Smrg section_data = elf_getdata(section, section_data); 148b8e80941Smrg binary->config_size = section_data->d_size; 149b8e80941Smrg if (!binary->config_size) { 150b8e80941Smrg fprintf(stderr, ".AMDGPU.config is empty!\n"); 151b8e80941Smrg success = false; 152b8e80941Smrg break; 153b8e80941Smrg } 154b8e80941Smrg binary->config = MALLOC(binary->config_size * sizeof(unsigned char)); 155b8e80941Smrg memcpy(binary->config, section_data->d_buf, binary->config_size); 156b8e80941Smrg } else if (!strcmp(name, ".AMDGPU.disasm")) { 157b8e80941Smrg /* Always read disassembly if it's available. */ 158b8e80941Smrg section_data = elf_getdata(section, section_data); 159b8e80941Smrg binary->disasm_string = strndup(section_data->d_buf, 160b8e80941Smrg section_data->d_size); 161b8e80941Smrg } else if (!strncmp(name, ".rodata", 7)) { 162b8e80941Smrg section_data = elf_getdata(section, section_data); 163b8e80941Smrg binary->rodata_size = section_data->d_size; 164b8e80941Smrg binary->rodata = MALLOC(binary->rodata_size * sizeof(unsigned char)); 165b8e80941Smrg memcpy(binary->rodata, section_data->d_buf, binary->rodata_size); 166b8e80941Smrg } else if (!strncmp(name, ".symtab", 7)) { 167b8e80941Smrg symbols = elf_getdata(section, section_data); 168b8e80941Smrg symbol_sh_link = section_header.sh_link; 169b8e80941Smrg parse_symbol_table(symbols, §ion_header, binary); 170b8e80941Smrg } else if (!strcmp(name, ".rel.text")) { 171b8e80941Smrg relocs = elf_getdata(section, section_data); 172b8e80941Smrg binary->reloc_count = section_header.sh_size / 173b8e80941Smrg section_header.sh_entsize; 174b8e80941Smrg } 175b8e80941Smrg } 176b8e80941Smrg 177b8e80941Smrg parse_relocs(elf, relocs, symbols, symbol_sh_link, binary); 178b8e80941Smrg 179b8e80941Smrg if (elf){ 180b8e80941Smrg elf_end(elf); 181b8e80941Smrg } 182b8e80941Smrg FREE(elf_buffer); 183b8e80941Smrg 184b8e80941Smrg /* Cache the config size per symbol */ 185b8e80941Smrg if (binary->global_symbol_count) { 186b8e80941Smrg binary->config_size_per_symbol = 187b8e80941Smrg binary->config_size / binary->global_symbol_count; 188b8e80941Smrg } else { 189b8e80941Smrg binary->global_symbol_count = 1; 190b8e80941Smrg binary->config_size_per_symbol = binary->config_size; 191b8e80941Smrg } 192b8e80941Smrg return success; 193b8e80941Smrg} 194b8e80941Smrg 195b8e80941Smrgconst unsigned char *ac_shader_binary_config_start( 196b8e80941Smrg const struct ac_shader_binary *binary, 197b8e80941Smrg uint64_t symbol_offset) 198b8e80941Smrg{ 199b8e80941Smrg unsigned i; 200b8e80941Smrg for (i = 0; i < binary->global_symbol_count; ++i) { 201b8e80941Smrg if (binary->global_symbol_offsets[i] == symbol_offset) { 202b8e80941Smrg unsigned offset = i * binary->config_size_per_symbol; 203b8e80941Smrg return binary->config + offset; 204b8e80941Smrg } 205b8e80941Smrg } 206b8e80941Smrg return binary->config; 207b8e80941Smrg} 208b8e80941Smrg 209b8e80941Smrg 210b8e80941Smrgstatic const char *scratch_rsrc_dword0_symbol = 211b8e80941Smrg "SCRATCH_RSRC_DWORD0"; 212b8e80941Smrg 213b8e80941Smrgstatic const char *scratch_rsrc_dword1_symbol = 214b8e80941Smrg "SCRATCH_RSRC_DWORD1"; 215b8e80941Smrg 216b8e80941Smrgvoid ac_shader_binary_read_config(struct ac_shader_binary *binary, 217b8e80941Smrg struct ac_shader_config *conf, 218b8e80941Smrg unsigned symbol_offset, 219b8e80941Smrg bool supports_spill) 220b8e80941Smrg{ 221b8e80941Smrg unsigned i; 222b8e80941Smrg const unsigned char *config = 223b8e80941Smrg ac_shader_binary_config_start(binary, symbol_offset); 224b8e80941Smrg bool really_needs_scratch = false; 225b8e80941Smrg uint32_t wavesize = 0; 226b8e80941Smrg /* LLVM adds SGPR spills to the scratch size. 227b8e80941Smrg * Find out if we really need the scratch buffer. 228b8e80941Smrg */ 229b8e80941Smrg if (supports_spill) { 230b8e80941Smrg really_needs_scratch = true; 231b8e80941Smrg } else { 232b8e80941Smrg for (i = 0; i < binary->reloc_count; i++) { 233b8e80941Smrg const struct ac_shader_reloc *reloc = &binary->relocs[i]; 234b8e80941Smrg 235b8e80941Smrg if (!strcmp(scratch_rsrc_dword0_symbol, reloc->name) || 236b8e80941Smrg !strcmp(scratch_rsrc_dword1_symbol, reloc->name)) { 237b8e80941Smrg really_needs_scratch = true; 238b8e80941Smrg break; 239b8e80941Smrg } 240b8e80941Smrg } 241b8e80941Smrg } 242b8e80941Smrg 243b8e80941Smrg for (i = 0; i < binary->config_size_per_symbol; i+= 8) { 244b8e80941Smrg unsigned reg = util_le32_to_cpu(*(uint32_t*)(config + i)); 245b8e80941Smrg unsigned value = util_le32_to_cpu(*(uint32_t*)(config + i + 4)); 246b8e80941Smrg switch (reg) { 247b8e80941Smrg case R_00B028_SPI_SHADER_PGM_RSRC1_PS: 248b8e80941Smrg case R_00B128_SPI_SHADER_PGM_RSRC1_VS: 249b8e80941Smrg case R_00B228_SPI_SHADER_PGM_RSRC1_GS: 250b8e80941Smrg case R_00B848_COMPUTE_PGM_RSRC1: 251b8e80941Smrg case R_00B428_SPI_SHADER_PGM_RSRC1_HS: 252b8e80941Smrg conf->num_sgprs = MAX2(conf->num_sgprs, (G_00B028_SGPRS(value) + 1) * 8); 253b8e80941Smrg conf->num_vgprs = MAX2(conf->num_vgprs, (G_00B028_VGPRS(value) + 1) * 4); 254b8e80941Smrg conf->float_mode = G_00B028_FLOAT_MODE(value); 255b8e80941Smrg break; 256b8e80941Smrg case R_00B02C_SPI_SHADER_PGM_RSRC2_PS: 257b8e80941Smrg conf->lds_size = MAX2(conf->lds_size, G_00B02C_EXTRA_LDS_SIZE(value)); 258b8e80941Smrg break; 259b8e80941Smrg case R_00B84C_COMPUTE_PGM_RSRC2: 260b8e80941Smrg conf->lds_size = MAX2(conf->lds_size, G_00B84C_LDS_SIZE(value)); 261b8e80941Smrg break; 262b8e80941Smrg case R_0286CC_SPI_PS_INPUT_ENA: 263b8e80941Smrg conf->spi_ps_input_ena = value; 264b8e80941Smrg break; 265b8e80941Smrg case R_0286D0_SPI_PS_INPUT_ADDR: 266b8e80941Smrg conf->spi_ps_input_addr = value; 267b8e80941Smrg break; 268b8e80941Smrg case R_0286E8_SPI_TMPRING_SIZE: 269b8e80941Smrg case R_00B860_COMPUTE_TMPRING_SIZE: 270b8e80941Smrg /* WAVESIZE is in units of 256 dwords. */ 271b8e80941Smrg wavesize = value; 272b8e80941Smrg break; 273b8e80941Smrg case SPILLED_SGPRS: 274b8e80941Smrg conf->spilled_sgprs = value; 275b8e80941Smrg break; 276b8e80941Smrg case SPILLED_VGPRS: 277b8e80941Smrg conf->spilled_vgprs = value; 278b8e80941Smrg break; 279b8e80941Smrg default: 280b8e80941Smrg { 281b8e80941Smrg static bool printed; 282b8e80941Smrg 283b8e80941Smrg if (!printed) { 284b8e80941Smrg fprintf(stderr, "Warning: LLVM emitted unknown " 285b8e80941Smrg "config register: 0x%x\n", reg); 286b8e80941Smrg printed = true; 287b8e80941Smrg } 288b8e80941Smrg } 289b8e80941Smrg break; 290b8e80941Smrg } 291b8e80941Smrg 292b8e80941Smrg if (!conf->spi_ps_input_addr) 293b8e80941Smrg conf->spi_ps_input_addr = conf->spi_ps_input_ena; 294b8e80941Smrg } 295b8e80941Smrg 296b8e80941Smrg if (really_needs_scratch) { 297b8e80941Smrg /* sgprs spills aren't spilling */ 298b8e80941Smrg conf->scratch_bytes_per_wave = G_00B860_WAVESIZE(wavesize) * 256 * 4; 299b8e80941Smrg } 300b8e80941Smrg} 301b8e80941Smrg 302b8e80941Smrgvoid ac_shader_binary_clean(struct ac_shader_binary *b) 303b8e80941Smrg{ 304b8e80941Smrg if (!b) 305b8e80941Smrg return; 306b8e80941Smrg FREE(b->code); 307b8e80941Smrg FREE(b->config); 308b8e80941Smrg FREE(b->rodata); 309b8e80941Smrg FREE(b->global_symbol_offsets); 310b8e80941Smrg FREE(b->relocs); 311b8e80941Smrg FREE(b->disasm_string); 312b8e80941Smrg FREE(b->llvm_ir_string); 313b8e80941Smrg} 314