1b8e80941Smrg/*
2b8e80941Smrg * Copyright 2014 Advanced Micro Devices, Inc.
3b8e80941Smrg *
4b8e80941Smrg * Permission is hereby granted, free of charge, to any person obtaining a
5b8e80941Smrg * copy of this software and associated documentation files (the "Software"),
6b8e80941Smrg * to deal in the Software without restriction, including without limitation
7b8e80941Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8b8e80941Smrg * and/or sell copies of the Software, and to permit persons to whom the
9b8e80941Smrg * Software is furnished to do so, subject to the following conditions:
10b8e80941Smrg *
11b8e80941Smrg * The above copyright notice and this permission notice (including the next
12b8e80941Smrg * paragraph) shall be included in all copies or substantial portions of the
13b8e80941Smrg * Software.
14b8e80941Smrg *
15b8e80941Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16b8e80941Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17b8e80941Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18b8e80941Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19b8e80941Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20b8e80941Smrg * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21b8e80941Smrg * SOFTWARE.
22b8e80941Smrg */
23b8e80941Smrg
24b8e80941Smrg#include "ac_binary.h"
25b8e80941Smrg
26b8e80941Smrg#include "util/u_math.h"
27b8e80941Smrg#include "util/u_memory.h"
28b8e80941Smrg
29b8e80941Smrg#include <gelf.h>
30b8e80941Smrg#include <libelf.h>
31b8e80941Smrg#include <stdio.h>
32b8e80941Smrg
33b8e80941Smrg#include <sid.h>
34b8e80941Smrg
35b8e80941Smrg#define SPILLED_SGPRS                                     0x4
36b8e80941Smrg#define SPILLED_VGPRS                                     0x8
37b8e80941Smrg
38b8e80941Smrgstatic void parse_symbol_table(Elf_Data *symbol_table_data,
39b8e80941Smrg				const GElf_Shdr *symbol_table_header,
40b8e80941Smrg				struct ac_shader_binary *binary)
41b8e80941Smrg{
42b8e80941Smrg	GElf_Sym symbol;
43b8e80941Smrg	unsigned i = 0;
44b8e80941Smrg	unsigned symbol_count =
45b8e80941Smrg		symbol_table_header->sh_size / symbol_table_header->sh_entsize;
46b8e80941Smrg
47b8e80941Smrg	/* We are over allocating this list, because symbol_count gives the
48b8e80941Smrg	 * total number of symbols, and we will only be filling the list
49b8e80941Smrg	 * with offsets of global symbols.  The memory savings from
50b8e80941Smrg	 * allocating the correct size of this list will be small, and
51b8e80941Smrg	 * I don't think it is worth the cost of pre-computing the number
52b8e80941Smrg	 * of global symbols.
53b8e80941Smrg	 */
54b8e80941Smrg	binary->global_symbol_offsets = CALLOC(symbol_count, sizeof(uint64_t));
55b8e80941Smrg
56b8e80941Smrg	while (gelf_getsym(symbol_table_data, i++, &symbol)) {
57b8e80941Smrg		unsigned i;
58b8e80941Smrg		if (GELF_ST_BIND(symbol.st_info) != STB_GLOBAL ||
59b8e80941Smrg		    symbol.st_shndx == 0 /* Undefined symbol */) {
60b8e80941Smrg			continue;
61b8e80941Smrg		}
62b8e80941Smrg
63b8e80941Smrg		binary->global_symbol_offsets[binary->global_symbol_count] =
64b8e80941Smrg					symbol.st_value;
65b8e80941Smrg
66b8e80941Smrg		/* Sort the list using bubble sort.  This list will usually
67b8e80941Smrg		 * be small. */
68b8e80941Smrg		for (i = binary->global_symbol_count; i > 0; --i) {
69b8e80941Smrg			uint64_t lhs = binary->global_symbol_offsets[i - 1];
70b8e80941Smrg			uint64_t rhs = binary->global_symbol_offsets[i];
71b8e80941Smrg			if (lhs < rhs) {
72b8e80941Smrg				break;
73b8e80941Smrg			}
74b8e80941Smrg			binary->global_symbol_offsets[i] = lhs;
75b8e80941Smrg			binary->global_symbol_offsets[i - 1] = rhs;
76b8e80941Smrg		}
77b8e80941Smrg		++binary->global_symbol_count;
78b8e80941Smrg	}
79b8e80941Smrg}
80b8e80941Smrg
81b8e80941Smrgstatic void parse_relocs(Elf *elf, Elf_Data *relocs, Elf_Data *symbols,
82b8e80941Smrg			unsigned symbol_sh_link,
83b8e80941Smrg			struct ac_shader_binary *binary)
84b8e80941Smrg{
85b8e80941Smrg	unsigned i;
86b8e80941Smrg
87b8e80941Smrg	if (!relocs || !symbols || !binary->reloc_count) {
88b8e80941Smrg		return;
89b8e80941Smrg	}
90b8e80941Smrg	binary->relocs = CALLOC(binary->reloc_count,
91b8e80941Smrg			sizeof(struct ac_shader_reloc));
92b8e80941Smrg	for (i = 0; i < binary->reloc_count; i++) {
93b8e80941Smrg		GElf_Sym symbol;
94b8e80941Smrg		GElf_Rel rel;
95b8e80941Smrg		char *symbol_name;
96b8e80941Smrg		struct ac_shader_reloc *reloc = &binary->relocs[i];
97b8e80941Smrg
98b8e80941Smrg		gelf_getrel(relocs, i, &rel);
99b8e80941Smrg		gelf_getsym(symbols, GELF_R_SYM(rel.r_info), &symbol);
100b8e80941Smrg		symbol_name = elf_strptr(elf, symbol_sh_link, symbol.st_name);
101b8e80941Smrg
102b8e80941Smrg		reloc->offset = rel.r_offset;
103b8e80941Smrg		strncpy(reloc->name, symbol_name, sizeof(reloc->name)-1);
104b8e80941Smrg		reloc->name[sizeof(reloc->name)-1] = 0;
105b8e80941Smrg	}
106b8e80941Smrg}
107b8e80941Smrg
108b8e80941Smrgbool ac_elf_read(const char *elf_data, unsigned elf_size,
109b8e80941Smrg		 struct ac_shader_binary *binary)
110b8e80941Smrg{
111b8e80941Smrg	char *elf_buffer;
112b8e80941Smrg	Elf *elf;
113b8e80941Smrg	Elf_Scn *section = NULL;
114b8e80941Smrg	Elf_Data *symbols = NULL, *relocs = NULL;
115b8e80941Smrg	size_t section_str_index;
116b8e80941Smrg	unsigned symbol_sh_link = 0;
117b8e80941Smrg	bool success = true;
118b8e80941Smrg
119b8e80941Smrg	/* One of the libelf implementations
120b8e80941Smrg	 * (http://www.mr511.de/software/english.htm) requires calling
121b8e80941Smrg	 * elf_version() before elf_memory().
122b8e80941Smrg	 */
123b8e80941Smrg	elf_version(EV_CURRENT);
124b8e80941Smrg	elf_buffer = MALLOC(elf_size);
125b8e80941Smrg	memcpy(elf_buffer, elf_data, elf_size);
126b8e80941Smrg
127b8e80941Smrg	elf = elf_memory(elf_buffer, elf_size);
128b8e80941Smrg
129b8e80941Smrg	elf_getshdrstrndx(elf, &section_str_index);
130b8e80941Smrg
131b8e80941Smrg	while ((section = elf_nextscn(elf, section))) {
132b8e80941Smrg		const char *name;
133b8e80941Smrg		Elf_Data *section_data = NULL;
134b8e80941Smrg		GElf_Shdr section_header;
135b8e80941Smrg		if (gelf_getshdr(section, &section_header) != &section_header) {
136b8e80941Smrg			fprintf(stderr, "Failed to read ELF section header\n");
137b8e80941Smrg			success = false;
138b8e80941Smrg			break;
139b8e80941Smrg		}
140b8e80941Smrg		name = elf_strptr(elf, section_str_index, section_header.sh_name);
141b8e80941Smrg		if (!strcmp(name, ".text")) {
142b8e80941Smrg			section_data = elf_getdata(section, section_data);
143b8e80941Smrg			binary->code_size = section_data->d_size;
144b8e80941Smrg			binary->code = MALLOC(binary->code_size * sizeof(unsigned char));
145b8e80941Smrg			memcpy(binary->code, section_data->d_buf, binary->code_size);
146b8e80941Smrg		} else if (!strcmp(name, ".AMDGPU.config")) {
147b8e80941Smrg			section_data = elf_getdata(section, section_data);
148b8e80941Smrg			binary->config_size = section_data->d_size;
149b8e80941Smrg			if (!binary->config_size) {
150b8e80941Smrg				fprintf(stderr, ".AMDGPU.config is empty!\n");
151b8e80941Smrg				success = false;
152b8e80941Smrg				break;
153b8e80941Smrg			}
154b8e80941Smrg			binary->config = MALLOC(binary->config_size * sizeof(unsigned char));
155b8e80941Smrg			memcpy(binary->config, section_data->d_buf, binary->config_size);
156b8e80941Smrg		} else if (!strcmp(name, ".AMDGPU.disasm")) {
157b8e80941Smrg			/* Always read disassembly if it's available. */
158b8e80941Smrg			section_data = elf_getdata(section, section_data);
159b8e80941Smrg			binary->disasm_string = strndup(section_data->d_buf,
160b8e80941Smrg							section_data->d_size);
161b8e80941Smrg		} else if (!strncmp(name, ".rodata", 7)) {
162b8e80941Smrg			section_data = elf_getdata(section, section_data);
163b8e80941Smrg			binary->rodata_size = section_data->d_size;
164b8e80941Smrg			binary->rodata = MALLOC(binary->rodata_size * sizeof(unsigned char));
165b8e80941Smrg			memcpy(binary->rodata, section_data->d_buf, binary->rodata_size);
166b8e80941Smrg		} else if (!strncmp(name, ".symtab", 7)) {
167b8e80941Smrg			symbols = elf_getdata(section, section_data);
168b8e80941Smrg			symbol_sh_link = section_header.sh_link;
169b8e80941Smrg			parse_symbol_table(symbols, &section_header, binary);
170b8e80941Smrg		} else if (!strcmp(name, ".rel.text")) {
171b8e80941Smrg			relocs = elf_getdata(section, section_data);
172b8e80941Smrg			binary->reloc_count = section_header.sh_size /
173b8e80941Smrg					section_header.sh_entsize;
174b8e80941Smrg		}
175b8e80941Smrg	}
176b8e80941Smrg
177b8e80941Smrg	parse_relocs(elf, relocs, symbols, symbol_sh_link, binary);
178b8e80941Smrg
179b8e80941Smrg	if (elf){
180b8e80941Smrg		elf_end(elf);
181b8e80941Smrg	}
182b8e80941Smrg	FREE(elf_buffer);
183b8e80941Smrg
184b8e80941Smrg	/* Cache the config size per symbol */
185b8e80941Smrg	if (binary->global_symbol_count) {
186b8e80941Smrg		binary->config_size_per_symbol =
187b8e80941Smrg			binary->config_size / binary->global_symbol_count;
188b8e80941Smrg	} else {
189b8e80941Smrg		binary->global_symbol_count = 1;
190b8e80941Smrg		binary->config_size_per_symbol = binary->config_size;
191b8e80941Smrg	}
192b8e80941Smrg	return success;
193b8e80941Smrg}
194b8e80941Smrg
195b8e80941Smrgconst unsigned char *ac_shader_binary_config_start(
196b8e80941Smrg	const struct ac_shader_binary *binary,
197b8e80941Smrg	uint64_t symbol_offset)
198b8e80941Smrg{
199b8e80941Smrg	unsigned i;
200b8e80941Smrg	for (i = 0; i < binary->global_symbol_count; ++i) {
201b8e80941Smrg		if (binary->global_symbol_offsets[i] == symbol_offset) {
202b8e80941Smrg			unsigned offset = i * binary->config_size_per_symbol;
203b8e80941Smrg			return binary->config + offset;
204b8e80941Smrg		}
205b8e80941Smrg	}
206b8e80941Smrg	return binary->config;
207b8e80941Smrg}
208b8e80941Smrg
209b8e80941Smrg
210b8e80941Smrgstatic const char *scratch_rsrc_dword0_symbol =
211b8e80941Smrg	"SCRATCH_RSRC_DWORD0";
212b8e80941Smrg
213b8e80941Smrgstatic const char *scratch_rsrc_dword1_symbol =
214b8e80941Smrg	"SCRATCH_RSRC_DWORD1";
215b8e80941Smrg
216b8e80941Smrgvoid ac_shader_binary_read_config(struct ac_shader_binary *binary,
217b8e80941Smrg				  struct ac_shader_config *conf,
218b8e80941Smrg				  unsigned symbol_offset,
219b8e80941Smrg				  bool supports_spill)
220b8e80941Smrg{
221b8e80941Smrg	unsigned i;
222b8e80941Smrg	const unsigned char *config =
223b8e80941Smrg		ac_shader_binary_config_start(binary, symbol_offset);
224b8e80941Smrg	bool really_needs_scratch = false;
225b8e80941Smrg	uint32_t wavesize = 0;
226b8e80941Smrg	/* LLVM adds SGPR spills to the scratch size.
227b8e80941Smrg	 * Find out if we really need the scratch buffer.
228b8e80941Smrg	 */
229b8e80941Smrg	if (supports_spill) {
230b8e80941Smrg		really_needs_scratch = true;
231b8e80941Smrg	} else {
232b8e80941Smrg		for (i = 0; i < binary->reloc_count; i++) {
233b8e80941Smrg			const struct ac_shader_reloc *reloc = &binary->relocs[i];
234b8e80941Smrg
235b8e80941Smrg			if (!strcmp(scratch_rsrc_dword0_symbol, reloc->name) ||
236b8e80941Smrg			    !strcmp(scratch_rsrc_dword1_symbol, reloc->name)) {
237b8e80941Smrg				really_needs_scratch = true;
238b8e80941Smrg				break;
239b8e80941Smrg			}
240b8e80941Smrg		}
241b8e80941Smrg	}
242b8e80941Smrg
243b8e80941Smrg	for (i = 0; i < binary->config_size_per_symbol; i+= 8) {
244b8e80941Smrg		unsigned reg = util_le32_to_cpu(*(uint32_t*)(config + i));
245b8e80941Smrg		unsigned value = util_le32_to_cpu(*(uint32_t*)(config + i + 4));
246b8e80941Smrg		switch (reg) {
247b8e80941Smrg		case R_00B028_SPI_SHADER_PGM_RSRC1_PS:
248b8e80941Smrg		case R_00B128_SPI_SHADER_PGM_RSRC1_VS:
249b8e80941Smrg		case R_00B228_SPI_SHADER_PGM_RSRC1_GS:
250b8e80941Smrg		case R_00B848_COMPUTE_PGM_RSRC1:
251b8e80941Smrg		case R_00B428_SPI_SHADER_PGM_RSRC1_HS:
252b8e80941Smrg			conf->num_sgprs = MAX2(conf->num_sgprs, (G_00B028_SGPRS(value) + 1) * 8);
253b8e80941Smrg			conf->num_vgprs = MAX2(conf->num_vgprs, (G_00B028_VGPRS(value) + 1) * 4);
254b8e80941Smrg			conf->float_mode =  G_00B028_FLOAT_MODE(value);
255b8e80941Smrg			break;
256b8e80941Smrg		case R_00B02C_SPI_SHADER_PGM_RSRC2_PS:
257b8e80941Smrg			conf->lds_size = MAX2(conf->lds_size, G_00B02C_EXTRA_LDS_SIZE(value));
258b8e80941Smrg			break;
259b8e80941Smrg		case R_00B84C_COMPUTE_PGM_RSRC2:
260b8e80941Smrg			conf->lds_size = MAX2(conf->lds_size, G_00B84C_LDS_SIZE(value));
261b8e80941Smrg			break;
262b8e80941Smrg		case R_0286CC_SPI_PS_INPUT_ENA:
263b8e80941Smrg			conf->spi_ps_input_ena = value;
264b8e80941Smrg			break;
265b8e80941Smrg		case R_0286D0_SPI_PS_INPUT_ADDR:
266b8e80941Smrg			conf->spi_ps_input_addr = value;
267b8e80941Smrg			break;
268b8e80941Smrg		case R_0286E8_SPI_TMPRING_SIZE:
269b8e80941Smrg		case R_00B860_COMPUTE_TMPRING_SIZE:
270b8e80941Smrg			/* WAVESIZE is in units of 256 dwords. */
271b8e80941Smrg			wavesize = value;
272b8e80941Smrg			break;
273b8e80941Smrg		case SPILLED_SGPRS:
274b8e80941Smrg			conf->spilled_sgprs = value;
275b8e80941Smrg			break;
276b8e80941Smrg		case SPILLED_VGPRS:
277b8e80941Smrg			conf->spilled_vgprs = value;
278b8e80941Smrg			break;
279b8e80941Smrg		default:
280b8e80941Smrg			{
281b8e80941Smrg				static bool printed;
282b8e80941Smrg
283b8e80941Smrg				if (!printed) {
284b8e80941Smrg					fprintf(stderr, "Warning: LLVM emitted unknown "
285b8e80941Smrg						"config register: 0x%x\n", reg);
286b8e80941Smrg					printed = true;
287b8e80941Smrg				}
288b8e80941Smrg			}
289b8e80941Smrg			break;
290b8e80941Smrg		}
291b8e80941Smrg
292b8e80941Smrg		if (!conf->spi_ps_input_addr)
293b8e80941Smrg			conf->spi_ps_input_addr = conf->spi_ps_input_ena;
294b8e80941Smrg	}
295b8e80941Smrg
296b8e80941Smrg	if (really_needs_scratch) {
297b8e80941Smrg		/* sgprs spills aren't spilling */
298b8e80941Smrg	        conf->scratch_bytes_per_wave = G_00B860_WAVESIZE(wavesize) * 256 * 4;
299b8e80941Smrg	}
300b8e80941Smrg}
301b8e80941Smrg
302b8e80941Smrgvoid ac_shader_binary_clean(struct ac_shader_binary *b)
303b8e80941Smrg{
304b8e80941Smrg	if (!b)
305b8e80941Smrg		return;
306b8e80941Smrg	FREE(b->code);
307b8e80941Smrg	FREE(b->config);
308b8e80941Smrg	FREE(b->rodata);
309b8e80941Smrg	FREE(b->global_symbol_offsets);
310b8e80941Smrg	FREE(b->relocs);
311b8e80941Smrg	FREE(b->disasm_string);
312b8e80941Smrg	FREE(b->llvm_ir_string);
313b8e80941Smrg}
314