1/*
2 * Copyright 2021 Advanced Micro Devices, Inc.
3 * All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * on the rights to use, copy, modify, merge, publish, distribute, sub
9 * license, and/or sell copies of the Software, and to permit persons to whom
10 * the Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22 * USE OR OTHER DEALINGS IN THE SOFTWARE.
23 *
24 */
25
26/**
27 * \file ac_rgp_elf_object_pack.c
28 *
29 * This file provides functions to create elf object for rgp profiling.
30 * The functions in this file create 64bit elf code object irrespective
31 * of if the driver is compiled as 32 or 64 bit.
32 */
33
34#include <stdint.h>
35#include <stdio.h>
36#include <string.h>
37#include <libelf.h>
38#include "ac_msgpack.h"
39#include "ac_rgp.h"
40#include "main/macros.h"
41
42#ifndef EM_AMDGPU
43// Old distributions may not have this enum constant
44#define EM_AMDGPU 224
45#endif
46
47char shader_stage_api_string[6][10] = {
48   ".vertex",      /* vertex */
49   ".hull",        /* tessellation control */
50   ".domain",      /* tessellation evaluation */
51   ".geometry",    /* geometry */
52   ".pixel",       /* fragment */
53   ".compute"      /* compute */
54};
55
56char hw_stage_string[RGP_HW_STAGE_MAX][4] = {
57   ".vs",
58   ".ls",
59   ".hs",
60   ".es",
61   ".gs",
62   ".ps",
63   ".cs"
64};
65
66char hw_stage_symbol_string[RGP_HW_STAGE_MAX][16] = {
67   "_amdgpu_vs_main",
68   "_amdgpu_ls_main",
69   "_amdgpu_hs_main",
70   "_amdgpu_es_main",
71   "_amdgpu_gs_main",
72   "_amdgpu_ps_main",
73   "_amdgpu_cs_main"
74};
75
76/**
77 * rgp profiler requires data for few variables stored in msgpack format
78 * in notes section. This function writes the data from
79 * struct rgp_code_object_record to elf object in msgpack format.
80 * for msgpack specification refer to
81 * github.com/msgpack/msgpack/blob/master/spec.md
82 */
83static void
84ac_rgp_write_msgpack(FILE *output,
85                     struct rgp_code_object_record *record,
86                     uint32_t *written_size)
87{
88   struct ac_msgpack msgpack;
89   uint32_t num_shaders;
90   uint32_t i;
91   uint32_t mask;
92
93   num_shaders = util_bitcount(record->shader_stages_mask);
94
95   ac_msgpack_init(&msgpack);
96
97   ac_msgpack_add_fixmap_op(&msgpack, 2);
98      ac_msgpack_add_fixstr(&msgpack, "amdpal.version");
99      ac_msgpack_add_fixarray_op(&msgpack, 2);
100         ac_msgpack_add_uint(&msgpack, 2);
101         ac_msgpack_add_uint(&msgpack, 1);
102
103      ac_msgpack_add_fixstr(&msgpack, "amdpal.pipelines");
104      ac_msgpack_add_fixarray_op(&msgpack, 1);
105         ac_msgpack_add_fixmap_op(&msgpack, 6);
106
107            /* 1
108             * This not used in RGP but data needs to be present
109             */
110            ac_msgpack_add_fixstr(&msgpack, ".spill_threshold");
111            ac_msgpack_add_uint(&msgpack, 0xffff);
112
113            /* 2
114             * This not used in RGP but data needs to be present
115             */
116            ac_msgpack_add_fixstr(&msgpack, ".user_data_limit");
117            ac_msgpack_add_uint(&msgpack, 32);
118
119            /* 3 */
120            ac_msgpack_add_fixstr(&msgpack, ".shaders");
121            ac_msgpack_add_fixmap_op(&msgpack, num_shaders);
122               mask = record->shader_stages_mask;
123               while(mask) {
124                  i = u_bit_scan(&mask);
125                  ac_msgpack_add_fixstr(&msgpack,
126                                        shader_stage_api_string[i]);
127                  ac_msgpack_add_fixmap_op(&msgpack, 2);
128                  ac_msgpack_add_fixstr(&msgpack, ".api_shader_hash");
129                  ac_msgpack_add_fixarray_op(&msgpack, 2);
130                     ac_msgpack_add_uint(&msgpack,
131                                         record->shader_data[i].hash[0]);
132                     ac_msgpack_add_uint(&msgpack, 0);
133                  ac_msgpack_add_fixstr(&msgpack, ".hardware_mapping");
134                  ac_msgpack_add_fixarray_op(&msgpack, 1);
135                     ac_msgpack_add_fixstr(&msgpack, hw_stage_string[
136                                           record->shader_data[i].hw_stage]);
137               }
138
139            /* 4 */
140            ac_msgpack_add_fixstr(&msgpack, ".hardware_stages");
141            ac_msgpack_add_fixmap_op(&msgpack,
142                                     record->num_shaders_combined);
143               mask = record->shader_stages_mask;
144               while(mask) {
145                  i = u_bit_scan(&mask);
146
147                  if (record->shader_data[i].is_combined)
148                     continue;
149
150                  ac_msgpack_add_fixstr(&msgpack, hw_stage_string[
151                                        record->shader_data[i].hw_stage]);
152                  ac_msgpack_add_fixmap_op(&msgpack, 5);
153                     ac_msgpack_add_fixstr(&msgpack, ".entry_point");
154                     ac_msgpack_add_fixstr(&msgpack, hw_stage_symbol_string[
155                                           record->shader_data[i].hw_stage]);
156
157                     ac_msgpack_add_fixstr(&msgpack, ".sgpr_count");
158                     ac_msgpack_add_uint(&msgpack,
159                                         record->shader_data[i].sgpr_count);
160
161                     ac_msgpack_add_fixstr(&msgpack, ".vgpr_count");
162                     ac_msgpack_add_uint(&msgpack,
163                                         record->shader_data[i].vgpr_count);
164
165                     ac_msgpack_add_fixstr(&msgpack, ".scratch_memory_size");
166                     ac_msgpack_add_uint(&msgpack,
167                                         record->shader_data[i].scratch_memory_size);
168
169                     ac_msgpack_add_fixstr(&msgpack, ".wavefront_size");
170                     ac_msgpack_add_uint(&msgpack,
171                                         record->shader_data[i].wavefront_size);
172               }
173
174            /* 5 */
175            ac_msgpack_add_fixstr(&msgpack, ".internal_pipeline_hash");
176            ac_msgpack_add_fixarray_op(&msgpack, 2);
177               ac_msgpack_add_uint(&msgpack, record->pipeline_hash[0]);
178               ac_msgpack_add_uint(&msgpack, record->pipeline_hash[1]);
179
180            /* 6 */
181            ac_msgpack_add_fixstr(&msgpack, ".api");
182            ac_msgpack_add_fixstr(&msgpack, "Vulkan");
183
184   ac_msgpack_resize_if_required(&msgpack, 4 - (msgpack.offset % 4));
185   msgpack.offset = ALIGN(msgpack.offset, 4);
186   fwrite(msgpack.mem, 1, msgpack.offset, output);
187   *written_size = msgpack.offset;
188   ac_msgpack_destroy(&msgpack);
189}
190
191
192static uint32_t
193get_lowest_shader(uint32_t *shader_stages_mask,
194                  struct rgp_code_object_record *record,
195                  struct rgp_shader_data **rgp_shader_data)
196{
197   uint32_t i, lowest = 0;
198   uint32_t mask;
199   uint64_t base_address = -1;
200
201   if (*shader_stages_mask == 0)
202      return false;
203
204   mask = *shader_stages_mask;
205   while(mask) {
206      i = u_bit_scan(&mask);
207      if (record->shader_data[i].is_combined) {
208         *shader_stages_mask = *shader_stages_mask & ~((uint32_t)1 << i);
209         continue;
210      }
211      if (base_address > record->shader_data[i].base_address) {
212         lowest = i;
213         base_address = record->shader_data[i].base_address;
214      }
215   }
216
217   *shader_stages_mask = *shader_stages_mask & ~((uint32_t)1 << lowest);
218   *rgp_shader_data = &record->shader_data[lowest];
219   return true;
220}
221
222/**
223 *  write the shader code into elf object in text section
224 */
225static void
226ac_rgp_file_write_elf_text(FILE *output, uint32_t *elf_size_calc,
227                           struct rgp_code_object_record *record,
228                           uint32_t *text_size)
229{
230   struct rgp_shader_data *rgp_shader_data = NULL;
231   struct rgp_shader_data *prev_rgp_shader_data = NULL;
232   uint32_t symbol_offset = 0;
233   uint32_t mask = record->shader_stages_mask;
234   static bool warn_once = true;
235
236   while(get_lowest_shader(&mask, record, &rgp_shader_data)) {
237      if (prev_rgp_shader_data) {
238         uint32_t code_offset = rgp_shader_data->base_address -
239                                prev_rgp_shader_data->base_address;
240         uint32_t gap_between_code = code_offset -
241                                     prev_rgp_shader_data->code_size;
242         symbol_offset += code_offset;
243         if (gap_between_code > 0x10000 && warn_once) {
244            fprintf(stderr, "Warning: shader code far from previous "
245                            "(%d bytes apart). The rgp capture file "
246                            "might be very large.\n", gap_between_code);
247            warn_once = false;
248         }
249
250         fseek(output, gap_between_code, SEEK_CUR);
251         *elf_size_calc += gap_between_code;
252      }
253
254      rgp_shader_data->elf_symbol_offset = symbol_offset;
255      fwrite(rgp_shader_data->code, 1, rgp_shader_data->code_size, output);
256      *elf_size_calc += rgp_shader_data->code_size;
257      prev_rgp_shader_data = rgp_shader_data;
258   }
259
260   symbol_offset += rgp_shader_data->code_size;
261   uint32_t align = ALIGN(symbol_offset, 256) - symbol_offset;
262   fseek(output, align, SEEK_CUR);
263   *elf_size_calc += align;
264   *text_size = symbol_offset + align;
265}
266
267/*
268 * hardcoded index for string table and text section in elf object.
269 * While populating section header table, the index order should
270 * be strictly followed.
271 */
272#define RGP_ELF_STRING_TBL_SEC_HEADER_INDEX 1
273#define RGP_ELF_TEXT_SEC_HEADER_INDEX 2
274
275/*
276 * hardcode the string table so that is a single write to output.
277 * the strings are in a structure so that it is easy to get the offset
278 * of given string in string table.
279 */
280struct ac_rgp_elf_string_table {
281   char null[sizeof("")];
282   char strtab[sizeof(".strtab")];
283   char text[sizeof(".text")];
284   char symtab[sizeof(".symtab")];
285   char note[sizeof(".note")];
286   char vs_main[sizeof("_amdgpu_vs_main")];
287   char ls_main[sizeof("_amdgpu_ls_main")];
288   char hs_main[sizeof("_amdgpu_hs_main")];
289   char es_main[sizeof("_amdgpu_es_main")];
290   char gs_main[sizeof("_amdgpu_gs_main")];
291   char ps_main[sizeof("_amdgpu_ps_main")];
292   char cs_main[sizeof("_amdgpu_cs_main")];
293};
294
295struct ac_rgp_elf_string_table rgp_elf_strtab = {
296   .null = "",
297   .strtab = ".strtab",
298   .text = ".text",
299   .symtab = ".symtab",
300   .note = ".note",
301   .vs_main = "_amdgpu_vs_main",
302   .ls_main = "_amdgpu_ls_main",
303   .hs_main = "_amdgpu_hs_main",
304   .es_main = "_amdgpu_es_main",
305   .gs_main = "_amdgpu_gs_main",
306   .ps_main = "_amdgpu_ps_main",
307   .cs_main = "_amdgpu_cs_main",
308};
309
310uint32_t rgp_elf_hw_stage_string_offset[RGP_HW_STAGE_MAX] = {
311   (uintptr_t)((struct ac_rgp_elf_string_table*)0)->vs_main,
312   (uintptr_t)((struct ac_rgp_elf_string_table*)0)->ls_main,
313   (uintptr_t)((struct ac_rgp_elf_string_table*)0)->hs_main,
314   (uintptr_t)((struct ac_rgp_elf_string_table*)0)->es_main,
315   (uintptr_t)((struct ac_rgp_elf_string_table*)0)->gs_main,
316   (uintptr_t)((struct ac_rgp_elf_string_table*)0)->ps_main,
317   (uintptr_t)((struct ac_rgp_elf_string_table*)0)->cs_main,
318};
319
320
321static void
322ac_rgp_file_write_elf_symbol_table(FILE *output, uint32_t *elf_size_calc,
323                                   struct rgp_code_object_record *record,
324                                   uint32_t *symbol_table_size)
325{
326   Elf64_Sym elf_sym;
327   uint32_t i;
328   uint32_t mask = record->shader_stages_mask;
329
330   memset(&elf_sym, 0x00, sizeof(elf_sym));
331   fwrite(&elf_sym, 1, sizeof(elf_sym), output);
332
333   while(mask) {
334      i = u_bit_scan(&mask);
335      if (record->shader_data[i].is_combined)
336         continue;
337
338      elf_sym.st_name = rgp_elf_hw_stage_string_offset
339                        [record->shader_data[i].hw_stage];
340      elf_sym.st_info = STT_FUNC;
341      elf_sym.st_other = 0x0;
342      elf_sym.st_shndx = RGP_ELF_TEXT_SEC_HEADER_INDEX;
343      elf_sym.st_value = record->shader_data[i].elf_symbol_offset;
344      elf_sym.st_size = record->shader_data[i].code_size;
345      fwrite(&elf_sym, 1, sizeof(elf_sym), output);
346   }
347
348   *symbol_table_size = (record->num_shaders_combined + 1)
349                        * sizeof(elf_sym);
350   *elf_size_calc += *symbol_table_size;
351}
352
353
354/* Below defines from from llvm project
355 * llvm/includel/llvm/BinaryFormat/ELF.h
356 */
357#define ELFOSABI_AMDGPU_PAL 65
358#define NT_AMDGPU_METADATA 32
359
360uint8_t elf_ident[EI_NIDENT] = { ELFMAG0, ELFMAG1, ELFMAG2, ELFMAG3,
361                                 ELFCLASS64, ELFDATA2LSB, EV_CURRENT,
362                                 ELFOSABI_AMDGPU_PAL,
363                                 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
364                                 0x00, 0x00 };
365
366#define NOTE_MSGPACK_NAME "AMDGPU"
367struct ac_rgp_elf_note_msgpack_hdr {
368   Elf64_Nhdr  hdr;
369   char name[sizeof(NOTE_MSGPACK_NAME)];
370};
371
372void
373ac_rgp_file_write_elf_object(FILE *output, size_t file_elf_start,
374                             struct rgp_code_object_record *record,
375                             uint32_t *written_size, uint32_t flags)
376{
377   Elf64_Ehdr elf_hdr;
378   Elf64_Shdr sec_hdr[5];
379   uint32_t elf_size_calc;
380   struct ac_rgp_elf_note_msgpack_hdr note_hdr;
381   uint32_t text_size = 0;
382   uint32_t symbol_table_size = 0;
383   uint32_t msgpack_size = 0;
384   size_t note_sec_start;
385   uint32_t sh_offset;
386
387   /* Give space for header in file. It will be written to file at the end */
388   fseek(output, sizeof(Elf64_Ehdr), SEEK_CUR);
389
390   elf_size_calc = sizeof(Elf64_Ehdr);
391
392   /* Initialize elf header */
393   memcpy(&elf_hdr.e_ident, &elf_ident, EI_NIDENT);
394   elf_hdr.e_type = ET_REL;
395   elf_hdr.e_machine = EM_AMDGPU;
396   elf_hdr.e_version = EV_CURRENT;
397   elf_hdr.e_entry = 0;
398   elf_hdr.e_flags = flags;
399   elf_hdr.e_shstrndx = 1; /* string table entry is hardcoded to 1*/
400   elf_hdr.e_phoff = 0;
401   elf_hdr.e_shentsize = sizeof(Elf64_Shdr);
402   elf_hdr.e_ehsize = sizeof(Elf64_Ehdr);
403   elf_hdr.e_phentsize = 0;
404   elf_hdr.e_phnum = 0;
405
406   /* write hardcoded string table */
407   fwrite(&rgp_elf_strtab, 1, sizeof(rgp_elf_strtab), output);
408   elf_size_calc += sizeof(rgp_elf_strtab);
409
410   /* write shader code as .text code */
411   ac_rgp_file_write_elf_text(output, &elf_size_calc, record, &text_size);
412
413   /* write symbol table */
414   ac_rgp_file_write_elf_symbol_table(output, &elf_size_calc, record,
415                                      &symbol_table_size);
416
417   /* write .note */
418   /* the .note section contains msgpack which stores variables */
419   note_sec_start = file_elf_start + elf_size_calc;
420   fseek(output, sizeof(struct ac_rgp_elf_note_msgpack_hdr), SEEK_CUR);
421   ac_rgp_write_msgpack(output, record, &msgpack_size);
422   note_hdr.hdr.n_namesz = sizeof(NOTE_MSGPACK_NAME);
423   note_hdr.hdr.n_descsz = msgpack_size;
424   note_hdr.hdr.n_type = NT_AMDGPU_METADATA;
425   memcpy(note_hdr.name, NOTE_MSGPACK_NAME "\0",
426          sizeof(NOTE_MSGPACK_NAME) + 1);
427   fseek(output, note_sec_start, SEEK_SET);
428   fwrite(&note_hdr, 1, sizeof(struct ac_rgp_elf_note_msgpack_hdr), output);
429   fseek(output, 0, SEEK_END);
430   elf_size_calc += (msgpack_size +
431                     sizeof(struct ac_rgp_elf_note_msgpack_hdr));
432
433   /* write section headers */
434   sh_offset = elf_size_calc;
435   memset(&sec_hdr[0], 0x00, sizeof(Elf64_Shdr) * 5);
436
437   /* string table must be at index 1 as used in other places*/
438   sec_hdr[1].sh_name = (uintptr_t)((struct ac_rgp_elf_string_table*)0)->strtab;
439   sec_hdr[1].sh_type = SHT_STRTAB;
440   sec_hdr[1].sh_offset = sizeof(Elf64_Ehdr);
441   sec_hdr[1].sh_size = sizeof(rgp_elf_strtab);
442
443   /* text must be at index 2 as used in other places*/
444   sec_hdr[2].sh_name = (uintptr_t)((struct ac_rgp_elf_string_table*)0)->text;
445   sec_hdr[2].sh_type = SHT_PROGBITS;
446   sec_hdr[2].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
447   sec_hdr[2].sh_offset = sec_hdr[1].sh_offset + sec_hdr[1].sh_size;
448   sec_hdr[2].sh_size = text_size;
449   sec_hdr[2].sh_addralign = 256;
450
451   sec_hdr[3].sh_name = (uintptr_t)((struct ac_rgp_elf_string_table*)0)->symtab;
452   sec_hdr[3].sh_type = SHT_SYMTAB;
453   sec_hdr[3].sh_offset = sec_hdr[2].sh_offset +
454                          ALIGN(sec_hdr[2].sh_size, 256);
455   sec_hdr[3].sh_size = symbol_table_size;
456   sec_hdr[3].sh_link = RGP_ELF_STRING_TBL_SEC_HEADER_INDEX;
457   sec_hdr[3].sh_addralign = 8;
458   sec_hdr[3].sh_entsize = sizeof(Elf64_Sym);
459
460   sec_hdr[4].sh_name = (uintptr_t)((struct ac_rgp_elf_string_table*)0)->note;
461   sec_hdr[4].sh_type = SHT_NOTE;
462   sec_hdr[4].sh_offset = sec_hdr[3].sh_offset + sec_hdr[3].sh_size;
463   sec_hdr[4].sh_size = msgpack_size +
464                        sizeof(struct ac_rgp_elf_note_msgpack_hdr);
465   sec_hdr[4].sh_addralign = 4;
466   fwrite(&sec_hdr, 1, sizeof(Elf64_Shdr) * 5, output);
467   elf_size_calc += (sizeof(Elf64_Shdr) * 5);
468
469   /* update and write elf header */
470   elf_hdr.e_shnum = 5;
471   elf_hdr.e_shoff = sh_offset;
472
473   fseek(output, file_elf_start, SEEK_SET);
474   fwrite(&elf_hdr, 1, sizeof(Elf64_Ehdr), output);
475   fseek(output, 0, SEEK_END);
476
477   *written_size = elf_size_calc;
478}
479