pgmdump2.c revision 7ec681f3
1/*
2 * Copyright (c) 2018 Rob Clark <robdclark@gmail.com>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24/*
25 * Decoder for "new" GL_OES_get_program_binary format.
26 *
27 * Overall structure is:
28 *
29 *   - header at top, contains, amongst other things, offsets of
30 *     per shader stage sections.
31 *   - per shader stage section (shader_info) starts with a header,
32 *     followed by a variably length list of descriptors.  Each
33 *     descriptor has a type/count/size plus offset from the start
34 *     of shader_info section where the data is found
35 */
36
37#include <assert.h>
38#include <ctype.h>
39#include <fcntl.h>
40#include <stddef.h>
41#include <stdint.h>
42#include <stdio.h>
43#include <stdlib.h>
44#include <string.h>
45#include <unistd.h>
46#include <sys/stat.h>
47#include <sys/types.h>
48
49#include "disasm.h"
50#include "io.h"
51#include "redump.h"
52#include "util.h"
53
54const char *infile;
55static int dump_full = 0;
56static int dump_offsets = 0;
57static int gpu_id = 320;
58static int shaderdb = 0; /* output shaderdb style traces to stderr */
59
60struct state {
61   char *buf;
62   int sz;
63   int lvl;
64
65   /* current shader_info section, some offsets calculated relative to
66    * this, rather than relative to start of buffer.
67    */
68   void *shader;
69
70   /* size of each entry within a shader_descriptor_blk: */
71   int desc_size;
72
73   const char *shader_type;
74   int full_regs;
75   int half_regs;
76};
77
78#define PACKED __attribute__((__packed__))
79
80#define OFF(field)                                                             \
81   do {                                                                        \
82      if (dump_offsets)                                                        \
83         printf("%08x: ", (uint32_t)((char *)&field - state->buf));            \
84   } while (0)
85
86/* decode field as hex */
87#define X(s, field)                                                            \
88   do {                                                                        \
89      OFF(s->field);                                                           \
90      printf("%s%12s:\t0x%x\n", tab(state->lvl), #field, s->field);            \
91   } while (0)
92
93/* decode field as digit */
94#define D(s, field)                                                            \
95   do {                                                                        \
96      OFF(s->field);                                                           \
97      printf("%s%12s:\t%u\n", tab(state->lvl), #field, s->field);              \
98   } while (0)
99
100/* decode field as float/hex */
101#define F(s, field)                                                            \
102   do {                                                                        \
103      OFF(s->field);                                                           \
104      printf("%s%12s:\t%f (0x%0x)\n", tab(state->lvl), #field, uif(s->field),  \
105             s->field);                                                        \
106   } while (0)
107
108/* decode field as register: (type is 'r' or 'c') */
109#define R(s, field, type)                                                      \
110   do {                                                                        \
111      OFF(s->field);                                                           \
112      printf("%s%12s:\t%c%u.%c\n", tab(state->lvl), #field, type,              \
113             (s->field >> 2), "xyzw"[s->field & 0x3]);                         \
114   } while (0)
115
116/* decode inline string (presumably null terminated?) */
117#define S(s, field)                                                            \
118   do {                                                                        \
119      OFF(s->field);                                                           \
120      printf("%s%12s:\t%s\n", tab(state->lvl), #field, s->field);              \
121   } while (0)
122
123/* decode string-table string */
124#define T(s, field) TODO
125
126/* decode field as unknown */
127#define U(s, start, end)                                                       \
128   dump_unknown(state, s->unk_##start##_##end, 0x##start,                      \
129                (4 + 0x##end - 0x##start) / 4)
130
131/* decode field as offset to other section */
132#define O(s, field, type)                                                      \
133   do {                                                                        \
134      X(s, field);                                                             \
135      assert(s->field < state->sz);                                            \
136      void *_p = &state->buf[s->field];                                        \
137      state->lvl++;                                                            \
138      decode_##type(state, _p);                                                \
139      state->lvl--;                                                            \
140   } while (0)
141
142struct shader_info;
143static void decode_shader_info(struct state *state, struct shader_info *info);
144
145static void
146dump_unknown(struct state *state, void *buf, unsigned start, unsigned n)
147{
148   uint32_t *ptr = buf;
149   uint8_t *ascii = buf;
150
151   for (unsigned i = 0; i < n; i++) {
152      uint32_t d = ptr[i];
153
154      if (dump_offsets)
155         printf("%08x:", (uint32_t)((char *)&ptr[i] - state->buf));
156
157      printf("%s        %04x:\t%08x", tab(state->lvl), start + i * 4, d);
158
159      printf("\t|");
160      for (unsigned j = 0; j < 4; j++) {
161         uint8_t c = *(ascii++);
162         printf("%c", (isascii(c) && !iscntrl(c)) ? c : '.');
163      }
164      printf("|\t%f", uif(d));
165
166      /* TODO maybe scan for first non-null and non-ascii char starting from
167       * end of shader binary to (roughly) establish the start of the string
168       * table.. that would be a bit better filter for deciding if something
169       * might be a pointer into the string table.  Also, the previous char
170       * to what it points to should probably be null.
171       */
172      if ((d < state->sz) && isascii(state->buf[d]) &&
173          (strlen(&state->buf[d]) > 2) && isascii(state->buf[d + 1]))
174         printf("\t<== %s", &state->buf[d]);
175
176      printf("\n");
177   }
178}
179
180struct PACKED header {
181   uint32_t version; /* I guess, always b10bcace ? */
182   uint32_t unk_0004_0014[5];
183   uint32_t size;
184   uint32_t size2; /* just to be sure? */
185   uint32_t unk_0020_0020[1];
186   uint32_t
187      chksum; /* I guess?  Small changes seem to result in big diffs here */
188   uint32_t unk_0028_0050[11];
189   uint32_t fs_info; /* offset of FS shader_info section */
190   uint32_t unk_0058_0090[15];
191   uint32_t vs_info; /* offset of VS shader_info section */
192   uint32_t unk_0098_00b0[7];
193   uint32_t vs_info2; /* offset of VS shader_info section (again?) */
194   uint32_t unk_00b8_0110[23];
195   uint32_t bs_info; /* offset of binning shader_info section */
196};
197
198static void
199decode_header(struct state *state, struct header *hdr)
200{
201   X(hdr, version);
202   U(hdr, 0004, 0014);
203   X(hdr, size);
204   X(hdr, size2);
205   U(hdr, 0020, 0020);
206   X(hdr, chksum);
207   U(hdr, 0028, 0050);
208   state->shader_type = "FRAG";
209   O(hdr, fs_info, shader_info);
210   U(hdr, 0058, 0090);
211   state->shader_type = "VERT";
212   O(hdr, vs_info, shader_info);
213   U(hdr, 0098, 00b0);
214   assert(hdr->vs_info ==
215          hdr->vs_info2); /* not sure what this if it is ever different */
216   X(hdr, vs_info2);
217   U(hdr, 00b8, 0110);
218   state->shader_type = "BVERT";
219   O(hdr, bs_info, shader_info);
220
221   /* not sure how much of the rest of contents before start of fs_info
222    * is the header, vs other things.. just dump it all as unknown for
223    * now:
224    */
225   dump_unknown(state, (void *)hdr + sizeof(*hdr), sizeof(*hdr),
226                (hdr->fs_info - sizeof(*hdr)) / 4);
227}
228
229struct PACKED shader_entry_point {
230   /* entry point name, ie. "main" of TBD length, followed by unknown */
231   char name[8];
232};
233
234static void
235decode_shader_entry_point(struct state *state, struct shader_entry_point *e)
236{
237   S(e, name);
238}
239
240struct PACKED shader_config {
241   uint32_t unk_0000_0008[3];
242   uint32_t full_regs;
243   uint32_t half_regs;
244};
245
246static void
247decode_shader_config(struct state *state, struct shader_config *cfg)
248{
249   U(cfg, 0000, 0008);
250   D(cfg, full_regs);
251   D(cfg, half_regs);
252
253   state->full_regs = cfg->full_regs;
254   state->half_regs = cfg->half_regs;
255
256   /* dump reset of unknown (size differs btwn versions) */
257   dump_unknown(state, (void *)cfg + sizeof(*cfg), sizeof(*cfg),
258                (state->desc_size - sizeof(*cfg)) / 4);
259}
260
261struct PACKED shader_io_block {
262   /* name of TBD length followed by unknown.. 42 dwords total */
263   char name[20];
264   uint32_t unk_0014_00a4[37];
265};
266
267static void
268decode_shader_io_block(struct state *state, struct shader_io_block *io)
269{
270   S(io, name);
271   U(io, 0014, 00a4);
272}
273
274struct PACKED shader_constant_block {
275   uint32_t value;
276   uint32_t unk_0004_000c[3];
277   uint32_t regid;
278   uint32_t unk_0014_0024[5];
279};
280
281static void
282decode_shader_constant_block(struct state *state,
283                             struct shader_constant_block *c)
284{
285   F(c, value);
286   U(c, 0004, 000c);
287   R(c, regid, 'c');
288   U(c, 0014, 0024);
289}
290
291enum {
292   ENTRY_POINT = 0,   /* shader_entry_point */
293   SHADER_CONFIG = 1, /* XXX placeholder name */
294   SHADER_INPUT = 2,  /* shader_io_block */
295   SHADER_OUTPUT = 3, /* shader_io_block */
296   CONSTANTS = 6,     /* shader_constant_block */
297   INTERNAL = 8,      /* internal input, like bary.f coord */
298   SHADER = 10,
299} shader_info_block_type;
300
301/* Refers to location of some type of records, with an offset relative to
302 * start of shader_info block.
303 */
304struct PACKED shader_descriptor_block {
305   uint32_t type;   /* block type */
306   uint32_t offset; /* offset (relative to start of shader_info block) */
307   uint32_t size;   /* size in bytes */
308   uint32_t count;  /* number of records */
309   uint32_t unk_0010_0010[1];
310};
311
312static void
313decode_shader_descriptor_block(struct state *state,
314                               struct shader_descriptor_block *blk)
315{
316   D(blk, type);
317   X(blk, offset);
318   D(blk, size);
319   D(blk, count);
320   U(blk, 0010, 0010);
321
322   /* offset relative to current shader block: */
323   void *ptr = state->shader + blk->offset;
324
325   if (blk->count == 0) {
326      assert(blk->size == 0);
327   } else {
328      assert((blk->size % blk->count) == 0);
329   }
330
331   state->desc_size = blk->size / blk->count;
332   state->lvl++;
333   for (unsigned i = 0; i < blk->count; i++) {
334      switch (blk->type) {
335      case ENTRY_POINT:
336         printf("%sentry point %u:\n", tab(state->lvl - 1), i);
337         decode_shader_entry_point(state, ptr);
338         break;
339      case SHADER_CONFIG:
340         printf("%sconfig %u:\n", tab(state->lvl - 1), i);
341         decode_shader_config(state, ptr);
342         break;
343      case SHADER_INPUT:
344         printf("%sinput %u:\n", tab(state->lvl - 1), i);
345         decode_shader_io_block(state, ptr);
346         break;
347      case SHADER_OUTPUT:
348         printf("%soutput %u:\n", tab(state->lvl - 1), i);
349         decode_shader_io_block(state, ptr);
350         break;
351      case INTERNAL:
352         printf("%sinternal input %u:\n", tab(state->lvl - 1), i);
353         decode_shader_io_block(state, ptr);
354         break;
355      case CONSTANTS:
356         printf("%sconstant %u:\n", tab(state->lvl - 1), i);
357         decode_shader_constant_block(state, ptr);
358         break;
359      case SHADER: {
360         struct shader_stats stats;
361         printf("%sshader %u:\n", tab(state->lvl - 1), i);
362         disasm_a3xx_stat(ptr, blk->size / 4, state->lvl, stdout, gpu_id,
363                          &stats);
364         if (shaderdb) {
365            unsigned dwords = 2 * stats.instlen;
366
367            if (gpu_id >= 400) {
368               dwords = ALIGN(dwords, 16 * 2);
369            } else {
370               dwords = ALIGN(dwords, 4 * 2);
371            }
372
373            unsigned half_regs = state->half_regs;
374            unsigned full_regs = state->full_regs;
375
376            /* On a6xx w/ merged/conflicting half and full regs, the
377             * full_regs footprint will be max of full_regs and half
378             * of half_regs.. we only care about which value is higher.
379             */
380            if (gpu_id >= 600) {
381               /* footprint of half_regs in units of full_regs: */
382               unsigned half_full = (half_regs + 1) / 2;
383               if (half_full > full_regs)
384                  full_regs = half_full;
385               half_regs = 0;
386            }
387
388            fprintf(stderr,
389                    "%s shader: %u inst, %u nops, %u non-nops, %u dwords, "
390                    "%u half, %u full, %u constlen, "
391                    "%u (ss), %u (sy), %d max_sun, %d loops\n",
392                    state->shader_type, stats.instructions, stats.nops,
393                    stats.instructions - stats.nops, dwords, half_regs,
394                    full_regs, stats.constlen, stats.ss, stats.sy, 0,
395                    0); /* max_sun or loops not possible */
396         }
397         /* this is a special case in a way, blk->count is # of
398          * instructions but disasm_a3xx() decodes all instructions,
399          * so just bail.
400          */
401         i = blk->count;
402         break;
403      }
404      default:
405         dump_unknown(state, ptr, 0, state->desc_size / 4);
406         break;
407      }
408      ptr += state->desc_size;
409   }
410   state->lvl--;
411}
412
413/* there looks like one of these per shader, followed by "main" and
414 * some more info, and then the shader itself.
415 */
416struct PACKED shader_info {
417   uint32_t unk_0000_0010[5];
418   uint32_t desc_off; /* offset to first descriptor block */
419   uint32_t num_blocks;
420};
421
422static void
423decode_shader_info(struct state *state, struct shader_info *info)
424{
425   assert((info->desc_off % 4) == 0);
426
427   U(info, 0000, 0010);
428   X(info, desc_off);
429   D(info, num_blocks);
430
431   dump_unknown(state, &info[1], 0, (info->desc_off - sizeof(*info)) / 4);
432
433   state->shader = info;
434
435   struct shader_descriptor_block *blocks = ((void *)info) + info->desc_off;
436   for (unsigned i = 0; i < info->num_blocks; i++) {
437      printf("%sdescriptor %u:\n", tab(state->lvl), i);
438      state->lvl++;
439      decode_shader_descriptor_block(state, &blocks[i]);
440      state->lvl--;
441   }
442}
443
444static void
445dump_program(struct state *state)
446{
447   struct header *hdr = (void *)state->buf;
448
449   if (dump_full)
450      dump_unknown(state, state->buf, 0, state->sz / 4);
451
452   decode_header(state, hdr);
453}
454
455int
456main(int argc, char **argv)
457{
458   enum rd_sect_type type = RD_NONE;
459   enum debug_t debug = PRINT_RAW | PRINT_STATS;
460   void *buf = NULL;
461   int sz;
462   struct io *io;
463   int raw_program = 0;
464
465   /* lame argument parsing: */
466
467   while (1) {
468      if ((argc > 1) && !strcmp(argv[1], "--verbose")) {
469         debug |= PRINT_RAW | PRINT_VERBOSE;
470         argv++;
471         argc--;
472         continue;
473      }
474      if ((argc > 1) && !strcmp(argv[1], "--expand")) {
475         debug |= EXPAND_REPEAT;
476         argv++;
477         argc--;
478         continue;
479      }
480      if ((argc > 1) && !strcmp(argv[1], "--full")) {
481         /* only short dump, original shader, symbol table, and disassembly */
482         dump_full = 1;
483         argv++;
484         argc--;
485         continue;
486      }
487      if ((argc > 1) && !strcmp(argv[1], "--dump-offsets")) {
488         dump_offsets = 1;
489         argv++;
490         argc--;
491         continue;
492      }
493      if ((argc > 1) && !strcmp(argv[1], "--raw")) {
494         raw_program = 1;
495         argv++;
496         argc--;
497         continue;
498      }
499      if ((argc > 1) && !strcmp(argv[1], "--shaderdb")) {
500         shaderdb = 1;
501         argv++;
502         argc--;
503         continue;
504      }
505      break;
506   }
507
508   if (argc != 2) {
509      fprintf(stderr, "usage: pgmdump2 [--verbose] [--expand] [--full] "
510                      "[--dump-offsets] [--raw] [--shaderdb] testlog.rd\n");
511      return -1;
512   }
513
514   disasm_a3xx_set_debug(debug);
515
516   infile = argv[1];
517
518   io = io_open(infile);
519   if (!io) {
520      fprintf(stderr, "could not open: %s\n", infile);
521      return -1;
522   }
523
524   if (raw_program) {
525      io_readn(io, &sz, 4);
526      free(buf);
527
528      /* note: allow hex dumps to go a bit past the end of the buffer..
529       * might see some garbage, but better than missing the last few bytes..
530       */
531      buf = calloc(1, sz + 3);
532      io_readn(io, buf + 4, sz);
533      (*(int *)buf) = sz;
534
535      struct state state = {
536         .buf = buf,
537         .sz = sz,
538      };
539      printf("############################################################\n");
540      printf("program:\n");
541      dump_program(&state);
542      printf("############################################################\n");
543      return 0;
544   }
545
546   /* figure out what sort of input we are dealing with: */
547   if (!(check_extension(infile, ".rd") || check_extension(infile, ".rd.gz"))) {
548      int ret;
549      buf = calloc(1, 100 * 1024);
550      ret = io_readn(io, buf, 100 * 1024);
551      if (ret < 0) {
552         fprintf(stderr, "error: %m");
553         return -1;
554      }
555      return disasm_a3xx(buf, ret / 4, 0, stdout, gpu_id);
556   }
557
558   while ((io_readn(io, &type, sizeof(type)) > 0) &&
559          (io_readn(io, &sz, 4) > 0)) {
560      free(buf);
561
562      /* note: allow hex dumps to go a bit past the end of the buffer..
563       * might see some garbage, but better than missing the last few bytes..
564       */
565      buf = calloc(1, sz + 3);
566      io_readn(io, buf, sz);
567
568      switch (type) {
569      case RD_TEST:
570         if (dump_full)
571            printf("test: %s\n", (char *)buf);
572         break;
573      case RD_VERT_SHADER:
574         printf("vertex shader:\n%s\n", (char *)buf);
575         break;
576      case RD_FRAG_SHADER:
577         printf("fragment shader:\n%s\n", (char *)buf);
578         break;
579      case RD_PROGRAM: {
580         struct state state = {
581            .buf = buf,
582            .sz = sz,
583         };
584         printf(
585            "############################################################\n");
586         printf("program:\n");
587         dump_program(&state);
588         printf(
589            "############################################################\n");
590         break;
591      }
592      case RD_GPU_ID:
593         gpu_id = *((unsigned int *)buf);
594         printf("gpu_id: %d\n", gpu_id);
595         break;
596      default:
597         break;
598      }
599   }
600
601   io_close(io);
602
603   return 0;
604}
605