intel_dump_gpu.c revision b8e80941
1/*
2 * Copyright © 2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24#include <stdlib.h>
25#include <stdio.h>
26#include <string.h>
27#include <stdint.h>
28#include <stdbool.h>
29#include <signal.h>
30#include <stdarg.h>
31#include <fcntl.h>
32#include <sys/types.h>
33#include <sys/sysmacros.h>
34#include <sys/stat.h>
35#include <sys/ioctl.h>
36#include <unistd.h>
37#include <errno.h>
38#include <sys/mman.h>
39#include <dlfcn.h>
40#include "drm-uapi/i915_drm.h"
41#include <inttypes.h>
42
43#include "intel_aub.h"
44#include "aub_write.h"
45
46#include "dev/gen_device_info.h"
47#include "util/macros.h"
48
49static int close_init_helper(int fd);
50static int ioctl_init_helper(int fd, unsigned long request, ...);
51
52static int (*libc_close)(int fd) = close_init_helper;
53static int (*libc_ioctl)(int fd, unsigned long request, ...) = ioctl_init_helper;
54
55static int drm_fd = -1;
56static char *output_filename = NULL;
57static FILE *output_file = NULL;
58static int verbose = 0;
59static bool device_override;
60
61#define MAX_FD_COUNT 64
62#define MAX_BO_COUNT 64 * 1024
63
64struct bo {
65   uint32_t size;
66   uint64_t offset;
67   void *map;
68};
69
70static struct bo *bos;
71
72#define DRM_MAJOR 226
73
74/* We set bit 0 in the map pointer for userptr BOs so we know not to
75 * munmap them on DRM_IOCTL_GEM_CLOSE.
76 */
77#define USERPTR_FLAG 1
78#define IS_USERPTR(p) ((uintptr_t) (p) & USERPTR_FLAG)
79#define GET_PTR(p) ( (void *) ((uintptr_t) p & ~(uintptr_t) 1) )
80
81static void __attribute__ ((format(__printf__, 2, 3)))
82fail_if(int cond, const char *format, ...)
83{
84   va_list args;
85
86   if (!cond)
87      return;
88
89   va_start(args, format);
90   fprintf(stderr, "intel_dump_gpu: ");
91   vfprintf(stderr, format, args);
92   va_end(args);
93
94   raise(SIGTRAP);
95}
96
97static struct bo *
98get_bo(unsigned fd, uint32_t handle)
99{
100   struct bo *bo;
101
102   fail_if(handle >= MAX_BO_COUNT, "bo handle too large\n");
103   fail_if(fd >= MAX_FD_COUNT, "bo fd too large\n");
104   bo = &bos[handle + fd * MAX_BO_COUNT];
105
106   return bo;
107}
108
109static inline uint32_t
110align_u32(uint32_t v, uint32_t a)
111{
112   return (v + a - 1) & ~(a - 1);
113}
114
115static struct gen_device_info devinfo = {0};
116static uint32_t device = 0;
117static struct aub_file aub_file;
118
119static void *
120relocate_bo(int fd, struct bo *bo, const struct drm_i915_gem_execbuffer2 *execbuffer2,
121            const struct drm_i915_gem_exec_object2 *obj)
122{
123   const struct drm_i915_gem_exec_object2 *exec_objects =
124      (struct drm_i915_gem_exec_object2 *) (uintptr_t) execbuffer2->buffers_ptr;
125   const struct drm_i915_gem_relocation_entry *relocs =
126      (const struct drm_i915_gem_relocation_entry *) (uintptr_t) obj->relocs_ptr;
127   void *relocated;
128   int handle;
129
130   relocated = malloc(bo->size);
131   fail_if(relocated == NULL, "out of memory\n");
132   memcpy(relocated, GET_PTR(bo->map), bo->size);
133   for (size_t i = 0; i < obj->relocation_count; i++) {
134      fail_if(relocs[i].offset >= bo->size, "reloc outside bo\n");
135
136      if (execbuffer2->flags & I915_EXEC_HANDLE_LUT)
137         handle = exec_objects[relocs[i].target_handle].handle;
138      else
139         handle = relocs[i].target_handle;
140
141      aub_write_reloc(&devinfo, ((char *)relocated) + relocs[i].offset,
142                      get_bo(fd, handle)->offset + relocs[i].delta);
143   }
144
145   return relocated;
146}
147
148static int
149gem_ioctl(int fd, unsigned long request, void *argp)
150{
151   int ret;
152
153   do {
154      ret = libc_ioctl(fd, request, argp);
155   } while (ret == -1 && (errno == EINTR || errno == EAGAIN));
156
157   return ret;
158}
159
160static void *
161gem_mmap(int fd, uint32_t handle, uint64_t offset, uint64_t size)
162{
163   struct drm_i915_gem_mmap mmap = {
164      .handle = handle,
165      .offset = offset,
166      .size = size
167   };
168
169   if (gem_ioctl(fd, DRM_IOCTL_I915_GEM_MMAP, &mmap) == -1)
170      return MAP_FAILED;
171
172   return (void *)(uintptr_t) mmap.addr_ptr;
173}
174
175static int
176gem_get_param(int fd, uint32_t param)
177{
178   int value;
179   drm_i915_getparam_t gp = {
180      .param = param,
181      .value = &value
182   };
183
184   if (gem_ioctl(fd, DRM_IOCTL_I915_GETPARAM, &gp) == -1)
185      return 0;
186
187   return value;
188}
189
190static enum drm_i915_gem_engine_class
191engine_class_from_ring_flag(uint32_t ring_flag)
192{
193   switch (ring_flag) {
194   case I915_EXEC_DEFAULT:
195   case I915_EXEC_RENDER:
196      return I915_ENGINE_CLASS_RENDER;
197   case I915_EXEC_BSD:
198      return I915_ENGINE_CLASS_VIDEO;
199   case I915_EXEC_BLT:
200      return I915_ENGINE_CLASS_COPY;
201   case I915_EXEC_VEBOX:
202      return I915_ENGINE_CLASS_VIDEO_ENHANCE;
203   default:
204      return I915_ENGINE_CLASS_INVALID;
205   }
206}
207
208static void
209dump_execbuffer2(int fd, struct drm_i915_gem_execbuffer2 *execbuffer2)
210{
211   struct drm_i915_gem_exec_object2 *exec_objects =
212      (struct drm_i915_gem_exec_object2 *) (uintptr_t) execbuffer2->buffers_ptr;
213   uint32_t ring_flag = execbuffer2->flags & I915_EXEC_RING_MASK;
214   uint32_t offset;
215   struct drm_i915_gem_exec_object2 *obj;
216   struct bo *bo, *batch_bo;
217   int batch_index;
218   void *data;
219
220   /* We can't do this at open time as we're not yet authenticated. */
221   if (device == 0) {
222      device = gem_get_param(fd, I915_PARAM_CHIPSET_ID);
223      fail_if(device == 0 || devinfo.gen == 0, "failed to identify chipset\n");
224   }
225   if (devinfo.gen == 0) {
226      fail_if(!gen_get_device_info(device, &devinfo),
227              "failed to identify chipset=0x%x\n", device);
228
229      aub_file_init(&aub_file, output_file,
230                    verbose == 2 ? stdout : NULL,
231                    device, program_invocation_short_name);
232      aub_write_default_setup(&aub_file);
233
234      if (verbose)
235         printf("[running, output file %s, chipset id 0x%04x, gen %d]\n",
236                output_filename, device, devinfo.gen);
237   }
238
239   if (aub_use_execlists(&aub_file))
240      offset = 0x1000;
241   else
242      offset = aub_gtt_size(&aub_file);
243
244   if (verbose)
245      printf("Dumping execbuffer2:\n");
246
247   for (uint32_t i = 0; i < execbuffer2->buffer_count; i++) {
248      obj = &exec_objects[i];
249      bo = get_bo(fd, obj->handle);
250
251      /* If bo->size == 0, this means they passed us an invalid
252       * buffer.  The kernel will reject it and so should we.
253       */
254      if (bo->size == 0) {
255         if (verbose)
256            printf("BO #%d is invalid!\n", obj->handle);
257         return;
258      }
259
260      if (obj->flags & EXEC_OBJECT_PINNED) {
261         bo->offset = obj->offset;
262         if (verbose)
263            printf("BO #%d (%dB) pinned @ 0x%lx\n",
264                   obj->handle, bo->size, bo->offset);
265      } else {
266         if (obj->alignment != 0)
267            offset = align_u32(offset, obj->alignment);
268         bo->offset = offset;
269         if (verbose)
270            printf("BO #%d (%dB) @ 0x%lx\n", obj->handle,
271                   bo->size, bo->offset);
272         offset = align_u32(offset + bo->size + 4095, 4096);
273      }
274
275      if (bo->map == NULL && bo->size > 0)
276         bo->map = gem_mmap(fd, obj->handle, 0, bo->size);
277      fail_if(bo->map == MAP_FAILED, "bo mmap failed\n");
278
279      if (aub_use_execlists(&aub_file))
280         aub_map_ppgtt(&aub_file, bo->offset, bo->size);
281   }
282
283   batch_index = (execbuffer2->flags & I915_EXEC_BATCH_FIRST) ? 0 :
284      execbuffer2->buffer_count - 1;
285   batch_bo = get_bo(fd, exec_objects[batch_index].handle);
286   for (uint32_t i = 0; i < execbuffer2->buffer_count; i++) {
287      obj = &exec_objects[i];
288      bo = get_bo(fd, obj->handle);
289
290      if (obj->relocation_count > 0)
291         data = relocate_bo(fd, bo, execbuffer2, obj);
292      else
293         data = bo->map;
294
295      if (bo == batch_bo) {
296         aub_write_trace_block(&aub_file, AUB_TRACE_TYPE_BATCH,
297                               GET_PTR(data), bo->size, bo->offset);
298      } else {
299         aub_write_trace_block(&aub_file, AUB_TRACE_TYPE_NOTYPE,
300                               GET_PTR(data), bo->size, bo->offset);
301      }
302
303      if (data != bo->map)
304         free(data);
305   }
306
307   aub_write_exec(&aub_file,
308                  batch_bo->offset + execbuffer2->batch_start_offset,
309                  offset, engine_class_from_ring_flag(ring_flag));
310
311   if (device_override &&
312       (execbuffer2->flags & I915_EXEC_FENCE_ARRAY) != 0) {
313      struct drm_i915_gem_exec_fence *fences =
314         (void*)(uintptr_t)execbuffer2->cliprects_ptr;
315      for (uint32_t i = 0; i < execbuffer2->num_cliprects; i++) {
316         if ((fences[i].flags & I915_EXEC_FENCE_SIGNAL) != 0) {
317            struct drm_syncobj_array arg = {
318               .handles = (uintptr_t)&fences[i].handle,
319               .count_handles = 1,
320               .pad = 0,
321            };
322            libc_ioctl(fd, DRM_IOCTL_SYNCOBJ_SIGNAL, &arg);
323         }
324      }
325   }
326}
327
328static void
329add_new_bo(unsigned fd, int handle, uint64_t size, void *map)
330{
331   struct bo *bo = &bos[handle + fd * MAX_BO_COUNT];
332
333   fail_if(handle >= MAX_BO_COUNT, "bo handle out of range\n");
334   fail_if(fd >= MAX_FD_COUNT, "bo fd out of range\n");
335   fail_if(size == 0, "bo size is invalid\n");
336
337   bo->size = size;
338   bo->map = map;
339}
340
341static void
342remove_bo(int fd, int handle)
343{
344   struct bo *bo = get_bo(fd, handle);
345
346   if (bo->map && !IS_USERPTR(bo->map))
347      munmap(bo->map, bo->size);
348   bo->size = 0;
349   bo->map = NULL;
350}
351
352__attribute__ ((visibility ("default"))) int
353close(int fd)
354{
355   if (fd == drm_fd)
356      drm_fd = -1;
357
358   return libc_close(fd);
359}
360
361static void
362maybe_init(void)
363{
364   static bool initialized = false;
365   FILE *config;
366   char *key, *value;
367
368   if (initialized)
369      return;
370
371   initialized = true;
372
373   config = fopen(getenv("INTEL_DUMP_GPU_CONFIG"), "r");
374   while (fscanf(config, "%m[^=]=%m[^\n]\n", &key, &value) != EOF) {
375      if (!strcmp(key, "verbose")) {
376         if (!strcmp(value, "1")) {
377            verbose = 1;
378         } else if (!strcmp(value, "2")) {
379            verbose = 2;
380         }
381      } else if (!strcmp(key, "device")) {
382         fail_if(device != 0, "Device/Platform override specified multiple times.");
383         fail_if(sscanf(value, "%i", &device) != 1,
384                 "failed to parse device id '%s'",
385                 value);
386         device_override = true;
387      } else if (!strcmp(key, "platform")) {
388         fail_if(device != 0, "Device/Platform override specified multiple times.");
389         device = gen_device_name_to_pci_device_id(value);
390         fail_if(device == -1, "Unknown platform '%s'", value);
391         device_override = true;
392      } else if (!strcmp(key, "file")) {
393         output_filename = strdup(value);
394         output_file = fopen(output_filename, "w+");
395         fail_if(output_file == NULL,
396                 "failed to open file '%s'\n",
397                 output_filename);
398      } else {
399         fprintf(stderr, "unknown option '%s'\n", key);
400      }
401
402      free(key);
403      free(value);
404   }
405   fclose(config);
406
407   bos = calloc(MAX_FD_COUNT * MAX_BO_COUNT, sizeof(bos[0]));
408   fail_if(bos == NULL, "out of memory\n");
409}
410
411__attribute__ ((visibility ("default"))) int
412ioctl(int fd, unsigned long request, ...)
413{
414   va_list args;
415   void *argp;
416   int ret;
417   struct stat buf;
418
419   va_start(args, request);
420   argp = va_arg(args, void *);
421   va_end(args);
422
423   if (_IOC_TYPE(request) == DRM_IOCTL_BASE &&
424       drm_fd != fd && fstat(fd, &buf) == 0 &&
425       (buf.st_mode & S_IFMT) == S_IFCHR && major(buf.st_rdev) == DRM_MAJOR) {
426      drm_fd = fd;
427      if (verbose)
428         printf("[intercept drm ioctl on fd %d]\n", fd);
429   }
430
431   if (fd == drm_fd) {
432      maybe_init();
433
434      switch (request) {
435      case DRM_IOCTL_I915_GETPARAM: {
436         struct drm_i915_getparam *getparam = argp;
437
438         if (device_override && getparam->param == I915_PARAM_CHIPSET_ID) {
439            *getparam->value = device;
440            return 0;
441         }
442
443         ret = libc_ioctl(fd, request, argp);
444
445         /* If the application looks up chipset_id
446          * (they typically do), we'll piggy-back on
447          * their ioctl and store the id for later
448          * use. */
449         if (ret == 0 && getparam->param == I915_PARAM_CHIPSET_ID)
450            device = *getparam->value;
451
452         return ret;
453      }
454
455      case DRM_IOCTL_I915_GEM_EXECBUFFER: {
456         static bool once;
457         if (!once) {
458            fprintf(stderr,
459                    "application uses DRM_IOCTL_I915_GEM_EXECBUFFER, not handled\n");
460            once = true;
461         }
462         return libc_ioctl(fd, request, argp);
463      }
464
465      case DRM_IOCTL_I915_GEM_EXECBUFFER2:
466      case DRM_IOCTL_I915_GEM_EXECBUFFER2_WR: {
467         dump_execbuffer2(fd, argp);
468         if (device_override)
469            return 0;
470
471         return libc_ioctl(fd, request, argp);
472      }
473
474      case DRM_IOCTL_I915_GEM_CREATE: {
475         struct drm_i915_gem_create *create = argp;
476
477         ret = libc_ioctl(fd, request, argp);
478         if (ret == 0)
479            add_new_bo(fd, create->handle, create->size, NULL);
480
481         return ret;
482      }
483
484      case DRM_IOCTL_I915_GEM_USERPTR: {
485         struct drm_i915_gem_userptr *userptr = argp;
486
487         ret = libc_ioctl(fd, request, argp);
488         if (ret == 0)
489            add_new_bo(fd, userptr->handle, userptr->user_size,
490                       (void *) (uintptr_t) (userptr->user_ptr | USERPTR_FLAG));
491
492         return ret;
493      }
494
495      case DRM_IOCTL_GEM_CLOSE: {
496         struct drm_gem_close *close = argp;
497
498         remove_bo(fd, close->handle);
499
500         return libc_ioctl(fd, request, argp);
501      }
502
503      case DRM_IOCTL_GEM_OPEN: {
504         struct drm_gem_open *open = argp;
505
506         ret = libc_ioctl(fd, request, argp);
507         if (ret == 0)
508            add_new_bo(fd, open->handle, open->size, NULL);
509
510         return ret;
511      }
512
513      case DRM_IOCTL_PRIME_FD_TO_HANDLE: {
514         struct drm_prime_handle *prime = argp;
515
516         ret = libc_ioctl(fd, request, argp);
517         if (ret == 0) {
518            off_t size;
519
520            size = lseek(prime->fd, 0, SEEK_END);
521            fail_if(size == -1, "failed to get prime bo size\n");
522            add_new_bo(fd, prime->handle, size, NULL);
523
524         }
525
526         return ret;
527      }
528
529      default:
530         return libc_ioctl(fd, request, argp);
531      }
532   } else {
533      return libc_ioctl(fd, request, argp);
534   }
535}
536
537static void
538init(void)
539{
540   libc_close = dlsym(RTLD_NEXT, "close");
541   libc_ioctl = dlsym(RTLD_NEXT, "ioctl");
542   fail_if(libc_close == NULL || libc_ioctl == NULL,
543           "failed to get libc ioctl or close\n");
544}
545
546static int
547close_init_helper(int fd)
548{
549   init();
550   return libc_close(fd);
551}
552
553static int
554ioctl_init_helper(int fd, unsigned long request, ...)
555{
556   va_list args;
557   void *argp;
558
559   va_start(args, request);
560   argp = va_arg(args, void *);
561   va_end(args);
562
563   init();
564   return libc_ioctl(fd, request, argp);
565}
566
567static void __attribute__ ((destructor))
568fini(void)
569{
570   if (devinfo.gen != 0) {
571      free(output_filename);
572      aub_file_finish(&aub_file);
573      free(bos);
574   }
575}
576