intel_dump_gpu.c revision b8e80941
1/* 2 * Copyright © 2015 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#include <stdlib.h> 25#include <stdio.h> 26#include <string.h> 27#include <stdint.h> 28#include <stdbool.h> 29#include <signal.h> 30#include <stdarg.h> 31#include <fcntl.h> 32#include <sys/types.h> 33#include <sys/sysmacros.h> 34#include <sys/stat.h> 35#include <sys/ioctl.h> 36#include <unistd.h> 37#include <errno.h> 38#include <sys/mman.h> 39#include <dlfcn.h> 40#include "drm-uapi/i915_drm.h" 41#include <inttypes.h> 42 43#include "intel_aub.h" 44#include "aub_write.h" 45 46#include "dev/gen_device_info.h" 47#include "util/macros.h" 48 49static int close_init_helper(int fd); 50static int ioctl_init_helper(int fd, unsigned long request, ...); 51 52static int (*libc_close)(int fd) = close_init_helper; 53static int (*libc_ioctl)(int fd, unsigned long request, ...) = ioctl_init_helper; 54 55static int drm_fd = -1; 56static char *output_filename = NULL; 57static FILE *output_file = NULL; 58static int verbose = 0; 59static bool device_override; 60 61#define MAX_FD_COUNT 64 62#define MAX_BO_COUNT 64 * 1024 63 64struct bo { 65 uint32_t size; 66 uint64_t offset; 67 void *map; 68}; 69 70static struct bo *bos; 71 72#define DRM_MAJOR 226 73 74/* We set bit 0 in the map pointer for userptr BOs so we know not to 75 * munmap them on DRM_IOCTL_GEM_CLOSE. 76 */ 77#define USERPTR_FLAG 1 78#define IS_USERPTR(p) ((uintptr_t) (p) & USERPTR_FLAG) 79#define GET_PTR(p) ( (void *) ((uintptr_t) p & ~(uintptr_t) 1) ) 80 81static void __attribute__ ((format(__printf__, 2, 3))) 82fail_if(int cond, const char *format, ...) 83{ 84 va_list args; 85 86 if (!cond) 87 return; 88 89 va_start(args, format); 90 fprintf(stderr, "intel_dump_gpu: "); 91 vfprintf(stderr, format, args); 92 va_end(args); 93 94 raise(SIGTRAP); 95} 96 97static struct bo * 98get_bo(unsigned fd, uint32_t handle) 99{ 100 struct bo *bo; 101 102 fail_if(handle >= MAX_BO_COUNT, "bo handle too large\n"); 103 fail_if(fd >= MAX_FD_COUNT, "bo fd too large\n"); 104 bo = &bos[handle + fd * MAX_BO_COUNT]; 105 106 return bo; 107} 108 109static inline uint32_t 110align_u32(uint32_t v, uint32_t a) 111{ 112 return (v + a - 1) & ~(a - 1); 113} 114 115static struct gen_device_info devinfo = {0}; 116static uint32_t device = 0; 117static struct aub_file aub_file; 118 119static void * 120relocate_bo(int fd, struct bo *bo, const struct drm_i915_gem_execbuffer2 *execbuffer2, 121 const struct drm_i915_gem_exec_object2 *obj) 122{ 123 const struct drm_i915_gem_exec_object2 *exec_objects = 124 (struct drm_i915_gem_exec_object2 *) (uintptr_t) execbuffer2->buffers_ptr; 125 const struct drm_i915_gem_relocation_entry *relocs = 126 (const struct drm_i915_gem_relocation_entry *) (uintptr_t) obj->relocs_ptr; 127 void *relocated; 128 int handle; 129 130 relocated = malloc(bo->size); 131 fail_if(relocated == NULL, "out of memory\n"); 132 memcpy(relocated, GET_PTR(bo->map), bo->size); 133 for (size_t i = 0; i < obj->relocation_count; i++) { 134 fail_if(relocs[i].offset >= bo->size, "reloc outside bo\n"); 135 136 if (execbuffer2->flags & I915_EXEC_HANDLE_LUT) 137 handle = exec_objects[relocs[i].target_handle].handle; 138 else 139 handle = relocs[i].target_handle; 140 141 aub_write_reloc(&devinfo, ((char *)relocated) + relocs[i].offset, 142 get_bo(fd, handle)->offset + relocs[i].delta); 143 } 144 145 return relocated; 146} 147 148static int 149gem_ioctl(int fd, unsigned long request, void *argp) 150{ 151 int ret; 152 153 do { 154 ret = libc_ioctl(fd, request, argp); 155 } while (ret == -1 && (errno == EINTR || errno == EAGAIN)); 156 157 return ret; 158} 159 160static void * 161gem_mmap(int fd, uint32_t handle, uint64_t offset, uint64_t size) 162{ 163 struct drm_i915_gem_mmap mmap = { 164 .handle = handle, 165 .offset = offset, 166 .size = size 167 }; 168 169 if (gem_ioctl(fd, DRM_IOCTL_I915_GEM_MMAP, &mmap) == -1) 170 return MAP_FAILED; 171 172 return (void *)(uintptr_t) mmap.addr_ptr; 173} 174 175static int 176gem_get_param(int fd, uint32_t param) 177{ 178 int value; 179 drm_i915_getparam_t gp = { 180 .param = param, 181 .value = &value 182 }; 183 184 if (gem_ioctl(fd, DRM_IOCTL_I915_GETPARAM, &gp) == -1) 185 return 0; 186 187 return value; 188} 189 190static enum drm_i915_gem_engine_class 191engine_class_from_ring_flag(uint32_t ring_flag) 192{ 193 switch (ring_flag) { 194 case I915_EXEC_DEFAULT: 195 case I915_EXEC_RENDER: 196 return I915_ENGINE_CLASS_RENDER; 197 case I915_EXEC_BSD: 198 return I915_ENGINE_CLASS_VIDEO; 199 case I915_EXEC_BLT: 200 return I915_ENGINE_CLASS_COPY; 201 case I915_EXEC_VEBOX: 202 return I915_ENGINE_CLASS_VIDEO_ENHANCE; 203 default: 204 return I915_ENGINE_CLASS_INVALID; 205 } 206} 207 208static void 209dump_execbuffer2(int fd, struct drm_i915_gem_execbuffer2 *execbuffer2) 210{ 211 struct drm_i915_gem_exec_object2 *exec_objects = 212 (struct drm_i915_gem_exec_object2 *) (uintptr_t) execbuffer2->buffers_ptr; 213 uint32_t ring_flag = execbuffer2->flags & I915_EXEC_RING_MASK; 214 uint32_t offset; 215 struct drm_i915_gem_exec_object2 *obj; 216 struct bo *bo, *batch_bo; 217 int batch_index; 218 void *data; 219 220 /* We can't do this at open time as we're not yet authenticated. */ 221 if (device == 0) { 222 device = gem_get_param(fd, I915_PARAM_CHIPSET_ID); 223 fail_if(device == 0 || devinfo.gen == 0, "failed to identify chipset\n"); 224 } 225 if (devinfo.gen == 0) { 226 fail_if(!gen_get_device_info(device, &devinfo), 227 "failed to identify chipset=0x%x\n", device); 228 229 aub_file_init(&aub_file, output_file, 230 verbose == 2 ? stdout : NULL, 231 device, program_invocation_short_name); 232 aub_write_default_setup(&aub_file); 233 234 if (verbose) 235 printf("[running, output file %s, chipset id 0x%04x, gen %d]\n", 236 output_filename, device, devinfo.gen); 237 } 238 239 if (aub_use_execlists(&aub_file)) 240 offset = 0x1000; 241 else 242 offset = aub_gtt_size(&aub_file); 243 244 if (verbose) 245 printf("Dumping execbuffer2:\n"); 246 247 for (uint32_t i = 0; i < execbuffer2->buffer_count; i++) { 248 obj = &exec_objects[i]; 249 bo = get_bo(fd, obj->handle); 250 251 /* If bo->size == 0, this means they passed us an invalid 252 * buffer. The kernel will reject it and so should we. 253 */ 254 if (bo->size == 0) { 255 if (verbose) 256 printf("BO #%d is invalid!\n", obj->handle); 257 return; 258 } 259 260 if (obj->flags & EXEC_OBJECT_PINNED) { 261 bo->offset = obj->offset; 262 if (verbose) 263 printf("BO #%d (%dB) pinned @ 0x%lx\n", 264 obj->handle, bo->size, bo->offset); 265 } else { 266 if (obj->alignment != 0) 267 offset = align_u32(offset, obj->alignment); 268 bo->offset = offset; 269 if (verbose) 270 printf("BO #%d (%dB) @ 0x%lx\n", obj->handle, 271 bo->size, bo->offset); 272 offset = align_u32(offset + bo->size + 4095, 4096); 273 } 274 275 if (bo->map == NULL && bo->size > 0) 276 bo->map = gem_mmap(fd, obj->handle, 0, bo->size); 277 fail_if(bo->map == MAP_FAILED, "bo mmap failed\n"); 278 279 if (aub_use_execlists(&aub_file)) 280 aub_map_ppgtt(&aub_file, bo->offset, bo->size); 281 } 282 283 batch_index = (execbuffer2->flags & I915_EXEC_BATCH_FIRST) ? 0 : 284 execbuffer2->buffer_count - 1; 285 batch_bo = get_bo(fd, exec_objects[batch_index].handle); 286 for (uint32_t i = 0; i < execbuffer2->buffer_count; i++) { 287 obj = &exec_objects[i]; 288 bo = get_bo(fd, obj->handle); 289 290 if (obj->relocation_count > 0) 291 data = relocate_bo(fd, bo, execbuffer2, obj); 292 else 293 data = bo->map; 294 295 if (bo == batch_bo) { 296 aub_write_trace_block(&aub_file, AUB_TRACE_TYPE_BATCH, 297 GET_PTR(data), bo->size, bo->offset); 298 } else { 299 aub_write_trace_block(&aub_file, AUB_TRACE_TYPE_NOTYPE, 300 GET_PTR(data), bo->size, bo->offset); 301 } 302 303 if (data != bo->map) 304 free(data); 305 } 306 307 aub_write_exec(&aub_file, 308 batch_bo->offset + execbuffer2->batch_start_offset, 309 offset, engine_class_from_ring_flag(ring_flag)); 310 311 if (device_override && 312 (execbuffer2->flags & I915_EXEC_FENCE_ARRAY) != 0) { 313 struct drm_i915_gem_exec_fence *fences = 314 (void*)(uintptr_t)execbuffer2->cliprects_ptr; 315 for (uint32_t i = 0; i < execbuffer2->num_cliprects; i++) { 316 if ((fences[i].flags & I915_EXEC_FENCE_SIGNAL) != 0) { 317 struct drm_syncobj_array arg = { 318 .handles = (uintptr_t)&fences[i].handle, 319 .count_handles = 1, 320 .pad = 0, 321 }; 322 libc_ioctl(fd, DRM_IOCTL_SYNCOBJ_SIGNAL, &arg); 323 } 324 } 325 } 326} 327 328static void 329add_new_bo(unsigned fd, int handle, uint64_t size, void *map) 330{ 331 struct bo *bo = &bos[handle + fd * MAX_BO_COUNT]; 332 333 fail_if(handle >= MAX_BO_COUNT, "bo handle out of range\n"); 334 fail_if(fd >= MAX_FD_COUNT, "bo fd out of range\n"); 335 fail_if(size == 0, "bo size is invalid\n"); 336 337 bo->size = size; 338 bo->map = map; 339} 340 341static void 342remove_bo(int fd, int handle) 343{ 344 struct bo *bo = get_bo(fd, handle); 345 346 if (bo->map && !IS_USERPTR(bo->map)) 347 munmap(bo->map, bo->size); 348 bo->size = 0; 349 bo->map = NULL; 350} 351 352__attribute__ ((visibility ("default"))) int 353close(int fd) 354{ 355 if (fd == drm_fd) 356 drm_fd = -1; 357 358 return libc_close(fd); 359} 360 361static void 362maybe_init(void) 363{ 364 static bool initialized = false; 365 FILE *config; 366 char *key, *value; 367 368 if (initialized) 369 return; 370 371 initialized = true; 372 373 config = fopen(getenv("INTEL_DUMP_GPU_CONFIG"), "r"); 374 while (fscanf(config, "%m[^=]=%m[^\n]\n", &key, &value) != EOF) { 375 if (!strcmp(key, "verbose")) { 376 if (!strcmp(value, "1")) { 377 verbose = 1; 378 } else if (!strcmp(value, "2")) { 379 verbose = 2; 380 } 381 } else if (!strcmp(key, "device")) { 382 fail_if(device != 0, "Device/Platform override specified multiple times."); 383 fail_if(sscanf(value, "%i", &device) != 1, 384 "failed to parse device id '%s'", 385 value); 386 device_override = true; 387 } else if (!strcmp(key, "platform")) { 388 fail_if(device != 0, "Device/Platform override specified multiple times."); 389 device = gen_device_name_to_pci_device_id(value); 390 fail_if(device == -1, "Unknown platform '%s'", value); 391 device_override = true; 392 } else if (!strcmp(key, "file")) { 393 output_filename = strdup(value); 394 output_file = fopen(output_filename, "w+"); 395 fail_if(output_file == NULL, 396 "failed to open file '%s'\n", 397 output_filename); 398 } else { 399 fprintf(stderr, "unknown option '%s'\n", key); 400 } 401 402 free(key); 403 free(value); 404 } 405 fclose(config); 406 407 bos = calloc(MAX_FD_COUNT * MAX_BO_COUNT, sizeof(bos[0])); 408 fail_if(bos == NULL, "out of memory\n"); 409} 410 411__attribute__ ((visibility ("default"))) int 412ioctl(int fd, unsigned long request, ...) 413{ 414 va_list args; 415 void *argp; 416 int ret; 417 struct stat buf; 418 419 va_start(args, request); 420 argp = va_arg(args, void *); 421 va_end(args); 422 423 if (_IOC_TYPE(request) == DRM_IOCTL_BASE && 424 drm_fd != fd && fstat(fd, &buf) == 0 && 425 (buf.st_mode & S_IFMT) == S_IFCHR && major(buf.st_rdev) == DRM_MAJOR) { 426 drm_fd = fd; 427 if (verbose) 428 printf("[intercept drm ioctl on fd %d]\n", fd); 429 } 430 431 if (fd == drm_fd) { 432 maybe_init(); 433 434 switch (request) { 435 case DRM_IOCTL_I915_GETPARAM: { 436 struct drm_i915_getparam *getparam = argp; 437 438 if (device_override && getparam->param == I915_PARAM_CHIPSET_ID) { 439 *getparam->value = device; 440 return 0; 441 } 442 443 ret = libc_ioctl(fd, request, argp); 444 445 /* If the application looks up chipset_id 446 * (they typically do), we'll piggy-back on 447 * their ioctl and store the id for later 448 * use. */ 449 if (ret == 0 && getparam->param == I915_PARAM_CHIPSET_ID) 450 device = *getparam->value; 451 452 return ret; 453 } 454 455 case DRM_IOCTL_I915_GEM_EXECBUFFER: { 456 static bool once; 457 if (!once) { 458 fprintf(stderr, 459 "application uses DRM_IOCTL_I915_GEM_EXECBUFFER, not handled\n"); 460 once = true; 461 } 462 return libc_ioctl(fd, request, argp); 463 } 464 465 case DRM_IOCTL_I915_GEM_EXECBUFFER2: 466 case DRM_IOCTL_I915_GEM_EXECBUFFER2_WR: { 467 dump_execbuffer2(fd, argp); 468 if (device_override) 469 return 0; 470 471 return libc_ioctl(fd, request, argp); 472 } 473 474 case DRM_IOCTL_I915_GEM_CREATE: { 475 struct drm_i915_gem_create *create = argp; 476 477 ret = libc_ioctl(fd, request, argp); 478 if (ret == 0) 479 add_new_bo(fd, create->handle, create->size, NULL); 480 481 return ret; 482 } 483 484 case DRM_IOCTL_I915_GEM_USERPTR: { 485 struct drm_i915_gem_userptr *userptr = argp; 486 487 ret = libc_ioctl(fd, request, argp); 488 if (ret == 0) 489 add_new_bo(fd, userptr->handle, userptr->user_size, 490 (void *) (uintptr_t) (userptr->user_ptr | USERPTR_FLAG)); 491 492 return ret; 493 } 494 495 case DRM_IOCTL_GEM_CLOSE: { 496 struct drm_gem_close *close = argp; 497 498 remove_bo(fd, close->handle); 499 500 return libc_ioctl(fd, request, argp); 501 } 502 503 case DRM_IOCTL_GEM_OPEN: { 504 struct drm_gem_open *open = argp; 505 506 ret = libc_ioctl(fd, request, argp); 507 if (ret == 0) 508 add_new_bo(fd, open->handle, open->size, NULL); 509 510 return ret; 511 } 512 513 case DRM_IOCTL_PRIME_FD_TO_HANDLE: { 514 struct drm_prime_handle *prime = argp; 515 516 ret = libc_ioctl(fd, request, argp); 517 if (ret == 0) { 518 off_t size; 519 520 size = lseek(prime->fd, 0, SEEK_END); 521 fail_if(size == -1, "failed to get prime bo size\n"); 522 add_new_bo(fd, prime->handle, size, NULL); 523 524 } 525 526 return ret; 527 } 528 529 default: 530 return libc_ioctl(fd, request, argp); 531 } 532 } else { 533 return libc_ioctl(fd, request, argp); 534 } 535} 536 537static void 538init(void) 539{ 540 libc_close = dlsym(RTLD_NEXT, "close"); 541 libc_ioctl = dlsym(RTLD_NEXT, "ioctl"); 542 fail_if(libc_close == NULL || libc_ioctl == NULL, 543 "failed to get libc ioctl or close\n"); 544} 545 546static int 547close_init_helper(int fd) 548{ 549 init(); 550 return libc_close(fd); 551} 552 553static int 554ioctl_init_helper(int fd, unsigned long request, ...) 555{ 556 va_list args; 557 void *argp; 558 559 va_start(args, request); 560 argp = va_arg(args, void *); 561 va_end(args); 562 563 init(); 564 return libc_ioctl(fd, request, argp); 565} 566 567static void __attribute__ ((destructor)) 568fini(void) 569{ 570 if (devinfo.gen != 0) { 571 free(output_filename); 572 aub_file_finish(&aub_file); 573 free(bos); 574 } 575} 576