intel_measure.c revision 7ec681f3
1/* 2 * Copyright © 2020 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included 12 * in all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 20 * DEALINGS IN THE SOFTWARE. 21 */ 22 23/** 24 * @file intel_measure.c 25 */ 26 27#include "intel_measure.h" 28 29#include <errno.h> 30#include <fcntl.h> 31#include <stdlib.h> 32#include <string.h> 33#include <sys/stat.h> 34#include <sys/types.h> 35#include <unistd.h> 36 37#define __STDC_FORMAT_MACROS 1 38#include <inttypes.h> 39 40#include "dev/intel_device_info.h" 41#include "util/debug.h" 42#include "util/macros.h" 43#include "util/u_debug.h" 44 45 46static const struct debug_control debug_control[] = { 47 { "draw", INTEL_MEASURE_DRAW }, 48 { "rt", INTEL_MEASURE_RENDERPASS }, 49 { "shader", INTEL_MEASURE_SHADER }, 50 { "batch", INTEL_MEASURE_BATCH }, 51 { "frame", INTEL_MEASURE_FRAME }, 52 { NULL, 0 } 53}; 54static struct intel_measure_config config; 55 56void 57intel_measure_init(struct intel_measure_device *device) 58{ 59 static bool once = false; 60 const char *env = getenv("INTEL_MEASURE"); 61 if (unlikely(!once)) { 62 once = true; 63 memset(&config, 0, sizeof(struct intel_measure_config)); 64 if (!env) 65 return; 66 67 config.file = stderr; 68 config.flags = parse_debug_string(env, debug_control); 69 if (!config.flags) 70 config.flags = INTEL_MEASURE_DRAW; 71 config.enabled = true; 72 config.event_interval = 1; 73 config.control_fh = -1; 74 75 /* Overflows of the following defaults will drop data and generate a 76 * warning on the output filehandle. 77 */ 78 79 /* default batch_size allows for 8k renders in a single batch */ 80 const int DEFAULT_BATCH_SIZE = 16 * 1024; 81 config.batch_size = DEFAULT_BATCH_SIZE; 82 83 /* Default buffer_size allows for 16k batches per line of output in the 84 * csv. Overflow may occur for offscreen workloads or large 'interval' 85 * settings. 86 */ 87 const int DEFAULT_BUFFER_SIZE = 16 * 1024; 88 config.buffer_size = DEFAULT_BUFFER_SIZE; 89 90 const char *filename = strstr(env, "file="); 91 const char *start_frame_s = strstr(env, "start="); 92 const char *count_frame_s = strstr(env, "count="); 93 const char *control_path = strstr(env, "control="); 94 const char *interval_s = strstr(env, "interval="); 95 const char *batch_size_s = strstr(env, "batch_size="); 96 const char *buffer_size_s = strstr(env, "buffer_size="); 97 while (true) { 98 char *sep = strrchr(env, ','); 99 if (sep == NULL) 100 break; 101 *sep = '\0'; 102 } 103 104 if (filename && !__check_suid()) { 105 filename += 5; 106 config.file = fopen(filename, "w"); 107 if (!config.file) { 108 fprintf(stderr, "INTEL_MEASURE failed to open output file %s: %s\n", 109 filename, strerror (errno)); 110 abort(); 111 } 112 } 113 114 if (start_frame_s) { 115 start_frame_s += 6; 116 const int start_frame = atoi(start_frame_s); 117 if (start_frame < 0) { 118 fprintf(stderr, "INTEL_MEASURE start frame may " 119 "not be negative: %d\n", start_frame); 120 abort(); 121 } 122 123 config.start_frame = start_frame; 124 config.enabled = false; 125 } 126 127 if (count_frame_s) { 128 count_frame_s += 6; 129 const int count_frame = atoi(count_frame_s); 130 if (count_frame <= 0) { 131 fprintf(stderr, "INTEL_MEASURE count frame must be positive: %d\n", 132 count_frame); 133 abort(); 134 } 135 136 config.end_frame = config.start_frame + count_frame; 137 } 138 139 if (control_path) { 140 control_path += 8; 141 if (mkfifoat(AT_FDCWD, control_path, O_CREAT | S_IRUSR | S_IWUSR)) { 142 if (errno != EEXIST) { 143 fprintf(stderr, "INTEL_MEASURE failed to create control " 144 "fifo %s: %s\n", control_path, strerror (errno)); 145 abort(); 146 } 147 } 148 149 config.control_fh = openat(AT_FDCWD, control_path, 150 O_RDONLY | O_NONBLOCK); 151 if (config.control_fh == -1) { 152 fprintf(stderr, "INTEL_MEASURE failed to open control fifo " 153 "%s: %s\n", control_path, strerror (errno)); 154 abort(); 155 } 156 157 /* when using a control fifo, do not start until the user triggers 158 * capture 159 */ 160 config.enabled = false; 161 } 162 163 if (interval_s) { 164 interval_s += 9; 165 const int event_interval = atoi(interval_s); 166 if (event_interval < 1) { 167 fprintf(stderr, "INTEL_MEASURE event_interval must be positive: " 168 "%d\n", event_interval); 169 abort(); 170 } 171 config.event_interval = event_interval; 172 } 173 174 if (batch_size_s) { 175 batch_size_s += 11; 176 const int batch_size = atoi(batch_size_s); 177 if (batch_size < DEFAULT_BATCH_SIZE) { 178 fprintf(stderr, "INTEL_MEASURE minimum batch_size is 4k: " 179 "%d\n", batch_size); 180 abort(); 181 } 182 if (batch_size > DEFAULT_BATCH_SIZE * 1024) { 183 fprintf(stderr, "INTEL_MEASURE batch_size limited to 4M: " 184 "%d\n", batch_size); 185 abort(); 186 } 187 188 config.batch_size = batch_size; 189 } 190 191 if (buffer_size_s) { 192 buffer_size_s += 12; 193 const int buffer_size = atoi(buffer_size_s); 194 if (buffer_size < DEFAULT_BUFFER_SIZE) { 195 fprintf(stderr, "INTEL_MEASURE minimum buffer_size is 1k: " 196 "%d\n", DEFAULT_BUFFER_SIZE); 197 } 198 if (buffer_size > DEFAULT_BUFFER_SIZE * 1024) { 199 fprintf(stderr, "INTEL_MEASURE buffer_size limited to 1M: " 200 "%d\n", buffer_size); 201 } 202 203 config.buffer_size = buffer_size; 204 } 205 206 fputs("draw_start,draw_end,frame,batch," 207 "event_index,event_count,type,count,vs,tcs,tes," 208 "gs,fs,cs,framebuffer,idle_ns,time_ns\n", 209 config.file); 210 } 211 212 device->config = NULL; 213 device->frame = 0; 214 pthread_mutex_init(&device->mutex, NULL); 215 list_inithead(&device->queued_snapshots); 216 217 if (env) 218 device->config = &config; 219} 220 221const char * 222intel_measure_snapshot_string(enum intel_measure_snapshot_type type) 223{ 224 const char *names[] = { 225 [INTEL_SNAPSHOT_UNDEFINED] = "undefined", 226 [INTEL_SNAPSHOT_BLIT] = "blit", 227 [INTEL_SNAPSHOT_CCS_AMBIGUATE] = "ccs ambiguate", 228 [INTEL_SNAPSHOT_CCS_COLOR_CLEAR] = "ccs color clear", 229 [INTEL_SNAPSHOT_CCS_PARTIAL_RESOLVE] = "ccs partial resolve", 230 [INTEL_SNAPSHOT_CCS_RESOLVE] = "ccs resolve", 231 [INTEL_SNAPSHOT_COMPUTE] = "compute", 232 [INTEL_SNAPSHOT_COPY] = "copy", 233 [INTEL_SNAPSHOT_DRAW] = "draw", 234 [INTEL_SNAPSHOT_HIZ_AMBIGUATE] = "hiz ambiguate", 235 [INTEL_SNAPSHOT_HIZ_CLEAR] = "hiz clear", 236 [INTEL_SNAPSHOT_HIZ_RESOLVE] = "hiz resolve", 237 [INTEL_SNAPSHOT_MCS_COLOR_CLEAR] = "mcs color clear", 238 [INTEL_SNAPSHOT_MCS_PARTIAL_RESOLVE] = "mcs partial resolve", 239 [INTEL_SNAPSHOT_SLOW_COLOR_CLEAR] = "slow color clear", 240 [INTEL_SNAPSHOT_SLOW_DEPTH_CLEAR] = "slow depth clear", 241 [INTEL_SNAPSHOT_SECONDARY_BATCH] = "secondary command buffer", 242 [INTEL_SNAPSHOT_END] = "end", 243 }; 244 assert(type < ARRAY_SIZE(names)); 245 assert(names[type] != NULL); 246 assert(type != INTEL_SNAPSHOT_UNDEFINED); 247 return names[type]; 248} 249 250/** 251 * Indicate to the caller whether a new snapshot should be started. 252 * 253 * Callers provide rendering state to this method to determine whether the 254 * current start event should be skipped. Depending on the configuration 255 * flags, a new snapshot may start: 256 * - at every event 257 * - when the program changes 258 * - after a batch is submitted 259 * - at frame boundaries 260 * 261 * Returns true if a snapshot should be started. 262 */ 263bool 264intel_measure_state_changed(const struct intel_measure_batch *batch, 265 uintptr_t vs, uintptr_t tcs, uintptr_t tes, 266 uintptr_t gs, uintptr_t fs, uintptr_t cs) 267{ 268 if (batch->index == 0) { 269 /* always record the first event */ 270 return true; 271 } 272 273 const struct intel_measure_snapshot *last_snap = 274 &batch->snapshots[batch->index - 1]; 275 276 if (config.flags & INTEL_MEASURE_DRAW) 277 return true; 278 279 if (batch->index % 2 == 0) { 280 /* no snapshot is running, but we have a start event */ 281 return true; 282 } 283 284 if (config.flags & (INTEL_MEASURE_FRAME | INTEL_MEASURE_BATCH)) { 285 /* only start collection when index == 0, at the beginning of a batch */ 286 return false; 287 } 288 289 if (config.flags & INTEL_MEASURE_RENDERPASS) { 290 return ((last_snap->framebuffer != batch->framebuffer) || 291 /* compute workloads are always in their own renderpass */ 292 (cs != 0)); 293 } 294 295 /* remaining comparisons check the state of the render pipeline for 296 * INTEL_MEASURE_PROGRAM 297 */ 298 assert(config.flags & INTEL_MEASURE_SHADER); 299 300 if (!vs && !tcs && !tes && !gs && !fs && !cs) { 301 /* blorp always changes program */ 302 return true; 303 } 304 305 return (last_snap->vs != (uintptr_t) vs || 306 last_snap->tcs != (uintptr_t) tcs || 307 last_snap->tes != (uintptr_t) tes || 308 last_snap->gs != (uintptr_t) gs || 309 last_snap->fs != (uintptr_t) fs || 310 last_snap->cs != (uintptr_t) cs); 311} 312 313/** 314 * Notify intel_measure that a frame is about to begin. 315 * 316 * Configuration values and the control fifo may commence measurement at frame 317 * boundaries. 318 */ 319void 320intel_measure_frame_transition(unsigned frame) 321{ 322 if (frame == config.start_frame) 323 config.enabled = true; 324 else if (frame == config.end_frame) 325 config.enabled = false; 326 327 /* user commands to the control fifo will override any start/count 328 * environment settings 329 */ 330 if (config.control_fh != -1) { 331 while (true) { 332 const unsigned BUF_SIZE = 128; 333 char buf[BUF_SIZE]; 334 ssize_t bytes = read(config.control_fh, buf, BUF_SIZE - 1); 335 if (bytes == 0) 336 break; 337 if (bytes == -1) { 338 fprintf(stderr, "INTEL_MEASURE failed to read control fifo: %s\n", 339 strerror(errno)); 340 abort(); 341 } 342 343 buf[bytes] = '\0'; 344 char *nptr = buf, *endptr = buf; 345 while (*nptr != '\0' && *endptr != '\0') { 346 long fcount = strtol(nptr, &endptr, 10); 347 if (nptr == endptr) { 348 config.enabled = false; 349 fprintf(stderr, "INTEL_MEASURE invalid frame count on " 350 "control fifo.\n"); 351 lseek(config.control_fh, 0, SEEK_END); 352 break; 353 } else if (fcount == 0) { 354 config.enabled = false; 355 } else { 356 config.enabled = true; 357 config.end_frame = frame + fcount; 358 } 359 360 nptr = endptr + 1; 361 } 362 } 363 } 364} 365 366#define TIMESTAMP_BITS 36 367static uint64_t 368raw_timestamp_delta(uint64_t time0, uint64_t time1) 369{ 370 if (time0 > time1) { 371 return (1ULL << TIMESTAMP_BITS) + time1 - time0; 372 } else { 373 return time1 - time0; 374 } 375} 376 377/** 378 * Verify that rendering has completed for the batch 379 * 380 * Rendering is complete when the last timestamp has been written. 381*/ 382bool 383intel_measure_ready(struct intel_measure_batch *batch) 384{ 385 assert(batch->timestamps); 386 assert(batch->index > 1); 387 return (batch->timestamps[batch->index - 1] != 0); 388} 389 390/** 391 * Submit completed snapshots for buffering. 392 * 393 * Snapshot data becomes available when asynchronous rendering completes. 394 * Depending on configuration, snapshot data may need to be collated before 395 * writing to the output file. 396 */ 397static void 398intel_measure_push_result(struct intel_measure_device *device, 399 struct intel_measure_batch *batch) 400{ 401 struct intel_measure_ringbuffer *rb = device->ringbuffer; 402 403 uint64_t *timestamps = batch->timestamps; 404 assert(timestamps != NULL); 405 assert(timestamps[0] != 0); 406 407 for (int i = 0; i < batch->index; i += 2) { 408 const struct intel_measure_snapshot *begin = &batch->snapshots[i]; 409 const struct intel_measure_snapshot *end = &batch->snapshots[i+1]; 410 411 assert (end->type == INTEL_SNAPSHOT_END); 412 413 if (begin->type == INTEL_SNAPSHOT_SECONDARY_BATCH) { 414 assert(begin->secondary != NULL); 415 begin->secondary->batch_count = batch->batch_count; 416 intel_measure_push_result(device, begin->secondary); 417 continue; 418 } 419 420 const uint64_t prev_end_ts = rb->results[rb->head].end_ts; 421 422 /* advance ring buffer */ 423 if (++rb->head == config.buffer_size) 424 rb->head = 0; 425 if (rb->head == rb->tail) { 426 static bool warned = false; 427 if (unlikely(!warned)) { 428 fprintf(config.file, 429 "WARNING: Buffered data exceeds INTEL_MEASURE limit: %d. " 430 "Data has been dropped. " 431 "Increase setting with INTEL_MEASURE=buffer_size={count}\n", 432 config.buffer_size); 433 warned = true; 434 } 435 break; 436 } 437 438 struct intel_measure_buffered_result *buffered_result = 439 &rb->results[rb->head]; 440 441 memset(buffered_result, 0, sizeof(*buffered_result)); 442 memcpy(&buffered_result->snapshot, begin, 443 sizeof(struct intel_measure_snapshot)); 444 buffered_result->start_ts = timestamps[i]; 445 buffered_result->end_ts = timestamps[i+1]; 446 buffered_result->idle_duration = 447 raw_timestamp_delta(prev_end_ts, buffered_result->start_ts); 448 buffered_result->frame = batch->frame; 449 buffered_result->batch_count = batch->batch_count; 450 buffered_result->event_index = i / 2; 451 buffered_result->snapshot.event_count = end->event_count; 452 } 453} 454 455static unsigned 456ringbuffer_size(const struct intel_measure_ringbuffer *rb) 457{ 458 unsigned head = rb->head; 459 if (head < rb->tail) 460 head += config.buffer_size; 461 return head - rb->tail; 462} 463 464static const struct intel_measure_buffered_result * 465ringbuffer_pop(struct intel_measure_ringbuffer *rb) 466{ 467 if (rb->tail == rb->head) { 468 /* encountered ringbuffer overflow while processing events */ 469 return NULL; 470 } 471 472 if (++rb->tail == config.buffer_size) 473 rb->tail = 0; 474 return &rb->results[rb->tail]; 475} 476 477static const struct intel_measure_buffered_result * 478ringbuffer_peek(const struct intel_measure_ringbuffer *rb, unsigned index) 479{ 480 int result_offset = rb->tail + index + 1; 481 if (result_offset >= config.buffer_size) 482 result_offset -= config.buffer_size; 483 return &rb->results[result_offset]; 484} 485 486 487/** 488 * Determine the number of buffered events that must be combined for the next 489 * line of csv output. Returns 0 if more events are needed. 490 */ 491static unsigned 492buffered_event_count(struct intel_measure_device *device) 493{ 494 const struct intel_measure_ringbuffer *rb = device->ringbuffer; 495 const unsigned buffered_event_count = ringbuffer_size(rb); 496 if (buffered_event_count == 0) { 497 /* no events to collect */ 498 return 0; 499 } 500 501 /* count the number of buffered events required to meet the configuration */ 502 if (config.flags & (INTEL_MEASURE_DRAW | 503 INTEL_MEASURE_RENDERPASS | 504 INTEL_MEASURE_SHADER)) { 505 /* For these flags, every buffered event represents a line in the 506 * output. None of these events span batches. If the event interval 507 * crosses a batch boundary, then the next interval starts with the new 508 * batch. 509 */ 510 return 1; 511 } 512 513 const unsigned start_frame = ringbuffer_peek(rb, 0)->frame; 514 if (config.flags & INTEL_MEASURE_BATCH) { 515 /* each buffered event is a command buffer. The number of events to 516 * process is the same as the interval, unless the interval crosses a 517 * frame boundary 518 */ 519 if (buffered_event_count < config.event_interval) { 520 /* not enough events */ 521 return 0; 522 } 523 524 /* Imperfect frame tracking requires us to allow for *older* frames */ 525 if (ringbuffer_peek(rb, config.event_interval - 1)->frame <= start_frame) { 526 /* No frame transition. The next {interval} events should be combined. */ 527 return config.event_interval; 528 } 529 530 /* Else a frame transition occurs within the interval. Find the 531 * transition, so the following line of output begins with the batch 532 * that starts the new frame. 533 */ 534 for (int event_index = 1; 535 event_index <= config.event_interval; 536 ++event_index) { 537 if (ringbuffer_peek(rb, event_index)->frame > start_frame) 538 return event_index; 539 } 540 541 assert(false); 542 } 543 544 /* Else we need to search buffered events to find the matching frame 545 * transition for our interval. 546 */ 547 assert(config.flags & INTEL_MEASURE_FRAME); 548 for (int event_index = 1; 549 event_index < buffered_event_count; 550 ++event_index) { 551 const int latest_frame = ringbuffer_peek(rb, event_index)->frame; 552 if (latest_frame - start_frame >= config.event_interval) 553 return event_index; 554 } 555 556 return 0; 557} 558 559/** 560 * Take result_count events from the ringbuffer and output them as a single 561 * line. 562 */ 563static void 564print_combined_results(struct intel_measure_device *measure_device, 565 int result_count, 566 struct intel_device_info *info) 567{ 568 if (result_count == 0) 569 return; 570 571 struct intel_measure_ringbuffer *result_rb = measure_device->ringbuffer; 572 assert(ringbuffer_size(result_rb) >= result_count); 573 const struct intel_measure_buffered_result* start_result = 574 ringbuffer_pop(result_rb); 575 const struct intel_measure_buffered_result* current_result = start_result; 576 577 if (start_result == NULL) 578 return; 579 --result_count; 580 581 uint64_t duration_ts = raw_timestamp_delta(start_result->start_ts, 582 current_result->end_ts); 583 unsigned event_count = start_result->snapshot.event_count; 584 while (result_count-- > 0) { 585 assert(ringbuffer_size(result_rb) > 0); 586 current_result = ringbuffer_pop(result_rb); 587 if (current_result == NULL) 588 return; 589 duration_ts += raw_timestamp_delta(current_result->start_ts, 590 current_result->end_ts); 591 event_count += current_result->snapshot.event_count; 592 } 593 594 const struct intel_measure_snapshot *begin = &start_result->snapshot; 595 fprintf(config.file, "%"PRIu64",%"PRIu64",%u,%u,%u,%u,%s,%u," 596 "0x%"PRIxPTR",0x%"PRIxPTR",0x%"PRIxPTR",0x%"PRIxPTR",0x%"PRIxPTR"," 597 "0x%"PRIxPTR",0x%"PRIxPTR",%"PRIu64",%"PRIu64"\n", 598 start_result->start_ts, current_result->end_ts, 599 start_result->frame, start_result->batch_count, 600 start_result->event_index, event_count, 601 begin->event_name, begin->count, 602 begin->vs, begin->tcs, begin->tes, begin->gs, begin->fs, begin->cs, 603 begin->framebuffer, 604 intel_device_info_timebase_scale(info, start_result->idle_duration), 605 intel_device_info_timebase_scale(info, duration_ts)); 606} 607 608/** 609 * Empty the ringbuffer of events that can be printed. 610 */ 611static void 612intel_measure_print(struct intel_measure_device *device, 613 struct intel_device_info *info) 614{ 615 while (true) { 616 const int events_to_combine = buffered_event_count(device); 617 if (events_to_combine == 0) 618 break; 619 print_combined_results(device, events_to_combine, info); 620 } 621} 622 623/** 624 * Collect snapshots from completed command buffers and submit them to 625 * intel_measure for printing. 626 */ 627void 628intel_measure_gather(struct intel_measure_device *measure_device, 629 struct intel_device_info *info) 630{ 631 pthread_mutex_lock(&measure_device->mutex); 632 633 /* Iterate snapshots and collect if ready. Each snapshot queue will be 634 * in-order, but we must determine which queue has the oldest batch. 635 */ 636 /* iterate snapshots and collect if ready */ 637 while (!list_is_empty(&measure_device->queued_snapshots)) { 638 struct intel_measure_batch *batch = 639 list_first_entry(&measure_device->queued_snapshots, 640 struct intel_measure_batch, link); 641 642 if (!intel_measure_ready(batch)) { 643 /* command buffer has begun execution on the gpu, but has not 644 * completed. 645 */ 646 break; 647 } 648 649 list_del(&batch->link); 650 assert(batch->index % 2 == 0); 651 652 intel_measure_push_result(measure_device, batch); 653 654 batch->index = 0; 655 batch->frame = 0; 656 } 657 658 intel_measure_print(measure_device, info); 659 pthread_mutex_unlock(&measure_device->mutex); 660} 661 662