intel_measure.c revision 7ec681f3
1/*
2 * Copyright © 2020 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
21 */
22
23/**
24 * @file intel_measure.c
25 */
26
27#include "intel_measure.h"
28
29#include <errno.h>
30#include <fcntl.h>
31#include <stdlib.h>
32#include <string.h>
33#include <sys/stat.h>
34#include <sys/types.h>
35#include <unistd.h>
36
37#define __STDC_FORMAT_MACROS 1
38#include <inttypes.h>
39
40#include "dev/intel_device_info.h"
41#include "util/debug.h"
42#include "util/macros.h"
43#include "util/u_debug.h"
44
45
46static const struct debug_control debug_control[] = {
47   { "draw",            INTEL_MEASURE_DRAW       },
48   { "rt",              INTEL_MEASURE_RENDERPASS },
49   { "shader",          INTEL_MEASURE_SHADER     },
50   { "batch",           INTEL_MEASURE_BATCH      },
51   { "frame",           INTEL_MEASURE_FRAME      },
52   { NULL, 0 }
53};
54static struct intel_measure_config config;
55
56void
57intel_measure_init(struct intel_measure_device *device)
58{
59   static bool once = false;
60   const char *env = getenv("INTEL_MEASURE");
61   if (unlikely(!once)) {
62      once = true;
63      memset(&config, 0, sizeof(struct intel_measure_config));
64      if (!env)
65         return;
66
67      config.file = stderr;
68      config.flags = parse_debug_string(env, debug_control);
69      if (!config.flags)
70         config.flags = INTEL_MEASURE_DRAW;
71      config.enabled = true;
72      config.event_interval = 1;
73      config.control_fh = -1;
74
75      /* Overflows of the following defaults will drop data and generate a
76       * warning on the output filehandle.
77       */
78
79      /* default batch_size allows for 8k renders in a single batch */
80      const int DEFAULT_BATCH_SIZE = 16 * 1024;
81      config.batch_size = DEFAULT_BATCH_SIZE;
82
83      /* Default buffer_size allows for 16k batches per line of output in the
84       * csv.  Overflow may occur for offscreen workloads or large 'interval'
85       * settings.
86       */
87      const int DEFAULT_BUFFER_SIZE = 16 * 1024;
88      config.buffer_size = DEFAULT_BUFFER_SIZE;
89
90      const char *filename = strstr(env, "file=");
91      const char *start_frame_s = strstr(env, "start=");
92      const char *count_frame_s = strstr(env, "count=");
93      const char *control_path = strstr(env, "control=");
94      const char *interval_s = strstr(env, "interval=");
95      const char *batch_size_s = strstr(env, "batch_size=");
96      const char *buffer_size_s = strstr(env, "buffer_size=");
97      while (true) {
98         char *sep = strrchr(env, ',');
99         if (sep == NULL)
100            break;
101         *sep = '\0';
102      }
103
104      if (filename && !__check_suid()) {
105         filename += 5;
106         config.file = fopen(filename, "w");
107         if (!config.file) {
108            fprintf(stderr, "INTEL_MEASURE failed to open output file %s: %s\n",
109                    filename, strerror (errno));
110            abort();
111         }
112      }
113
114      if (start_frame_s) {
115         start_frame_s += 6;
116         const int start_frame = atoi(start_frame_s);
117         if (start_frame < 0) {
118            fprintf(stderr, "INTEL_MEASURE start frame may "
119                    "not be negative: %d\n", start_frame);
120            abort();
121         }
122
123         config.start_frame = start_frame;
124         config.enabled = false;
125      }
126
127      if (count_frame_s) {
128         count_frame_s += 6;
129         const int count_frame = atoi(count_frame_s);
130         if (count_frame <= 0) {
131            fprintf(stderr, "INTEL_MEASURE count frame must be positive: %d\n",
132                    count_frame);
133            abort();
134         }
135
136         config.end_frame = config.start_frame + count_frame;
137      }
138
139      if (control_path) {
140         control_path += 8;
141         if (mkfifoat(AT_FDCWD, control_path, O_CREAT | S_IRUSR | S_IWUSR)) {
142            if (errno != EEXIST) {
143               fprintf(stderr, "INTEL_MEASURE failed to create control "
144                       "fifo %s: %s\n", control_path, strerror (errno));
145               abort();
146            }
147         }
148
149         config.control_fh = openat(AT_FDCWD, control_path,
150                                    O_RDONLY | O_NONBLOCK);
151         if (config.control_fh == -1) {
152            fprintf(stderr, "INTEL_MEASURE failed to open control fifo "
153                    "%s: %s\n", control_path, strerror (errno));
154            abort();
155         }
156
157         /* when using a control fifo, do not start until the user triggers
158          * capture
159          */
160         config.enabled = false;
161      }
162
163      if (interval_s) {
164         interval_s += 9;
165         const int event_interval = atoi(interval_s);
166         if (event_interval < 1) {
167            fprintf(stderr, "INTEL_MEASURE event_interval must be positive: "
168                    "%d\n", event_interval);
169            abort();
170         }
171         config.event_interval = event_interval;
172      }
173
174      if (batch_size_s) {
175         batch_size_s += 11;
176         const int batch_size = atoi(batch_size_s);
177         if (batch_size < DEFAULT_BATCH_SIZE) {
178            fprintf(stderr, "INTEL_MEASURE minimum batch_size is 4k: "
179                    "%d\n", batch_size);
180            abort();
181         }
182         if (batch_size > DEFAULT_BATCH_SIZE * 1024) {
183            fprintf(stderr, "INTEL_MEASURE batch_size limited to 4M: "
184                    "%d\n", batch_size);
185            abort();
186         }
187
188         config.batch_size = batch_size;
189      }
190
191      if (buffer_size_s) {
192         buffer_size_s += 12;
193         const int buffer_size = atoi(buffer_size_s);
194         if (buffer_size < DEFAULT_BUFFER_SIZE) {
195            fprintf(stderr, "INTEL_MEASURE minimum buffer_size is 1k: "
196                    "%d\n", DEFAULT_BUFFER_SIZE);
197         }
198         if (buffer_size > DEFAULT_BUFFER_SIZE * 1024) {
199            fprintf(stderr, "INTEL_MEASURE buffer_size limited to 1M: "
200                    "%d\n", buffer_size);
201         }
202
203         config.buffer_size = buffer_size;
204      }
205
206      fputs("draw_start,draw_end,frame,batch,"
207            "event_index,event_count,type,count,vs,tcs,tes,"
208            "gs,fs,cs,framebuffer,idle_ns,time_ns\n",
209            config.file);
210   }
211
212   device->config = NULL;
213   device->frame = 0;
214   pthread_mutex_init(&device->mutex, NULL);
215   list_inithead(&device->queued_snapshots);
216
217   if (env)
218      device->config = &config;
219}
220
221const char *
222intel_measure_snapshot_string(enum intel_measure_snapshot_type type)
223{
224   const char *names[] = {
225      [INTEL_SNAPSHOT_UNDEFINED]           = "undefined",
226      [INTEL_SNAPSHOT_BLIT]                = "blit",
227      [INTEL_SNAPSHOT_CCS_AMBIGUATE]       = "ccs ambiguate",
228      [INTEL_SNAPSHOT_CCS_COLOR_CLEAR]     = "ccs color clear",
229      [INTEL_SNAPSHOT_CCS_PARTIAL_RESOLVE] = "ccs partial resolve",
230      [INTEL_SNAPSHOT_CCS_RESOLVE]         = "ccs resolve",
231      [INTEL_SNAPSHOT_COMPUTE]             = "compute",
232      [INTEL_SNAPSHOT_COPY]                = "copy",
233      [INTEL_SNAPSHOT_DRAW]                = "draw",
234      [INTEL_SNAPSHOT_HIZ_AMBIGUATE]       = "hiz ambiguate",
235      [INTEL_SNAPSHOT_HIZ_CLEAR]           = "hiz clear",
236      [INTEL_SNAPSHOT_HIZ_RESOLVE]         = "hiz resolve",
237      [INTEL_SNAPSHOT_MCS_COLOR_CLEAR]     = "mcs color clear",
238      [INTEL_SNAPSHOT_MCS_PARTIAL_RESOLVE] = "mcs partial resolve",
239      [INTEL_SNAPSHOT_SLOW_COLOR_CLEAR]    = "slow color clear",
240      [INTEL_SNAPSHOT_SLOW_DEPTH_CLEAR]    = "slow depth clear",
241      [INTEL_SNAPSHOT_SECONDARY_BATCH]     = "secondary command buffer",
242      [INTEL_SNAPSHOT_END]                 = "end",
243   };
244   assert(type < ARRAY_SIZE(names));
245   assert(names[type] != NULL);
246   assert(type != INTEL_SNAPSHOT_UNDEFINED);
247   return names[type];
248}
249
250/**
251 * Indicate to the caller whether a new snapshot should be started.
252 *
253 * Callers provide rendering state to this method to determine whether the
254 * current start event should be skipped. Depending on the configuration
255 * flags, a new snapshot may start:
256 *  - at every event
257 *  - when the program changes
258 *  - after a batch is submitted
259 *  - at frame boundaries
260 *
261 * Returns true if a snapshot should be started.
262 */
263bool
264intel_measure_state_changed(const struct intel_measure_batch *batch,
265                            uintptr_t vs, uintptr_t tcs, uintptr_t tes,
266                            uintptr_t gs, uintptr_t fs, uintptr_t cs)
267{
268   if (batch->index == 0) {
269      /* always record the first event */
270      return true;
271   }
272
273   const struct intel_measure_snapshot *last_snap =
274      &batch->snapshots[batch->index - 1];
275
276   if (config.flags & INTEL_MEASURE_DRAW)
277      return true;
278
279   if (batch->index % 2 == 0) {
280      /* no snapshot is running, but we have a start event */
281      return true;
282   }
283
284   if (config.flags & (INTEL_MEASURE_FRAME | INTEL_MEASURE_BATCH)) {
285      /* only start collection when index == 0, at the beginning of a batch */
286      return false;
287   }
288
289   if (config.flags & INTEL_MEASURE_RENDERPASS) {
290      return ((last_snap->framebuffer != batch->framebuffer) ||
291              /* compute workloads are always in their own renderpass */
292              (cs != 0));
293   }
294
295   /* remaining comparisons check the state of the render pipeline for
296    * INTEL_MEASURE_PROGRAM
297    */
298   assert(config.flags & INTEL_MEASURE_SHADER);
299
300   if (!vs && !tcs && !tes && !gs && !fs && !cs) {
301      /* blorp always changes program */
302      return true;
303   }
304
305   return (last_snap->vs  != (uintptr_t) vs ||
306           last_snap->tcs != (uintptr_t) tcs ||
307           last_snap->tes != (uintptr_t) tes ||
308           last_snap->gs  != (uintptr_t) gs ||
309           last_snap->fs  != (uintptr_t) fs ||
310           last_snap->cs  != (uintptr_t) cs);
311}
312
313/**
314 * Notify intel_measure that a frame is about to begin.
315 *
316 * Configuration values and the control fifo may commence measurement at frame
317 * boundaries.
318 */
319void
320intel_measure_frame_transition(unsigned frame)
321{
322   if (frame == config.start_frame)
323      config.enabled = true;
324   else if (frame == config.end_frame)
325      config.enabled = false;
326
327   /* user commands to the control fifo will override any start/count
328    * environment settings
329    */
330   if (config.control_fh != -1) {
331      while (true) {
332         const unsigned BUF_SIZE = 128;
333         char buf[BUF_SIZE];
334         ssize_t bytes = read(config.control_fh, buf, BUF_SIZE - 1);
335         if (bytes == 0)
336            break;
337         if (bytes == -1) {
338            fprintf(stderr, "INTEL_MEASURE failed to read control fifo: %s\n",
339                    strerror(errno));
340            abort();
341         }
342
343         buf[bytes] = '\0';
344         char *nptr = buf, *endptr = buf;
345         while (*nptr != '\0' && *endptr != '\0') {
346            long fcount = strtol(nptr, &endptr, 10);
347            if (nptr == endptr) {
348               config.enabled = false;
349               fprintf(stderr, "INTEL_MEASURE invalid frame count on "
350                       "control fifo.\n");
351               lseek(config.control_fh, 0, SEEK_END);
352               break;
353            } else if (fcount == 0) {
354               config.enabled = false;
355            } else {
356               config.enabled = true;
357               config.end_frame = frame + fcount;
358            }
359
360            nptr = endptr + 1;
361         }
362      }
363   }
364}
365
366#define TIMESTAMP_BITS 36
367static uint64_t
368raw_timestamp_delta(uint64_t time0, uint64_t time1)
369{
370   if (time0 > time1) {
371      return (1ULL << TIMESTAMP_BITS) + time1 - time0;
372   } else {
373      return time1 - time0;
374   }
375}
376
377/**
378 * Verify that rendering has completed for the batch
379 *
380 * Rendering is complete when the last timestamp has been written.
381*/
382bool
383intel_measure_ready(struct intel_measure_batch *batch)
384{
385   assert(batch->timestamps);
386   assert(batch->index > 1);
387   return (batch->timestamps[batch->index - 1] != 0);
388}
389
390/**
391 * Submit completed snapshots for buffering.
392 *
393 * Snapshot data becomes available when asynchronous rendering completes.
394 * Depending on configuration, snapshot data may need to be collated before
395 * writing to the output file.
396 */
397static void
398intel_measure_push_result(struct intel_measure_device *device,
399                          struct intel_measure_batch *batch)
400{
401   struct intel_measure_ringbuffer *rb = device->ringbuffer;
402
403   uint64_t *timestamps = batch->timestamps;
404   assert(timestamps != NULL);
405   assert(timestamps[0] != 0);
406
407   for (int i = 0; i < batch->index; i += 2) {
408      const struct intel_measure_snapshot *begin = &batch->snapshots[i];
409      const struct intel_measure_snapshot *end = &batch->snapshots[i+1];
410
411      assert (end->type == INTEL_SNAPSHOT_END);
412
413      if (begin->type == INTEL_SNAPSHOT_SECONDARY_BATCH) {
414         assert(begin->secondary != NULL);
415         begin->secondary->batch_count = batch->batch_count;
416         intel_measure_push_result(device, begin->secondary);
417         continue;
418      }
419
420      const uint64_t prev_end_ts = rb->results[rb->head].end_ts;
421
422      /* advance ring buffer */
423      if (++rb->head == config.buffer_size)
424         rb->head = 0;
425      if (rb->head == rb->tail) {
426         static bool warned = false;
427         if (unlikely(!warned)) {
428            fprintf(config.file,
429                    "WARNING: Buffered data exceeds INTEL_MEASURE limit: %d. "
430                    "Data has been dropped. "
431                    "Increase setting with INTEL_MEASURE=buffer_size={count}\n",
432                    config.buffer_size);
433            warned = true;
434         }
435         break;
436      }
437
438      struct intel_measure_buffered_result *buffered_result =
439         &rb->results[rb->head];
440
441      memset(buffered_result, 0, sizeof(*buffered_result));
442      memcpy(&buffered_result->snapshot, begin,
443             sizeof(struct intel_measure_snapshot));
444      buffered_result->start_ts = timestamps[i];
445      buffered_result->end_ts = timestamps[i+1];
446      buffered_result->idle_duration =
447         raw_timestamp_delta(prev_end_ts, buffered_result->start_ts);
448      buffered_result->frame = batch->frame;
449      buffered_result->batch_count = batch->batch_count;
450      buffered_result->event_index = i / 2;
451      buffered_result->snapshot.event_count = end->event_count;
452   }
453}
454
455static unsigned
456ringbuffer_size(const struct intel_measure_ringbuffer *rb)
457{
458   unsigned head = rb->head;
459   if (head < rb->tail)
460      head += config.buffer_size;
461   return head - rb->tail;
462}
463
464static const struct intel_measure_buffered_result *
465ringbuffer_pop(struct intel_measure_ringbuffer *rb)
466{
467   if (rb->tail == rb->head) {
468      /* encountered ringbuffer overflow while processing events */
469      return NULL;
470   }
471
472   if (++rb->tail == config.buffer_size)
473      rb->tail = 0;
474   return &rb->results[rb->tail];
475}
476
477static const struct intel_measure_buffered_result *
478ringbuffer_peek(const struct intel_measure_ringbuffer *rb, unsigned index)
479{
480   int result_offset = rb->tail + index + 1;
481   if (result_offset >= config.buffer_size)
482      result_offset -= config.buffer_size;
483   return &rb->results[result_offset];
484}
485
486
487/**
488 * Determine the number of buffered events that must be combined for the next
489 * line of csv output. Returns 0 if more events are needed.
490 */
491static unsigned
492buffered_event_count(struct intel_measure_device *device)
493{
494   const struct intel_measure_ringbuffer *rb = device->ringbuffer;
495   const unsigned buffered_event_count = ringbuffer_size(rb);
496   if (buffered_event_count == 0) {
497      /* no events to collect */
498      return 0;
499   }
500
501   /* count the number of buffered events required to meet the configuration */
502   if (config.flags & (INTEL_MEASURE_DRAW |
503                       INTEL_MEASURE_RENDERPASS |
504                       INTEL_MEASURE_SHADER)) {
505      /* For these flags, every buffered event represents a line in the
506       * output.  None of these events span batches.  If the event interval
507       * crosses a batch boundary, then the next interval starts with the new
508       * batch.
509       */
510      return 1;
511   }
512
513   const unsigned start_frame = ringbuffer_peek(rb, 0)->frame;
514   if (config.flags & INTEL_MEASURE_BATCH) {
515      /* each buffered event is a command buffer.  The number of events to
516       * process is the same as the interval, unless the interval crosses a
517       * frame boundary
518       */
519      if (buffered_event_count < config.event_interval) {
520         /* not enough events */
521         return 0;
522      }
523
524      /* Imperfect frame tracking requires us to allow for *older* frames */
525      if (ringbuffer_peek(rb, config.event_interval - 1)->frame <= start_frame) {
526         /* No frame transition.  The next {interval} events should be combined. */
527         return config.event_interval;
528      }
529
530      /* Else a frame transition occurs within the interval.  Find the
531       * transition, so the following line of output begins with the batch
532       * that starts the new frame.
533       */
534      for (int event_index = 1;
535           event_index <= config.event_interval;
536           ++event_index) {
537         if (ringbuffer_peek(rb, event_index)->frame > start_frame)
538            return event_index;
539      }
540
541      assert(false);
542   }
543
544   /* Else we need to search buffered events to find the matching frame
545    * transition for our interval.
546    */
547   assert(config.flags & INTEL_MEASURE_FRAME);
548   for (int event_index = 1;
549        event_index < buffered_event_count;
550        ++event_index) {
551      const int latest_frame = ringbuffer_peek(rb, event_index)->frame;
552      if (latest_frame - start_frame >= config.event_interval)
553         return event_index;
554   }
555
556   return 0;
557}
558
559/**
560 * Take result_count events from the ringbuffer and output them as a single
561 * line.
562 */
563static void
564print_combined_results(struct intel_measure_device *measure_device,
565                       int result_count,
566                       struct intel_device_info *info)
567{
568   if (result_count == 0)
569      return;
570
571   struct intel_measure_ringbuffer *result_rb = measure_device->ringbuffer;
572   assert(ringbuffer_size(result_rb) >= result_count);
573   const struct intel_measure_buffered_result* start_result =
574      ringbuffer_pop(result_rb);
575   const struct intel_measure_buffered_result* current_result = start_result;
576
577   if (start_result == NULL)
578      return;
579   --result_count;
580
581   uint64_t duration_ts = raw_timestamp_delta(start_result->start_ts,
582                                              current_result->end_ts);
583   unsigned event_count = start_result->snapshot.event_count;
584   while (result_count-- > 0) {
585      assert(ringbuffer_size(result_rb) > 0);
586      current_result = ringbuffer_pop(result_rb);
587      if (current_result == NULL)
588         return;
589      duration_ts += raw_timestamp_delta(current_result->start_ts,
590                                         current_result->end_ts);
591      event_count += current_result->snapshot.event_count;
592   }
593
594   const struct intel_measure_snapshot *begin = &start_result->snapshot;
595   fprintf(config.file, "%"PRIu64",%"PRIu64",%u,%u,%u,%u,%s,%u,"
596           "0x%"PRIxPTR",0x%"PRIxPTR",0x%"PRIxPTR",0x%"PRIxPTR",0x%"PRIxPTR","
597           "0x%"PRIxPTR",0x%"PRIxPTR",%"PRIu64",%"PRIu64"\n",
598           start_result->start_ts, current_result->end_ts,
599           start_result->frame, start_result->batch_count,
600           start_result->event_index, event_count,
601           begin->event_name, begin->count,
602           begin->vs, begin->tcs, begin->tes, begin->gs, begin->fs, begin->cs,
603           begin->framebuffer,
604           intel_device_info_timebase_scale(info, start_result->idle_duration),
605           intel_device_info_timebase_scale(info, duration_ts));
606}
607
608/**
609 * Empty the ringbuffer of events that can be printed.
610 */
611static void
612intel_measure_print(struct intel_measure_device *device,
613                    struct intel_device_info *info)
614{
615   while (true) {
616      const int events_to_combine = buffered_event_count(device);
617      if (events_to_combine == 0)
618         break;
619      print_combined_results(device, events_to_combine, info);
620   }
621}
622
623/**
624 * Collect snapshots from completed command buffers and submit them to
625 * intel_measure for printing.
626 */
627void
628intel_measure_gather(struct intel_measure_device *measure_device,
629                     struct intel_device_info *info)
630{
631   pthread_mutex_lock(&measure_device->mutex);
632
633   /* Iterate snapshots and collect if ready.  Each snapshot queue will be
634    * in-order, but we must determine which queue has the oldest batch.
635    */
636   /* iterate snapshots and collect if ready */
637   while (!list_is_empty(&measure_device->queued_snapshots)) {
638      struct intel_measure_batch *batch =
639         list_first_entry(&measure_device->queued_snapshots,
640                          struct intel_measure_batch, link);
641
642      if (!intel_measure_ready(batch)) {
643         /* command buffer has begun execution on the gpu, but has not
644          * completed.
645          */
646         break;
647      }
648
649      list_del(&batch->link);
650      assert(batch->index % 2 == 0);
651
652      intel_measure_push_result(measure_device, batch);
653
654      batch->index = 0;
655      batch->frame = 0;
656   }
657
658   intel_measure_print(measure_device, info);
659   pthread_mutex_unlock(&measure_device->mutex);
660}
661
662