17ec681f3Smrg/*
27ec681f3Smrg * Copyright © 2020 Intel Corporation
37ec681f3Smrg *
47ec681f3Smrg * Permission is hereby granted, free of charge, to any person obtaining a
57ec681f3Smrg * copy of this software and associated documentation files (the "Software"),
67ec681f3Smrg * to deal in the Software without restriction, including without limitation
77ec681f3Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
87ec681f3Smrg * and/or sell copies of the Software, and to permit persons to whom the
97ec681f3Smrg * Software is furnished to do so, subject to the following conditions:
107ec681f3Smrg *
117ec681f3Smrg * The above copyright notice and this permission notice shall be included
127ec681f3Smrg * in all copies or substantial portions of the Software.
137ec681f3Smrg *
147ec681f3Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
157ec681f3Smrg * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
167ec681f3Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
177ec681f3Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
187ec681f3Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
197ec681f3Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
207ec681f3Smrg * DEALINGS IN THE SOFTWARE.
217ec681f3Smrg */
227ec681f3Smrg
237ec681f3Smrg/**
247ec681f3Smrg * @file intel_measure.c
257ec681f3Smrg */
267ec681f3Smrg
277ec681f3Smrg#include "intel_measure.h"
287ec681f3Smrg
297ec681f3Smrg#include <errno.h>
307ec681f3Smrg#include <fcntl.h>
317ec681f3Smrg#include <stdlib.h>
327ec681f3Smrg#include <string.h>
337ec681f3Smrg#include <sys/stat.h>
347ec681f3Smrg#include <sys/types.h>
357ec681f3Smrg#include <unistd.h>
367ec681f3Smrg
377ec681f3Smrg#define __STDC_FORMAT_MACROS 1
387ec681f3Smrg#include <inttypes.h>
397ec681f3Smrg
407ec681f3Smrg#include "dev/intel_device_info.h"
417ec681f3Smrg#include "util/debug.h"
427ec681f3Smrg#include "util/macros.h"
437ec681f3Smrg#include "util/u_debug.h"
447ec681f3Smrg
457ec681f3Smrg
467ec681f3Smrgstatic const struct debug_control debug_control[] = {
477ec681f3Smrg   { "draw",            INTEL_MEASURE_DRAW       },
487ec681f3Smrg   { "rt",              INTEL_MEASURE_RENDERPASS },
497ec681f3Smrg   { "shader",          INTEL_MEASURE_SHADER     },
507ec681f3Smrg   { "batch",           INTEL_MEASURE_BATCH      },
517ec681f3Smrg   { "frame",           INTEL_MEASURE_FRAME      },
527ec681f3Smrg   { NULL, 0 }
537ec681f3Smrg};
547ec681f3Smrgstatic struct intel_measure_config config;
557ec681f3Smrg
567ec681f3Smrgvoid
577ec681f3Smrgintel_measure_init(struct intel_measure_device *device)
587ec681f3Smrg{
597ec681f3Smrg   static bool once = false;
607ec681f3Smrg   const char *env = getenv("INTEL_MEASURE");
617ec681f3Smrg   if (unlikely(!once)) {
627ec681f3Smrg      once = true;
637ec681f3Smrg      memset(&config, 0, sizeof(struct intel_measure_config));
647ec681f3Smrg      if (!env)
657ec681f3Smrg         return;
667ec681f3Smrg
677ec681f3Smrg      config.file = stderr;
687ec681f3Smrg      config.flags = parse_debug_string(env, debug_control);
697ec681f3Smrg      if (!config.flags)
707ec681f3Smrg         config.flags = INTEL_MEASURE_DRAW;
717ec681f3Smrg      config.enabled = true;
727ec681f3Smrg      config.event_interval = 1;
737ec681f3Smrg      config.control_fh = -1;
747ec681f3Smrg
757ec681f3Smrg      /* Overflows of the following defaults will drop data and generate a
767ec681f3Smrg       * warning on the output filehandle.
777ec681f3Smrg       */
787ec681f3Smrg
797ec681f3Smrg      /* default batch_size allows for 8k renders in a single batch */
807ec681f3Smrg      const int DEFAULT_BATCH_SIZE = 16 * 1024;
817ec681f3Smrg      config.batch_size = DEFAULT_BATCH_SIZE;
827ec681f3Smrg
837ec681f3Smrg      /* Default buffer_size allows for 16k batches per line of output in the
847ec681f3Smrg       * csv.  Overflow may occur for offscreen workloads or large 'interval'
857ec681f3Smrg       * settings.
867ec681f3Smrg       */
877ec681f3Smrg      const int DEFAULT_BUFFER_SIZE = 16 * 1024;
887ec681f3Smrg      config.buffer_size = DEFAULT_BUFFER_SIZE;
897ec681f3Smrg
907ec681f3Smrg      const char *filename = strstr(env, "file=");
917ec681f3Smrg      const char *start_frame_s = strstr(env, "start=");
927ec681f3Smrg      const char *count_frame_s = strstr(env, "count=");
937ec681f3Smrg      const char *control_path = strstr(env, "control=");
947ec681f3Smrg      const char *interval_s = strstr(env, "interval=");
957ec681f3Smrg      const char *batch_size_s = strstr(env, "batch_size=");
967ec681f3Smrg      const char *buffer_size_s = strstr(env, "buffer_size=");
977ec681f3Smrg      while (true) {
987ec681f3Smrg         char *sep = strrchr(env, ',');
997ec681f3Smrg         if (sep == NULL)
1007ec681f3Smrg            break;
1017ec681f3Smrg         *sep = '\0';
1027ec681f3Smrg      }
1037ec681f3Smrg
1047ec681f3Smrg      if (filename && !__check_suid()) {
1057ec681f3Smrg         filename += 5;
1067ec681f3Smrg         config.file = fopen(filename, "w");
1077ec681f3Smrg         if (!config.file) {
1087ec681f3Smrg            fprintf(stderr, "INTEL_MEASURE failed to open output file %s: %s\n",
1097ec681f3Smrg                    filename, strerror (errno));
1107ec681f3Smrg            abort();
1117ec681f3Smrg         }
1127ec681f3Smrg      }
1137ec681f3Smrg
1147ec681f3Smrg      if (start_frame_s) {
1157ec681f3Smrg         start_frame_s += 6;
1167ec681f3Smrg         const int start_frame = atoi(start_frame_s);
1177ec681f3Smrg         if (start_frame < 0) {
1187ec681f3Smrg            fprintf(stderr, "INTEL_MEASURE start frame may "
1197ec681f3Smrg                    "not be negative: %d\n", start_frame);
1207ec681f3Smrg            abort();
1217ec681f3Smrg         }
1227ec681f3Smrg
1237ec681f3Smrg         config.start_frame = start_frame;
1247ec681f3Smrg         config.enabled = false;
1257ec681f3Smrg      }
1267ec681f3Smrg
1277ec681f3Smrg      if (count_frame_s) {
1287ec681f3Smrg         count_frame_s += 6;
1297ec681f3Smrg         const int count_frame = atoi(count_frame_s);
1307ec681f3Smrg         if (count_frame <= 0) {
1317ec681f3Smrg            fprintf(stderr, "INTEL_MEASURE count frame must be positive: %d\n",
1327ec681f3Smrg                    count_frame);
1337ec681f3Smrg            abort();
1347ec681f3Smrg         }
1357ec681f3Smrg
1367ec681f3Smrg         config.end_frame = config.start_frame + count_frame;
1377ec681f3Smrg      }
1387ec681f3Smrg
1397ec681f3Smrg      if (control_path) {
1407ec681f3Smrg         control_path += 8;
1417ec681f3Smrg         if (mkfifoat(AT_FDCWD, control_path, O_CREAT | S_IRUSR | S_IWUSR)) {
1427ec681f3Smrg            if (errno != EEXIST) {
1437ec681f3Smrg               fprintf(stderr, "INTEL_MEASURE failed to create control "
1447ec681f3Smrg                       "fifo %s: %s\n", control_path, strerror (errno));
1457ec681f3Smrg               abort();
1467ec681f3Smrg            }
1477ec681f3Smrg         }
1487ec681f3Smrg
1497ec681f3Smrg         config.control_fh = openat(AT_FDCWD, control_path,
1507ec681f3Smrg                                    O_RDONLY | O_NONBLOCK);
1517ec681f3Smrg         if (config.control_fh == -1) {
1527ec681f3Smrg            fprintf(stderr, "INTEL_MEASURE failed to open control fifo "
1537ec681f3Smrg                    "%s: %s\n", control_path, strerror (errno));
1547ec681f3Smrg            abort();
1557ec681f3Smrg         }
1567ec681f3Smrg
1577ec681f3Smrg         /* when using a control fifo, do not start until the user triggers
1587ec681f3Smrg          * capture
1597ec681f3Smrg          */
1607ec681f3Smrg         config.enabled = false;
1617ec681f3Smrg      }
1627ec681f3Smrg
1637ec681f3Smrg      if (interval_s) {
1647ec681f3Smrg         interval_s += 9;
1657ec681f3Smrg         const int event_interval = atoi(interval_s);
1667ec681f3Smrg         if (event_interval < 1) {
1677ec681f3Smrg            fprintf(stderr, "INTEL_MEASURE event_interval must be positive: "
1687ec681f3Smrg                    "%d\n", event_interval);
1697ec681f3Smrg            abort();
1707ec681f3Smrg         }
1717ec681f3Smrg         config.event_interval = event_interval;
1727ec681f3Smrg      }
1737ec681f3Smrg
1747ec681f3Smrg      if (batch_size_s) {
1757ec681f3Smrg         batch_size_s += 11;
1767ec681f3Smrg         const int batch_size = atoi(batch_size_s);
1777ec681f3Smrg         if (batch_size < DEFAULT_BATCH_SIZE) {
1787ec681f3Smrg            fprintf(stderr, "INTEL_MEASURE minimum batch_size is 4k: "
1797ec681f3Smrg                    "%d\n", batch_size);
1807ec681f3Smrg            abort();
1817ec681f3Smrg         }
1827ec681f3Smrg         if (batch_size > DEFAULT_BATCH_SIZE * 1024) {
1837ec681f3Smrg            fprintf(stderr, "INTEL_MEASURE batch_size limited to 4M: "
1847ec681f3Smrg                    "%d\n", batch_size);
1857ec681f3Smrg            abort();
1867ec681f3Smrg         }
1877ec681f3Smrg
1887ec681f3Smrg         config.batch_size = batch_size;
1897ec681f3Smrg      }
1907ec681f3Smrg
1917ec681f3Smrg      if (buffer_size_s) {
1927ec681f3Smrg         buffer_size_s += 12;
1937ec681f3Smrg         const int buffer_size = atoi(buffer_size_s);
1947ec681f3Smrg         if (buffer_size < DEFAULT_BUFFER_SIZE) {
1957ec681f3Smrg            fprintf(stderr, "INTEL_MEASURE minimum buffer_size is 1k: "
1967ec681f3Smrg                    "%d\n", DEFAULT_BUFFER_SIZE);
1977ec681f3Smrg         }
1987ec681f3Smrg         if (buffer_size > DEFAULT_BUFFER_SIZE * 1024) {
1997ec681f3Smrg            fprintf(stderr, "INTEL_MEASURE buffer_size limited to 1M: "
2007ec681f3Smrg                    "%d\n", buffer_size);
2017ec681f3Smrg         }
2027ec681f3Smrg
2037ec681f3Smrg         config.buffer_size = buffer_size;
2047ec681f3Smrg      }
2057ec681f3Smrg
2067ec681f3Smrg      fputs("draw_start,draw_end,frame,batch,"
2077ec681f3Smrg            "event_index,event_count,type,count,vs,tcs,tes,"
2087ec681f3Smrg            "gs,fs,cs,framebuffer,idle_ns,time_ns\n",
2097ec681f3Smrg            config.file);
2107ec681f3Smrg   }
2117ec681f3Smrg
2127ec681f3Smrg   device->config = NULL;
2137ec681f3Smrg   device->frame = 0;
2147ec681f3Smrg   pthread_mutex_init(&device->mutex, NULL);
2157ec681f3Smrg   list_inithead(&device->queued_snapshots);
2167ec681f3Smrg
2177ec681f3Smrg   if (env)
2187ec681f3Smrg      device->config = &config;
2197ec681f3Smrg}
2207ec681f3Smrg
2217ec681f3Smrgconst char *
2227ec681f3Smrgintel_measure_snapshot_string(enum intel_measure_snapshot_type type)
2237ec681f3Smrg{
2247ec681f3Smrg   const char *names[] = {
2257ec681f3Smrg      [INTEL_SNAPSHOT_UNDEFINED]           = "undefined",
2267ec681f3Smrg      [INTEL_SNAPSHOT_BLIT]                = "blit",
2277ec681f3Smrg      [INTEL_SNAPSHOT_CCS_AMBIGUATE]       = "ccs ambiguate",
2287ec681f3Smrg      [INTEL_SNAPSHOT_CCS_COLOR_CLEAR]     = "ccs color clear",
2297ec681f3Smrg      [INTEL_SNAPSHOT_CCS_PARTIAL_RESOLVE] = "ccs partial resolve",
2307ec681f3Smrg      [INTEL_SNAPSHOT_CCS_RESOLVE]         = "ccs resolve",
2317ec681f3Smrg      [INTEL_SNAPSHOT_COMPUTE]             = "compute",
2327ec681f3Smrg      [INTEL_SNAPSHOT_COPY]                = "copy",
2337ec681f3Smrg      [INTEL_SNAPSHOT_DRAW]                = "draw",
2347ec681f3Smrg      [INTEL_SNAPSHOT_HIZ_AMBIGUATE]       = "hiz ambiguate",
2357ec681f3Smrg      [INTEL_SNAPSHOT_HIZ_CLEAR]           = "hiz clear",
2367ec681f3Smrg      [INTEL_SNAPSHOT_HIZ_RESOLVE]         = "hiz resolve",
2377ec681f3Smrg      [INTEL_SNAPSHOT_MCS_COLOR_CLEAR]     = "mcs color clear",
2387ec681f3Smrg      [INTEL_SNAPSHOT_MCS_PARTIAL_RESOLVE] = "mcs partial resolve",
2397ec681f3Smrg      [INTEL_SNAPSHOT_SLOW_COLOR_CLEAR]    = "slow color clear",
2407ec681f3Smrg      [INTEL_SNAPSHOT_SLOW_DEPTH_CLEAR]    = "slow depth clear",
2417ec681f3Smrg      [INTEL_SNAPSHOT_SECONDARY_BATCH]     = "secondary command buffer",
2427ec681f3Smrg      [INTEL_SNAPSHOT_END]                 = "end",
2437ec681f3Smrg   };
2447ec681f3Smrg   assert(type < ARRAY_SIZE(names));
2457ec681f3Smrg   assert(names[type] != NULL);
2467ec681f3Smrg   assert(type != INTEL_SNAPSHOT_UNDEFINED);
2477ec681f3Smrg   return names[type];
2487ec681f3Smrg}
2497ec681f3Smrg
2507ec681f3Smrg/**
2517ec681f3Smrg * Indicate to the caller whether a new snapshot should be started.
2527ec681f3Smrg *
2537ec681f3Smrg * Callers provide rendering state to this method to determine whether the
2547ec681f3Smrg * current start event should be skipped. Depending on the configuration
2557ec681f3Smrg * flags, a new snapshot may start:
2567ec681f3Smrg *  - at every event
2577ec681f3Smrg *  - when the program changes
2587ec681f3Smrg *  - after a batch is submitted
2597ec681f3Smrg *  - at frame boundaries
2607ec681f3Smrg *
2617ec681f3Smrg * Returns true if a snapshot should be started.
2627ec681f3Smrg */
2637ec681f3Smrgbool
2647ec681f3Smrgintel_measure_state_changed(const struct intel_measure_batch *batch,
2657ec681f3Smrg                            uintptr_t vs, uintptr_t tcs, uintptr_t tes,
2667ec681f3Smrg                            uintptr_t gs, uintptr_t fs, uintptr_t cs)
2677ec681f3Smrg{
2687ec681f3Smrg   if (batch->index == 0) {
2697ec681f3Smrg      /* always record the first event */
2707ec681f3Smrg      return true;
2717ec681f3Smrg   }
2727ec681f3Smrg
2737ec681f3Smrg   const struct intel_measure_snapshot *last_snap =
2747ec681f3Smrg      &batch->snapshots[batch->index - 1];
2757ec681f3Smrg
2767ec681f3Smrg   if (config.flags & INTEL_MEASURE_DRAW)
2777ec681f3Smrg      return true;
2787ec681f3Smrg
2797ec681f3Smrg   if (batch->index % 2 == 0) {
2807ec681f3Smrg      /* no snapshot is running, but we have a start event */
2817ec681f3Smrg      return true;
2827ec681f3Smrg   }
2837ec681f3Smrg
2847ec681f3Smrg   if (config.flags & (INTEL_MEASURE_FRAME | INTEL_MEASURE_BATCH)) {
2857ec681f3Smrg      /* only start collection when index == 0, at the beginning of a batch */
2867ec681f3Smrg      return false;
2877ec681f3Smrg   }
2887ec681f3Smrg
2897ec681f3Smrg   if (config.flags & INTEL_MEASURE_RENDERPASS) {
2907ec681f3Smrg      return ((last_snap->framebuffer != batch->framebuffer) ||
2917ec681f3Smrg              /* compute workloads are always in their own renderpass */
2927ec681f3Smrg              (cs != 0));
2937ec681f3Smrg   }
2947ec681f3Smrg
2957ec681f3Smrg   /* remaining comparisons check the state of the render pipeline for
2967ec681f3Smrg    * INTEL_MEASURE_PROGRAM
2977ec681f3Smrg    */
2987ec681f3Smrg   assert(config.flags & INTEL_MEASURE_SHADER);
2997ec681f3Smrg
3007ec681f3Smrg   if (!vs && !tcs && !tes && !gs && !fs && !cs) {
3017ec681f3Smrg      /* blorp always changes program */
3027ec681f3Smrg      return true;
3037ec681f3Smrg   }
3047ec681f3Smrg
3057ec681f3Smrg   return (last_snap->vs  != (uintptr_t) vs ||
3067ec681f3Smrg           last_snap->tcs != (uintptr_t) tcs ||
3077ec681f3Smrg           last_snap->tes != (uintptr_t) tes ||
3087ec681f3Smrg           last_snap->gs  != (uintptr_t) gs ||
3097ec681f3Smrg           last_snap->fs  != (uintptr_t) fs ||
3107ec681f3Smrg           last_snap->cs  != (uintptr_t) cs);
3117ec681f3Smrg}
3127ec681f3Smrg
3137ec681f3Smrg/**
3147ec681f3Smrg * Notify intel_measure that a frame is about to begin.
3157ec681f3Smrg *
3167ec681f3Smrg * Configuration values and the control fifo may commence measurement at frame
3177ec681f3Smrg * boundaries.
3187ec681f3Smrg */
3197ec681f3Smrgvoid
3207ec681f3Smrgintel_measure_frame_transition(unsigned frame)
3217ec681f3Smrg{
3227ec681f3Smrg   if (frame == config.start_frame)
3237ec681f3Smrg      config.enabled = true;
3247ec681f3Smrg   else if (frame == config.end_frame)
3257ec681f3Smrg      config.enabled = false;
3267ec681f3Smrg
3277ec681f3Smrg   /* user commands to the control fifo will override any start/count
3287ec681f3Smrg    * environment settings
3297ec681f3Smrg    */
3307ec681f3Smrg   if (config.control_fh != -1) {
3317ec681f3Smrg      while (true) {
3327ec681f3Smrg         const unsigned BUF_SIZE = 128;
3337ec681f3Smrg         char buf[BUF_SIZE];
3347ec681f3Smrg         ssize_t bytes = read(config.control_fh, buf, BUF_SIZE - 1);
3357ec681f3Smrg         if (bytes == 0)
3367ec681f3Smrg            break;
3377ec681f3Smrg         if (bytes == -1) {
3387ec681f3Smrg            fprintf(stderr, "INTEL_MEASURE failed to read control fifo: %s\n",
3397ec681f3Smrg                    strerror(errno));
3407ec681f3Smrg            abort();
3417ec681f3Smrg         }
3427ec681f3Smrg
3437ec681f3Smrg         buf[bytes] = '\0';
3447ec681f3Smrg         char *nptr = buf, *endptr = buf;
3457ec681f3Smrg         while (*nptr != '\0' && *endptr != '\0') {
3467ec681f3Smrg            long fcount = strtol(nptr, &endptr, 10);
3477ec681f3Smrg            if (nptr == endptr) {
3487ec681f3Smrg               config.enabled = false;
3497ec681f3Smrg               fprintf(stderr, "INTEL_MEASURE invalid frame count on "
3507ec681f3Smrg                       "control fifo.\n");
3517ec681f3Smrg               lseek(config.control_fh, 0, SEEK_END);
3527ec681f3Smrg               break;
3537ec681f3Smrg            } else if (fcount == 0) {
3547ec681f3Smrg               config.enabled = false;
3557ec681f3Smrg            } else {
3567ec681f3Smrg               config.enabled = true;
3577ec681f3Smrg               config.end_frame = frame + fcount;
3587ec681f3Smrg            }
3597ec681f3Smrg
3607ec681f3Smrg            nptr = endptr + 1;
3617ec681f3Smrg         }
3627ec681f3Smrg      }
3637ec681f3Smrg   }
3647ec681f3Smrg}
3657ec681f3Smrg
3667ec681f3Smrg#define TIMESTAMP_BITS 36
3677ec681f3Smrgstatic uint64_t
3687ec681f3Smrgraw_timestamp_delta(uint64_t time0, uint64_t time1)
3697ec681f3Smrg{
3707ec681f3Smrg   if (time0 > time1) {
3717ec681f3Smrg      return (1ULL << TIMESTAMP_BITS) + time1 - time0;
3727ec681f3Smrg   } else {
3737ec681f3Smrg      return time1 - time0;
3747ec681f3Smrg   }
3757ec681f3Smrg}
3767ec681f3Smrg
3777ec681f3Smrg/**
3787ec681f3Smrg * Verify that rendering has completed for the batch
3797ec681f3Smrg *
3807ec681f3Smrg * Rendering is complete when the last timestamp has been written.
3817ec681f3Smrg*/
3827ec681f3Smrgbool
3837ec681f3Smrgintel_measure_ready(struct intel_measure_batch *batch)
3847ec681f3Smrg{
3857ec681f3Smrg   assert(batch->timestamps);
3867ec681f3Smrg   assert(batch->index > 1);
3877ec681f3Smrg   return (batch->timestamps[batch->index - 1] != 0);
3887ec681f3Smrg}
3897ec681f3Smrg
3907ec681f3Smrg/**
3917ec681f3Smrg * Submit completed snapshots for buffering.
3927ec681f3Smrg *
3937ec681f3Smrg * Snapshot data becomes available when asynchronous rendering completes.
3947ec681f3Smrg * Depending on configuration, snapshot data may need to be collated before
3957ec681f3Smrg * writing to the output file.
3967ec681f3Smrg */
3977ec681f3Smrgstatic void
3987ec681f3Smrgintel_measure_push_result(struct intel_measure_device *device,
3997ec681f3Smrg                          struct intel_measure_batch *batch)
4007ec681f3Smrg{
4017ec681f3Smrg   struct intel_measure_ringbuffer *rb = device->ringbuffer;
4027ec681f3Smrg
4037ec681f3Smrg   uint64_t *timestamps = batch->timestamps;
4047ec681f3Smrg   assert(timestamps != NULL);
4057ec681f3Smrg   assert(timestamps[0] != 0);
4067ec681f3Smrg
4077ec681f3Smrg   for (int i = 0; i < batch->index; i += 2) {
4087ec681f3Smrg      const struct intel_measure_snapshot *begin = &batch->snapshots[i];
4097ec681f3Smrg      const struct intel_measure_snapshot *end = &batch->snapshots[i+1];
4107ec681f3Smrg
4117ec681f3Smrg      assert (end->type == INTEL_SNAPSHOT_END);
4127ec681f3Smrg
4137ec681f3Smrg      if (begin->type == INTEL_SNAPSHOT_SECONDARY_BATCH) {
4147ec681f3Smrg         assert(begin->secondary != NULL);
4157ec681f3Smrg         begin->secondary->batch_count = batch->batch_count;
4167ec681f3Smrg         intel_measure_push_result(device, begin->secondary);
4177ec681f3Smrg         continue;
4187ec681f3Smrg      }
4197ec681f3Smrg
4207ec681f3Smrg      const uint64_t prev_end_ts = rb->results[rb->head].end_ts;
4217ec681f3Smrg
4227ec681f3Smrg      /* advance ring buffer */
4237ec681f3Smrg      if (++rb->head == config.buffer_size)
4247ec681f3Smrg         rb->head = 0;
4257ec681f3Smrg      if (rb->head == rb->tail) {
4267ec681f3Smrg         static bool warned = false;
4277ec681f3Smrg         if (unlikely(!warned)) {
4287ec681f3Smrg            fprintf(config.file,
4297ec681f3Smrg                    "WARNING: Buffered data exceeds INTEL_MEASURE limit: %d. "
4307ec681f3Smrg                    "Data has been dropped. "
4317ec681f3Smrg                    "Increase setting with INTEL_MEASURE=buffer_size={count}\n",
4327ec681f3Smrg                    config.buffer_size);
4337ec681f3Smrg            warned = true;
4347ec681f3Smrg         }
4357ec681f3Smrg         break;
4367ec681f3Smrg      }
4377ec681f3Smrg
4387ec681f3Smrg      struct intel_measure_buffered_result *buffered_result =
4397ec681f3Smrg         &rb->results[rb->head];
4407ec681f3Smrg
4417ec681f3Smrg      memset(buffered_result, 0, sizeof(*buffered_result));
4427ec681f3Smrg      memcpy(&buffered_result->snapshot, begin,
4437ec681f3Smrg             sizeof(struct intel_measure_snapshot));
4447ec681f3Smrg      buffered_result->start_ts = timestamps[i];
4457ec681f3Smrg      buffered_result->end_ts = timestamps[i+1];
4467ec681f3Smrg      buffered_result->idle_duration =
4477ec681f3Smrg         raw_timestamp_delta(prev_end_ts, buffered_result->start_ts);
4487ec681f3Smrg      buffered_result->frame = batch->frame;
4497ec681f3Smrg      buffered_result->batch_count = batch->batch_count;
4507ec681f3Smrg      buffered_result->event_index = i / 2;
4517ec681f3Smrg      buffered_result->snapshot.event_count = end->event_count;
4527ec681f3Smrg   }
4537ec681f3Smrg}
4547ec681f3Smrg
4557ec681f3Smrgstatic unsigned
4567ec681f3Smrgringbuffer_size(const struct intel_measure_ringbuffer *rb)
4577ec681f3Smrg{
4587ec681f3Smrg   unsigned head = rb->head;
4597ec681f3Smrg   if (head < rb->tail)
4607ec681f3Smrg      head += config.buffer_size;
4617ec681f3Smrg   return head - rb->tail;
4627ec681f3Smrg}
4637ec681f3Smrg
4647ec681f3Smrgstatic const struct intel_measure_buffered_result *
4657ec681f3Smrgringbuffer_pop(struct intel_measure_ringbuffer *rb)
4667ec681f3Smrg{
4677ec681f3Smrg   if (rb->tail == rb->head) {
4687ec681f3Smrg      /* encountered ringbuffer overflow while processing events */
4697ec681f3Smrg      return NULL;
4707ec681f3Smrg   }
4717ec681f3Smrg
4727ec681f3Smrg   if (++rb->tail == config.buffer_size)
4737ec681f3Smrg      rb->tail = 0;
4747ec681f3Smrg   return &rb->results[rb->tail];
4757ec681f3Smrg}
4767ec681f3Smrg
4777ec681f3Smrgstatic const struct intel_measure_buffered_result *
4787ec681f3Smrgringbuffer_peek(const struct intel_measure_ringbuffer *rb, unsigned index)
4797ec681f3Smrg{
4807ec681f3Smrg   int result_offset = rb->tail + index + 1;
4817ec681f3Smrg   if (result_offset >= config.buffer_size)
4827ec681f3Smrg      result_offset -= config.buffer_size;
4837ec681f3Smrg   return &rb->results[result_offset];
4847ec681f3Smrg}
4857ec681f3Smrg
4867ec681f3Smrg
4877ec681f3Smrg/**
4887ec681f3Smrg * Determine the number of buffered events that must be combined for the next
4897ec681f3Smrg * line of csv output. Returns 0 if more events are needed.
4907ec681f3Smrg */
4917ec681f3Smrgstatic unsigned
4927ec681f3Smrgbuffered_event_count(struct intel_measure_device *device)
4937ec681f3Smrg{
4947ec681f3Smrg   const struct intel_measure_ringbuffer *rb = device->ringbuffer;
4957ec681f3Smrg   const unsigned buffered_event_count = ringbuffer_size(rb);
4967ec681f3Smrg   if (buffered_event_count == 0) {
4977ec681f3Smrg      /* no events to collect */
4987ec681f3Smrg      return 0;
4997ec681f3Smrg   }
5007ec681f3Smrg
5017ec681f3Smrg   /* count the number of buffered events required to meet the configuration */
5027ec681f3Smrg   if (config.flags & (INTEL_MEASURE_DRAW |
5037ec681f3Smrg                       INTEL_MEASURE_RENDERPASS |
5047ec681f3Smrg                       INTEL_MEASURE_SHADER)) {
5057ec681f3Smrg      /* For these flags, every buffered event represents a line in the
5067ec681f3Smrg       * output.  None of these events span batches.  If the event interval
5077ec681f3Smrg       * crosses a batch boundary, then the next interval starts with the new
5087ec681f3Smrg       * batch.
5097ec681f3Smrg       */
5107ec681f3Smrg      return 1;
5117ec681f3Smrg   }
5127ec681f3Smrg
5137ec681f3Smrg   const unsigned start_frame = ringbuffer_peek(rb, 0)->frame;
5147ec681f3Smrg   if (config.flags & INTEL_MEASURE_BATCH) {
5157ec681f3Smrg      /* each buffered event is a command buffer.  The number of events to
5167ec681f3Smrg       * process is the same as the interval, unless the interval crosses a
5177ec681f3Smrg       * frame boundary
5187ec681f3Smrg       */
5197ec681f3Smrg      if (buffered_event_count < config.event_interval) {
5207ec681f3Smrg         /* not enough events */
5217ec681f3Smrg         return 0;
5227ec681f3Smrg      }
5237ec681f3Smrg
5247ec681f3Smrg      /* Imperfect frame tracking requires us to allow for *older* frames */
5257ec681f3Smrg      if (ringbuffer_peek(rb, config.event_interval - 1)->frame <= start_frame) {
5267ec681f3Smrg         /* No frame transition.  The next {interval} events should be combined. */
5277ec681f3Smrg         return config.event_interval;
5287ec681f3Smrg      }
5297ec681f3Smrg
5307ec681f3Smrg      /* Else a frame transition occurs within the interval.  Find the
5317ec681f3Smrg       * transition, so the following line of output begins with the batch
5327ec681f3Smrg       * that starts the new frame.
5337ec681f3Smrg       */
5347ec681f3Smrg      for (int event_index = 1;
5357ec681f3Smrg           event_index <= config.event_interval;
5367ec681f3Smrg           ++event_index) {
5377ec681f3Smrg         if (ringbuffer_peek(rb, event_index)->frame > start_frame)
5387ec681f3Smrg            return event_index;
5397ec681f3Smrg      }
5407ec681f3Smrg
5417ec681f3Smrg      assert(false);
5427ec681f3Smrg   }
5437ec681f3Smrg
5447ec681f3Smrg   /* Else we need to search buffered events to find the matching frame
5457ec681f3Smrg    * transition for our interval.
5467ec681f3Smrg    */
5477ec681f3Smrg   assert(config.flags & INTEL_MEASURE_FRAME);
5487ec681f3Smrg   for (int event_index = 1;
5497ec681f3Smrg        event_index < buffered_event_count;
5507ec681f3Smrg        ++event_index) {
5517ec681f3Smrg      const int latest_frame = ringbuffer_peek(rb, event_index)->frame;
5527ec681f3Smrg      if (latest_frame - start_frame >= config.event_interval)
5537ec681f3Smrg         return event_index;
5547ec681f3Smrg   }
5557ec681f3Smrg
5567ec681f3Smrg   return 0;
5577ec681f3Smrg}
5587ec681f3Smrg
5597ec681f3Smrg/**
5607ec681f3Smrg * Take result_count events from the ringbuffer and output them as a single
5617ec681f3Smrg * line.
5627ec681f3Smrg */
5637ec681f3Smrgstatic void
5647ec681f3Smrgprint_combined_results(struct intel_measure_device *measure_device,
5657ec681f3Smrg                       int result_count,
5667ec681f3Smrg                       struct intel_device_info *info)
5677ec681f3Smrg{
5687ec681f3Smrg   if (result_count == 0)
5697ec681f3Smrg      return;
5707ec681f3Smrg
5717ec681f3Smrg   struct intel_measure_ringbuffer *result_rb = measure_device->ringbuffer;
5727ec681f3Smrg   assert(ringbuffer_size(result_rb) >= result_count);
5737ec681f3Smrg   const struct intel_measure_buffered_result* start_result =
5747ec681f3Smrg      ringbuffer_pop(result_rb);
5757ec681f3Smrg   const struct intel_measure_buffered_result* current_result = start_result;
5767ec681f3Smrg
5777ec681f3Smrg   if (start_result == NULL)
5787ec681f3Smrg      return;
5797ec681f3Smrg   --result_count;
5807ec681f3Smrg
5817ec681f3Smrg   uint64_t duration_ts = raw_timestamp_delta(start_result->start_ts,
5827ec681f3Smrg                                              current_result->end_ts);
5837ec681f3Smrg   unsigned event_count = start_result->snapshot.event_count;
5847ec681f3Smrg   while (result_count-- > 0) {
5857ec681f3Smrg      assert(ringbuffer_size(result_rb) > 0);
5867ec681f3Smrg      current_result = ringbuffer_pop(result_rb);
5877ec681f3Smrg      if (current_result == NULL)
5887ec681f3Smrg         return;
5897ec681f3Smrg      duration_ts += raw_timestamp_delta(current_result->start_ts,
5907ec681f3Smrg                                         current_result->end_ts);
5917ec681f3Smrg      event_count += current_result->snapshot.event_count;
5927ec681f3Smrg   }
5937ec681f3Smrg
5947ec681f3Smrg   const struct intel_measure_snapshot *begin = &start_result->snapshot;
5957ec681f3Smrg   fprintf(config.file, "%"PRIu64",%"PRIu64",%u,%u,%u,%u,%s,%u,"
5967ec681f3Smrg           "0x%"PRIxPTR",0x%"PRIxPTR",0x%"PRIxPTR",0x%"PRIxPTR",0x%"PRIxPTR","
5977ec681f3Smrg           "0x%"PRIxPTR",0x%"PRIxPTR",%"PRIu64",%"PRIu64"\n",
5987ec681f3Smrg           start_result->start_ts, current_result->end_ts,
5997ec681f3Smrg           start_result->frame, start_result->batch_count,
6007ec681f3Smrg           start_result->event_index, event_count,
6017ec681f3Smrg           begin->event_name, begin->count,
6027ec681f3Smrg           begin->vs, begin->tcs, begin->tes, begin->gs, begin->fs, begin->cs,
6037ec681f3Smrg           begin->framebuffer,
6047ec681f3Smrg           intel_device_info_timebase_scale(info, start_result->idle_duration),
6057ec681f3Smrg           intel_device_info_timebase_scale(info, duration_ts));
6067ec681f3Smrg}
6077ec681f3Smrg
6087ec681f3Smrg/**
6097ec681f3Smrg * Empty the ringbuffer of events that can be printed.
6107ec681f3Smrg */
6117ec681f3Smrgstatic void
6127ec681f3Smrgintel_measure_print(struct intel_measure_device *device,
6137ec681f3Smrg                    struct intel_device_info *info)
6147ec681f3Smrg{
6157ec681f3Smrg   while (true) {
6167ec681f3Smrg      const int events_to_combine = buffered_event_count(device);
6177ec681f3Smrg      if (events_to_combine == 0)
6187ec681f3Smrg         break;
6197ec681f3Smrg      print_combined_results(device, events_to_combine, info);
6207ec681f3Smrg   }
6217ec681f3Smrg}
6227ec681f3Smrg
6237ec681f3Smrg/**
6247ec681f3Smrg * Collect snapshots from completed command buffers and submit them to
6257ec681f3Smrg * intel_measure for printing.
6267ec681f3Smrg */
6277ec681f3Smrgvoid
6287ec681f3Smrgintel_measure_gather(struct intel_measure_device *measure_device,
6297ec681f3Smrg                     struct intel_device_info *info)
6307ec681f3Smrg{
6317ec681f3Smrg   pthread_mutex_lock(&measure_device->mutex);
6327ec681f3Smrg
6337ec681f3Smrg   /* Iterate snapshots and collect if ready.  Each snapshot queue will be
6347ec681f3Smrg    * in-order, but we must determine which queue has the oldest batch.
6357ec681f3Smrg    */
6367ec681f3Smrg   /* iterate snapshots and collect if ready */
6377ec681f3Smrg   while (!list_is_empty(&measure_device->queued_snapshots)) {
6387ec681f3Smrg      struct intel_measure_batch *batch =
6397ec681f3Smrg         list_first_entry(&measure_device->queued_snapshots,
6407ec681f3Smrg                          struct intel_measure_batch, link);
6417ec681f3Smrg
6427ec681f3Smrg      if (!intel_measure_ready(batch)) {
6437ec681f3Smrg         /* command buffer has begun execution on the gpu, but has not
6447ec681f3Smrg          * completed.
6457ec681f3Smrg          */
6467ec681f3Smrg         break;
6477ec681f3Smrg      }
6487ec681f3Smrg
6497ec681f3Smrg      list_del(&batch->link);
6507ec681f3Smrg      assert(batch->index % 2 == 0);
6517ec681f3Smrg
6527ec681f3Smrg      intel_measure_push_result(measure_device, batch);
6537ec681f3Smrg
6547ec681f3Smrg      batch->index = 0;
6557ec681f3Smrg      batch->frame = 0;
6567ec681f3Smrg   }
6577ec681f3Smrg
6587ec681f3Smrg   intel_measure_print(measure_device, info);
6597ec681f3Smrg   pthread_mutex_unlock(&measure_device->mutex);
6607ec681f3Smrg}
6617ec681f3Smrg
662