1/* 2 * Copyright © 2020 Google, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 */ 23 24#ifndef _U_TRACE_H 25#define _U_TRACE_H 26 27#include <stdbool.h> 28#include <stdint.h> 29#include <stdio.h> 30 31#include "util/u_queue.h" 32 33#ifdef __cplusplus 34extern "C" { 35#endif 36 37/* A trace mechanism (very) loosely inspired by the linux kernel tracepoint 38 * mechanism, in that it allows for defining driver specific (or common) 39 * tracepoints, which generate 'trace_$name()' functions that can be 40 * called at various points in commandstream emit. 41 * 42 * Currently a printf backend is implemented, but the expectation is to 43 * also implement a perfetto backend for shipping out traces to a tool like 44 * AGI. 45 * 46 * Notable differences: 47 * 48 * - GPU timestamps! A driver provided callback is used to emit timestamps 49 * to a buffer. At a later point in time (when stalling to wait for the 50 * GPU is not required), the timestamps are re-united with the trace 51 * payload. This makes the trace mechanism suitable for profiling. 52 * 53 * - Instead of a systemwide trace ringbuffer, buffering of un-retired 54 * tracepoints is split into two stages. Traces are emitted to a 55 * 'u_trace' instance, and at a later time flushed to a 'u_trace_context' 56 * instance. This avoids the requirement that commandstream containing 57 * tracepoints is emitted in the same order as it is generated. 58 * 59 * If the hw has multiple parallel "engines" (for example, 3d/blit/compute) 60 * then a `u_trace_context` per-engine should be used. 61 * 62 * - Unlike kernel tracepoints, u_trace tracepoints are defined in py 63 * from which header and src files are generated. Since we already have 64 * a build dependency on python+mako, this gives more flexibility than 65 * clunky preprocessor macro magic. 66 * 67 */ 68 69struct u_trace_context; 70struct u_trace; 71struct u_trace_chunk; 72 73/** 74 * Special reserved value to indicate that no timestamp was captured, 75 * and that the timestamp of the previous trace should be reused. 76 */ 77#define U_TRACE_NO_TIMESTAMP ((uint64_t)0) 78 79/** 80 * Driver provided callback to create a timestamp buffer which will be 81 * read by u_trace_read_ts function. 82 */ 83typedef void* (*u_trace_create_ts_buffer)(struct u_trace_context *utctx, 84 uint32_t timestamps_count); 85 86/** 87 * Driver provided callback to delete a timestamp buffer. 88 */ 89typedef void (*u_trace_delete_ts_buffer)(struct u_trace_context *utctx, 90 void *timestamps); 91 92/** 93 * Driver provided callback to emit commands into the soecified command 94 * stream to capture a 64b timestamp into the specified timestamps buffer, 95 * at the specified index. 96 * 97 * The hw counter that the driver records should be something that runs at 98 * a fixed rate, even as the GPU freq changes. The same source used for 99 * GL_TIMESTAMP queries should be appropriate. 100 */ 101typedef void (*u_trace_record_ts)(struct u_trace *ut, void *cs, 102 void *timestamps, unsigned idx); 103 104/** 105 * Driver provided callback to read back a previously recorded timestamp. 106 * If necessary, this should block until the GPU has finished writing back 107 * the timestamps. (The timestamps will be read back in order, so it is 108 * safe to only synchronize on idx==0.) 109 * 110 * flush_data is data provided by the driver via u_trace_flush. 111 * 112 * The returned timestamp should be in units of nanoseconds. The same 113 * timebase as GL_TIMESTAMP queries should be used. 114 * 115 * The driver can return the special U_TRACE_NO_TIMESTAMP value to indicate 116 * that no timestamp was captured and the timestamp from the previous trace 117 * will be re-used. (The first trace in the u_trace buf may not do this.) 118 * This allows the driver to detect cases where multiple tracepoints are 119 * emitted with no other intervening cmdstream, to avoid pointlessly 120 * capturing the same timestamp multiple times in a row. 121 */ 122typedef uint64_t (*u_trace_read_ts)(struct u_trace_context *utctx, 123 void *timestamps, unsigned idx, void *flush_data); 124 125/** 126 * Driver provided callback to delete flush data. 127 */ 128typedef void (*u_trace_delete_flush_data)(struct u_trace_context *utctx, 129 void *flush_data); 130 131/** 132 * The trace context provides tracking for "in-flight" traces, once the 133 * cmdstream that records timestamps has been flushed. 134 */ 135struct u_trace_context { 136 void *pctx; 137 138 u_trace_create_ts_buffer create_timestamp_buffer; 139 u_trace_delete_ts_buffer delete_timestamp_buffer; 140 u_trace_record_ts record_timestamp; 141 u_trace_read_ts read_timestamp; 142 u_trace_delete_flush_data delete_flush_data; 143 144 FILE *out; 145 146 /* Once u_trace_flush() is called u_trace_chunk's are queued up to 147 * render tracepoints on a queue. The per-chunk queue jobs block until 148 * timestamps are available. 149 */ 150 struct util_queue queue; 151 152#ifdef HAVE_PERFETTO 153 /* node in global list of trace contexts. */ 154 struct list_head node; 155#endif 156 157 /* State to accumulate time across N chunks associated with a single 158 * batch (u_trace). 159 */ 160 uint64_t last_time_ns; 161 uint64_t first_time_ns; 162 163 uint32_t frame_nr; 164 165 /* list of unprocessed trace chunks in fifo order: */ 166 struct list_head flushed_trace_chunks; 167}; 168 169/** 170 * The u_trace ptr is passed as the first arg to generated tracepoints. 171 * It provides buffering for tracepoint payload until the corresponding 172 * driver cmdstream containing the emitted commands to capture is 173 * flushed. 174 * 175 * Individual tracepoints emitted to u_trace are expected to be "executed" 176 * (ie. timestamp captured) in FIFO order with respect to other tracepoints 177 * emitted to the same u_trace. But the order WRT other u_trace instances 178 * is undefined util u_trace_flush(). 179 */ 180struct u_trace { 181 struct u_trace_context *utctx; 182 183 struct list_head trace_chunks; /* list of unflushed trace chunks in fifo order */ 184 185 bool enabled; 186}; 187 188void u_trace_context_init(struct u_trace_context *utctx, 189 void *pctx, 190 u_trace_create_ts_buffer create_timestamp_buffer, 191 u_trace_delete_ts_buffer delete_timestamp_buffer, 192 u_trace_record_ts record_timestamp, 193 u_trace_read_ts read_timestamp, 194 u_trace_delete_flush_data delete_flush_data); 195void u_trace_context_fini(struct u_trace_context *utctx); 196 197/** 198 * Flush (trigger processing) of traces previously flushed to the trace-context 199 * by u_trace_flush(). 200 * 201 * This should typically be called in the driver's pctx->flush(). 202 */ 203void u_trace_context_process(struct u_trace_context *utctx, bool eof); 204 205void u_trace_init(struct u_trace *ut, struct u_trace_context *utctx); 206void u_trace_fini(struct u_trace *ut); 207 208bool u_trace_has_points(struct u_trace *ut); 209 210struct u_trace_iterator 211{ 212 struct u_trace *ut; 213 struct u_trace_chunk *chunk; 214 uint32_t event_idx; 215}; 216 217struct u_trace_iterator 218u_trace_begin_iterator(struct u_trace *ut); 219 220struct u_trace_iterator 221u_trace_end_iterator(struct u_trace *ut); 222 223bool 224u_trace_iterator_equal(struct u_trace_iterator a, 225 struct u_trace_iterator b); 226 227typedef void (*u_trace_copy_ts_buffer)(struct u_trace_context *utctx, 228 void *cmdstream, 229 void *ts_from, uint32_t from_offset, 230 void *ts_to, uint32_t to_offset, 231 uint32_t count); 232 233/** 234 * Clones tracepoints range into target u_trace. 235 * Provides callback for driver to copy timestamps on GPU from 236 * one buffer to another. 237 * 238 * It allows: 239 * - Tracing re-usable command buffer in Vulkan, by copying tracepoints 240 * each time it is submitted. 241 * - Per-tile tracing for tiling GPUs, by copying a range of tracepoints 242 * corresponding to a tile. 243 */ 244void u_trace_clone_append(struct u_trace_iterator begin_it, 245 struct u_trace_iterator end_it, 246 struct u_trace *into, 247 void *cmdstream, 248 u_trace_copy_ts_buffer copy_ts_buffer); 249 250void u_trace_disable_event_range(struct u_trace_iterator begin_it, 251 struct u_trace_iterator end_it); 252 253/** 254 * Flush traces to the parent trace-context. At this point, the expectation 255 * is that all the tracepoints are "executed" by the GPU following any previously 256 * flushed u_trace batch. 257 * 258 * flush_data is a way for driver to pass additional data, which becomes available 259 * only at the point of flush, to the u_trace_read_ts callback and perfetto. 260 * The typical example of such data would be a fence to wait on in u_trace_read_ts, 261 * and a submission_id to pass into perfetto. 262 * The destruction of the data is done via u_trace_delete_flush_data. 263 * 264 * This should typically be called when the corresponding cmdstream (containing 265 * the timestamp reads) is flushed to the kernel. 266 */ 267void u_trace_flush(struct u_trace *ut, void *flush_data, bool free_data); 268 269#ifdef HAVE_PERFETTO 270extern int ut_perfetto_enabled; 271 272void u_trace_perfetto_start(void); 273void u_trace_perfetto_stop(void); 274#else 275# define ut_perfetto_enabled 0 276#endif 277 278static inline bool 279u_trace_context_tracing(struct u_trace_context *utctx) 280{ 281 return !!utctx->out || (ut_perfetto_enabled > 0); 282} 283 284#ifdef __cplusplus 285} 286#endif 287 288#endif /* _U_TRACE_H */ 289