freedreno_perfetto.cc revision 7ec681f3
1/* 2 * Copyright © 2021 Google, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 */ 23 24#include <perfetto.h> 25 26#include "util/u_perfetto.h" 27 28#include "freedreno_tracepoints.h" 29 30static uint32_t gpu_clock_id; 31static uint64_t next_clock_sync_ns; /* cpu time of next clk sync */ 32 33/** 34 * The timestamp at the point where we first emitted the clock_sync.. 35 * this will be a *later* timestamp that the first GPU traces (since 36 * we capture the first clock_sync from the CPU *after* the first GPU 37 * tracepoints happen). To avoid confusing perfetto we need to drop 38 * the GPU traces with timestamps before this. 39 */ 40static uint64_t sync_gpu_ts; 41 42struct FdRenderpassIncrementalState { 43 bool was_cleared = true; 44}; 45 46struct FdRenderpassTraits : public perfetto::DefaultDataSourceTraits { 47 using IncrementalStateType = FdRenderpassIncrementalState; 48}; 49 50class FdRenderpassDataSource : public perfetto::DataSource<FdRenderpassDataSource, FdRenderpassTraits> { 51public: 52 void OnSetup(const SetupArgs &) override 53 { 54 // Use this callback to apply any custom configuration to your data source 55 // based on the TraceConfig in SetupArgs. 56 } 57 58 void OnStart(const StartArgs &) override 59 { 60 // This notification can be used to initialize the GPU driver, enable 61 // counters, etc. StartArgs will contains the DataSourceDescriptor, 62 // which can be extended. 63 u_trace_perfetto_start(); 64 PERFETTO_LOG("Tracing started"); 65 66 /* Note: clock_id's below 128 are reserved.. for custom clock sources, 67 * using the hash of a namespaced string is the recommended approach. 68 * See: https://perfetto.dev/docs/concepts/clock-sync 69 */ 70 gpu_clock_id = 71 _mesa_hash_string("org.freedesktop.mesa.freedreno") | 0x80000000; 72 } 73 74 void OnStop(const StopArgs &) override 75 { 76 PERFETTO_LOG("Tracing stopped"); 77 78 // Undo any initialization done in OnStart. 79 u_trace_perfetto_stop(); 80 // TODO we should perhaps block until queued traces are flushed? 81 82 Trace([](FdRenderpassDataSource::TraceContext ctx) { 83 auto packet = ctx.NewTracePacket(); 84 packet->Finalize(); 85 ctx.Flush(); 86 }); 87 } 88}; 89 90PERFETTO_DECLARE_DATA_SOURCE_STATIC_MEMBERS(FdRenderpassDataSource); 91PERFETTO_DEFINE_DATA_SOURCE_STATIC_MEMBERS(FdRenderpassDataSource); 92 93static void 94send_descriptors(FdRenderpassDataSource::TraceContext &ctx, uint64_t ts_ns) 95{ 96 PERFETTO_LOG("Sending renderstage descriptors"); 97 98 auto packet = ctx.NewTracePacket(); 99 100 packet->set_timestamp(0); 101// packet->set_timestamp(ts_ns); 102// packet->set_timestamp_clock_id(gpu_clock_id); 103 104 auto event = packet->set_gpu_render_stage_event(); 105 event->set_gpu_id(0); 106 107 auto spec = event->set_specifications(); 108 109 for (unsigned i = 0; i < ARRAY_SIZE(queues); i++) { 110 auto desc = spec->add_hw_queue(); 111 112 desc->set_name(queues[i].name); 113 desc->set_description(queues[i].desc); 114 } 115 116 for (unsigned i = 0; i < ARRAY_SIZE(stages); i++) { 117 auto desc = spec->add_stage(); 118 119 desc->set_name(stages[i].name); 120 if (stages[i].desc) 121 desc->set_description(stages[i].desc); 122 } 123} 124 125static void 126stage_start(struct pipe_context *pctx, uint64_t ts_ns, enum fd_stage_id stage) 127{ 128 struct fd_context *ctx = fd_context(pctx); 129 struct fd_perfetto_state *p = &ctx->perfetto; 130 131 p->start_ts[stage] = ts_ns; 132} 133 134static void 135stage_end(struct pipe_context *pctx, uint64_t ts_ns, enum fd_stage_id stage) 136{ 137 struct fd_context *ctx = fd_context(pctx); 138 struct fd_perfetto_state *p = &ctx->perfetto; 139 140 /* If we haven't managed to calibrate the alignment between GPU and CPU 141 * timestamps yet, then skip this trace, otherwise perfetto won't know 142 * what to do with it. 143 */ 144 if (!sync_gpu_ts) 145 return; 146 147 FdRenderpassDataSource::Trace([=](FdRenderpassDataSource::TraceContext tctx) { 148 if (auto state = tctx.GetIncrementalState(); state->was_cleared) { 149 send_descriptors(tctx, p->start_ts[stage]); 150 state->was_cleared = false; 151 } 152 153 auto packet = tctx.NewTracePacket(); 154 155 packet->set_timestamp(p->start_ts[stage]); 156 packet->set_timestamp_clock_id(gpu_clock_id); 157 158 auto event = packet->set_gpu_render_stage_event(); 159 event->set_event_id(0); // ??? 160 event->set_hw_queue_id(DEFAULT_HW_QUEUE_ID); 161 event->set_duration(ts_ns - p->start_ts[stage]); 162 event->set_stage_id(stage); 163 event->set_context((uintptr_t)pctx); 164 165 /* The "surface" meta-stage has extra info about render target: */ 166 if (stage == SURFACE_STAGE_ID) { 167 168 event->set_submission_id(p->submit_id); 169 170 if (p->cbuf0_format) { 171 auto data = event->add_extra_data(); 172 173 data->set_name("color0 format"); 174 data->set_value(util_format_short_name(p->cbuf0_format)); 175 } 176 177 if (p->zs_format) { 178 auto data = event->add_extra_data(); 179 180 data->set_name("zs format"); 181 data->set_value(util_format_short_name(p->zs_format)); 182 } 183 184 { 185 auto data = event->add_extra_data(); 186 187 data->set_name("width"); 188 data->set_value(std::to_string(p->width)); 189 } 190 191 { 192 auto data = event->add_extra_data(); 193 194 data->set_name("height"); 195 data->set_value(std::to_string(p->height)); 196 } 197 198 { 199 auto data = event->add_extra_data(); 200 201 data->set_name("MSAA"); 202 data->set_value(std::to_string(p->samples)); 203 } 204 205 { 206 auto data = event->add_extra_data(); 207 208 data->set_name("MRTs"); 209 data->set_value(std::to_string(p->mrts)); 210 } 211 212 // "renderMode" 213 // "surfaceID" 214 215 if (p->nbins) { 216 auto data = event->add_extra_data(); 217 218 data->set_name("numberOfBins"); 219 data->set_value(std::to_string(p->nbins)); 220 } 221 222 if (p->binw) { 223 auto data = event->add_extra_data(); 224 225 data->set_name("binWidth"); 226 data->set_value(std::to_string(p->binw)); 227 } 228 229 if (p->binh) { 230 auto data = event->add_extra_data(); 231 232 data->set_name("binHeight"); 233 data->set_value(std::to_string(p->binh)); 234 } 235 } 236 }); 237} 238 239#ifdef __cplusplus 240extern "C" { 241#endif 242 243void 244fd_perfetto_init(void) 245{ 246 util_perfetto_init(); 247 248 perfetto::DataSourceDescriptor dsd; 249 dsd.set_name("gpu.renderstages.msm"); 250 FdRenderpassDataSource::Register(dsd); 251} 252 253static void 254sync_timestamp(struct fd_context *ctx) 255{ 256 uint64_t cpu_ts = perfetto::base::GetBootTimeNs().count(); 257 uint64_t gpu_ts; 258 259 if (cpu_ts < next_clock_sync_ns) 260 return; 261 262 if (fd_pipe_get_param(ctx->pipe, FD_TIMESTAMP, &gpu_ts)) { 263 PERFETTO_ELOG("Could not sync CPU and GPU clocks"); 264 return; 265 } 266 267 /* convert GPU ts into ns: */ 268 gpu_ts = ctx->ts_to_ns(gpu_ts); 269 270 FdRenderpassDataSource::Trace([=](FdRenderpassDataSource::TraceContext tctx) { 271 auto packet = tctx.NewTracePacket(); 272 273 packet->set_timestamp(cpu_ts); 274 275 auto event = packet->set_clock_snapshot(); 276 277 { 278 auto clock = event->add_clocks(); 279 280 clock->set_clock_id(perfetto::protos::pbzero::BUILTIN_CLOCK_BOOTTIME); 281 clock->set_timestamp(cpu_ts); 282 } 283 284 { 285 auto clock = event->add_clocks(); 286 287 clock->set_clock_id(gpu_clock_id); 288 clock->set_timestamp(gpu_ts); 289 } 290 291 sync_gpu_ts = gpu_ts; 292 next_clock_sync_ns = cpu_ts + 30000000; 293 }); 294} 295 296static void 297emit_submit_id(struct fd_context *ctx) 298{ 299 FdRenderpassDataSource::Trace([=](FdRenderpassDataSource::TraceContext tctx) { 300 auto packet = tctx.NewTracePacket(); 301 302 packet->set_timestamp(perfetto::base::GetBootTimeNs().count()); 303 304 auto event = packet->set_vulkan_api_event(); 305 auto submit = event->set_vk_queue_submit(); 306 307 submit->set_submission_id(ctx->submit_count); 308 }); 309} 310 311void 312fd_perfetto_submit(struct fd_context *ctx) 313{ 314 sync_timestamp(ctx); 315 emit_submit_id(ctx); 316} 317 318/* 319 * Trace callbacks, called from u_trace once the timestamps from GPU have been 320 * collected. 321 */ 322 323void 324fd_start_render_pass(struct pipe_context *pctx, uint64_t ts_ns, 325 const void *flush_data, 326 const struct trace_start_render_pass *payload) 327{ 328 stage_start(pctx, ts_ns, SURFACE_STAGE_ID); 329 330 struct fd_perfetto_state *p = &fd_context(pctx)->perfetto; 331 332 p->submit_id = payload->submit_id; 333 p->cbuf0_format = payload->cbuf0_format; 334 p->zs_format = payload->zs_format; 335 p->width = payload->width; 336 p->height = payload->height; 337 p->mrts = payload->mrts; 338 p->samples = payload->samples; 339 p->nbins = payload->nbins; 340 p->binw = payload->binw; 341 p->binh = payload->binh; 342} 343 344void 345fd_end_render_pass(struct pipe_context *pctx, uint64_t ts_ns, 346 const void *flush_data, 347 const struct trace_end_render_pass *payload) 348{ 349 stage_end(pctx, ts_ns, SURFACE_STAGE_ID); 350} 351 352void 353fd_start_binning_ib(struct pipe_context *pctx, uint64_t ts_ns, 354 const void *flush_data, 355 const struct trace_start_binning_ib *payload) 356{ 357 stage_start(pctx, ts_ns, BINNING_STAGE_ID); 358} 359 360void 361fd_end_binning_ib(struct pipe_context *pctx, uint64_t ts_ns, 362 const void *flush_data, 363 const struct trace_end_binning_ib *payload) 364{ 365 stage_end(pctx, ts_ns, BINNING_STAGE_ID); 366} 367 368void 369fd_start_draw_ib(struct pipe_context *pctx, uint64_t ts_ns, 370 const void *flush_data, 371 const struct trace_start_draw_ib *payload) 372{ 373 stage_start( 374 pctx, ts_ns, 375 fd_context(pctx)->perfetto.nbins ? GMEM_STAGE_ID : BYPASS_STAGE_ID); 376} 377 378void 379fd_end_draw_ib(struct pipe_context *pctx, uint64_t ts_ns, 380 const void *flush_data, 381 const struct trace_end_draw_ib *payload) 382{ 383 stage_end( 384 pctx, ts_ns, 385 fd_context(pctx)->perfetto.nbins ? GMEM_STAGE_ID : BYPASS_STAGE_ID); 386} 387 388void 389fd_start_blit(struct pipe_context *pctx, uint64_t ts_ns, 390 const void *flush_data, 391 const struct trace_start_blit *payload) 392{ 393 stage_start(pctx, ts_ns, BLIT_STAGE_ID); 394} 395 396void 397fd_end_blit(struct pipe_context *pctx, uint64_t ts_ns, 398 const void *flush_data, 399 const struct trace_end_blit *payload) 400{ 401 stage_end(pctx, ts_ns, BLIT_STAGE_ID); 402} 403 404void 405fd_start_compute(struct pipe_context *pctx, uint64_t ts_ns, 406 const void *flush_data, 407 const struct trace_start_compute *payload) 408{ 409 stage_start(pctx, ts_ns, COMPUTE_STAGE_ID); 410} 411 412void 413fd_end_compute(struct pipe_context *pctx, uint64_t ts_ns, 414 const void *flush_data, 415 const struct trace_end_compute *payload) 416{ 417 stage_end(pctx, ts_ns, COMPUTE_STAGE_ID); 418} 419 420void 421fd_start_clear_restore(struct pipe_context *pctx, uint64_t ts_ns, 422 const void *flush_data, 423 const struct trace_start_clear_restore *payload) 424{ 425 stage_start(pctx, ts_ns, CLEAR_RESTORE_STAGE_ID); 426} 427 428void 429fd_end_clear_restore(struct pipe_context *pctx, uint64_t ts_ns, 430 const void *flush_data, 431 const struct trace_end_clear_restore *payload) 432{ 433 stage_end(pctx, ts_ns, CLEAR_RESTORE_STAGE_ID); 434} 435 436void 437fd_start_resolve(struct pipe_context *pctx, uint64_t ts_ns, 438 const void *flush_data, 439 const struct trace_start_resolve *payload) 440{ 441 stage_start(pctx, ts_ns, RESOLVE_STAGE_ID); 442} 443 444void 445fd_end_resolve(struct pipe_context *pctx, uint64_t ts_ns, 446 const void *flush_data, 447 const struct trace_end_resolve *payload) 448{ 449 stage_end(pctx, ts_ns, RESOLVE_STAGE_ID); 450} 451 452#ifdef __cplusplus 453} 454#endif 455