tu_perfetto.cc revision 7ec681f3
1/* 2 * Copyright © 2021 Google, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 */ 23 24#include <perfetto.h> 25 26#include "tu_perfetto.h" 27 28#include "util/u_perfetto.h" 29#include "util/hash_table.h" 30 31#include "tu_tracepoints.h" 32#include "tu_tracepoints_perfetto.h" 33 34static uint32_t gpu_clock_id; 35static uint64_t next_clock_sync_ns; /* cpu time of next clk sync */ 36 37/** 38 * The timestamp at the point where we first emitted the clock_sync.. 39 * this will be a *later* timestamp that the first GPU traces (since 40 * we capture the first clock_sync from the CPU *after* the first GPU 41 * tracepoints happen). To avoid confusing perfetto we need to drop 42 * the GPU traces with timestamps before this. 43 */ 44static uint64_t sync_gpu_ts; 45 46struct TuRenderpassIncrementalState { 47 bool was_cleared = true; 48}; 49 50struct TuRenderpassTraits : public perfetto::DefaultDataSourceTraits { 51 using IncrementalStateType = TuRenderpassIncrementalState; 52}; 53 54class TuRenderpassDataSource : public perfetto::DataSource<TuRenderpassDataSource, TuRenderpassTraits> { 55public: 56 void OnSetup(const SetupArgs &) override 57 { 58 // Use this callback to apply any custom configuration to your data source 59 // based on the TraceConfig in SetupArgs. 60 } 61 62 void OnStart(const StartArgs &) override 63 { 64 // This notification can be used to initialize the GPU driver, enable 65 // counters, etc. StartArgs will contains the DataSourceDescriptor, 66 // which can be extended. 67 u_trace_perfetto_start(); 68 PERFETTO_LOG("Tracing started"); 69 70 /* Note: clock_id's below 128 are reserved.. for custom clock sources, 71 * using the hash of a namespaced string is the recommended approach. 72 * See: https://perfetto.dev/docs/concepts/clock-sync 73 */ 74 gpu_clock_id = 75 _mesa_hash_string("org.freedesktop.mesa.freedreno") | 0x80000000; 76 } 77 78 void OnStop(const StopArgs &) override 79 { 80 PERFETTO_LOG("Tracing stopped"); 81 82 // Undo any initialization done in OnStart. 83 u_trace_perfetto_stop(); 84 // TODO we should perhaps block until queued traces are flushed? 85 86 Trace([](TuRenderpassDataSource::TraceContext ctx) { 87 auto packet = ctx.NewTracePacket(); 88 packet->Finalize(); 89 ctx.Flush(); 90 }); 91 } 92}; 93 94PERFETTO_DECLARE_DATA_SOURCE_STATIC_MEMBERS(TuRenderpassDataSource); 95PERFETTO_DEFINE_DATA_SOURCE_STATIC_MEMBERS(TuRenderpassDataSource); 96 97static void 98send_descriptors(TuRenderpassDataSource::TraceContext &ctx, uint64_t ts_ns) 99{ 100 PERFETTO_LOG("Sending renderstage descriptors"); 101 102 auto packet = ctx.NewTracePacket(); 103 104 packet->set_timestamp(0); 105 106 auto event = packet->set_gpu_render_stage_event(); 107 event->set_gpu_id(0); 108 109 auto spec = event->set_specifications(); 110 111 for (unsigned i = 0; i < ARRAY_SIZE(queues); i++) { 112 auto desc = spec->add_hw_queue(); 113 114 desc->set_name(queues[i].name); 115 desc->set_description(queues[i].desc); 116 } 117 118 for (unsigned i = 0; i < ARRAY_SIZE(stages); i++) { 119 auto desc = spec->add_stage(); 120 121 desc->set_name(stages[i].name); 122 if (stages[i].desc) 123 desc->set_description(stages[i].desc); 124 } 125} 126 127static void 128stage_start(struct tu_device *dev, uint64_t ts_ns, enum tu_stage_id stage) 129{ 130 struct tu_perfetto_state *p = tu_device_get_perfetto_state(dev); 131 132 p->start_ts[stage] = ts_ns; 133} 134 135typedef void (*trace_payload_as_extra_func)(perfetto::protos::pbzero::GpuRenderStageEvent *, const void*); 136 137static void 138stage_end(struct tu_device *dev, uint64_t ts_ns, enum tu_stage_id stage, 139 uint32_t submission_id, const void* payload = nullptr, 140 trace_payload_as_extra_func payload_as_extra = nullptr) 141{ 142 struct tu_perfetto_state *p = tu_device_get_perfetto_state(dev); 143 144 /* If we haven't managed to calibrate the alignment between GPU and CPU 145 * timestamps yet, then skip this trace, otherwise perfetto won't know 146 * what to do with it. 147 */ 148 if (!sync_gpu_ts) 149 return; 150 151 TuRenderpassDataSource::Trace([=](TuRenderpassDataSource::TraceContext tctx) { 152 if (auto state = tctx.GetIncrementalState(); state->was_cleared) { 153 send_descriptors(tctx, p->start_ts[stage]); 154 state->was_cleared = false; 155 } 156 157 auto packet = tctx.NewTracePacket(); 158 159 packet->set_timestamp(p->start_ts[stage]); 160 packet->set_timestamp_clock_id(gpu_clock_id); 161 162 auto event = packet->set_gpu_render_stage_event(); 163 event->set_event_id(0); // ??? 164 event->set_hw_queue_id(DEFAULT_HW_QUEUE_ID); 165 event->set_duration(ts_ns - p->start_ts[stage]); 166 event->set_stage_id(stage); 167 event->set_context((uintptr_t)dev); 168 event->set_submission_id(submission_id); 169 170 if (payload && payload_as_extra) { 171 payload_as_extra(event, payload); 172 } 173 }); 174} 175 176#ifdef __cplusplus 177extern "C" { 178#endif 179 180void 181tu_perfetto_init(void) 182{ 183 util_perfetto_init(); 184 185 perfetto::DataSourceDescriptor dsd; 186 dsd.set_name("gpu.renderstages.msm"); 187 TuRenderpassDataSource::Register(dsd); 188} 189 190static void 191sync_timestamp(struct tu_device *dev) 192{ 193 uint64_t cpu_ts = perfetto::base::GetBootTimeNs().count(); 194 uint64_t gpu_ts = 0; 195 196 if (cpu_ts < next_clock_sync_ns) 197 return; 198 199 if (tu_device_get_timestamp(dev, &gpu_ts)) { 200 PERFETTO_ELOG("Could not sync CPU and GPU clocks"); 201 return; 202 } 203 204 /* convert GPU ts into ns: */ 205 gpu_ts = tu_device_ticks_to_ns(dev, gpu_ts); 206 207 TuRenderpassDataSource::Trace([=](TuRenderpassDataSource::TraceContext tctx) { 208 auto packet = tctx.NewTracePacket(); 209 210 packet->set_timestamp(cpu_ts); 211 212 auto event = packet->set_clock_snapshot(); 213 214 { 215 auto clock = event->add_clocks(); 216 217 clock->set_clock_id(perfetto::protos::pbzero::BUILTIN_CLOCK_BOOTTIME); 218 clock->set_timestamp(cpu_ts); 219 } 220 221 { 222 auto clock = event->add_clocks(); 223 224 clock->set_clock_id(gpu_clock_id); 225 clock->set_timestamp(gpu_ts); 226 } 227 228 sync_gpu_ts = gpu_ts; 229 next_clock_sync_ns = cpu_ts + 30000000; 230 }); 231} 232 233static void 234emit_submit_id(uint32_t submission_id) 235{ 236 TuRenderpassDataSource::Trace([=](TuRenderpassDataSource::TraceContext tctx) { 237 auto packet = tctx.NewTracePacket(); 238 239 packet->set_timestamp(perfetto::base::GetBootTimeNs().count()); 240 241 auto event = packet->set_vulkan_api_event(); 242 auto submit = event->set_vk_queue_submit(); 243 244 submit->set_submission_id(submission_id); 245 }); 246} 247 248void 249tu_perfetto_submit(struct tu_device *dev, uint32_t submission_id) 250{ 251 sync_timestamp(dev); 252 emit_submit_id(submission_id); 253} 254 255/* 256 * Trace callbacks, called from u_trace once the timestamps from GPU have been 257 * collected. 258 */ 259 260#define CREATE_EVENT_CALLBACK(event_name, stage) \ 261void \ 262tu_start_##event_name(struct tu_device *dev, uint64_t ts_ns, \ 263 const void *flush_data, \ 264 const struct trace_start_##event_name *payload) \ 265{ \ 266 stage_start(dev, ts_ns, stage); \ 267} \ 268 \ 269void \ 270tu_end_##event_name(struct tu_device *dev, uint64_t ts_ns, \ 271 const void *flush_data, \ 272 const struct trace_end_##event_name *payload) \ 273{ \ 274 auto trace_flush_data = (const struct tu_u_trace_flush_data *) flush_data; \ 275 uint32_t submission_id = \ 276 tu_u_trace_flush_data_get_submit_id(trace_flush_data); \ 277 stage_end(dev, ts_ns, stage, submission_id, payload, \ 278 (trace_payload_as_extra_func) &trace_payload_as_extra_end_##event_name);\ 279} 280 281CREATE_EVENT_CALLBACK(render_pass, SURFACE_STAGE_ID) 282CREATE_EVENT_CALLBACK(binning_ib, BINNING_STAGE_ID) 283CREATE_EVENT_CALLBACK(draw_ib_gmem, GMEM_STAGE_ID) 284CREATE_EVENT_CALLBACK(draw_ib_sysmem, BYPASS_STAGE_ID) 285CREATE_EVENT_CALLBACK(blit, BLIT_STAGE_ID) 286CREATE_EVENT_CALLBACK(compute, COMPUTE_STAGE_ID) 287CREATE_EVENT_CALLBACK(gmem_clear, CLEAR_GMEM_STAGE_ID) 288CREATE_EVENT_CALLBACK(sysmem_clear, CLEAR_SYSMEM_STAGE_ID) 289CREATE_EVENT_CALLBACK(sysmem_clear_all, CLEAR_SYSMEM_STAGE_ID) 290CREATE_EVENT_CALLBACK(gmem_load, GMEM_LOAD_STAGE_ID) 291CREATE_EVENT_CALLBACK(gmem_store, GMEM_STORE_STAGE_ID) 292CREATE_EVENT_CALLBACK(sysmem_resolve, SYSMEM_RESOLVE_STAGE_ID) 293 294#ifdef __cplusplus 295} 296#endif 297