freedreno_perfetto.cc revision 7ec681f3
1/*
2 * Copyright © 2021 Google, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24#include <perfetto.h>
25
26#include "util/u_perfetto.h"
27
28#include "freedreno_tracepoints.h"
29
30static uint32_t gpu_clock_id;
31static uint64_t next_clock_sync_ns; /* cpu time of next clk sync */
32
33/**
34 * The timestamp at the point where we first emitted the clock_sync..
35 * this  will be a *later* timestamp that the first GPU traces (since
36 * we capture the first clock_sync from the CPU *after* the first GPU
37 * tracepoints happen).  To avoid confusing perfetto we need to drop
38 * the GPU traces with timestamps before this.
39 */
40static uint64_t sync_gpu_ts;
41
42struct FdRenderpassIncrementalState {
43   bool was_cleared = true;
44};
45
46struct FdRenderpassTraits : public perfetto::DefaultDataSourceTraits {
47   using IncrementalStateType = FdRenderpassIncrementalState;
48};
49
50class FdRenderpassDataSource : public perfetto::DataSource<FdRenderpassDataSource, FdRenderpassTraits> {
51public:
52   void OnSetup(const SetupArgs &) override
53   {
54      // Use this callback to apply any custom configuration to your data source
55      // based on the TraceConfig in SetupArgs.
56   }
57
58   void OnStart(const StartArgs &) override
59   {
60      // This notification can be used to initialize the GPU driver, enable
61      // counters, etc. StartArgs will contains the DataSourceDescriptor,
62      // which can be extended.
63      u_trace_perfetto_start();
64      PERFETTO_LOG("Tracing started");
65
66      /* Note: clock_id's below 128 are reserved.. for custom clock sources,
67       * using the hash of a namespaced string is the recommended approach.
68       * See: https://perfetto.dev/docs/concepts/clock-sync
69       */
70      gpu_clock_id =
71         _mesa_hash_string("org.freedesktop.mesa.freedreno") | 0x80000000;
72   }
73
74   void OnStop(const StopArgs &) override
75   {
76      PERFETTO_LOG("Tracing stopped");
77
78      // Undo any initialization done in OnStart.
79      u_trace_perfetto_stop();
80      // TODO we should perhaps block until queued traces are flushed?
81
82      Trace([](FdRenderpassDataSource::TraceContext ctx) {
83         auto packet = ctx.NewTracePacket();
84         packet->Finalize();
85         ctx.Flush();
86      });
87   }
88};
89
90PERFETTO_DECLARE_DATA_SOURCE_STATIC_MEMBERS(FdRenderpassDataSource);
91PERFETTO_DEFINE_DATA_SOURCE_STATIC_MEMBERS(FdRenderpassDataSource);
92
93static void
94send_descriptors(FdRenderpassDataSource::TraceContext &ctx, uint64_t ts_ns)
95{
96   PERFETTO_LOG("Sending renderstage descriptors");
97
98   auto packet = ctx.NewTracePacket();
99
100   packet->set_timestamp(0);
101//   packet->set_timestamp(ts_ns);
102//   packet->set_timestamp_clock_id(gpu_clock_id);
103
104   auto event = packet->set_gpu_render_stage_event();
105   event->set_gpu_id(0);
106
107   auto spec = event->set_specifications();
108
109   for (unsigned i = 0; i < ARRAY_SIZE(queues); i++) {
110      auto desc = spec->add_hw_queue();
111
112      desc->set_name(queues[i].name);
113      desc->set_description(queues[i].desc);
114   }
115
116   for (unsigned i = 0; i < ARRAY_SIZE(stages); i++) {
117      auto desc = spec->add_stage();
118
119      desc->set_name(stages[i].name);
120      if (stages[i].desc)
121         desc->set_description(stages[i].desc);
122   }
123}
124
125static void
126stage_start(struct pipe_context *pctx, uint64_t ts_ns, enum fd_stage_id stage)
127{
128   struct fd_context *ctx = fd_context(pctx);
129   struct fd_perfetto_state *p = &ctx->perfetto;
130
131   p->start_ts[stage] = ts_ns;
132}
133
134static void
135stage_end(struct pipe_context *pctx, uint64_t ts_ns, enum fd_stage_id stage)
136{
137   struct fd_context *ctx = fd_context(pctx);
138   struct fd_perfetto_state *p = &ctx->perfetto;
139
140   /* If we haven't managed to calibrate the alignment between GPU and CPU
141    * timestamps yet, then skip this trace, otherwise perfetto won't know
142    * what to do with it.
143    */
144   if (!sync_gpu_ts)
145      return;
146
147   FdRenderpassDataSource::Trace([=](FdRenderpassDataSource::TraceContext tctx) {
148      if (auto state = tctx.GetIncrementalState(); state->was_cleared) {
149         send_descriptors(tctx, p->start_ts[stage]);
150         state->was_cleared = false;
151      }
152
153      auto packet = tctx.NewTracePacket();
154
155      packet->set_timestamp(p->start_ts[stage]);
156      packet->set_timestamp_clock_id(gpu_clock_id);
157
158      auto event = packet->set_gpu_render_stage_event();
159      event->set_event_id(0); // ???
160      event->set_hw_queue_id(DEFAULT_HW_QUEUE_ID);
161      event->set_duration(ts_ns - p->start_ts[stage]);
162      event->set_stage_id(stage);
163      event->set_context((uintptr_t)pctx);
164
165      /* The "surface" meta-stage has extra info about render target: */
166      if (stage == SURFACE_STAGE_ID) {
167
168         event->set_submission_id(p->submit_id);
169
170         if (p->cbuf0_format) {
171            auto data = event->add_extra_data();
172
173            data->set_name("color0 format");
174            data->set_value(util_format_short_name(p->cbuf0_format));
175         }
176
177         if (p->zs_format) {
178            auto data = event->add_extra_data();
179
180            data->set_name("zs format");
181            data->set_value(util_format_short_name(p->zs_format));
182         }
183
184         {
185            auto data = event->add_extra_data();
186
187            data->set_name("width");
188            data->set_value(std::to_string(p->width));
189         }
190
191         {
192            auto data = event->add_extra_data();
193
194            data->set_name("height");
195            data->set_value(std::to_string(p->height));
196         }
197
198         {
199            auto data = event->add_extra_data();
200
201            data->set_name("MSAA");
202            data->set_value(std::to_string(p->samples));
203         }
204
205         {
206            auto data = event->add_extra_data();
207
208            data->set_name("MRTs");
209            data->set_value(std::to_string(p->mrts));
210         }
211
212         // "renderMode"
213         // "surfaceID"
214
215         if (p->nbins) {
216            auto data = event->add_extra_data();
217
218            data->set_name("numberOfBins");
219            data->set_value(std::to_string(p->nbins));
220         }
221
222         if (p->binw) {
223            auto data = event->add_extra_data();
224
225            data->set_name("binWidth");
226            data->set_value(std::to_string(p->binw));
227         }
228
229         if (p->binh) {
230            auto data = event->add_extra_data();
231
232            data->set_name("binHeight");
233            data->set_value(std::to_string(p->binh));
234         }
235      }
236   });
237}
238
239#ifdef __cplusplus
240extern "C" {
241#endif
242
243void
244fd_perfetto_init(void)
245{
246   util_perfetto_init();
247
248   perfetto::DataSourceDescriptor dsd;
249   dsd.set_name("gpu.renderstages.msm");
250   FdRenderpassDataSource::Register(dsd);
251}
252
253static void
254sync_timestamp(struct fd_context *ctx)
255{
256   uint64_t cpu_ts = perfetto::base::GetBootTimeNs().count();
257   uint64_t gpu_ts;
258
259   if (cpu_ts < next_clock_sync_ns)
260      return;
261
262   if (fd_pipe_get_param(ctx->pipe, FD_TIMESTAMP, &gpu_ts)) {
263      PERFETTO_ELOG("Could not sync CPU and GPU clocks");
264      return;
265   }
266
267   /* convert GPU ts into ns: */
268   gpu_ts = ctx->ts_to_ns(gpu_ts);
269
270   FdRenderpassDataSource::Trace([=](FdRenderpassDataSource::TraceContext tctx) {
271      auto packet = tctx.NewTracePacket();
272
273      packet->set_timestamp(cpu_ts);
274
275      auto event = packet->set_clock_snapshot();
276
277      {
278         auto clock = event->add_clocks();
279
280         clock->set_clock_id(perfetto::protos::pbzero::BUILTIN_CLOCK_BOOTTIME);
281         clock->set_timestamp(cpu_ts);
282      }
283
284      {
285         auto clock = event->add_clocks();
286
287         clock->set_clock_id(gpu_clock_id);
288         clock->set_timestamp(gpu_ts);
289      }
290
291      sync_gpu_ts = gpu_ts;
292      next_clock_sync_ns = cpu_ts + 30000000;
293   });
294}
295
296static void
297emit_submit_id(struct fd_context *ctx)
298{
299   FdRenderpassDataSource::Trace([=](FdRenderpassDataSource::TraceContext tctx) {
300      auto packet = tctx.NewTracePacket();
301
302      packet->set_timestamp(perfetto::base::GetBootTimeNs().count());
303
304      auto event = packet->set_vulkan_api_event();
305      auto submit = event->set_vk_queue_submit();
306
307      submit->set_submission_id(ctx->submit_count);
308   });
309}
310
311void
312fd_perfetto_submit(struct fd_context *ctx)
313{
314   sync_timestamp(ctx);
315   emit_submit_id(ctx);
316}
317
318/*
319 * Trace callbacks, called from u_trace once the timestamps from GPU have been
320 * collected.
321 */
322
323void
324fd_start_render_pass(struct pipe_context *pctx, uint64_t ts_ns,
325                     const void *flush_data,
326                     const struct trace_start_render_pass *payload)
327{
328   stage_start(pctx, ts_ns, SURFACE_STAGE_ID);
329
330   struct fd_perfetto_state *p = &fd_context(pctx)->perfetto;
331
332   p->submit_id = payload->submit_id;
333   p->cbuf0_format = payload->cbuf0_format;
334   p->zs_format = payload->zs_format;
335   p->width = payload->width;
336   p->height = payload->height;
337   p->mrts = payload->mrts;
338   p->samples = payload->samples;
339   p->nbins = payload->nbins;
340   p->binw = payload->binw;
341   p->binh = payload->binh;
342}
343
344void
345fd_end_render_pass(struct pipe_context *pctx, uint64_t ts_ns,
346                   const void *flush_data,
347                   const struct trace_end_render_pass *payload)
348{
349   stage_end(pctx, ts_ns, SURFACE_STAGE_ID);
350}
351
352void
353fd_start_binning_ib(struct pipe_context *pctx, uint64_t ts_ns,
354                    const void *flush_data,
355                    const struct trace_start_binning_ib *payload)
356{
357   stage_start(pctx, ts_ns, BINNING_STAGE_ID);
358}
359
360void
361fd_end_binning_ib(struct pipe_context *pctx, uint64_t ts_ns,
362                  const void *flush_data,
363                  const struct trace_end_binning_ib *payload)
364{
365   stage_end(pctx, ts_ns, BINNING_STAGE_ID);
366}
367
368void
369fd_start_draw_ib(struct pipe_context *pctx, uint64_t ts_ns,
370                 const void *flush_data,
371                 const struct trace_start_draw_ib *payload)
372{
373   stage_start(
374      pctx, ts_ns,
375      fd_context(pctx)->perfetto.nbins ? GMEM_STAGE_ID : BYPASS_STAGE_ID);
376}
377
378void
379fd_end_draw_ib(struct pipe_context *pctx, uint64_t ts_ns,
380               const void *flush_data,
381               const struct trace_end_draw_ib *payload)
382{
383   stage_end(
384      pctx, ts_ns,
385      fd_context(pctx)->perfetto.nbins ? GMEM_STAGE_ID : BYPASS_STAGE_ID);
386}
387
388void
389fd_start_blit(struct pipe_context *pctx, uint64_t ts_ns,
390              const void *flush_data,
391              const struct trace_start_blit *payload)
392{
393   stage_start(pctx, ts_ns, BLIT_STAGE_ID);
394}
395
396void
397fd_end_blit(struct pipe_context *pctx, uint64_t ts_ns,
398            const void *flush_data,
399            const struct trace_end_blit *payload)
400{
401   stage_end(pctx, ts_ns, BLIT_STAGE_ID);
402}
403
404void
405fd_start_compute(struct pipe_context *pctx, uint64_t ts_ns,
406                 const void *flush_data,
407                 const struct trace_start_compute *payload)
408{
409   stage_start(pctx, ts_ns, COMPUTE_STAGE_ID);
410}
411
412void
413fd_end_compute(struct pipe_context *pctx, uint64_t ts_ns,
414               const void *flush_data,
415               const struct trace_end_compute *payload)
416{
417   stage_end(pctx, ts_ns, COMPUTE_STAGE_ID);
418}
419
420void
421fd_start_clear_restore(struct pipe_context *pctx, uint64_t ts_ns,
422                       const void *flush_data,
423                       const struct trace_start_clear_restore *payload)
424{
425   stage_start(pctx, ts_ns, CLEAR_RESTORE_STAGE_ID);
426}
427
428void
429fd_end_clear_restore(struct pipe_context *pctx, uint64_t ts_ns,
430                     const void *flush_data,
431                     const struct trace_end_clear_restore *payload)
432{
433   stage_end(pctx, ts_ns, CLEAR_RESTORE_STAGE_ID);
434}
435
436void
437fd_start_resolve(struct pipe_context *pctx, uint64_t ts_ns,
438                 const void *flush_data,
439                 const struct trace_start_resolve *payload)
440{
441   stage_start(pctx, ts_ns, RESOLVE_STAGE_ID);
442}
443
444void
445fd_end_resolve(struct pipe_context *pctx, uint64_t ts_ns,
446               const void *flush_data,
447               const struct trace_end_resolve *payload)
448{
449   stage_end(pctx, ts_ns, RESOLVE_STAGE_ID);
450}
451
452#ifdef __cplusplus
453}
454#endif
455