fd_pps_driver.cc revision 7ec681f3
1/*
2 * Copyright © 2021 Google, Inc.
3 *
4 * SPDX-License-Identifier: MIT
5 */
6
7#include "fd_pps_driver.h"
8
9#include <cstring>
10#include <iostream>
11#include <perfetto.h>
12
13#include "pps/pps.h"
14#include "pps/pps_algorithm.h"
15
16namespace pps
17{
18
19uint64_t
20FreedrenoDriver::get_min_sampling_period_ns()
21{
22   return 100000;
23}
24
25/*
26TODO this sees like it would be largely the same for a5xx as well
27(ie. same countable names)..
28 */
29void
30FreedrenoDriver::setup_a6xx_counters()
31{
32   /* TODO is there a reason to want more than one group? */
33   CounterGroup group = {};
34   group.name = "counters";
35   groups.clear();
36   counters.clear();
37   countables.clear();
38   enabled_counters.clear();
39   groups.emplace_back(std::move(group));
40
41   /*
42    * Create the countables that we'll be using.
43    */
44
45   auto PERF_CP_ALWAYS_COUNT = countable("PERF_CP_ALWAYS_COUNT");
46   auto PERF_CP_BUSY_CYCLES  = countable("PERF_CP_BUSY_CYCLES");
47   auto PERF_RB_3D_PIXELS    = countable("PERF_RB_3D_PIXELS");
48   auto PERF_SP_FS_STAGE_FULL_ALU_INSTRUCTIONS = countable("PERF_SP_FS_STAGE_FULL_ALU_INSTRUCTIONS");
49   auto PERF_SP_FS_STAGE_HALF_ALU_INSTRUCTIONS = countable("PERF_SP_FS_STAGE_HALF_ALU_INSTRUCTIONS");
50   auto PERF_TP_L1_CACHELINE_MISSES = countable("PERF_TP_L1_CACHELINE_MISSES");
51   auto PERF_SP_BUSY_CYCLES  = countable("PERF_SP_BUSY_CYCLES");
52
53   /*
54    * And then setup the derived counters that we are exporting to
55    * pps based on the captured countable values
56    */
57
58   counter("GPU Frequency", Counter::Units::Hertz, [=]() {
59         return PERF_CP_ALWAYS_COUNT / time;
60      }
61   );
62
63   counter("GPU % Utilization", Counter::Units::Percent, [=]() {
64         return 100.0 * (PERF_CP_BUSY_CYCLES / time) / max_freq;
65      }
66   );
67
68   // This one is a bit of a guess, but seems plausible..
69   counter("ALU / Fragment", Counter::Units::None, [=]() {
70         return (PERF_SP_FS_STAGE_FULL_ALU_INSTRUCTIONS +
71               PERF_SP_FS_STAGE_HALF_ALU_INSTRUCTIONS / 2) / PERF_RB_3D_PIXELS;
72      }
73   );
74
75   counter("TP L1 Cache Misses", Counter::Units::None, [=]() {
76         return PERF_TP_L1_CACHELINE_MISSES / time;
77      }
78   );
79
80   counter("Shader Core Utilization", Counter::Units::Percent, [=]() {
81         return 100.0 * (PERF_SP_BUSY_CYCLES / time) / (max_freq * info->num_sp_cores);
82      }
83   );
84
85   // TODO add more.. see https://gpuinspector.dev/docs/gpu-counters/qualcomm
86   // for what blob exposes
87}
88
89/**
90 * Generate an submit the cmdstream to configure the counter/countable
91 * muxing
92 */
93void
94FreedrenoDriver::configure_counters(bool reset, bool wait)
95{
96   struct fd_submit *submit = fd_submit_new(pipe);
97   enum fd_ringbuffer_flags flags =
98      (enum fd_ringbuffer_flags)(FD_RINGBUFFER_PRIMARY | FD_RINGBUFFER_GROWABLE);
99   struct fd_ringbuffer *ring = fd_submit_new_ringbuffer(submit, 0x1000, flags);
100
101   for (auto countable : countables)
102      countable.configure(ring, reset);
103
104   struct fd_submit_fence fence = {};
105   util_queue_fence_init(&fence.ready);
106
107   fd_submit_flush(submit, -1, &fence);
108
109   util_queue_fence_wait(&fence.ready);
110
111   fd_ringbuffer_del(ring);
112   fd_submit_del(submit);
113
114   if (wait)
115      fd_pipe_wait(pipe, &fence.fence);
116}
117
118/**
119 * Read the current counter values and record the time.
120 */
121void
122FreedrenoDriver::collect_countables()
123{
124   last_dump_ts = perfetto::base::GetBootTimeNs().count();
125
126   for (auto countable : countables)
127      countable.collect();
128}
129
130bool
131FreedrenoDriver::init_perfcnt()
132{
133   uint64_t val;
134
135   dev = fd_device_new(drm_device.fd);
136   pipe = fd_pipe_new(dev, FD_PIPE_3D);
137   dev_id = fd_pipe_dev_id(pipe);
138
139   if (fd_pipe_get_param(pipe, FD_MAX_FREQ, &val)) {
140      PERFETTO_FATAL("Could not get MAX_FREQ");
141      return false;
142   }
143   max_freq = val;
144
145   if (fd_pipe_get_param(pipe, FD_SUSPEND_COUNT, &val)) {
146      PERFETTO_ILOG("Could not get SUSPEND_COUNT");
147   } else {
148      suspend_count = val;
149      has_suspend_count = true;
150   }
151
152   perfcntrs = fd_perfcntrs(fd_pipe_dev_id(pipe), &num_perfcntrs);
153   if (num_perfcntrs == 0) {
154      PERFETTO_FATAL("No hw counters available");
155      return false;
156   }
157
158   assigned_counters.resize(num_perfcntrs);
159   assigned_counters.assign(assigned_counters.size(), 0);
160
161   switch (fd_dev_gen(dev_id)) {
162   case 6:
163      setup_a6xx_counters();
164      break;
165   default:
166      PERFETTO_FATAL("Unsupported GPU: a%03u", fd_dev_gpu_id(dev_id));
167      return false;
168   }
169
170   state.resize(next_countable_id);
171
172   for (auto countable : countables)
173      countable.resolve();
174
175   info = fd_dev_info(dev_id);
176
177   io = fd_dt_find_io();
178   if (!io) {
179      PERFETTO_FATAL("Could not map GPU I/O space");
180      return false;
181   }
182
183   configure_counters(true, true);
184   collect_countables();
185
186   return true;
187}
188
189void
190FreedrenoDriver::enable_counter(const uint32_t counter_id)
191{
192   enabled_counters.push_back(counters[counter_id]);
193}
194
195void
196FreedrenoDriver::enable_all_counters()
197{
198   enabled_counters.reserve(counters.size());
199   for (auto &counter : counters) {
200      enabled_counters.push_back(counter);
201   }
202}
203
204void
205FreedrenoDriver::enable_perfcnt(const uint64_t /* sampling_period_ns */)
206{
207}
208
209bool
210FreedrenoDriver::dump_perfcnt()
211{
212   if (has_suspend_count) {
213      uint64_t val;
214
215      fd_pipe_get_param(pipe, FD_SUSPEND_COUNT, &val);
216
217      if (suspend_count != val) {
218         PERFETTO_ILOG("Device had suspended!");
219
220         suspend_count = val;
221
222         configure_counters(true, true);
223         collect_countables();
224
225         /* We aren't going to have anything sensible by comparing
226          * current values to values from prior to the suspend, so
227          * just skip this sampling period.
228          */
229         return false;
230      }
231   }
232
233   auto last_ts = last_dump_ts;
234
235   /* Capture the timestamp from the *start* of the sampling period: */
236   last_capture_ts = last_dump_ts;
237
238   collect_countables();
239
240   auto elapsed_time_ns = last_dump_ts - last_ts;
241
242   time = (float)elapsed_time_ns / 1000000000.0;
243
244   /* On older kernels that dont' support querying the suspend-
245    * count, just send configuration cmdstream regularly to keep
246    * the GPU alive and correctly configured for the countables
247    * we want
248    */
249   if (!has_suspend_count) {
250      configure_counters(false, false);
251   }
252
253   return true;
254}
255
256uint64_t FreedrenoDriver::next()
257{
258   auto ret = last_capture_ts;
259   last_capture_ts = 0;
260   return ret;
261}
262
263void FreedrenoDriver::disable_perfcnt()
264{
265   /* There isn't really any disable, only reconfiguring which countables
266    * get muxed to which counters
267    */
268}
269
270/*
271 * Countable
272 */
273
274FreedrenoDriver::Countable
275FreedrenoDriver::countable(std::string name)
276{
277   auto countable = Countable(this, name);
278   countables.emplace_back(countable);
279   return countable;
280}
281
282FreedrenoDriver::Countable::Countable(FreedrenoDriver *d, std::string name)
283   : id {d->next_countable_id++}, d {d}, name {name}
284{
285}
286
287/* Emit register writes on ring to configure counter/countable muxing: */
288void
289FreedrenoDriver::Countable::configure(struct fd_ringbuffer *ring, bool reset)
290{
291   const struct fd_perfcntr_countable *countable = d->state[id].countable;
292   const struct fd_perfcntr_counter   *counter   = d->state[id].counter;
293
294   OUT_PKT7(ring, CP_WAIT_FOR_IDLE, 0);
295
296   if (counter->enable && reset) {
297      OUT_PKT4(ring, counter->enable, 1);
298      OUT_RING(ring, 0);
299   }
300
301   if (counter->clear && reset) {
302      OUT_PKT4(ring, counter->clear, 1);
303      OUT_RING(ring, 1);
304
305      OUT_PKT4(ring, counter->clear, 1);
306      OUT_RING(ring, 0);
307   }
308
309   OUT_PKT4(ring, counter->select_reg, 1);
310   OUT_RING(ring, countable->selector);
311
312   if (counter->enable && reset) {
313      OUT_PKT4(ring, counter->enable, 1);
314      OUT_RING(ring, 1);
315   }
316}
317
318/* Collect current counter value and calculate delta since last sample: */
319void
320FreedrenoDriver::Countable::collect()
321{
322   const struct fd_perfcntr_counter *counter = d->state[id].counter;
323
324   d->state[id].last_value = d->state[id].value;
325
326   uint32_t *reg_lo = (uint32_t *)d->io + counter->counter_reg_lo;
327   uint32_t *reg_hi = (uint32_t *)d->io + counter->counter_reg_hi;
328
329   uint32_t lo = *reg_lo;
330   uint32_t hi = *reg_hi;
331
332   d->state[id].value = lo | ((uint64_t)hi << 32);
333}
334
335/* Resolve the countable and assign next counter from it's group: */
336void
337FreedrenoDriver::Countable::resolve()
338{
339   for (unsigned i = 0; i < d->num_perfcntrs; i++) {
340      const struct fd_perfcntr_group *g = &d->perfcntrs[i];
341      for (unsigned j = 0; j < g->num_countables; j++) {
342         const struct fd_perfcntr_countable *c = &g->countables[j];
343         if (name == c->name) {
344            d->state[id].countable = c;
345
346            /* Assign a counter from the same group: */
347            assert(d->assigned_counters[i] < g->num_counters);
348            d->state[id].counter = &g->counters[d->assigned_counters[i]++];
349
350            std::cout << "Countable: " << name << ", group=" << g->name <<
351                  ", counter=" << d->assigned_counters[i] - 1 << "\n";
352
353            return;
354         }
355      }
356   }
357   unreachable("no such countable!");
358}
359
360uint64_t
361FreedrenoDriver::Countable::get_value() const
362{
363   return d->state[id].value - d->state[id].last_value;
364}
365
366/*
367 * DerivedCounter
368 */
369
370FreedrenoDriver::DerivedCounter::DerivedCounter(FreedrenoDriver *d, std::string name,
371                                                Counter::Units units,
372                                                std::function<int64_t()> derive)
373   : Counter(d->next_counter_id++, name, 0)
374{
375   std::cout << "DerivedCounter: " << name << ", id=" << id << "\n";
376   this->units = units;
377   set_getter([=](const Counter &c, const Driver &d) {
378         return derive();
379      }
380   );
381}
382
383FreedrenoDriver::DerivedCounter
384FreedrenoDriver::counter(std::string name, Counter::Units units,
385                         std::function<int64_t()> derive)
386{
387   auto counter = DerivedCounter(this, name, units, derive);
388   counters.emplace_back(counter);
389   return counter;
390}
391
392} // namespace pps
393