1/*
2 * Copyright © 2019-2021 Collabora, Ltd.
3 * Author: Antonio Caggiano <antonio.caggiano@collabora.com>
4 * Author: Rohan Garg <rohan.garg@collabora.com>
5 * Author: Robert Beckett <bob.beckett@collabora.com>
6 * Author: Corentin Noël <corentin.noel@collabora.com>
7 *
8 * SPDX-License-Identifier: MIT
9 */
10
11#include "pps_datasource.h"
12#include "pps_driver.h"
13
14#include <condition_variable>
15#include <thread>
16#include <variant>
17
18// Minimum supported sampling period in nanoseconds
19#define MIN_SAMPLING_PERIOD_NS 50000
20
21namespace pps
22{
23static std::string driver_name;
24
25/// Synchronize access to started_cv and started
26static std::mutex started_m;
27static std::condition_variable started_cv;
28static bool started = false;
29
30float ms(const std::chrono::nanoseconds &t)
31{
32   return t.count() / 1000000.0f;
33}
34
35void GpuDataSource::OnSetup(const SetupArgs &args)
36{
37   // Create drivers for all supported devices
38   auto drm_devices = DrmDevice::create_all();
39   for (auto &drm_device : drm_devices) {
40      if (drm_device.name != driver_name)
41         continue;
42
43      if (auto driver = Driver::get_driver(std::move(drm_device))) {
44         if (!driver->init_perfcnt()) {
45            // Skip failing driver
46            PPS_LOG_ERROR("Failed to initialize %s driver", driver->drm_device.name.c_str());
47            continue;
48         }
49
50         this->driver = driver;
51      }
52   }
53   if (driver == nullptr) {
54      PPS_LOG_FATAL("No DRM devices supported");
55   }
56
57   // Parse perfetto config
58   const std::string &config_raw = args.config->gpu_counter_config_raw();
59   perfetto::protos::pbzero::GpuCounterConfig::Decoder config(config_raw);
60
61   if (config.has_counter_ids()) {
62      // Get enabled counters
63      PPS_LOG_IMPORTANT("Selecting counters");
64      for (auto it = config.counter_ids(); it; ++it) {
65         uint32_t counter_id = it->as_uint32();
66         driver->enable_counter(counter_id);
67      }
68   } else {
69      // Enable all counters
70      driver->enable_all_counters();
71   }
72
73   // Get sampling period
74   auto min_sampling_period = std::chrono::nanoseconds(MIN_SAMPLING_PERIOD_NS);
75
76   auto dev_supported = std::chrono::nanoseconds(driver->get_min_sampling_period_ns());
77   if (dev_supported > min_sampling_period) {
78      min_sampling_period = dev_supported;
79   }
80
81   time_to_sleep = std::max(time_to_sleep, min_sampling_period);
82
83   if (config.has_counter_period_ns()) {
84      auto requested_sampling_period = std::chrono::nanoseconds(config.counter_period_ns());
85      if (requested_sampling_period < min_sampling_period) {
86         PPS_LOG_ERROR("Sampling period should be greater than %" PRIu64 " ns (%.2f ms)",
87            uint64_t(min_sampling_period.count()),
88            ms(min_sampling_period));
89      } else {
90         time_to_sleep = requested_sampling_period;
91      }
92   }
93   PPS_LOG("Sampling period set to %" PRIu64 " ns", uint64_t(time_to_sleep.count()));
94}
95
96void GpuDataSource::OnStart(const StartArgs &args)
97{
98   driver->enable_perfcnt(time_to_sleep.count());
99
100   state = State::Start;
101
102   {
103      std::lock_guard<std::mutex> lock(started_m);
104      started = true;
105   }
106   started_cv.notify_all();
107}
108
109void close_callback(GpuDataSource::TraceContext ctx)
110{
111   auto packet = ctx.NewTracePacket();
112   packet->Finalize();
113   ctx.Flush();
114   PPS_LOG("Context flushed");
115}
116
117void GpuDataSource::OnStop(const StopArgs &args)
118{
119   state = State::Stop;
120   auto stop_closure = args.HandleStopAsynchronously();
121   Trace(close_callback);
122   stop_closure();
123
124   driver->disable_perfcnt();
125   driver = nullptr;
126
127   std::lock_guard<std::mutex> lock(started_m);
128   started = false;
129}
130
131void GpuDataSource::wait_started()
132{
133   std::unique_lock<std::mutex> lock(started_m);
134   if (!started) {
135      PPS_LOG("Waiting for start");
136      started_cv.wait(lock, [] { return started; });
137   }
138}
139
140void GpuDataSource::register_data_source(const std::string &_driver_name)
141{
142   driver_name = _driver_name;
143   static perfetto::DataSourceDescriptor dsd;
144   dsd.set_name("gpu.counters." + driver_name);
145   Register(dsd);
146}
147
148void add_group(perfetto::protos::pbzero::GpuCounterDescriptor *desc,
149   const CounterGroup &group,
150   const std::string &prefix,
151   int32_t gpu_num)
152{
153   if (!group.counters.empty()) {
154      // Define a block for each group containing counters
155      auto block_desc = desc->add_blocks();
156      block_desc->set_name(prefix + "." + group.name);
157      block_desc->set_block_id(group.id);
158
159      // Associate counters to blocks
160      for (auto id : group.counters) {
161         block_desc->add_counter_ids(id);
162      }
163   }
164
165   for (auto const &sub : group.subgroups) {
166      // Perfetto doesnt currently support nested groups.
167      // Flatten group hierarchy, using dot separator
168      add_group(desc, sub, prefix + "." + group.name, gpu_num);
169   }
170}
171
172void add_descriptors(perfetto::protos::pbzero::GpuCounterEvent *event,
173   std::vector<CounterGroup> const &groups,
174   std::vector<Counter> const &counters,
175   Driver &driver)
176{
177   // Start a counter descriptor
178   auto desc = event->set_counter_descriptor();
179
180   // Add the groups
181   for (auto const &group : groups) {
182      add_group(desc, group, driver.drm_device.name, driver.drm_device.gpu_num);
183   }
184
185   // Add the counters
186   for (auto const &counter : counters) {
187      auto spec = desc->add_specs();
188      spec->set_counter_id(counter.id);
189      spec->set_name(counter.name);
190
191      auto units = perfetto::protos::pbzero::GpuCounterDescriptor::NONE;
192      switch (counter.units) {
193      case Counter::Units::Percent:
194         units = perfetto::protos::pbzero::GpuCounterDescriptor::PERCENT;
195         break;
196      case Counter::Units::Byte:
197         units = perfetto::protos::pbzero::GpuCounterDescriptor::BYTE;
198         break;
199      case Counter::Units::Hertz:
200         units = perfetto::protos::pbzero::GpuCounterDescriptor::HERTZ;
201         break;
202      case Counter::Units::None:
203         units = perfetto::protos::pbzero::GpuCounterDescriptor::NONE;
204         break;
205      default:
206         assert(false && "Missing counter units type!");
207         break;
208      }
209      spec->add_numerator_units(units);
210   }
211}
212
213void add_samples(perfetto::protos::pbzero::GpuCounterEvent &event, const Driver &driver)
214{
215   if (driver.enabled_counters.size() == 0) {
216      PPS_LOG_FATAL("There are no counters enabled");
217   }
218
219   for (const auto &counter : driver.enabled_counters) {
220      auto counter_event = event.add_counters();
221
222      counter_event->set_counter_id(counter.id);
223
224      auto value = counter.get_value(driver);
225      if (auto d_value = std::get_if<double>(&value)) {
226         counter_event->set_double_value(*d_value);
227      } else if (auto i_value = std::get_if<int64_t>(&value)) {
228         counter_event->set_int_value(*i_value);
229      } else {
230         PPS_LOG_ERROR("Failed to get value for counter %s", counter.name.c_str());
231      }
232   }
233}
234
235void GpuDataSource::trace(TraceContext &ctx)
236{
237   using namespace perfetto::protos::pbzero;
238
239   if (auto state = ctx.GetIncrementalState(); state->was_cleared) {
240      // Mark any incremental state before this point invalid
241      {
242         auto packet = ctx.NewTracePacket();
243         packet->set_timestamp(perfetto::base::GetBootTimeNs().count());
244         packet->set_sequence_flags(TracePacket::SEQ_INCREMENTAL_STATE_CLEARED);
245      }
246
247      auto packet = ctx.NewTracePacket();
248      descriptor_timestamp = perfetto::base::GetBootTimeNs().count();
249      packet->set_timestamp(descriptor_timestamp);
250
251      auto event = packet->set_gpu_counter_event();
252      event->set_gpu_id(driver->drm_device.gpu_num);
253
254      auto &groups = driver->groups;
255      auto &counters = driver->enabled_counters;
256      PPS_LOG("Sending counter descriptors");
257      add_descriptors(event, groups, counters, *driver);
258
259      state->was_cleared = false;
260   }
261
262   // Save current scheduler for restoring later
263   int prev_sched_policy = sched_getscheduler(0);
264   sched_param prev_priority_param;
265   sched_getparam(0, &prev_priority_param);
266
267   // Use FIFO policy to avoid preemption while collecting counters
268   int sched_policy = SCHED_FIFO;
269   // Do not use max priority to avoid starving migration and watchdog threads
270   int priority_value = sched_get_priority_max(sched_policy) - 1;
271   sched_param priority_param { priority_value };
272   sched_setscheduler(0, sched_policy, &priority_param);
273
274   if (driver->dump_perfcnt()) {
275      while (auto timestamp = driver->next()) {
276         if (timestamp <= descriptor_timestamp) {
277            // Do not send counter values before counter descriptors
278            PPS_LOG_ERROR("Skipping counter values coming before descriptors");
279            continue;
280         }
281
282         auto packet = ctx.NewTracePacket();
283         packet->set_timestamp(timestamp);
284
285         auto event = packet->set_gpu_counter_event();
286         event->set_gpu_id(driver->drm_device.gpu_num);
287
288         add_samples(*event, *driver);
289      }
290   }
291
292   // Reset normal scheduler
293   sched_setscheduler(0, prev_sched_policy, &prev_priority_param);
294}
295
296void GpuDataSource::trace_callback(TraceContext ctx)
297{
298   using namespace std::chrono;
299
300   nanoseconds sleep_time = nanoseconds(0);
301
302   if (auto data_source = ctx.GetDataSourceLocked()) {
303      if (data_source->time_to_sleep > data_source->time_to_trace) {
304         sleep_time = data_source->time_to_sleep - data_source->time_to_trace;
305      }
306   }
307
308   // Wait sampling period before tracing
309   std::this_thread::sleep_for(sleep_time);
310
311   auto time_zero = perfetto::base::GetBootTimeNs();
312   if (auto data_source = ctx.GetDataSourceLocked()) {
313      // Check data source is still running
314      if (data_source->state == pps::State::Start) {
315         data_source->trace(ctx);
316         data_source->time_to_trace = perfetto::base::GetBootTimeNs() - time_zero;
317      }
318   } else {
319      PPS_LOG("Tracing finished");
320   }
321}
322
323} // namespace pps
324