1/* 2 * Copyright © 2019-2021 Collabora, Ltd. 3 * Author: Antonio Caggiano <antonio.caggiano@collabora.com> 4 * Author: Rohan Garg <rohan.garg@collabora.com> 5 * Author: Robert Beckett <bob.beckett@collabora.com> 6 * Author: Corentin Noël <corentin.noel@collabora.com> 7 * 8 * SPDX-License-Identifier: MIT 9 */ 10 11#include "pps_datasource.h" 12#include "pps_driver.h" 13 14#include <condition_variable> 15#include <thread> 16#include <variant> 17 18// Minimum supported sampling period in nanoseconds 19#define MIN_SAMPLING_PERIOD_NS 50000 20 21namespace pps 22{ 23static std::string driver_name; 24 25/// Synchronize access to started_cv and started 26static std::mutex started_m; 27static std::condition_variable started_cv; 28static bool started = false; 29 30float ms(const std::chrono::nanoseconds &t) 31{ 32 return t.count() / 1000000.0f; 33} 34 35void GpuDataSource::OnSetup(const SetupArgs &args) 36{ 37 // Create drivers for all supported devices 38 auto drm_devices = DrmDevice::create_all(); 39 for (auto &drm_device : drm_devices) { 40 if (drm_device.name != driver_name) 41 continue; 42 43 if (auto driver = Driver::get_driver(std::move(drm_device))) { 44 if (!driver->init_perfcnt()) { 45 // Skip failing driver 46 PPS_LOG_ERROR("Failed to initialize %s driver", driver->drm_device.name.c_str()); 47 continue; 48 } 49 50 this->driver = driver; 51 } 52 } 53 if (driver == nullptr) { 54 PPS_LOG_FATAL("No DRM devices supported"); 55 } 56 57 // Parse perfetto config 58 const std::string &config_raw = args.config->gpu_counter_config_raw(); 59 perfetto::protos::pbzero::GpuCounterConfig::Decoder config(config_raw); 60 61 if (config.has_counter_ids()) { 62 // Get enabled counters 63 PPS_LOG_IMPORTANT("Selecting counters"); 64 for (auto it = config.counter_ids(); it; ++it) { 65 uint32_t counter_id = it->as_uint32(); 66 driver->enable_counter(counter_id); 67 } 68 } else { 69 // Enable all counters 70 driver->enable_all_counters(); 71 } 72 73 // Get sampling period 74 auto min_sampling_period = std::chrono::nanoseconds(MIN_SAMPLING_PERIOD_NS); 75 76 auto dev_supported = std::chrono::nanoseconds(driver->get_min_sampling_period_ns()); 77 if (dev_supported > min_sampling_period) { 78 min_sampling_period = dev_supported; 79 } 80 81 time_to_sleep = std::max(time_to_sleep, min_sampling_period); 82 83 if (config.has_counter_period_ns()) { 84 auto requested_sampling_period = std::chrono::nanoseconds(config.counter_period_ns()); 85 if (requested_sampling_period < min_sampling_period) { 86 PPS_LOG_ERROR("Sampling period should be greater than %" PRIu64 " ns (%.2f ms)", 87 uint64_t(min_sampling_period.count()), 88 ms(min_sampling_period)); 89 } else { 90 time_to_sleep = requested_sampling_period; 91 } 92 } 93 PPS_LOG("Sampling period set to %" PRIu64 " ns", uint64_t(time_to_sleep.count())); 94} 95 96void GpuDataSource::OnStart(const StartArgs &args) 97{ 98 driver->enable_perfcnt(time_to_sleep.count()); 99 100 state = State::Start; 101 102 { 103 std::lock_guard<std::mutex> lock(started_m); 104 started = true; 105 } 106 started_cv.notify_all(); 107} 108 109void close_callback(GpuDataSource::TraceContext ctx) 110{ 111 auto packet = ctx.NewTracePacket(); 112 packet->Finalize(); 113 ctx.Flush(); 114 PPS_LOG("Context flushed"); 115} 116 117void GpuDataSource::OnStop(const StopArgs &args) 118{ 119 state = State::Stop; 120 auto stop_closure = args.HandleStopAsynchronously(); 121 Trace(close_callback); 122 stop_closure(); 123 124 driver->disable_perfcnt(); 125 driver = nullptr; 126 127 std::lock_guard<std::mutex> lock(started_m); 128 started = false; 129} 130 131void GpuDataSource::wait_started() 132{ 133 std::unique_lock<std::mutex> lock(started_m); 134 if (!started) { 135 PPS_LOG("Waiting for start"); 136 started_cv.wait(lock, [] { return started; }); 137 } 138} 139 140void GpuDataSource::register_data_source(const std::string &_driver_name) 141{ 142 driver_name = _driver_name; 143 static perfetto::DataSourceDescriptor dsd; 144 dsd.set_name("gpu.counters." + driver_name); 145 Register(dsd); 146} 147 148void add_group(perfetto::protos::pbzero::GpuCounterDescriptor *desc, 149 const CounterGroup &group, 150 const std::string &prefix, 151 int32_t gpu_num) 152{ 153 if (!group.counters.empty()) { 154 // Define a block for each group containing counters 155 auto block_desc = desc->add_blocks(); 156 block_desc->set_name(prefix + "." + group.name); 157 block_desc->set_block_id(group.id); 158 159 // Associate counters to blocks 160 for (auto id : group.counters) { 161 block_desc->add_counter_ids(id); 162 } 163 } 164 165 for (auto const &sub : group.subgroups) { 166 // Perfetto doesnt currently support nested groups. 167 // Flatten group hierarchy, using dot separator 168 add_group(desc, sub, prefix + "." + group.name, gpu_num); 169 } 170} 171 172void add_descriptors(perfetto::protos::pbzero::GpuCounterEvent *event, 173 std::vector<CounterGroup> const &groups, 174 std::vector<Counter> const &counters, 175 Driver &driver) 176{ 177 // Start a counter descriptor 178 auto desc = event->set_counter_descriptor(); 179 180 // Add the groups 181 for (auto const &group : groups) { 182 add_group(desc, group, driver.drm_device.name, driver.drm_device.gpu_num); 183 } 184 185 // Add the counters 186 for (auto const &counter : counters) { 187 auto spec = desc->add_specs(); 188 spec->set_counter_id(counter.id); 189 spec->set_name(counter.name); 190 191 auto units = perfetto::protos::pbzero::GpuCounterDescriptor::NONE; 192 switch (counter.units) { 193 case Counter::Units::Percent: 194 units = perfetto::protos::pbzero::GpuCounterDescriptor::PERCENT; 195 break; 196 case Counter::Units::Byte: 197 units = perfetto::protos::pbzero::GpuCounterDescriptor::BYTE; 198 break; 199 case Counter::Units::Hertz: 200 units = perfetto::protos::pbzero::GpuCounterDescriptor::HERTZ; 201 break; 202 case Counter::Units::None: 203 units = perfetto::protos::pbzero::GpuCounterDescriptor::NONE; 204 break; 205 default: 206 assert(false && "Missing counter units type!"); 207 break; 208 } 209 spec->add_numerator_units(units); 210 } 211} 212 213void add_samples(perfetto::protos::pbzero::GpuCounterEvent &event, const Driver &driver) 214{ 215 if (driver.enabled_counters.size() == 0) { 216 PPS_LOG_FATAL("There are no counters enabled"); 217 } 218 219 for (const auto &counter : driver.enabled_counters) { 220 auto counter_event = event.add_counters(); 221 222 counter_event->set_counter_id(counter.id); 223 224 auto value = counter.get_value(driver); 225 if (auto d_value = std::get_if<double>(&value)) { 226 counter_event->set_double_value(*d_value); 227 } else if (auto i_value = std::get_if<int64_t>(&value)) { 228 counter_event->set_int_value(*i_value); 229 } else { 230 PPS_LOG_ERROR("Failed to get value for counter %s", counter.name.c_str()); 231 } 232 } 233} 234 235void GpuDataSource::trace(TraceContext &ctx) 236{ 237 using namespace perfetto::protos::pbzero; 238 239 if (auto state = ctx.GetIncrementalState(); state->was_cleared) { 240 // Mark any incremental state before this point invalid 241 { 242 auto packet = ctx.NewTracePacket(); 243 packet->set_timestamp(perfetto::base::GetBootTimeNs().count()); 244 packet->set_sequence_flags(TracePacket::SEQ_INCREMENTAL_STATE_CLEARED); 245 } 246 247 auto packet = ctx.NewTracePacket(); 248 descriptor_timestamp = perfetto::base::GetBootTimeNs().count(); 249 packet->set_timestamp(descriptor_timestamp); 250 251 auto event = packet->set_gpu_counter_event(); 252 event->set_gpu_id(driver->drm_device.gpu_num); 253 254 auto &groups = driver->groups; 255 auto &counters = driver->enabled_counters; 256 PPS_LOG("Sending counter descriptors"); 257 add_descriptors(event, groups, counters, *driver); 258 259 state->was_cleared = false; 260 } 261 262 // Save current scheduler for restoring later 263 int prev_sched_policy = sched_getscheduler(0); 264 sched_param prev_priority_param; 265 sched_getparam(0, &prev_priority_param); 266 267 // Use FIFO policy to avoid preemption while collecting counters 268 int sched_policy = SCHED_FIFO; 269 // Do not use max priority to avoid starving migration and watchdog threads 270 int priority_value = sched_get_priority_max(sched_policy) - 1; 271 sched_param priority_param { priority_value }; 272 sched_setscheduler(0, sched_policy, &priority_param); 273 274 if (driver->dump_perfcnt()) { 275 while (auto timestamp = driver->next()) { 276 if (timestamp <= descriptor_timestamp) { 277 // Do not send counter values before counter descriptors 278 PPS_LOG_ERROR("Skipping counter values coming before descriptors"); 279 continue; 280 } 281 282 auto packet = ctx.NewTracePacket(); 283 packet->set_timestamp(timestamp); 284 285 auto event = packet->set_gpu_counter_event(); 286 event->set_gpu_id(driver->drm_device.gpu_num); 287 288 add_samples(*event, *driver); 289 } 290 } 291 292 // Reset normal scheduler 293 sched_setscheduler(0, prev_sched_policy, &prev_priority_param); 294} 295 296void GpuDataSource::trace_callback(TraceContext ctx) 297{ 298 using namespace std::chrono; 299 300 nanoseconds sleep_time = nanoseconds(0); 301 302 if (auto data_source = ctx.GetDataSourceLocked()) { 303 if (data_source->time_to_sleep > data_source->time_to_trace) { 304 sleep_time = data_source->time_to_sleep - data_source->time_to_trace; 305 } 306 } 307 308 // Wait sampling period before tracing 309 std::this_thread::sleep_for(sleep_time); 310 311 auto time_zero = perfetto::base::GetBootTimeNs(); 312 if (auto data_source = ctx.GetDataSourceLocked()) { 313 // Check data source is still running 314 if (data_source->state == pps::State::Start) { 315 data_source->trace(ctx); 316 data_source->time_to_trace = perfetto::base::GetBootTimeNs() - time_zero; 317 } 318 } else { 319 PPS_LOG("Tracing finished"); 320 } 321} 322 323} // namespace pps 324