fd_pps_driver.cc revision 7ec681f3
1/* 2 * Copyright © 2021 Google, Inc. 3 * 4 * SPDX-License-Identifier: MIT 5 */ 6 7#include "fd_pps_driver.h" 8 9#include <cstring> 10#include <iostream> 11#include <perfetto.h> 12 13#include "pps/pps.h" 14#include "pps/pps_algorithm.h" 15 16namespace pps 17{ 18 19uint64_t 20FreedrenoDriver::get_min_sampling_period_ns() 21{ 22 return 100000; 23} 24 25/* 26TODO this sees like it would be largely the same for a5xx as well 27(ie. same countable names).. 28 */ 29void 30FreedrenoDriver::setup_a6xx_counters() 31{ 32 /* TODO is there a reason to want more than one group? */ 33 CounterGroup group = {}; 34 group.name = "counters"; 35 groups.clear(); 36 counters.clear(); 37 countables.clear(); 38 enabled_counters.clear(); 39 groups.emplace_back(std::move(group)); 40 41 /* 42 * Create the countables that we'll be using. 43 */ 44 45 auto PERF_CP_ALWAYS_COUNT = countable("PERF_CP_ALWAYS_COUNT"); 46 auto PERF_CP_BUSY_CYCLES = countable("PERF_CP_BUSY_CYCLES"); 47 auto PERF_RB_3D_PIXELS = countable("PERF_RB_3D_PIXELS"); 48 auto PERF_SP_FS_STAGE_FULL_ALU_INSTRUCTIONS = countable("PERF_SP_FS_STAGE_FULL_ALU_INSTRUCTIONS"); 49 auto PERF_SP_FS_STAGE_HALF_ALU_INSTRUCTIONS = countable("PERF_SP_FS_STAGE_HALF_ALU_INSTRUCTIONS"); 50 auto PERF_TP_L1_CACHELINE_MISSES = countable("PERF_TP_L1_CACHELINE_MISSES"); 51 auto PERF_SP_BUSY_CYCLES = countable("PERF_SP_BUSY_CYCLES"); 52 53 /* 54 * And then setup the derived counters that we are exporting to 55 * pps based on the captured countable values 56 */ 57 58 counter("GPU Frequency", Counter::Units::Hertz, [=]() { 59 return PERF_CP_ALWAYS_COUNT / time; 60 } 61 ); 62 63 counter("GPU % Utilization", Counter::Units::Percent, [=]() { 64 return 100.0 * (PERF_CP_BUSY_CYCLES / time) / max_freq; 65 } 66 ); 67 68 // This one is a bit of a guess, but seems plausible.. 69 counter("ALU / Fragment", Counter::Units::None, [=]() { 70 return (PERF_SP_FS_STAGE_FULL_ALU_INSTRUCTIONS + 71 PERF_SP_FS_STAGE_HALF_ALU_INSTRUCTIONS / 2) / PERF_RB_3D_PIXELS; 72 } 73 ); 74 75 counter("TP L1 Cache Misses", Counter::Units::None, [=]() { 76 return PERF_TP_L1_CACHELINE_MISSES / time; 77 } 78 ); 79 80 counter("Shader Core Utilization", Counter::Units::Percent, [=]() { 81 return 100.0 * (PERF_SP_BUSY_CYCLES / time) / (max_freq * info->num_sp_cores); 82 } 83 ); 84 85 // TODO add more.. see https://gpuinspector.dev/docs/gpu-counters/qualcomm 86 // for what blob exposes 87} 88 89/** 90 * Generate an submit the cmdstream to configure the counter/countable 91 * muxing 92 */ 93void 94FreedrenoDriver::configure_counters(bool reset, bool wait) 95{ 96 struct fd_submit *submit = fd_submit_new(pipe); 97 enum fd_ringbuffer_flags flags = 98 (enum fd_ringbuffer_flags)(FD_RINGBUFFER_PRIMARY | FD_RINGBUFFER_GROWABLE); 99 struct fd_ringbuffer *ring = fd_submit_new_ringbuffer(submit, 0x1000, flags); 100 101 for (auto countable : countables) 102 countable.configure(ring, reset); 103 104 struct fd_submit_fence fence = {}; 105 util_queue_fence_init(&fence.ready); 106 107 fd_submit_flush(submit, -1, &fence); 108 109 util_queue_fence_wait(&fence.ready); 110 111 fd_ringbuffer_del(ring); 112 fd_submit_del(submit); 113 114 if (wait) 115 fd_pipe_wait(pipe, &fence.fence); 116} 117 118/** 119 * Read the current counter values and record the time. 120 */ 121void 122FreedrenoDriver::collect_countables() 123{ 124 last_dump_ts = perfetto::base::GetBootTimeNs().count(); 125 126 for (auto countable : countables) 127 countable.collect(); 128} 129 130bool 131FreedrenoDriver::init_perfcnt() 132{ 133 uint64_t val; 134 135 dev = fd_device_new(drm_device.fd); 136 pipe = fd_pipe_new(dev, FD_PIPE_3D); 137 dev_id = fd_pipe_dev_id(pipe); 138 139 if (fd_pipe_get_param(pipe, FD_MAX_FREQ, &val)) { 140 PERFETTO_FATAL("Could not get MAX_FREQ"); 141 return false; 142 } 143 max_freq = val; 144 145 if (fd_pipe_get_param(pipe, FD_SUSPEND_COUNT, &val)) { 146 PERFETTO_ILOG("Could not get SUSPEND_COUNT"); 147 } else { 148 suspend_count = val; 149 has_suspend_count = true; 150 } 151 152 perfcntrs = fd_perfcntrs(fd_pipe_dev_id(pipe), &num_perfcntrs); 153 if (num_perfcntrs == 0) { 154 PERFETTO_FATAL("No hw counters available"); 155 return false; 156 } 157 158 assigned_counters.resize(num_perfcntrs); 159 assigned_counters.assign(assigned_counters.size(), 0); 160 161 switch (fd_dev_gen(dev_id)) { 162 case 6: 163 setup_a6xx_counters(); 164 break; 165 default: 166 PERFETTO_FATAL("Unsupported GPU: a%03u", fd_dev_gpu_id(dev_id)); 167 return false; 168 } 169 170 state.resize(next_countable_id); 171 172 for (auto countable : countables) 173 countable.resolve(); 174 175 info = fd_dev_info(dev_id); 176 177 io = fd_dt_find_io(); 178 if (!io) { 179 PERFETTO_FATAL("Could not map GPU I/O space"); 180 return false; 181 } 182 183 configure_counters(true, true); 184 collect_countables(); 185 186 return true; 187} 188 189void 190FreedrenoDriver::enable_counter(const uint32_t counter_id) 191{ 192 enabled_counters.push_back(counters[counter_id]); 193} 194 195void 196FreedrenoDriver::enable_all_counters() 197{ 198 enabled_counters.reserve(counters.size()); 199 for (auto &counter : counters) { 200 enabled_counters.push_back(counter); 201 } 202} 203 204void 205FreedrenoDriver::enable_perfcnt(const uint64_t /* sampling_period_ns */) 206{ 207} 208 209bool 210FreedrenoDriver::dump_perfcnt() 211{ 212 if (has_suspend_count) { 213 uint64_t val; 214 215 fd_pipe_get_param(pipe, FD_SUSPEND_COUNT, &val); 216 217 if (suspend_count != val) { 218 PERFETTO_ILOG("Device had suspended!"); 219 220 suspend_count = val; 221 222 configure_counters(true, true); 223 collect_countables(); 224 225 /* We aren't going to have anything sensible by comparing 226 * current values to values from prior to the suspend, so 227 * just skip this sampling period. 228 */ 229 return false; 230 } 231 } 232 233 auto last_ts = last_dump_ts; 234 235 /* Capture the timestamp from the *start* of the sampling period: */ 236 last_capture_ts = last_dump_ts; 237 238 collect_countables(); 239 240 auto elapsed_time_ns = last_dump_ts - last_ts; 241 242 time = (float)elapsed_time_ns / 1000000000.0; 243 244 /* On older kernels that dont' support querying the suspend- 245 * count, just send configuration cmdstream regularly to keep 246 * the GPU alive and correctly configured for the countables 247 * we want 248 */ 249 if (!has_suspend_count) { 250 configure_counters(false, false); 251 } 252 253 return true; 254} 255 256uint64_t FreedrenoDriver::next() 257{ 258 auto ret = last_capture_ts; 259 last_capture_ts = 0; 260 return ret; 261} 262 263void FreedrenoDriver::disable_perfcnt() 264{ 265 /* There isn't really any disable, only reconfiguring which countables 266 * get muxed to which counters 267 */ 268} 269 270/* 271 * Countable 272 */ 273 274FreedrenoDriver::Countable 275FreedrenoDriver::countable(std::string name) 276{ 277 auto countable = Countable(this, name); 278 countables.emplace_back(countable); 279 return countable; 280} 281 282FreedrenoDriver::Countable::Countable(FreedrenoDriver *d, std::string name) 283 : id {d->next_countable_id++}, d {d}, name {name} 284{ 285} 286 287/* Emit register writes on ring to configure counter/countable muxing: */ 288void 289FreedrenoDriver::Countable::configure(struct fd_ringbuffer *ring, bool reset) 290{ 291 const struct fd_perfcntr_countable *countable = d->state[id].countable; 292 const struct fd_perfcntr_counter *counter = d->state[id].counter; 293 294 OUT_PKT7(ring, CP_WAIT_FOR_IDLE, 0); 295 296 if (counter->enable && reset) { 297 OUT_PKT4(ring, counter->enable, 1); 298 OUT_RING(ring, 0); 299 } 300 301 if (counter->clear && reset) { 302 OUT_PKT4(ring, counter->clear, 1); 303 OUT_RING(ring, 1); 304 305 OUT_PKT4(ring, counter->clear, 1); 306 OUT_RING(ring, 0); 307 } 308 309 OUT_PKT4(ring, counter->select_reg, 1); 310 OUT_RING(ring, countable->selector); 311 312 if (counter->enable && reset) { 313 OUT_PKT4(ring, counter->enable, 1); 314 OUT_RING(ring, 1); 315 } 316} 317 318/* Collect current counter value and calculate delta since last sample: */ 319void 320FreedrenoDriver::Countable::collect() 321{ 322 const struct fd_perfcntr_counter *counter = d->state[id].counter; 323 324 d->state[id].last_value = d->state[id].value; 325 326 uint32_t *reg_lo = (uint32_t *)d->io + counter->counter_reg_lo; 327 uint32_t *reg_hi = (uint32_t *)d->io + counter->counter_reg_hi; 328 329 uint32_t lo = *reg_lo; 330 uint32_t hi = *reg_hi; 331 332 d->state[id].value = lo | ((uint64_t)hi << 32); 333} 334 335/* Resolve the countable and assign next counter from it's group: */ 336void 337FreedrenoDriver::Countable::resolve() 338{ 339 for (unsigned i = 0; i < d->num_perfcntrs; i++) { 340 const struct fd_perfcntr_group *g = &d->perfcntrs[i]; 341 for (unsigned j = 0; j < g->num_countables; j++) { 342 const struct fd_perfcntr_countable *c = &g->countables[j]; 343 if (name == c->name) { 344 d->state[id].countable = c; 345 346 /* Assign a counter from the same group: */ 347 assert(d->assigned_counters[i] < g->num_counters); 348 d->state[id].counter = &g->counters[d->assigned_counters[i]++]; 349 350 std::cout << "Countable: " << name << ", group=" << g->name << 351 ", counter=" << d->assigned_counters[i] - 1 << "\n"; 352 353 return; 354 } 355 } 356 } 357 unreachable("no such countable!"); 358} 359 360uint64_t 361FreedrenoDriver::Countable::get_value() const 362{ 363 return d->state[id].value - d->state[id].last_value; 364} 365 366/* 367 * DerivedCounter 368 */ 369 370FreedrenoDriver::DerivedCounter::DerivedCounter(FreedrenoDriver *d, std::string name, 371 Counter::Units units, 372 std::function<int64_t()> derive) 373 : Counter(d->next_counter_id++, name, 0) 374{ 375 std::cout << "DerivedCounter: " << name << ", id=" << id << "\n"; 376 this->units = units; 377 set_getter([=](const Counter &c, const Driver &d) { 378 return derive(); 379 } 380 ); 381} 382 383FreedrenoDriver::DerivedCounter 384FreedrenoDriver::counter(std::string name, Counter::Units units, 385 std::function<int64_t()> derive) 386{ 387 auto counter = DerivedCounter(this, name, units, derive); 388 counters.emplace_back(counter); 389 return counter; 390} 391 392} // namespace pps 393