1/* 2 * Copyright © 2021 Collabora, Ltd. 3 * Author: Antonio Caggiano <antonio.caggiano@collabora.com> 4 * 5 * SPDX-License-Identifier: MIT 6 */ 7 8#include "intel_pps_perf.h" 9 10#include <i915_drm.h> 11#include <math.h> 12#include <sys/ioctl.h> 13#include <util/ralloc.h> 14#include <utility> 15 16#include <pps/pps.h> 17#include <pps/pps_device.h> 18 19namespace pps 20{ 21int perf_ioctl(int fd, unsigned long request, void *arg) 22{ 23 int ret; 24 25 do { 26 ret = ioctl(fd, request, arg); 27 } while (ret == -1 && (errno == EINTR || errno == EAGAIN)); 28 29 return ret; 30} 31 32IntelPerf::IntelPerf(const int drm_fd) 33 : drm_fd {drm_fd} 34 , ralloc_ctx {ralloc_context(nullptr)} 35 , ralloc_cfg {ralloc_context(nullptr)} 36 , cfg {intel_perf_new(ralloc_cfg)} 37{ 38 assert(drm_fd >= 0 && "DRM fd is not valid"); 39 40 if (!intel_get_device_info_from_fd(drm_fd, &devinfo)) { 41 PPS_LOG_FATAL("Failed to get devinfo"); 42 } 43 44 intel_perf_init_metrics(cfg, 45 &devinfo, 46 drm_fd, 47 false, // no pipeline statistics 48 false // no register snapshots 49 ); 50 51 // Enable RenderBasic counters 52 auto query_name = "RenderBasic"; 53 query = find_query_by_name(query_name); 54 if (!query) { 55 PPS_LOG_FATAL("Failed to find %s query", query_name); 56 } 57} 58 59IntelPerf::IntelPerf(IntelPerf &&o) 60 : drm_fd {o.drm_fd} 61 , ralloc_ctx {o.ralloc_ctx} 62 , ralloc_cfg {o.ralloc_cfg} 63 , ctx {o.ctx} 64 , cfg {o.cfg} 65 , devinfo {std::move(o.devinfo)} 66 , query {std::move(o.query)} 67{ 68 o.drm_fd = -1; 69 o.ralloc_ctx = nullptr; 70 o.ralloc_cfg = nullptr; 71 o.ctx = nullptr; 72 o.cfg = nullptr; 73} 74 75IntelPerf &IntelPerf::operator=(IntelPerf &&o) noexcept 76{ 77 std::swap(drm_fd, o.drm_fd); 78 std::swap(ralloc_ctx, o.ralloc_ctx); 79 std::swap(ralloc_cfg, o.ralloc_cfg); 80 std::swap(ctx, o.ctx); 81 std::swap(cfg, o.cfg); 82 std::swap(devinfo, o.devinfo); 83 std::swap(query, o.query); 84 return *this; 85} 86 87IntelPerf::~IntelPerf() 88{ 89 close(); 90 91 if (ralloc_ctx) { 92 ralloc_free(ralloc_ctx); 93 } 94 95 if (ralloc_cfg) { 96 ralloc_free(ralloc_cfg); 97 } 98} 99 100/// @return A query info, which is something like a group of counters 101std::optional<struct intel_perf_query_info> IntelPerf::find_query_by_name( 102 const std::string &name) const 103{ 104 for (int i = 0; i < cfg->n_queries; ++i) { 105 struct intel_perf_query_info query = cfg->queries[i]; 106 if (name == query.symbol_name) { 107 return query; 108 } 109 } 110 111 return std::nullopt; 112} 113 114std::vector<struct intel_perf_query_info *> IntelPerf::get_queries() const 115{ 116 assert(cfg && "Intel perf config should be valid"); 117 assert(cfg->n_queries && "Intel perf queries not initialized"); 118 119 std::vector<struct intel_perf_query_info *> queries = {}; 120 121 for (int i = 0; i < cfg->n_queries; ++i) { 122 struct intel_perf_query_info *query = &cfg->queries[i]; 123 // Skip invalid queries 124 if (query && query->symbol_name) { 125 queries.push_back(query); 126 } 127 } 128 129 return queries; 130} 131 132static uint64_t query_timestamp_frequency(const int drm_fd) 133{ 134 int timestamp_frequency; 135 136 drm_i915_getparam_t gp = {}; 137 gp.param = I915_PARAM_CS_TIMESTAMP_FREQUENCY; 138 gp.value = ×tamp_frequency; 139 if (perf_ioctl(drm_fd, DRM_IOCTL_I915_GETPARAM, &gp) == 0) { 140 return timestamp_frequency; 141 } 142 143 PPS_LOG_ERROR("Unable to query timestamp frequency from i915, guessing values..."); 144 return 12000000; 145} 146 147// The period_exponent gives a sampling period as follows: 148// sample_period = timestamp_period * 2^(period_exponent + 1) 149// where timestamp_period is 80ns for Haswell+ 150static uint32_t get_oa_exponent(const int drm_fd, const uint64_t sampling_period_ns) 151{ 152 uint64_t timestamp_frequency = query_timestamp_frequency(drm_fd); 153 return static_cast<uint32_t>(log2(sampling_period_ns * timestamp_frequency / 1000000000ull)) - 1; 154} 155 156bool IntelPerf::open(const uint64_t sampling_period_ns) 157{ 158 assert(!ctx && "Perf context should not be initialized at this point"); 159 160 ctx = intel_perf_new_context(ralloc_ctx); 161 intel_perf_init_context(ctx, cfg, nullptr, nullptr, nullptr, &devinfo, 0, drm_fd); 162 163 auto oa_exponent = get_oa_exponent(drm_fd, sampling_period_ns); 164 165 return intel_perf_open(ctx, 166 query->oa_metrics_set_id, 167 query->oa_format, 168 oa_exponent, 169 drm_fd, 170 INTEL_PERF_INVALID_CTX_ID, 171 true /* enable stream immediately */); 172} 173 174void IntelPerf::close() 175{ 176 if (ctx) { 177 intel_perf_close(ctx, nullptr); 178 ctx = nullptr; 179 } 180} 181 182bool IntelPerf::oa_stream_ready() const 183{ 184 assert(ctx && "Perf context was not open"); 185 return intel_perf_oa_stream_ready(ctx); 186} 187 188ssize_t IntelPerf::read_oa_stream(void *buf, size_t bytes) const 189{ 190 assert(ctx && "Perf context was not open"); 191 return intel_perf_read_oa_stream(ctx, buf, bytes); 192} 193 194} // namespace pps 195