17ec681f3Smrg/* 27ec681f3Smrg * Copyright © 2021 Collabora, Ltd. 37ec681f3Smrg * Author: Antonio Caggiano <antonio.caggiano@collabora.com> 47ec681f3Smrg * 57ec681f3Smrg * SPDX-License-Identifier: MIT 67ec681f3Smrg */ 77ec681f3Smrg 87ec681f3Smrg#include "intel_pps_perf.h" 97ec681f3Smrg 107ec681f3Smrg#include <i915_drm.h> 117ec681f3Smrg#include <math.h> 127ec681f3Smrg#include <sys/ioctl.h> 137ec681f3Smrg#include <util/ralloc.h> 147ec681f3Smrg#include <utility> 157ec681f3Smrg 167ec681f3Smrg#include <pps/pps.h> 177ec681f3Smrg#include <pps/pps_device.h> 187ec681f3Smrg 197ec681f3Smrgnamespace pps 207ec681f3Smrg{ 217ec681f3Smrgint perf_ioctl(int fd, unsigned long request, void *arg) 227ec681f3Smrg{ 237ec681f3Smrg int ret; 247ec681f3Smrg 257ec681f3Smrg do { 267ec681f3Smrg ret = ioctl(fd, request, arg); 277ec681f3Smrg } while (ret == -1 && (errno == EINTR || errno == EAGAIN)); 287ec681f3Smrg 297ec681f3Smrg return ret; 307ec681f3Smrg} 317ec681f3Smrg 327ec681f3SmrgIntelPerf::IntelPerf(const int drm_fd) 337ec681f3Smrg : drm_fd {drm_fd} 347ec681f3Smrg , ralloc_ctx {ralloc_context(nullptr)} 357ec681f3Smrg , ralloc_cfg {ralloc_context(nullptr)} 367ec681f3Smrg , cfg {intel_perf_new(ralloc_cfg)} 377ec681f3Smrg{ 387ec681f3Smrg assert(drm_fd >= 0 && "DRM fd is not valid"); 397ec681f3Smrg 407ec681f3Smrg if (!intel_get_device_info_from_fd(drm_fd, &devinfo)) { 417ec681f3Smrg PPS_LOG_FATAL("Failed to get devinfo"); 427ec681f3Smrg } 437ec681f3Smrg 447ec681f3Smrg intel_perf_init_metrics(cfg, 457ec681f3Smrg &devinfo, 467ec681f3Smrg drm_fd, 477ec681f3Smrg false, // no pipeline statistics 487ec681f3Smrg false // no register snapshots 497ec681f3Smrg ); 507ec681f3Smrg 517ec681f3Smrg // Enable RenderBasic counters 527ec681f3Smrg auto query_name = "RenderBasic"; 537ec681f3Smrg query = find_query_by_name(query_name); 547ec681f3Smrg if (!query) { 557ec681f3Smrg PPS_LOG_FATAL("Failed to find %s query", query_name); 567ec681f3Smrg } 577ec681f3Smrg} 587ec681f3Smrg 597ec681f3SmrgIntelPerf::IntelPerf(IntelPerf &&o) 607ec681f3Smrg : drm_fd {o.drm_fd} 617ec681f3Smrg , ralloc_ctx {o.ralloc_ctx} 627ec681f3Smrg , ralloc_cfg {o.ralloc_cfg} 637ec681f3Smrg , ctx {o.ctx} 647ec681f3Smrg , cfg {o.cfg} 657ec681f3Smrg , devinfo {std::move(o.devinfo)} 667ec681f3Smrg , query {std::move(o.query)} 677ec681f3Smrg{ 687ec681f3Smrg o.drm_fd = -1; 697ec681f3Smrg o.ralloc_ctx = nullptr; 707ec681f3Smrg o.ralloc_cfg = nullptr; 717ec681f3Smrg o.ctx = nullptr; 727ec681f3Smrg o.cfg = nullptr; 737ec681f3Smrg} 747ec681f3Smrg 757ec681f3SmrgIntelPerf &IntelPerf::operator=(IntelPerf &&o) noexcept 767ec681f3Smrg{ 777ec681f3Smrg std::swap(drm_fd, o.drm_fd); 787ec681f3Smrg std::swap(ralloc_ctx, o.ralloc_ctx); 797ec681f3Smrg std::swap(ralloc_cfg, o.ralloc_cfg); 807ec681f3Smrg std::swap(ctx, o.ctx); 817ec681f3Smrg std::swap(cfg, o.cfg); 827ec681f3Smrg std::swap(devinfo, o.devinfo); 837ec681f3Smrg std::swap(query, o.query); 847ec681f3Smrg return *this; 857ec681f3Smrg} 867ec681f3Smrg 877ec681f3SmrgIntelPerf::~IntelPerf() 887ec681f3Smrg{ 897ec681f3Smrg close(); 907ec681f3Smrg 917ec681f3Smrg if (ralloc_ctx) { 927ec681f3Smrg ralloc_free(ralloc_ctx); 937ec681f3Smrg } 947ec681f3Smrg 957ec681f3Smrg if (ralloc_cfg) { 967ec681f3Smrg ralloc_free(ralloc_cfg); 977ec681f3Smrg } 987ec681f3Smrg} 997ec681f3Smrg 1007ec681f3Smrg/// @return A query info, which is something like a group of counters 1017ec681f3Smrgstd::optional<struct intel_perf_query_info> IntelPerf::find_query_by_name( 1027ec681f3Smrg const std::string &name) const 1037ec681f3Smrg{ 1047ec681f3Smrg for (int i = 0; i < cfg->n_queries; ++i) { 1057ec681f3Smrg struct intel_perf_query_info query = cfg->queries[i]; 1067ec681f3Smrg if (name == query.symbol_name) { 1077ec681f3Smrg return query; 1087ec681f3Smrg } 1097ec681f3Smrg } 1107ec681f3Smrg 1117ec681f3Smrg return std::nullopt; 1127ec681f3Smrg} 1137ec681f3Smrg 1147ec681f3Smrgstd::vector<struct intel_perf_query_info *> IntelPerf::get_queries() const 1157ec681f3Smrg{ 1167ec681f3Smrg assert(cfg && "Intel perf config should be valid"); 1177ec681f3Smrg assert(cfg->n_queries && "Intel perf queries not initialized"); 1187ec681f3Smrg 1197ec681f3Smrg std::vector<struct intel_perf_query_info *> queries = {}; 1207ec681f3Smrg 1217ec681f3Smrg for (int i = 0; i < cfg->n_queries; ++i) { 1227ec681f3Smrg struct intel_perf_query_info *query = &cfg->queries[i]; 1237ec681f3Smrg // Skip invalid queries 1247ec681f3Smrg if (query && query->symbol_name) { 1257ec681f3Smrg queries.push_back(query); 1267ec681f3Smrg } 1277ec681f3Smrg } 1287ec681f3Smrg 1297ec681f3Smrg return queries; 1307ec681f3Smrg} 1317ec681f3Smrg 1327ec681f3Smrgstatic uint64_t query_timestamp_frequency(const int drm_fd) 1337ec681f3Smrg{ 1347ec681f3Smrg int timestamp_frequency; 1357ec681f3Smrg 1367ec681f3Smrg drm_i915_getparam_t gp = {}; 1377ec681f3Smrg gp.param = I915_PARAM_CS_TIMESTAMP_FREQUENCY; 1387ec681f3Smrg gp.value = ×tamp_frequency; 1397ec681f3Smrg if (perf_ioctl(drm_fd, DRM_IOCTL_I915_GETPARAM, &gp) == 0) { 1407ec681f3Smrg return timestamp_frequency; 1417ec681f3Smrg } 1427ec681f3Smrg 1437ec681f3Smrg PPS_LOG_ERROR("Unable to query timestamp frequency from i915, guessing values..."); 1447ec681f3Smrg return 12000000; 1457ec681f3Smrg} 1467ec681f3Smrg 1477ec681f3Smrg// The period_exponent gives a sampling period as follows: 1487ec681f3Smrg// sample_period = timestamp_period * 2^(period_exponent + 1) 1497ec681f3Smrg// where timestamp_period is 80ns for Haswell+ 1507ec681f3Smrgstatic uint32_t get_oa_exponent(const int drm_fd, const uint64_t sampling_period_ns) 1517ec681f3Smrg{ 1527ec681f3Smrg uint64_t timestamp_frequency = query_timestamp_frequency(drm_fd); 1537ec681f3Smrg return static_cast<uint32_t>(log2(sampling_period_ns * timestamp_frequency / 1000000000ull)) - 1; 1547ec681f3Smrg} 1557ec681f3Smrg 1567ec681f3Smrgbool IntelPerf::open(const uint64_t sampling_period_ns) 1577ec681f3Smrg{ 1587ec681f3Smrg assert(!ctx && "Perf context should not be initialized at this point"); 1597ec681f3Smrg 1607ec681f3Smrg ctx = intel_perf_new_context(ralloc_ctx); 1617ec681f3Smrg intel_perf_init_context(ctx, cfg, nullptr, nullptr, nullptr, &devinfo, 0, drm_fd); 1627ec681f3Smrg 1637ec681f3Smrg auto oa_exponent = get_oa_exponent(drm_fd, sampling_period_ns); 1647ec681f3Smrg 1657ec681f3Smrg return intel_perf_open(ctx, 1667ec681f3Smrg query->oa_metrics_set_id, 1677ec681f3Smrg query->oa_format, 1687ec681f3Smrg oa_exponent, 1697ec681f3Smrg drm_fd, 1707ec681f3Smrg INTEL_PERF_INVALID_CTX_ID, 1717ec681f3Smrg true /* enable stream immediately */); 1727ec681f3Smrg} 1737ec681f3Smrg 1747ec681f3Smrgvoid IntelPerf::close() 1757ec681f3Smrg{ 1767ec681f3Smrg if (ctx) { 1777ec681f3Smrg intel_perf_close(ctx, nullptr); 1787ec681f3Smrg ctx = nullptr; 1797ec681f3Smrg } 1807ec681f3Smrg} 1817ec681f3Smrg 1827ec681f3Smrgbool IntelPerf::oa_stream_ready() const 1837ec681f3Smrg{ 1847ec681f3Smrg assert(ctx && "Perf context was not open"); 1857ec681f3Smrg return intel_perf_oa_stream_ready(ctx); 1867ec681f3Smrg} 1877ec681f3Smrg 1887ec681f3Smrgssize_t IntelPerf::read_oa_stream(void *buf, size_t bytes) const 1897ec681f3Smrg{ 1907ec681f3Smrg assert(ctx && "Perf context was not open"); 1917ec681f3Smrg return intel_perf_read_oa_stream(ctx, buf, bytes); 1927ec681f3Smrg} 1937ec681f3Smrg 1947ec681f3Smrg} // namespace pps 195