17ec681f3Smrg/*
27ec681f3Smrg * Copyright © 2021 Collabora, Ltd.
37ec681f3Smrg * Author: Antonio Caggiano <antonio.caggiano@collabora.com>
47ec681f3Smrg *
57ec681f3Smrg * SPDX-License-Identifier: MIT
67ec681f3Smrg */
77ec681f3Smrg
87ec681f3Smrg#include "intel_pps_perf.h"
97ec681f3Smrg
107ec681f3Smrg#include <i915_drm.h>
117ec681f3Smrg#include <math.h>
127ec681f3Smrg#include <sys/ioctl.h>
137ec681f3Smrg#include <util/ralloc.h>
147ec681f3Smrg#include <utility>
157ec681f3Smrg
167ec681f3Smrg#include <pps/pps.h>
177ec681f3Smrg#include <pps/pps_device.h>
187ec681f3Smrg
197ec681f3Smrgnamespace pps
207ec681f3Smrg{
217ec681f3Smrgint perf_ioctl(int fd, unsigned long request, void *arg)
227ec681f3Smrg{
237ec681f3Smrg   int ret;
247ec681f3Smrg
257ec681f3Smrg   do {
267ec681f3Smrg      ret = ioctl(fd, request, arg);
277ec681f3Smrg   } while (ret == -1 && (errno == EINTR || errno == EAGAIN));
287ec681f3Smrg
297ec681f3Smrg   return ret;
307ec681f3Smrg}
317ec681f3Smrg
327ec681f3SmrgIntelPerf::IntelPerf(const int drm_fd)
337ec681f3Smrg   : drm_fd {drm_fd}
347ec681f3Smrg   , ralloc_ctx {ralloc_context(nullptr)}
357ec681f3Smrg   , ralloc_cfg {ralloc_context(nullptr)}
367ec681f3Smrg   , cfg {intel_perf_new(ralloc_cfg)}
377ec681f3Smrg{
387ec681f3Smrg   assert(drm_fd >= 0 && "DRM fd is not valid");
397ec681f3Smrg
407ec681f3Smrg   if (!intel_get_device_info_from_fd(drm_fd, &devinfo)) {
417ec681f3Smrg      PPS_LOG_FATAL("Failed to get devinfo");
427ec681f3Smrg   }
437ec681f3Smrg
447ec681f3Smrg   intel_perf_init_metrics(cfg,
457ec681f3Smrg      &devinfo,
467ec681f3Smrg      drm_fd,
477ec681f3Smrg      false, // no pipeline statistics
487ec681f3Smrg      false  // no register snapshots
497ec681f3Smrg   );
507ec681f3Smrg
517ec681f3Smrg   // Enable RenderBasic counters
527ec681f3Smrg   auto query_name = "RenderBasic";
537ec681f3Smrg   query = find_query_by_name(query_name);
547ec681f3Smrg   if (!query) {
557ec681f3Smrg      PPS_LOG_FATAL("Failed to find %s query", query_name);
567ec681f3Smrg   }
577ec681f3Smrg}
587ec681f3Smrg
597ec681f3SmrgIntelPerf::IntelPerf(IntelPerf &&o)
607ec681f3Smrg   : drm_fd {o.drm_fd}
617ec681f3Smrg   , ralloc_ctx {o.ralloc_ctx}
627ec681f3Smrg   , ralloc_cfg {o.ralloc_cfg}
637ec681f3Smrg   , ctx {o.ctx}
647ec681f3Smrg   , cfg {o.cfg}
657ec681f3Smrg   , devinfo {std::move(o.devinfo)}
667ec681f3Smrg   , query {std::move(o.query)}
677ec681f3Smrg{
687ec681f3Smrg   o.drm_fd = -1;
697ec681f3Smrg   o.ralloc_ctx = nullptr;
707ec681f3Smrg   o.ralloc_cfg = nullptr;
717ec681f3Smrg   o.ctx = nullptr;
727ec681f3Smrg   o.cfg = nullptr;
737ec681f3Smrg}
747ec681f3Smrg
757ec681f3SmrgIntelPerf &IntelPerf::operator=(IntelPerf &&o) noexcept
767ec681f3Smrg{
777ec681f3Smrg   std::swap(drm_fd, o.drm_fd);
787ec681f3Smrg   std::swap(ralloc_ctx, o.ralloc_ctx);
797ec681f3Smrg   std::swap(ralloc_cfg, o.ralloc_cfg);
807ec681f3Smrg   std::swap(ctx, o.ctx);
817ec681f3Smrg   std::swap(cfg, o.cfg);
827ec681f3Smrg   std::swap(devinfo, o.devinfo);
837ec681f3Smrg   std::swap(query, o.query);
847ec681f3Smrg   return *this;
857ec681f3Smrg}
867ec681f3Smrg
877ec681f3SmrgIntelPerf::~IntelPerf()
887ec681f3Smrg{
897ec681f3Smrg   close();
907ec681f3Smrg
917ec681f3Smrg   if (ralloc_ctx) {
927ec681f3Smrg      ralloc_free(ralloc_ctx);
937ec681f3Smrg   }
947ec681f3Smrg
957ec681f3Smrg   if (ralloc_cfg) {
967ec681f3Smrg      ralloc_free(ralloc_cfg);
977ec681f3Smrg   }
987ec681f3Smrg}
997ec681f3Smrg
1007ec681f3Smrg/// @return A query info, which is something like a group of counters
1017ec681f3Smrgstd::optional<struct intel_perf_query_info> IntelPerf::find_query_by_name(
1027ec681f3Smrg   const std::string &name) const
1037ec681f3Smrg{
1047ec681f3Smrg   for (int i = 0; i < cfg->n_queries; ++i) {
1057ec681f3Smrg      struct intel_perf_query_info query = cfg->queries[i];
1067ec681f3Smrg      if (name == query.symbol_name) {
1077ec681f3Smrg         return query;
1087ec681f3Smrg      }
1097ec681f3Smrg   }
1107ec681f3Smrg
1117ec681f3Smrg   return std::nullopt;
1127ec681f3Smrg}
1137ec681f3Smrg
1147ec681f3Smrgstd::vector<struct intel_perf_query_info *> IntelPerf::get_queries() const
1157ec681f3Smrg{
1167ec681f3Smrg   assert(cfg && "Intel perf config should be valid");
1177ec681f3Smrg   assert(cfg->n_queries && "Intel perf queries not initialized");
1187ec681f3Smrg
1197ec681f3Smrg   std::vector<struct intel_perf_query_info *> queries = {};
1207ec681f3Smrg
1217ec681f3Smrg   for (int i = 0; i < cfg->n_queries; ++i) {
1227ec681f3Smrg      struct intel_perf_query_info *query = &cfg->queries[i];
1237ec681f3Smrg      // Skip invalid queries
1247ec681f3Smrg      if (query && query->symbol_name) {
1257ec681f3Smrg         queries.push_back(query);
1267ec681f3Smrg      }
1277ec681f3Smrg   }
1287ec681f3Smrg
1297ec681f3Smrg   return queries;
1307ec681f3Smrg}
1317ec681f3Smrg
1327ec681f3Smrgstatic uint64_t query_timestamp_frequency(const int drm_fd)
1337ec681f3Smrg{
1347ec681f3Smrg   int timestamp_frequency;
1357ec681f3Smrg
1367ec681f3Smrg   drm_i915_getparam_t gp = {};
1377ec681f3Smrg   gp.param = I915_PARAM_CS_TIMESTAMP_FREQUENCY;
1387ec681f3Smrg   gp.value = &timestamp_frequency;
1397ec681f3Smrg   if (perf_ioctl(drm_fd, DRM_IOCTL_I915_GETPARAM, &gp) == 0) {
1407ec681f3Smrg      return timestamp_frequency;
1417ec681f3Smrg   }
1427ec681f3Smrg
1437ec681f3Smrg   PPS_LOG_ERROR("Unable to query timestamp frequency from i915, guessing values...");
1447ec681f3Smrg   return 12000000;
1457ec681f3Smrg}
1467ec681f3Smrg
1477ec681f3Smrg// The period_exponent gives a sampling period as follows:
1487ec681f3Smrg// sample_period = timestamp_period * 2^(period_exponent + 1)
1497ec681f3Smrg// where timestamp_period is 80ns for Haswell+
1507ec681f3Smrgstatic uint32_t get_oa_exponent(const int drm_fd, const uint64_t sampling_period_ns)
1517ec681f3Smrg{
1527ec681f3Smrg   uint64_t timestamp_frequency = query_timestamp_frequency(drm_fd);
1537ec681f3Smrg   return static_cast<uint32_t>(log2(sampling_period_ns * timestamp_frequency / 1000000000ull)) - 1;
1547ec681f3Smrg}
1557ec681f3Smrg
1567ec681f3Smrgbool IntelPerf::open(const uint64_t sampling_period_ns)
1577ec681f3Smrg{
1587ec681f3Smrg   assert(!ctx && "Perf context should not be initialized at this point");
1597ec681f3Smrg
1607ec681f3Smrg   ctx = intel_perf_new_context(ralloc_ctx);
1617ec681f3Smrg   intel_perf_init_context(ctx, cfg, nullptr, nullptr, nullptr, &devinfo, 0, drm_fd);
1627ec681f3Smrg
1637ec681f3Smrg   auto oa_exponent = get_oa_exponent(drm_fd, sampling_period_ns);
1647ec681f3Smrg
1657ec681f3Smrg   return intel_perf_open(ctx,
1667ec681f3Smrg      query->oa_metrics_set_id,
1677ec681f3Smrg      query->oa_format,
1687ec681f3Smrg      oa_exponent,
1697ec681f3Smrg      drm_fd,
1707ec681f3Smrg      INTEL_PERF_INVALID_CTX_ID,
1717ec681f3Smrg      true /* enable stream immediately */);
1727ec681f3Smrg}
1737ec681f3Smrg
1747ec681f3Smrgvoid IntelPerf::close()
1757ec681f3Smrg{
1767ec681f3Smrg   if (ctx) {
1777ec681f3Smrg      intel_perf_close(ctx, nullptr);
1787ec681f3Smrg      ctx = nullptr;
1797ec681f3Smrg   }
1807ec681f3Smrg}
1817ec681f3Smrg
1827ec681f3Smrgbool IntelPerf::oa_stream_ready() const
1837ec681f3Smrg{
1847ec681f3Smrg   assert(ctx && "Perf context was not open");
1857ec681f3Smrg   return intel_perf_oa_stream_ready(ctx);
1867ec681f3Smrg}
1877ec681f3Smrg
1887ec681f3Smrgssize_t IntelPerf::read_oa_stream(void *buf, size_t bytes) const
1897ec681f3Smrg{
1907ec681f3Smrg   assert(ctx && "Perf context was not open");
1917ec681f3Smrg   return intel_perf_read_oa_stream(ctx, buf, bytes);
1927ec681f3Smrg}
1937ec681f3Smrg
1947ec681f3Smrg} // namespace pps
195