1/*
2 * Copyright © 2021 Collabora, Ltd.
3 * Author: Antonio Caggiano <antonio.caggiano@collabora.com>
4 *
5 * SPDX-License-Identifier: MIT
6 */
7
8#include "intel_pps_perf.h"
9
10#include <i915_drm.h>
11#include <math.h>
12#include <sys/ioctl.h>
13#include <util/ralloc.h>
14#include <utility>
15
16#include <pps/pps.h>
17#include <pps/pps_device.h>
18
19namespace pps
20{
21int perf_ioctl(int fd, unsigned long request, void *arg)
22{
23   int ret;
24
25   do {
26      ret = ioctl(fd, request, arg);
27   } while (ret == -1 && (errno == EINTR || errno == EAGAIN));
28
29   return ret;
30}
31
32IntelPerf::IntelPerf(const int drm_fd)
33   : drm_fd {drm_fd}
34   , ralloc_ctx {ralloc_context(nullptr)}
35   , ralloc_cfg {ralloc_context(nullptr)}
36   , cfg {intel_perf_new(ralloc_cfg)}
37{
38   assert(drm_fd >= 0 && "DRM fd is not valid");
39
40   if (!intel_get_device_info_from_fd(drm_fd, &devinfo)) {
41      PPS_LOG_FATAL("Failed to get devinfo");
42   }
43
44   intel_perf_init_metrics(cfg,
45      &devinfo,
46      drm_fd,
47      false, // no pipeline statistics
48      false  // no register snapshots
49   );
50
51   // Enable RenderBasic counters
52   auto query_name = "RenderBasic";
53   query = find_query_by_name(query_name);
54   if (!query) {
55      PPS_LOG_FATAL("Failed to find %s query", query_name);
56   }
57}
58
59IntelPerf::IntelPerf(IntelPerf &&o)
60   : drm_fd {o.drm_fd}
61   , ralloc_ctx {o.ralloc_ctx}
62   , ralloc_cfg {o.ralloc_cfg}
63   , ctx {o.ctx}
64   , cfg {o.cfg}
65   , devinfo {std::move(o.devinfo)}
66   , query {std::move(o.query)}
67{
68   o.drm_fd = -1;
69   o.ralloc_ctx = nullptr;
70   o.ralloc_cfg = nullptr;
71   o.ctx = nullptr;
72   o.cfg = nullptr;
73}
74
75IntelPerf &IntelPerf::operator=(IntelPerf &&o) noexcept
76{
77   std::swap(drm_fd, o.drm_fd);
78   std::swap(ralloc_ctx, o.ralloc_ctx);
79   std::swap(ralloc_cfg, o.ralloc_cfg);
80   std::swap(ctx, o.ctx);
81   std::swap(cfg, o.cfg);
82   std::swap(devinfo, o.devinfo);
83   std::swap(query, o.query);
84   return *this;
85}
86
87IntelPerf::~IntelPerf()
88{
89   close();
90
91   if (ralloc_ctx) {
92      ralloc_free(ralloc_ctx);
93   }
94
95   if (ralloc_cfg) {
96      ralloc_free(ralloc_cfg);
97   }
98}
99
100/// @return A query info, which is something like a group of counters
101std::optional<struct intel_perf_query_info> IntelPerf::find_query_by_name(
102   const std::string &name) const
103{
104   for (int i = 0; i < cfg->n_queries; ++i) {
105      struct intel_perf_query_info query = cfg->queries[i];
106      if (name == query.symbol_name) {
107         return query;
108      }
109   }
110
111   return std::nullopt;
112}
113
114std::vector<struct intel_perf_query_info *> IntelPerf::get_queries() const
115{
116   assert(cfg && "Intel perf config should be valid");
117   assert(cfg->n_queries && "Intel perf queries not initialized");
118
119   std::vector<struct intel_perf_query_info *> queries = {};
120
121   for (int i = 0; i < cfg->n_queries; ++i) {
122      struct intel_perf_query_info *query = &cfg->queries[i];
123      // Skip invalid queries
124      if (query && query->symbol_name) {
125         queries.push_back(query);
126      }
127   }
128
129   return queries;
130}
131
132static uint64_t query_timestamp_frequency(const int drm_fd)
133{
134   int timestamp_frequency;
135
136   drm_i915_getparam_t gp = {};
137   gp.param = I915_PARAM_CS_TIMESTAMP_FREQUENCY;
138   gp.value = &timestamp_frequency;
139   if (perf_ioctl(drm_fd, DRM_IOCTL_I915_GETPARAM, &gp) == 0) {
140      return timestamp_frequency;
141   }
142
143   PPS_LOG_ERROR("Unable to query timestamp frequency from i915, guessing values...");
144   return 12000000;
145}
146
147// The period_exponent gives a sampling period as follows:
148// sample_period = timestamp_period * 2^(period_exponent + 1)
149// where timestamp_period is 80ns for Haswell+
150static uint32_t get_oa_exponent(const int drm_fd, const uint64_t sampling_period_ns)
151{
152   uint64_t timestamp_frequency = query_timestamp_frequency(drm_fd);
153   return static_cast<uint32_t>(log2(sampling_period_ns * timestamp_frequency / 1000000000ull)) - 1;
154}
155
156bool IntelPerf::open(const uint64_t sampling_period_ns)
157{
158   assert(!ctx && "Perf context should not be initialized at this point");
159
160   ctx = intel_perf_new_context(ralloc_ctx);
161   intel_perf_init_context(ctx, cfg, nullptr, nullptr, nullptr, &devinfo, 0, drm_fd);
162
163   auto oa_exponent = get_oa_exponent(drm_fd, sampling_period_ns);
164
165   return intel_perf_open(ctx,
166      query->oa_metrics_set_id,
167      query->oa_format,
168      oa_exponent,
169      drm_fd,
170      INTEL_PERF_INVALID_CTX_ID,
171      true /* enable stream immediately */);
172}
173
174void IntelPerf::close()
175{
176   if (ctx) {
177      intel_perf_close(ctx, nullptr);
178      ctx = nullptr;
179   }
180}
181
182bool IntelPerf::oa_stream_ready() const
183{
184   assert(ctx && "Perf context was not open");
185   return intel_perf_oa_stream_ready(ctx);
186}
187
188ssize_t IntelPerf::read_oa_stream(void *buf, size_t bytes) const
189{
190   assert(ctx && "Perf context was not open");
191   return intel_perf_read_oa_stream(ctx, buf, bytes);
192}
193
194} // namespace pps
195