1/*
2 * Copyright © 2018 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24#ifndef GEN_PERF_H
25#define GEN_PERF_H
26
27#include <stdio.h>
28#include <stdint.h>
29#include <string.h>
30
31#if defined(MAJOR_IN_SYSMACROS)
32#include <sys/sysmacros.h>
33#elif defined(MAJOR_IN_MKDEV)
34#include <sys/mkdev.h>
35#endif
36
37#include "util/hash_table.h"
38#include "util/ralloc.h"
39
40struct gen_device_info;
41
42struct gen_perf;
43struct gen_perf_query_info;
44
45enum gen_perf_counter_type {
46   GEN_PERF_COUNTER_TYPE_EVENT,
47   GEN_PERF_COUNTER_TYPE_DURATION_NORM,
48   GEN_PERF_COUNTER_TYPE_DURATION_RAW,
49   GEN_PERF_COUNTER_TYPE_THROUGHPUT,
50   GEN_PERF_COUNTER_TYPE_RAW,
51   GEN_PERF_COUNTER_TYPE_TIMESTAMP,
52};
53
54enum gen_perf_counter_data_type {
55   GEN_PERF_COUNTER_DATA_TYPE_BOOL32,
56   GEN_PERF_COUNTER_DATA_TYPE_UINT32,
57   GEN_PERF_COUNTER_DATA_TYPE_UINT64,
58   GEN_PERF_COUNTER_DATA_TYPE_FLOAT,
59   GEN_PERF_COUNTER_DATA_TYPE_DOUBLE,
60};
61
62struct gen_pipeline_stat {
63   uint32_t reg;
64   uint32_t numerator;
65   uint32_t denominator;
66};
67
68/*
69 * The largest OA formats we can use include:
70 * For Haswell:
71 *   1 timestamp, 45 A counters, 8 B counters and 8 C counters.
72 * For Gen8+
73 *   1 timestamp, 1 clock, 36 A counters, 8 B counters and 8 C counters
74 */
75#define MAX_OA_REPORT_COUNTERS 62
76
77struct gen_perf_query_result {
78   /**
79    * Storage for the final accumulated OA counters.
80    */
81   uint64_t accumulator[MAX_OA_REPORT_COUNTERS];
82
83   /**
84    * Hw ID used by the context on which the query was running.
85    */
86   uint32_t hw_id;
87
88   /**
89    * Number of reports accumulated to produce the results.
90    */
91   uint32_t reports_accumulated;
92
93   /**
94    * Frequency in the slices of the GT at the begin and end of the
95    * query.
96    */
97   uint64_t slice_frequency[2];
98
99   /**
100    * Frequency in the unslice of the GT at the begin and end of the
101    * query.
102    */
103   uint64_t unslice_frequency[2];
104};
105
106struct gen_perf_query_counter {
107   const char *name;
108   const char *desc;
109   enum gen_perf_counter_type type;
110   enum gen_perf_counter_data_type data_type;
111   uint64_t raw_max;
112   size_t offset;
113
114   union {
115      uint64_t (*oa_counter_read_uint64)(struct gen_perf *perf,
116                                         const struct gen_perf_query_info *query,
117                                         const uint64_t *accumulator);
118      float (*oa_counter_read_float)(struct gen_perf *perf,
119                                     const struct gen_perf_query_info *query,
120                                     const uint64_t *accumulator);
121      struct gen_pipeline_stat pipeline_stat;
122   };
123};
124
125struct gen_perf_query_register_prog {
126   uint32_t reg;
127   uint32_t val;
128};
129
130struct gen_perf_query_info {
131   enum gen_perf_query_type {
132      GEN_PERF_QUERY_TYPE_OA,
133      GEN_PERF_QUERY_TYPE_RAW,
134      GEN_PERF_QUERY_TYPE_PIPELINE,
135   } kind;
136   const char *name;
137   const char *guid;
138   struct gen_perf_query_counter *counters;
139   int n_counters;
140   int max_counters;
141   size_t data_size;
142
143   /* OA specific */
144   uint64_t oa_metrics_set_id;
145   int oa_format;
146
147   /* For indexing into the accumulator[] ... */
148   int gpu_time_offset;
149   int gpu_clock_offset;
150   int a_offset;
151   int b_offset;
152   int c_offset;
153
154   /* Register programming for a given query */
155   struct gen_perf_query_register_prog *flex_regs;
156   uint32_t n_flex_regs;
157
158   struct gen_perf_query_register_prog *mux_regs;
159   uint32_t n_mux_regs;
160
161   struct gen_perf_query_register_prog *b_counter_regs;
162   uint32_t n_b_counter_regs;
163};
164
165struct gen_perf {
166   struct gen_perf_query_info *queries;
167   int n_queries;
168
169   /* Variables referenced in the XML meta data for OA performance
170    * counters, e.g in the normalization equations.
171    *
172    * All uint64_t for consistent operand types in generated code
173    */
174   struct {
175      uint64_t timestamp_frequency; /** $GpuTimestampFrequency */
176      uint64_t n_eus;               /** $EuCoresTotalCount */
177      uint64_t n_eu_slices;         /** $EuSlicesTotalCount */
178      uint64_t n_eu_sub_slices;     /** $EuSubslicesTotalCount */
179      uint64_t eu_threads_count;    /** $EuThreadsCount */
180      uint64_t slice_mask;          /** $SliceMask */
181      uint64_t subslice_mask;       /** $SubsliceMask */
182      uint64_t gt_min_freq;         /** $GpuMinFrequency */
183      uint64_t gt_max_freq;         /** $GpuMaxFrequency */
184      uint64_t revision;            /** $SkuRevisionId */
185   } sys_vars;
186
187   /* OA metric sets, indexed by GUID, as know by Mesa at build time, to
188    * cross-reference with the GUIDs of configs advertised by the kernel at
189    * runtime
190    */
191   struct hash_table *oa_metrics_table;
192
193   /* Location of the device's sysfs entry. */
194   char sysfs_dev_dir[256];
195
196   int (*ioctl)(int, unsigned long, void *);
197};
198
199static inline size_t
200gen_perf_query_counter_get_size(const struct gen_perf_query_counter *counter)
201{
202   switch (counter->data_type) {
203   case GEN_PERF_COUNTER_DATA_TYPE_BOOL32:
204      return sizeof(uint32_t);
205   case GEN_PERF_COUNTER_DATA_TYPE_UINT32:
206      return sizeof(uint32_t);
207   case GEN_PERF_COUNTER_DATA_TYPE_UINT64:
208      return sizeof(uint64_t);
209   case GEN_PERF_COUNTER_DATA_TYPE_FLOAT:
210      return sizeof(float);
211   case GEN_PERF_COUNTER_DATA_TYPE_DOUBLE:
212      return sizeof(double);
213   default:
214      unreachable("invalid counter data type");
215   }
216}
217
218static inline struct gen_perf_query_info *
219gen_perf_query_append_query_info(struct gen_perf *perf, int max_counters)
220{
221   struct gen_perf_query_info *query;
222
223   perf->queries = reralloc(perf, perf->queries,
224                            struct gen_perf_query_info,
225                            ++perf->n_queries);
226   query = &perf->queries[perf->n_queries - 1];
227   memset(query, 0, sizeof(*query));
228
229   if (max_counters > 0) {
230      query->max_counters = max_counters;
231      query->counters =
232         rzalloc_array(perf, struct gen_perf_query_counter, max_counters);
233   }
234
235   return query;
236}
237
238static inline void
239gen_perf_query_info_add_stat_reg(struct gen_perf_query_info *query,
240                                 uint32_t reg,
241                                 uint32_t numerator,
242                                 uint32_t denominator,
243                                 const char *name,
244                                 const char *description)
245{
246   struct gen_perf_query_counter *counter;
247
248   assert(query->n_counters < query->max_counters);
249
250   counter = &query->counters[query->n_counters];
251   counter->name = name;
252   counter->desc = description;
253   counter->type = GEN_PERF_COUNTER_TYPE_RAW;
254   counter->data_type = GEN_PERF_COUNTER_DATA_TYPE_UINT64;
255   counter->offset = sizeof(uint64_t) * query->n_counters;
256   counter->pipeline_stat.reg = reg;
257   counter->pipeline_stat.numerator = numerator;
258   counter->pipeline_stat.denominator = denominator;
259
260   query->n_counters++;
261}
262
263static inline void
264gen_perf_query_info_add_basic_stat_reg(struct gen_perf_query_info *query,
265                                       uint32_t reg, const char *name)
266{
267   gen_perf_query_info_add_stat_reg(query, reg, 1, 1, name, name);
268}
269
270static inline struct gen_perf *
271gen_perf_new(void *ctx, int (*ioctl_cb)(int, unsigned long, void *))
272{
273   struct gen_perf *perf = rzalloc(ctx, struct gen_perf);
274
275   perf->ioctl = ioctl_cb;
276
277   return perf;
278}
279
280bool gen_perf_load_oa_metrics(struct gen_perf *perf, int fd,
281                              const struct gen_device_info *devinfo);
282bool gen_perf_load_metric_id(struct gen_perf *perf, const char *guid,
283                             uint64_t *metric_id);
284
285void gen_perf_query_result_read_frequencies(struct gen_perf_query_result *result,
286                                            const struct gen_device_info *devinfo,
287                                            const uint32_t *start,
288                                            const uint32_t *end);
289void gen_perf_query_result_accumulate(struct gen_perf_query_result *result,
290                                      const struct gen_perf_query_info *query,
291                                      const uint32_t *start,
292                                      const uint32_t *end);
293void gen_perf_query_result_clear(struct gen_perf_query_result *result);
294
295
296#endif /* GEN_PERF_H */
297