1/*
2 * Copyright © 2019 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
21 */
22
23#include "crocus_monitor.h"
24
25#include <xf86drm.h>
26
27#include "crocus_screen.h"
28#include "crocus_context.h"
29
30#include "perf/intel_perf.h"
31#include "perf/intel_perf_query.h"
32#include "perf/intel_perf_regs.h"
33
34struct crocus_monitor_object {
35   int num_active_counters;
36   int *active_counters;
37
38   size_t result_size;
39   unsigned char *result_buffer;
40
41   struct intel_perf_query_object *query;
42};
43
44int
45crocus_get_monitor_info(struct pipe_screen *pscreen, unsigned index,
46                        struct pipe_driver_query_info *info)
47{
48   const struct crocus_screen *screen = (struct crocus_screen *)pscreen;
49   assert(screen->monitor_cfg);
50   if (!screen->monitor_cfg)
51      return 0;
52
53   const struct crocus_monitor_config *monitor_cfg = screen->monitor_cfg;
54
55   if (!info) {
56      /* return the number of metrics */
57      return monitor_cfg->num_counters;
58   }
59
60   const struct intel_perf_config *perf_cfg = monitor_cfg->perf_cfg;
61   const int group = monitor_cfg->counters[index].group;
62   const int counter_index = monitor_cfg->counters[index].counter;
63   struct intel_perf_query_counter *counter =
64      &perf_cfg->queries[group].counters[counter_index];
65
66   info->group_id = group;
67   info->name = counter->name;
68   info->query_type = PIPE_QUERY_DRIVER_SPECIFIC + index;
69
70   if (counter->type == INTEL_PERF_COUNTER_TYPE_THROUGHPUT)
71      info->result_type = PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE;
72   else
73      info->result_type = PIPE_DRIVER_QUERY_RESULT_TYPE_CUMULATIVE;
74   switch (counter->data_type) {
75   case INTEL_PERF_COUNTER_DATA_TYPE_BOOL32:
76   case INTEL_PERF_COUNTER_DATA_TYPE_UINT32:
77      info->type = PIPE_DRIVER_QUERY_TYPE_UINT;
78      info->max_value.u32 = 0;
79      break;
80   case INTEL_PERF_COUNTER_DATA_TYPE_UINT64:
81      info->type = PIPE_DRIVER_QUERY_TYPE_UINT64;
82      info->max_value.u64 = 0;
83      break;
84   case INTEL_PERF_COUNTER_DATA_TYPE_FLOAT:
85   case INTEL_PERF_COUNTER_DATA_TYPE_DOUBLE:
86      info->type = PIPE_DRIVER_QUERY_TYPE_FLOAT;
87      info->max_value.u64 = -1;
88      break;
89   default:
90      assert(false);
91      break;
92   }
93
94   /* indicates that this is an OA query, not a pipeline statistics query */
95   info->flags = PIPE_DRIVER_QUERY_FLAG_BATCH;
96   return 1;
97}
98
99typedef void (*bo_unreference_t)(void *);
100typedef void *(*bo_map_t)(void *, void *, unsigned flags);
101typedef void (*bo_unmap_t)(void *);
102typedef void (*emit_mi_report_t)(void *, void *, uint32_t, uint32_t);
103typedef void (*emit_mi_flush_t)(void *);
104typedef void (*capture_frequency_stat_register_t)(void *, void *,
105                                                  uint32_t );
106typedef void (*store_register_mem64_t)(void *ctx, void *bo,
107                                       uint32_t reg, uint32_t offset);
108typedef bool (*batch_references_t)(void *batch, void *bo);
109typedef void (*bo_wait_rendering_t)(void *bo);
110typedef int (*bo_busy_t)(void *bo);
111
112static void *
113crocus_oa_bo_alloc(void *bufmgr, const char *name, uint64_t size)
114{
115   return crocus_bo_alloc(bufmgr, name, size);
116}
117
118#if 0
119static void
120crocus_monitor_emit_mi_flush(struct crocus_context *ice)
121{
122   const int flags = PIPE_CONTROL_RENDER_TARGET_FLUSH |
123                     PIPE_CONTROL_INSTRUCTION_INVALIDATE |
124                     PIPE_CONTROL_CONST_CACHE_INVALIDATE |
125                     PIPE_CONTROL_DATA_CACHE_FLUSH |
126                     PIPE_CONTROL_DEPTH_CACHE_FLUSH |
127                     PIPE_CONTROL_VF_CACHE_INVALIDATE |
128                     PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
129                     PIPE_CONTROL_CS_STALL;
130   crocus_emit_pipe_control_flush(&ice->batches[CROCUS_BATCH_RENDER],
131                                  "OA metrics", flags);
132}
133#endif
134
135static void
136crocus_monitor_emit_mi_report_perf_count(void *c,
137                                         void *bo,
138                                         uint32_t offset_in_bytes,
139                                         uint32_t report_id)
140{
141   struct crocus_context *ice = c;
142   struct crocus_batch *batch = &ice->batches[CROCUS_BATCH_RENDER];
143   struct crocus_screen *screen = batch->screen;
144   screen->vtbl.emit_mi_report_perf_count(batch, bo, offset_in_bytes, report_id);
145}
146
147static void
148crocus_monitor_batchbuffer_flush(void *c, const char *file, int line)
149{
150   struct crocus_context *ice = c;
151   _crocus_batch_flush(&ice->batches[CROCUS_BATCH_RENDER], __FILE__, __LINE__);
152}
153
154#if 0
155static void
156crocus_monitor_capture_frequency_stat_register(void *ctx,
157                                               void *bo,
158                                               uint32_t bo_offset)
159{
160   struct crocus_context *ice = ctx;
161   struct crocus_batch *batch = &ice->batches[CROCUS_BATCH_RENDER];
162   ice->vtbl.store_register_mem32(batch, GEN9_RPSTAT0, bo, bo_offset, false);
163}
164
165static void
166crocus_monitor_store_register_mem64(void *ctx, void *bo,
167                                    uint32_t reg, uint32_t offset)
168{
169   struct crocus_context *ice = ctx;
170   struct crocus_batch *batch = &ice->batches[CROCUS_BATCH_RENDER];
171   ice->vtbl.store_register_mem64(batch, reg, bo, offset, false);
172}
173#endif
174
175static bool
176crocus_monitor_init_metrics(struct crocus_screen *screen)
177{
178   struct crocus_monitor_config *monitor_cfg =
179      rzalloc(screen, struct crocus_monitor_config);
180   struct intel_perf_config *perf_cfg = NULL;
181   if (unlikely(!monitor_cfg))
182      goto allocation_error;
183   perf_cfg = intel_perf_new(monitor_cfg);
184   if (unlikely(!perf_cfg))
185      goto allocation_error;
186
187   monitor_cfg->perf_cfg = perf_cfg;
188
189   perf_cfg->vtbl.bo_alloc = crocus_oa_bo_alloc;
190   perf_cfg->vtbl.bo_unreference = (bo_unreference_t)crocus_bo_unreference;
191   perf_cfg->vtbl.bo_map = (bo_map_t)crocus_bo_map;
192   perf_cfg->vtbl.bo_unmap = (bo_unmap_t)crocus_bo_unmap;
193
194   perf_cfg->vtbl.emit_mi_report_perf_count =
195      (emit_mi_report_t)crocus_monitor_emit_mi_report_perf_count;
196   perf_cfg->vtbl.batchbuffer_flush = crocus_monitor_batchbuffer_flush;
197   perf_cfg->vtbl.batch_references = (batch_references_t)crocus_batch_references;
198   perf_cfg->vtbl.bo_wait_rendering =
199      (bo_wait_rendering_t)crocus_bo_wait_rendering;
200   perf_cfg->vtbl.bo_busy = (bo_busy_t)crocus_bo_busy;
201
202   intel_perf_init_metrics(perf_cfg, &screen->devinfo, screen->fd, false, false);
203   screen->monitor_cfg = monitor_cfg;
204
205   /* a gallium "group" is equivalent to a gen "query"
206    * a gallium "query" is equivalent to a gen "query_counter"
207    *
208    * Each gen_query supports a specific number of query_counters.  To
209    * allocate the array of crocus_monitor_counter, we need an upper bound
210    * (ignoring duplicate query_counters).
211    */
212   int gen_query_counters_count = 0;
213   for (int gen_query_id = 0;
214        gen_query_id < perf_cfg->n_queries;
215        ++gen_query_id) {
216      gen_query_counters_count += perf_cfg->queries[gen_query_id].n_counters;
217   }
218
219   monitor_cfg->counters = rzalloc_size(monitor_cfg,
220                                        sizeof(struct crocus_monitor_counter) *
221                                        gen_query_counters_count);
222   if (unlikely(!monitor_cfg->counters))
223      goto allocation_error;
224
225   int crocus_monitor_id = 0;
226   for (int group = 0; group < perf_cfg->n_queries; ++group) {
227      for (int counter = 0;
228           counter < perf_cfg->queries[group].n_counters;
229           ++counter) {
230         /* Check previously identified metrics to filter out duplicates. The
231          * user is not helped by having the same metric available in several
232          * groups. (n^2 algorithm).
233          */
234         bool duplicate = false;
235         for (int existing_group = 0;
236              existing_group < group && !duplicate;
237              ++existing_group) {
238            for (int existing_counter = 0;
239                 existing_counter < perf_cfg->queries[existing_group].n_counters && !duplicate;
240                 ++existing_counter) {
241               const char *current_name =
242                  perf_cfg->queries[group].counters[counter].name;
243               const char *existing_name =
244                  perf_cfg->queries[existing_group].counters[existing_counter].name;
245               if (strcmp(current_name, existing_name) == 0) {
246                  duplicate = true;
247               }
248            }
249         }
250         if (duplicate)
251            continue;
252         monitor_cfg->counters[crocus_monitor_id].group = group;
253         monitor_cfg->counters[crocus_monitor_id].counter = counter;
254         ++crocus_monitor_id;
255      }
256   }
257   monitor_cfg->num_counters = crocus_monitor_id;
258   return monitor_cfg->num_counters;
259
260allocation_error:
261   if (monitor_cfg)
262      free(monitor_cfg->counters);
263   free(perf_cfg);
264   free(monitor_cfg);
265   return false;
266}
267
268int
269crocus_get_monitor_group_info(struct pipe_screen *pscreen,
270                              unsigned group_index,
271                              struct pipe_driver_query_group_info *info)
272{
273   struct crocus_screen *screen = (struct crocus_screen *)pscreen;
274   if (!screen->monitor_cfg) {
275      if (!crocus_monitor_init_metrics(screen))
276         return 0;
277   }
278
279   const struct crocus_monitor_config *monitor_cfg = screen->monitor_cfg;
280   const struct intel_perf_config *perf_cfg = monitor_cfg->perf_cfg;
281
282   if (!info) {
283      /* return the count that can be queried */
284      return perf_cfg->n_queries;
285   }
286
287   if (group_index >= perf_cfg->n_queries) {
288      /* out of range */
289      return 0;
290   }
291
292   struct intel_perf_query_info *query = &perf_cfg->queries[group_index];
293
294   info->name = query->name;
295   info->max_active_queries = query->n_counters;
296   info->num_queries = query->n_counters;
297
298   return 1;
299}
300
301static void
302crocus_init_monitor_ctx(struct crocus_context *ice)
303{
304   struct crocus_screen *screen = (struct crocus_screen *) ice->ctx.screen;
305   struct crocus_monitor_config *monitor_cfg = screen->monitor_cfg;
306
307   ice->perf_ctx = intel_perf_new_context(ice);
308   if (unlikely(!ice->perf_ctx))
309      return;
310
311   struct intel_perf_context *perf_ctx = ice->perf_ctx;
312   struct intel_perf_config *perf_cfg = monitor_cfg->perf_cfg;
313   intel_perf_init_context(perf_ctx,
314                           perf_cfg,
315                           ice,
316                           ice,
317                           screen->bufmgr,
318                           &screen->devinfo,
319                           ice->batches[CROCUS_BATCH_RENDER].hw_ctx_id,
320                           screen->fd);
321}
322
323/* entry point for GenPerfMonitorsAMD */
324struct crocus_monitor_object *
325crocus_create_monitor_object(struct crocus_context *ice,
326                             unsigned num_queries,
327                             unsigned *query_types)
328{
329   struct crocus_screen *screen = (struct crocus_screen *) ice->ctx.screen;
330   struct crocus_monitor_config *monitor_cfg = screen->monitor_cfg;
331   struct intel_perf_config *perf_cfg = monitor_cfg->perf_cfg;
332   struct intel_perf_query_object *query_obj = NULL;
333
334   /* initialize perf context if this has not already been done.  This
335    * function is the first entry point that carries the gl context.
336    */
337   if (ice->perf_ctx == NULL) {
338      crocus_init_monitor_ctx(ice);
339   }
340   struct intel_perf_context *perf_ctx = ice->perf_ctx;
341
342   assert(num_queries > 0);
343   int query_index = query_types[0] - PIPE_QUERY_DRIVER_SPECIFIC;
344   assert(query_index <= monitor_cfg->num_counters);
345   const int group = monitor_cfg->counters[query_index].group;
346
347   struct crocus_monitor_object *monitor =
348      calloc(1, sizeof(struct crocus_monitor_object));
349   if (unlikely(!monitor))
350      goto allocation_failure;
351
352   monitor->num_active_counters = num_queries;
353   monitor->active_counters = calloc(num_queries, sizeof(int));
354   if (unlikely(!monitor->active_counters))
355      goto allocation_failure;
356
357   for (int i = 0; i < num_queries; ++i) {
358      unsigned current_query = query_types[i];
359      unsigned current_query_index = current_query - PIPE_QUERY_DRIVER_SPECIFIC;
360
361      /* all queries must be in the same group */
362      assert(current_query_index <= monitor_cfg->num_counters);
363      assert(monitor_cfg->counters[current_query_index].group == group);
364      monitor->active_counters[i] =
365         monitor_cfg->counters[current_query_index].counter;
366   }
367
368   /* create the intel_perf_query */
369   query_obj = intel_perf_new_query(perf_ctx, group);
370   if (unlikely(!query_obj))
371      goto allocation_failure;
372
373   monitor->query = query_obj;
374   monitor->result_size = perf_cfg->queries[group].data_size;
375   monitor->result_buffer = calloc(1, monitor->result_size);
376   if (unlikely(!monitor->result_buffer))
377      goto allocation_failure;
378
379   return monitor;
380
381allocation_failure:
382   if (monitor) {
383      free(monitor->active_counters);
384      free(monitor->result_buffer);
385   }
386   free(query_obj);
387   free(monitor);
388   return NULL;
389}
390
391void
392crocus_destroy_monitor_object(struct pipe_context *ctx,
393                              struct crocus_monitor_object *monitor)
394{
395   struct crocus_context *ice = (struct crocus_context *)ctx;
396
397   intel_perf_delete_query(ice->perf_ctx, monitor->query);
398   free(monitor->result_buffer);
399   monitor->result_buffer = NULL;
400   free(monitor->active_counters);
401   monitor->active_counters = NULL;
402   free(monitor);
403}
404
405bool
406crocus_begin_monitor(struct pipe_context *ctx,
407                     struct crocus_monitor_object *monitor)
408{
409   struct crocus_context *ice = (void *) ctx;
410   struct intel_perf_context *perf_ctx = ice->perf_ctx;
411
412   return intel_perf_begin_query(perf_ctx, monitor->query);
413}
414
415bool
416crocus_end_monitor(struct pipe_context *ctx,
417                   struct crocus_monitor_object *monitor)
418{
419   struct crocus_context *ice = (void *) ctx;
420   struct intel_perf_context *perf_ctx = ice->perf_ctx;
421
422   intel_perf_end_query(perf_ctx, monitor->query);
423   return true;
424}
425
426bool
427crocus_get_monitor_result(struct pipe_context *ctx,
428                          struct crocus_monitor_object *monitor,
429                          bool wait,
430                          union pipe_numeric_type_union *result)
431{
432   struct crocus_context *ice = (void *) ctx;
433   struct intel_perf_context *perf_ctx = ice->perf_ctx;
434   struct crocus_batch *batch = &ice->batches[CROCUS_BATCH_RENDER];
435
436   bool monitor_ready =
437      intel_perf_is_query_ready(perf_ctx, monitor->query, batch);
438
439   if (!monitor_ready) {
440      if (!wait)
441         return false;
442      intel_perf_wait_query(perf_ctx, monitor->query, batch);
443   }
444
445   assert(intel_perf_is_query_ready(perf_ctx, monitor->query, batch));
446
447   unsigned bytes_written;
448   intel_perf_get_query_data(perf_ctx, monitor->query, batch,
449                             monitor->result_size,
450                             (unsigned*) monitor->result_buffer,
451                             &bytes_written);
452   if (bytes_written != monitor->result_size)
453      return false;
454
455   /* copy metrics into the batch result */
456   for (int i = 0; i < monitor->num_active_counters; ++i) {
457      int current_counter = monitor->active_counters[i];
458      const struct intel_perf_query_info *info =
459         intel_perf_query_info(monitor->query);
460      const struct intel_perf_query_counter *counter =
461         &info->counters[current_counter];
462      assert(intel_perf_query_counter_get_size(counter));
463      switch (counter->data_type) {
464      case INTEL_PERF_COUNTER_DATA_TYPE_UINT64:
465         result[i].u64 = *(uint64_t*)(monitor->result_buffer + counter->offset);
466         break;
467      case INTEL_PERF_COUNTER_DATA_TYPE_FLOAT:
468         result[i].f = *(float*)(monitor->result_buffer + counter->offset);
469         break;
470      case INTEL_PERF_COUNTER_DATA_TYPE_UINT32:
471      case INTEL_PERF_COUNTER_DATA_TYPE_BOOL32:
472         result[i].u64 = *(uint32_t*)(monitor->result_buffer + counter->offset);
473         break;
474      case INTEL_PERF_COUNTER_DATA_TYPE_DOUBLE: {
475         double v = *(double*)(monitor->result_buffer + counter->offset);
476         result[i].f = v;
477         break;
478      }
479      default:
480         unreachable("unexpected counter data type");
481      }
482   }
483   return true;
484}
485