anv_perf.c revision 7ec681f3
1/*
2 * Copyright © 2018 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24#include <assert.h>
25#include <stdbool.h>
26#include <stdint.h>
27
28#include "anv_private.h"
29#include "vk_util.h"
30
31#include "perf/intel_perf.h"
32#include "perf/intel_perf_mdapi.h"
33
34#include "util/mesa-sha1.h"
35
36void
37anv_physical_device_init_perf(struct anv_physical_device *device, int fd)
38{
39   const struct intel_device_info *devinfo = &device->info;
40
41   device->perf = NULL;
42
43   /* We need self modifying batches. The i915 parser prevents it on
44    * Gfx7.5 :( maybe one day.
45    */
46   if (devinfo->ver < 8)
47      return;
48
49   struct intel_perf_config *perf = intel_perf_new(NULL);
50
51   intel_perf_init_metrics(perf, &device->info, fd,
52                           false /* pipeline statistics */,
53                           true /* register snapshots */);
54
55   if (!perf->n_queries) {
56      if (perf->platform_supported) {
57         static bool warned_once = false;
58
59         if (!warned_once) {
60            mesa_logw("Performance support disabled, "
61                      "consider sysctl dev.i915.perf_stream_paranoid=0\n");
62            warned_once = true;
63         }
64      }
65      goto err;
66   }
67
68   /* We need DRM_I915_PERF_PROP_HOLD_PREEMPTION support, only available in
69    * perf revision 2.
70    */
71   if (!INTEL_DEBUG(DEBUG_NO_OACONFIG)) {
72      if (!intel_perf_has_hold_preemption(perf))
73         goto err;
74   }
75
76   device->perf = perf;
77
78   /* Compute the number of commands we need to implement a performance
79    * query.
80    */
81   const struct intel_perf_query_field_layout *layout = &perf->query_layout;
82   device->n_perf_query_commands = 0;
83   for (uint32_t f = 0; f < layout->n_fields; f++) {
84      struct intel_perf_query_field *field = &layout->fields[f];
85
86      switch (field->type) {
87      case INTEL_PERF_QUERY_FIELD_TYPE_MI_RPC:
88         device->n_perf_query_commands++;
89         break;
90      case INTEL_PERF_QUERY_FIELD_TYPE_SRM_PERFCNT:
91      case INTEL_PERF_QUERY_FIELD_TYPE_SRM_RPSTAT:
92      case INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_B:
93      case INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_C:
94         device->n_perf_query_commands += field->size / 4;
95         break;
96      }
97   }
98   device->n_perf_query_commands *= 2; /* Begin & End */
99   device->n_perf_query_commands += 1; /* availability */
100
101   return;
102
103 err:
104   ralloc_free(perf);
105}
106
107void
108anv_device_perf_init(struct anv_device *device)
109{
110   device->perf_fd = -1;
111}
112
113static int
114anv_device_perf_open(struct anv_device *device, uint64_t metric_id)
115{
116   uint64_t properties[DRM_I915_PERF_PROP_MAX * 2];
117   struct drm_i915_perf_open_param param;
118   int p = 0, stream_fd;
119
120   properties[p++] = DRM_I915_PERF_PROP_SAMPLE_OA;
121   properties[p++] = true;
122
123   properties[p++] = DRM_I915_PERF_PROP_OA_METRICS_SET;
124   properties[p++] = metric_id;
125
126   properties[p++] = DRM_I915_PERF_PROP_OA_FORMAT;
127   properties[p++] = device->info.ver >= 8 ?
128      I915_OA_FORMAT_A32u40_A4u32_B8_C8 :
129      I915_OA_FORMAT_A45_B8_C8;
130
131   properties[p++] = DRM_I915_PERF_PROP_OA_EXPONENT;
132   properties[p++] = 31; /* slowest sampling period */
133
134   properties[p++] = DRM_I915_PERF_PROP_CTX_HANDLE;
135   properties[p++] = device->context_id;
136
137   properties[p++] = DRM_I915_PERF_PROP_HOLD_PREEMPTION;
138   properties[p++] = true;
139
140   /* If global SSEU is available, pin it to the default. This will ensure on
141    * Gfx11 for instance we use the full EU array. Initially when perf was
142    * enabled we would use only half on Gfx11 because of functional
143    * requirements.
144    */
145   if (intel_perf_has_global_sseu(device->physical->perf)) {
146      properties[p++] = DRM_I915_PERF_PROP_GLOBAL_SSEU;
147      properties[p++] = (uintptr_t) &device->physical->perf->sseu;
148   }
149
150   memset(&param, 0, sizeof(param));
151   param.flags = 0;
152   param.flags |= I915_PERF_FLAG_FD_CLOEXEC | I915_PERF_FLAG_FD_NONBLOCK;
153   param.properties_ptr = (uintptr_t)properties;
154   param.num_properties = p / 2;
155
156   stream_fd = intel_ioctl(device->fd, DRM_IOCTL_I915_PERF_OPEN, &param);
157   return stream_fd;
158}
159
160/* VK_INTEL_performance_query */
161VkResult anv_InitializePerformanceApiINTEL(
162    VkDevice                                    _device,
163    const VkInitializePerformanceApiInfoINTEL*  pInitializeInfo)
164{
165   ANV_FROM_HANDLE(anv_device, device, _device);
166
167   if (!device->physical->perf)
168      return VK_ERROR_EXTENSION_NOT_PRESENT;
169
170   /* Not much to do here */
171   return VK_SUCCESS;
172}
173
174VkResult anv_GetPerformanceParameterINTEL(
175    VkDevice                                    _device,
176    VkPerformanceParameterTypeINTEL             parameter,
177    VkPerformanceValueINTEL*                    pValue)
178{
179      ANV_FROM_HANDLE(anv_device, device, _device);
180
181      if (!device->physical->perf)
182         return VK_ERROR_EXTENSION_NOT_PRESENT;
183
184      VkResult result = VK_SUCCESS;
185      switch (parameter) {
186      case VK_PERFORMANCE_PARAMETER_TYPE_HW_COUNTERS_SUPPORTED_INTEL:
187         pValue->type = VK_PERFORMANCE_VALUE_TYPE_BOOL_INTEL;
188         pValue->data.valueBool = VK_TRUE;
189         break;
190
191      case VK_PERFORMANCE_PARAMETER_TYPE_STREAM_MARKER_VALID_BITS_INTEL:
192         pValue->type = VK_PERFORMANCE_VALUE_TYPE_UINT32_INTEL;
193         pValue->data.value32 = 25;
194         break;
195
196      default:
197         result = VK_ERROR_FEATURE_NOT_PRESENT;
198         break;
199      }
200
201      return result;
202}
203
204VkResult anv_CmdSetPerformanceMarkerINTEL(
205    VkCommandBuffer                             commandBuffer,
206    const VkPerformanceMarkerInfoINTEL*         pMarkerInfo)
207{
208   ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
209
210   cmd_buffer->intel_perf_marker = pMarkerInfo->marker;
211
212   return VK_SUCCESS;
213}
214
215VkResult anv_AcquirePerformanceConfigurationINTEL(
216    VkDevice                                    _device,
217    const VkPerformanceConfigurationAcquireInfoINTEL* pAcquireInfo,
218    VkPerformanceConfigurationINTEL*            pConfiguration)
219{
220   ANV_FROM_HANDLE(anv_device, device, _device);
221   struct anv_performance_configuration_intel *config;
222
223   config = vk_object_alloc(&device->vk, NULL, sizeof(*config),
224                            VK_OBJECT_TYPE_PERFORMANCE_CONFIGURATION_INTEL);
225   if (!config)
226      return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
227
228   if (!INTEL_DEBUG(DEBUG_NO_OACONFIG)) {
229      config->register_config =
230         intel_perf_load_configuration(device->physical->perf, device->fd,
231                                     INTEL_PERF_QUERY_GUID_MDAPI);
232      if (!config->register_config) {
233         vk_object_free(&device->vk, NULL, config);
234         return VK_INCOMPLETE;
235      }
236
237      int ret =
238         intel_perf_store_configuration(device->physical->perf, device->fd,
239                                      config->register_config, NULL /* guid */);
240      if (ret < 0) {
241         ralloc_free(config->register_config);
242         vk_object_free(&device->vk, NULL, config);
243         return VK_INCOMPLETE;
244      }
245
246      config->config_id = ret;
247   }
248
249   *pConfiguration = anv_performance_configuration_intel_to_handle(config);
250
251   return VK_SUCCESS;
252}
253
254VkResult anv_ReleasePerformanceConfigurationINTEL(
255    VkDevice                                    _device,
256    VkPerformanceConfigurationINTEL             _configuration)
257{
258   ANV_FROM_HANDLE(anv_device, device, _device);
259   ANV_FROM_HANDLE(anv_performance_configuration_intel, config, _configuration);
260
261   if (!INTEL_DEBUG(DEBUG_NO_OACONFIG))
262      intel_ioctl(device->fd, DRM_IOCTL_I915_PERF_REMOVE_CONFIG, &config->config_id);
263
264   ralloc_free(config->register_config);
265
266   vk_object_free(&device->vk, NULL, config);
267
268   return VK_SUCCESS;
269}
270
271VkResult anv_QueueSetPerformanceConfigurationINTEL(
272    VkQueue                                     _queue,
273    VkPerformanceConfigurationINTEL             _configuration)
274{
275   ANV_FROM_HANDLE(anv_queue, queue, _queue);
276   ANV_FROM_HANDLE(anv_performance_configuration_intel, config, _configuration);
277   struct anv_device *device = queue->device;
278
279   if (!INTEL_DEBUG(DEBUG_NO_OACONFIG)) {
280      if (device->perf_fd < 0) {
281         device->perf_fd = anv_device_perf_open(device, config->config_id);
282         if (device->perf_fd < 0)
283            return VK_ERROR_INITIALIZATION_FAILED;
284      } else {
285         int ret = intel_ioctl(device->perf_fd, I915_PERF_IOCTL_CONFIG,
286                               (void *)(uintptr_t) config->config_id);
287         if (ret < 0)
288            return anv_device_set_lost(device, "i915-perf config failed: %m");
289      }
290   }
291
292   return VK_SUCCESS;
293}
294
295void anv_UninitializePerformanceApiINTEL(
296    VkDevice                                    _device)
297{
298   ANV_FROM_HANDLE(anv_device, device, _device);
299
300   if (device->perf_fd >= 0) {
301      close(device->perf_fd);
302      device->perf_fd = -1;
303   }
304}
305
306/* VK_KHR_performance_query */
307static const VkPerformanceCounterUnitKHR
308intel_perf_counter_unit_to_vk_unit[] = {
309   [INTEL_PERF_COUNTER_UNITS_BYTES]                                = VK_PERFORMANCE_COUNTER_UNIT_BYTES_KHR,
310   [INTEL_PERF_COUNTER_UNITS_HZ]                                   = VK_PERFORMANCE_COUNTER_UNIT_HERTZ_KHR,
311   [INTEL_PERF_COUNTER_UNITS_NS]                                   = VK_PERFORMANCE_COUNTER_UNIT_NANOSECONDS_KHR,
312   [INTEL_PERF_COUNTER_UNITS_US]                                   = VK_PERFORMANCE_COUNTER_UNIT_NANOSECONDS_KHR, /* todo */
313   [INTEL_PERF_COUNTER_UNITS_PIXELS]                               = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
314   [INTEL_PERF_COUNTER_UNITS_TEXELS]                               = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
315   [INTEL_PERF_COUNTER_UNITS_THREADS]                              = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
316   [INTEL_PERF_COUNTER_UNITS_PERCENT]                              = VK_PERFORMANCE_COUNTER_UNIT_PERCENTAGE_KHR,
317   [INTEL_PERF_COUNTER_UNITS_MESSAGES]                             = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
318   [INTEL_PERF_COUNTER_UNITS_NUMBER]                               = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
319   [INTEL_PERF_COUNTER_UNITS_CYCLES]                               = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
320   [INTEL_PERF_COUNTER_UNITS_EVENTS]                               = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
321   [INTEL_PERF_COUNTER_UNITS_UTILIZATION]                          = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
322   [INTEL_PERF_COUNTER_UNITS_EU_SENDS_TO_L3_CACHE_LINES]           = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
323   [INTEL_PERF_COUNTER_UNITS_EU_ATOMIC_REQUESTS_TO_L3_CACHE_LINES] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
324   [INTEL_PERF_COUNTER_UNITS_EU_REQUESTS_TO_L3_CACHE_LINES]        = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
325   [INTEL_PERF_COUNTER_UNITS_EU_BYTES_PER_L3_CACHE_LINE]           = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
326};
327
328static const VkPerformanceCounterStorageKHR
329intel_perf_counter_data_type_to_vk_storage[] = {
330   [INTEL_PERF_COUNTER_DATA_TYPE_BOOL32] = VK_PERFORMANCE_COUNTER_STORAGE_UINT32_KHR,
331   [INTEL_PERF_COUNTER_DATA_TYPE_UINT32] = VK_PERFORMANCE_COUNTER_STORAGE_UINT32_KHR,
332   [INTEL_PERF_COUNTER_DATA_TYPE_UINT64] = VK_PERFORMANCE_COUNTER_STORAGE_UINT64_KHR,
333   [INTEL_PERF_COUNTER_DATA_TYPE_FLOAT]  = VK_PERFORMANCE_COUNTER_STORAGE_FLOAT32_KHR,
334   [INTEL_PERF_COUNTER_DATA_TYPE_DOUBLE] = VK_PERFORMANCE_COUNTER_STORAGE_FLOAT64_KHR,
335};
336
337VkResult anv_EnumeratePhysicalDeviceQueueFamilyPerformanceQueryCountersKHR(
338    VkPhysicalDevice                            physicalDevice,
339    uint32_t                                    queueFamilyIndex,
340    uint32_t*                                   pCounterCount,
341    VkPerformanceCounterKHR*                    pCounters,
342    VkPerformanceCounterDescriptionKHR*         pCounterDescriptions)
343{
344   ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice);
345   struct intel_perf_config *perf = pdevice->perf;
346
347   uint32_t desc_count = *pCounterCount;
348
349   VK_OUTARRAY_MAKE(out, pCounters, pCounterCount);
350   VK_OUTARRAY_MAKE(out_desc, pCounterDescriptions, &desc_count);
351
352   for (int c = 0; c < (perf ? perf->n_counters : 0); c++) {
353      const struct intel_perf_query_counter *intel_counter = perf->counter_infos[c].counter;
354
355      vk_outarray_append(&out, counter) {
356         counter->unit = intel_perf_counter_unit_to_vk_unit[intel_counter->units];
357         counter->scope = VK_QUERY_SCOPE_COMMAND_KHR;
358         counter->storage = intel_perf_counter_data_type_to_vk_storage[intel_counter->data_type];
359
360         unsigned char sha1_result[20];
361         _mesa_sha1_compute(intel_counter->symbol_name,
362                            strlen(intel_counter->symbol_name),
363                            sha1_result);
364         memcpy(counter->uuid, sha1_result, sizeof(counter->uuid));
365      }
366
367      vk_outarray_append(&out_desc, desc) {
368         desc->flags = 0; /* None so far. */
369         snprintf(desc->name, sizeof(desc->name), "%s", intel_counter->name);
370         snprintf(desc->category, sizeof(desc->category), "%s", intel_counter->category);
371         snprintf(desc->description, sizeof(desc->description), "%s", intel_counter->desc);
372      }
373   }
374
375   return vk_outarray_status(&out);
376}
377
378void anv_GetPhysicalDeviceQueueFamilyPerformanceQueryPassesKHR(
379    VkPhysicalDevice                            physicalDevice,
380    const VkQueryPoolPerformanceCreateInfoKHR*  pPerformanceQueryCreateInfo,
381    uint32_t*                                   pNumPasses)
382{
383   ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice);
384   struct intel_perf_config *perf = pdevice->perf;
385
386   if (!perf) {
387      *pNumPasses = 0;
388      return;
389   }
390
391   *pNumPasses = intel_perf_get_n_passes(perf,
392                                       pPerformanceQueryCreateInfo->pCounterIndices,
393                                       pPerformanceQueryCreateInfo->counterIndexCount,
394                                       NULL);
395}
396
397VkResult anv_AcquireProfilingLockKHR(
398    VkDevice                                    _device,
399    const VkAcquireProfilingLockInfoKHR*        pInfo)
400{
401   ANV_FROM_HANDLE(anv_device, device, _device);
402   struct intel_perf_config *perf = device->physical->perf;
403   struct intel_perf_query_info *first_metric_set = &perf->queries[0];
404   int fd = -1;
405
406   assert(device->perf_fd == -1);
407
408   if (!INTEL_DEBUG(DEBUG_NO_OACONFIG)) {
409      fd = anv_device_perf_open(device, first_metric_set->oa_metrics_set_id);
410      if (fd < 0)
411         return VK_TIMEOUT;
412   }
413
414   device->perf_fd = fd;
415   return VK_SUCCESS;
416}
417
418void anv_ReleaseProfilingLockKHR(
419    VkDevice                                    _device)
420{
421   ANV_FROM_HANDLE(anv_device, device, _device);
422
423   if (!INTEL_DEBUG(DEBUG_NO_OACONFIG)) {
424      assert(device->perf_fd >= 0);
425      close(device->perf_fd);
426   }
427   device->perf_fd = -1;
428}
429
430void
431anv_perf_write_pass_results(struct intel_perf_config *perf,
432                            struct anv_query_pool *pool, uint32_t pass,
433                            const struct intel_perf_query_result *accumulated_results,
434                            union VkPerformanceCounterResultKHR *results)
435{
436   for (uint32_t c = 0; c < pool->n_counters; c++) {
437      const struct intel_perf_counter_pass *counter_pass = &pool->counter_pass[c];
438
439      if (counter_pass->pass != pass)
440         continue;
441
442      switch (pool->pass_query[pass]->kind) {
443      case INTEL_PERF_QUERY_TYPE_PIPELINE: {
444         assert(counter_pass->counter->data_type == INTEL_PERF_COUNTER_DATA_TYPE_UINT64);
445         uint32_t accu_offset = counter_pass->counter->offset / sizeof(uint64_t);
446         results[c].uint64 = accumulated_results->accumulator[accu_offset];
447         break;
448      }
449
450      case INTEL_PERF_QUERY_TYPE_OA:
451      case INTEL_PERF_QUERY_TYPE_RAW:
452         switch (counter_pass->counter->data_type) {
453         case INTEL_PERF_COUNTER_DATA_TYPE_UINT64:
454            results[c].uint64 =
455               counter_pass->counter->oa_counter_read_uint64(perf,
456                                                             counter_pass->query,
457                                                             accumulated_results);
458            break;
459         case INTEL_PERF_COUNTER_DATA_TYPE_FLOAT:
460            results[c].float32 =
461               counter_pass->counter->oa_counter_read_float(perf,
462                                                            counter_pass->query,
463                                                            accumulated_results);
464            break;
465         default:
466            /* So far we aren't using uint32, double or bool32... */
467            unreachable("unexpected counter data type");
468         }
469         break;
470
471      default:
472         unreachable("invalid query type");
473      }
474
475      /* The Vulkan extension only has nanoseconds as a unit */
476      if (counter_pass->counter->units == INTEL_PERF_COUNTER_UNITS_US) {
477         assert(counter_pass->counter->data_type == INTEL_PERF_COUNTER_DATA_TYPE_UINT64);
478         results[c].uint64 *= 1000;
479      }
480   }
481}
482