17ec681f3Smrg/*
27ec681f3Smrg * Copyright © 2018 Intel Corporation
37ec681f3Smrg *
47ec681f3Smrg * Permission is hereby granted, free of charge, to any person obtaining a
57ec681f3Smrg * copy of this software and associated documentation files (the "Software"),
67ec681f3Smrg * to deal in the Software without restriction, including without limitation
77ec681f3Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
87ec681f3Smrg * and/or sell copies of the Software, and to permit persons to whom the
97ec681f3Smrg * Software is furnished to do so, subject to the following conditions:
107ec681f3Smrg *
117ec681f3Smrg * The above copyright notice and this permission notice (including the next
127ec681f3Smrg * paragraph) shall be included in all copies or substantial portions of the
137ec681f3Smrg * Software.
147ec681f3Smrg *
157ec681f3Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
167ec681f3Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
177ec681f3Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
187ec681f3Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
197ec681f3Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
207ec681f3Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
217ec681f3Smrg * DEALINGS IN THE SOFTWARE.
227ec681f3Smrg */
237ec681f3Smrg
247ec681f3Smrg#include <assert.h>
257ec681f3Smrg#include <stdbool.h>
267ec681f3Smrg#include <stdint.h>
277ec681f3Smrg
287ec681f3Smrg#include "anv_private.h"
297ec681f3Smrg#include "vk_util.h"
307ec681f3Smrg
317ec681f3Smrg#include "perf/intel_perf.h"
327ec681f3Smrg#include "perf/intel_perf_mdapi.h"
337ec681f3Smrg
347ec681f3Smrg#include "util/mesa-sha1.h"
357ec681f3Smrg
367ec681f3Smrgvoid
377ec681f3Smrganv_physical_device_init_perf(struct anv_physical_device *device, int fd)
387ec681f3Smrg{
397ec681f3Smrg   const struct intel_device_info *devinfo = &device->info;
407ec681f3Smrg
417ec681f3Smrg   device->perf = NULL;
427ec681f3Smrg
437ec681f3Smrg   /* We need self modifying batches. The i915 parser prevents it on
447ec681f3Smrg    * Gfx7.5 :( maybe one day.
457ec681f3Smrg    */
467ec681f3Smrg   if (devinfo->ver < 8)
477ec681f3Smrg      return;
487ec681f3Smrg
497ec681f3Smrg   struct intel_perf_config *perf = intel_perf_new(NULL);
507ec681f3Smrg
517ec681f3Smrg   intel_perf_init_metrics(perf, &device->info, fd,
527ec681f3Smrg                           false /* pipeline statistics */,
537ec681f3Smrg                           true /* register snapshots */);
547ec681f3Smrg
557ec681f3Smrg   if (!perf->n_queries) {
567ec681f3Smrg      if (perf->platform_supported) {
577ec681f3Smrg         static bool warned_once = false;
587ec681f3Smrg
597ec681f3Smrg         if (!warned_once) {
607ec681f3Smrg            mesa_logw("Performance support disabled, "
617ec681f3Smrg                      "consider sysctl dev.i915.perf_stream_paranoid=0\n");
627ec681f3Smrg            warned_once = true;
637ec681f3Smrg         }
647ec681f3Smrg      }
657ec681f3Smrg      goto err;
667ec681f3Smrg   }
677ec681f3Smrg
687ec681f3Smrg   /* We need DRM_I915_PERF_PROP_HOLD_PREEMPTION support, only available in
697ec681f3Smrg    * perf revision 2.
707ec681f3Smrg    */
717ec681f3Smrg   if (!INTEL_DEBUG(DEBUG_NO_OACONFIG)) {
727ec681f3Smrg      if (!intel_perf_has_hold_preemption(perf))
737ec681f3Smrg         goto err;
747ec681f3Smrg   }
757ec681f3Smrg
767ec681f3Smrg   device->perf = perf;
777ec681f3Smrg
787ec681f3Smrg   /* Compute the number of commands we need to implement a performance
797ec681f3Smrg    * query.
807ec681f3Smrg    */
817ec681f3Smrg   const struct intel_perf_query_field_layout *layout = &perf->query_layout;
827ec681f3Smrg   device->n_perf_query_commands = 0;
837ec681f3Smrg   for (uint32_t f = 0; f < layout->n_fields; f++) {
847ec681f3Smrg      struct intel_perf_query_field *field = &layout->fields[f];
857ec681f3Smrg
867ec681f3Smrg      switch (field->type) {
877ec681f3Smrg      case INTEL_PERF_QUERY_FIELD_TYPE_MI_RPC:
887ec681f3Smrg         device->n_perf_query_commands++;
897ec681f3Smrg         break;
907ec681f3Smrg      case INTEL_PERF_QUERY_FIELD_TYPE_SRM_PERFCNT:
917ec681f3Smrg      case INTEL_PERF_QUERY_FIELD_TYPE_SRM_RPSTAT:
927ec681f3Smrg      case INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_B:
937ec681f3Smrg      case INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_C:
947ec681f3Smrg         device->n_perf_query_commands += field->size / 4;
957ec681f3Smrg         break;
967ec681f3Smrg      }
977ec681f3Smrg   }
987ec681f3Smrg   device->n_perf_query_commands *= 2; /* Begin & End */
997ec681f3Smrg   device->n_perf_query_commands += 1; /* availability */
1007ec681f3Smrg
1017ec681f3Smrg   return;
1027ec681f3Smrg
1037ec681f3Smrg err:
1047ec681f3Smrg   ralloc_free(perf);
1057ec681f3Smrg}
1067ec681f3Smrg
1077ec681f3Smrgvoid
1087ec681f3Smrganv_device_perf_init(struct anv_device *device)
1097ec681f3Smrg{
1107ec681f3Smrg   device->perf_fd = -1;
1117ec681f3Smrg}
1127ec681f3Smrg
1137ec681f3Smrgstatic int
1147ec681f3Smrganv_device_perf_open(struct anv_device *device, uint64_t metric_id)
1157ec681f3Smrg{
1167ec681f3Smrg   uint64_t properties[DRM_I915_PERF_PROP_MAX * 2];
1177ec681f3Smrg   struct drm_i915_perf_open_param param;
1187ec681f3Smrg   int p = 0, stream_fd;
1197ec681f3Smrg
1207ec681f3Smrg   properties[p++] = DRM_I915_PERF_PROP_SAMPLE_OA;
1217ec681f3Smrg   properties[p++] = true;
1227ec681f3Smrg
1237ec681f3Smrg   properties[p++] = DRM_I915_PERF_PROP_OA_METRICS_SET;
1247ec681f3Smrg   properties[p++] = metric_id;
1257ec681f3Smrg
1267ec681f3Smrg   properties[p++] = DRM_I915_PERF_PROP_OA_FORMAT;
1277ec681f3Smrg   properties[p++] = device->info.ver >= 8 ?
1287ec681f3Smrg      I915_OA_FORMAT_A32u40_A4u32_B8_C8 :
1297ec681f3Smrg      I915_OA_FORMAT_A45_B8_C8;
1307ec681f3Smrg
1317ec681f3Smrg   properties[p++] = DRM_I915_PERF_PROP_OA_EXPONENT;
1327ec681f3Smrg   properties[p++] = 31; /* slowest sampling period */
1337ec681f3Smrg
1347ec681f3Smrg   properties[p++] = DRM_I915_PERF_PROP_CTX_HANDLE;
1357ec681f3Smrg   properties[p++] = device->context_id;
1367ec681f3Smrg
1377ec681f3Smrg   properties[p++] = DRM_I915_PERF_PROP_HOLD_PREEMPTION;
1387ec681f3Smrg   properties[p++] = true;
1397ec681f3Smrg
1407ec681f3Smrg   /* If global SSEU is available, pin it to the default. This will ensure on
1417ec681f3Smrg    * Gfx11 for instance we use the full EU array. Initially when perf was
1427ec681f3Smrg    * enabled we would use only half on Gfx11 because of functional
1437ec681f3Smrg    * requirements.
1447ec681f3Smrg    */
1457ec681f3Smrg   if (intel_perf_has_global_sseu(device->physical->perf)) {
1467ec681f3Smrg      properties[p++] = DRM_I915_PERF_PROP_GLOBAL_SSEU;
1477ec681f3Smrg      properties[p++] = (uintptr_t) &device->physical->perf->sseu;
1487ec681f3Smrg   }
1497ec681f3Smrg
1507ec681f3Smrg   memset(&param, 0, sizeof(param));
1517ec681f3Smrg   param.flags = 0;
1527ec681f3Smrg   param.flags |= I915_PERF_FLAG_FD_CLOEXEC | I915_PERF_FLAG_FD_NONBLOCK;
1537ec681f3Smrg   param.properties_ptr = (uintptr_t)properties;
1547ec681f3Smrg   param.num_properties = p / 2;
1557ec681f3Smrg
1567ec681f3Smrg   stream_fd = intel_ioctl(device->fd, DRM_IOCTL_I915_PERF_OPEN, &param);
1577ec681f3Smrg   return stream_fd;
1587ec681f3Smrg}
1597ec681f3Smrg
1607ec681f3Smrg/* VK_INTEL_performance_query */
1617ec681f3SmrgVkResult anv_InitializePerformanceApiINTEL(
1627ec681f3Smrg    VkDevice                                    _device,
1637ec681f3Smrg    const VkInitializePerformanceApiInfoINTEL*  pInitializeInfo)
1647ec681f3Smrg{
1657ec681f3Smrg   ANV_FROM_HANDLE(anv_device, device, _device);
1667ec681f3Smrg
1677ec681f3Smrg   if (!device->physical->perf)
1687ec681f3Smrg      return VK_ERROR_EXTENSION_NOT_PRESENT;
1697ec681f3Smrg
1707ec681f3Smrg   /* Not much to do here */
1717ec681f3Smrg   return VK_SUCCESS;
1727ec681f3Smrg}
1737ec681f3Smrg
1747ec681f3SmrgVkResult anv_GetPerformanceParameterINTEL(
1757ec681f3Smrg    VkDevice                                    _device,
1767ec681f3Smrg    VkPerformanceParameterTypeINTEL             parameter,
1777ec681f3Smrg    VkPerformanceValueINTEL*                    pValue)
1787ec681f3Smrg{
1797ec681f3Smrg      ANV_FROM_HANDLE(anv_device, device, _device);
1807ec681f3Smrg
1817ec681f3Smrg      if (!device->physical->perf)
1827ec681f3Smrg         return VK_ERROR_EXTENSION_NOT_PRESENT;
1837ec681f3Smrg
1847ec681f3Smrg      VkResult result = VK_SUCCESS;
1857ec681f3Smrg      switch (parameter) {
1867ec681f3Smrg      case VK_PERFORMANCE_PARAMETER_TYPE_HW_COUNTERS_SUPPORTED_INTEL:
1877ec681f3Smrg         pValue->type = VK_PERFORMANCE_VALUE_TYPE_BOOL_INTEL;
1887ec681f3Smrg         pValue->data.valueBool = VK_TRUE;
1897ec681f3Smrg         break;
1907ec681f3Smrg
1917ec681f3Smrg      case VK_PERFORMANCE_PARAMETER_TYPE_STREAM_MARKER_VALID_BITS_INTEL:
1927ec681f3Smrg         pValue->type = VK_PERFORMANCE_VALUE_TYPE_UINT32_INTEL;
1937ec681f3Smrg         pValue->data.value32 = 25;
1947ec681f3Smrg         break;
1957ec681f3Smrg
1967ec681f3Smrg      default:
1977ec681f3Smrg         result = VK_ERROR_FEATURE_NOT_PRESENT;
1987ec681f3Smrg         break;
1997ec681f3Smrg      }
2007ec681f3Smrg
2017ec681f3Smrg      return result;
2027ec681f3Smrg}
2037ec681f3Smrg
2047ec681f3SmrgVkResult anv_CmdSetPerformanceMarkerINTEL(
2057ec681f3Smrg    VkCommandBuffer                             commandBuffer,
2067ec681f3Smrg    const VkPerformanceMarkerInfoINTEL*         pMarkerInfo)
2077ec681f3Smrg{
2087ec681f3Smrg   ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
2097ec681f3Smrg
2107ec681f3Smrg   cmd_buffer->intel_perf_marker = pMarkerInfo->marker;
2117ec681f3Smrg
2127ec681f3Smrg   return VK_SUCCESS;
2137ec681f3Smrg}
2147ec681f3Smrg
2157ec681f3SmrgVkResult anv_AcquirePerformanceConfigurationINTEL(
2167ec681f3Smrg    VkDevice                                    _device,
2177ec681f3Smrg    const VkPerformanceConfigurationAcquireInfoINTEL* pAcquireInfo,
2187ec681f3Smrg    VkPerformanceConfigurationINTEL*            pConfiguration)
2197ec681f3Smrg{
2207ec681f3Smrg   ANV_FROM_HANDLE(anv_device, device, _device);
2217ec681f3Smrg   struct anv_performance_configuration_intel *config;
2227ec681f3Smrg
2237ec681f3Smrg   config = vk_object_alloc(&device->vk, NULL, sizeof(*config),
2247ec681f3Smrg                            VK_OBJECT_TYPE_PERFORMANCE_CONFIGURATION_INTEL);
2257ec681f3Smrg   if (!config)
2267ec681f3Smrg      return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2277ec681f3Smrg
2287ec681f3Smrg   if (!INTEL_DEBUG(DEBUG_NO_OACONFIG)) {
2297ec681f3Smrg      config->register_config =
2307ec681f3Smrg         intel_perf_load_configuration(device->physical->perf, device->fd,
2317ec681f3Smrg                                     INTEL_PERF_QUERY_GUID_MDAPI);
2327ec681f3Smrg      if (!config->register_config) {
2337ec681f3Smrg         vk_object_free(&device->vk, NULL, config);
2347ec681f3Smrg         return VK_INCOMPLETE;
2357ec681f3Smrg      }
2367ec681f3Smrg
2377ec681f3Smrg      int ret =
2387ec681f3Smrg         intel_perf_store_configuration(device->physical->perf, device->fd,
2397ec681f3Smrg                                      config->register_config, NULL /* guid */);
2407ec681f3Smrg      if (ret < 0) {
2417ec681f3Smrg         ralloc_free(config->register_config);
2427ec681f3Smrg         vk_object_free(&device->vk, NULL, config);
2437ec681f3Smrg         return VK_INCOMPLETE;
2447ec681f3Smrg      }
2457ec681f3Smrg
2467ec681f3Smrg      config->config_id = ret;
2477ec681f3Smrg   }
2487ec681f3Smrg
2497ec681f3Smrg   *pConfiguration = anv_performance_configuration_intel_to_handle(config);
2507ec681f3Smrg
2517ec681f3Smrg   return VK_SUCCESS;
2527ec681f3Smrg}
2537ec681f3Smrg
2547ec681f3SmrgVkResult anv_ReleasePerformanceConfigurationINTEL(
2557ec681f3Smrg    VkDevice                                    _device,
2567ec681f3Smrg    VkPerformanceConfigurationINTEL             _configuration)
2577ec681f3Smrg{
2587ec681f3Smrg   ANV_FROM_HANDLE(anv_device, device, _device);
2597ec681f3Smrg   ANV_FROM_HANDLE(anv_performance_configuration_intel, config, _configuration);
2607ec681f3Smrg
2617ec681f3Smrg   if (!INTEL_DEBUG(DEBUG_NO_OACONFIG))
2627ec681f3Smrg      intel_ioctl(device->fd, DRM_IOCTL_I915_PERF_REMOVE_CONFIG, &config->config_id);
2637ec681f3Smrg
2647ec681f3Smrg   ralloc_free(config->register_config);
2657ec681f3Smrg
2667ec681f3Smrg   vk_object_free(&device->vk, NULL, config);
2677ec681f3Smrg
2687ec681f3Smrg   return VK_SUCCESS;
2697ec681f3Smrg}
2707ec681f3Smrg
2717ec681f3SmrgVkResult anv_QueueSetPerformanceConfigurationINTEL(
2727ec681f3Smrg    VkQueue                                     _queue,
2737ec681f3Smrg    VkPerformanceConfigurationINTEL             _configuration)
2747ec681f3Smrg{
2757ec681f3Smrg   ANV_FROM_HANDLE(anv_queue, queue, _queue);
2767ec681f3Smrg   ANV_FROM_HANDLE(anv_performance_configuration_intel, config, _configuration);
2777ec681f3Smrg   struct anv_device *device = queue->device;
2787ec681f3Smrg
2797ec681f3Smrg   if (!INTEL_DEBUG(DEBUG_NO_OACONFIG)) {
2807ec681f3Smrg      if (device->perf_fd < 0) {
2817ec681f3Smrg         device->perf_fd = anv_device_perf_open(device, config->config_id);
2827ec681f3Smrg         if (device->perf_fd < 0)
2837ec681f3Smrg            return VK_ERROR_INITIALIZATION_FAILED;
2847ec681f3Smrg      } else {
2857ec681f3Smrg         int ret = intel_ioctl(device->perf_fd, I915_PERF_IOCTL_CONFIG,
2867ec681f3Smrg                               (void *)(uintptr_t) config->config_id);
2877ec681f3Smrg         if (ret < 0)
2887ec681f3Smrg            return anv_device_set_lost(device, "i915-perf config failed: %m");
2897ec681f3Smrg      }
2907ec681f3Smrg   }
2917ec681f3Smrg
2927ec681f3Smrg   return VK_SUCCESS;
2937ec681f3Smrg}
2947ec681f3Smrg
2957ec681f3Smrgvoid anv_UninitializePerformanceApiINTEL(
2967ec681f3Smrg    VkDevice                                    _device)
2977ec681f3Smrg{
2987ec681f3Smrg   ANV_FROM_HANDLE(anv_device, device, _device);
2997ec681f3Smrg
3007ec681f3Smrg   if (device->perf_fd >= 0) {
3017ec681f3Smrg      close(device->perf_fd);
3027ec681f3Smrg      device->perf_fd = -1;
3037ec681f3Smrg   }
3047ec681f3Smrg}
3057ec681f3Smrg
3067ec681f3Smrg/* VK_KHR_performance_query */
3077ec681f3Smrgstatic const VkPerformanceCounterUnitKHR
3087ec681f3Smrgintel_perf_counter_unit_to_vk_unit[] = {
3097ec681f3Smrg   [INTEL_PERF_COUNTER_UNITS_BYTES]                                = VK_PERFORMANCE_COUNTER_UNIT_BYTES_KHR,
3107ec681f3Smrg   [INTEL_PERF_COUNTER_UNITS_HZ]                                   = VK_PERFORMANCE_COUNTER_UNIT_HERTZ_KHR,
3117ec681f3Smrg   [INTEL_PERF_COUNTER_UNITS_NS]                                   = VK_PERFORMANCE_COUNTER_UNIT_NANOSECONDS_KHR,
3127ec681f3Smrg   [INTEL_PERF_COUNTER_UNITS_US]                                   = VK_PERFORMANCE_COUNTER_UNIT_NANOSECONDS_KHR, /* todo */
3137ec681f3Smrg   [INTEL_PERF_COUNTER_UNITS_PIXELS]                               = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
3147ec681f3Smrg   [INTEL_PERF_COUNTER_UNITS_TEXELS]                               = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
3157ec681f3Smrg   [INTEL_PERF_COUNTER_UNITS_THREADS]                              = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
3167ec681f3Smrg   [INTEL_PERF_COUNTER_UNITS_PERCENT]                              = VK_PERFORMANCE_COUNTER_UNIT_PERCENTAGE_KHR,
3177ec681f3Smrg   [INTEL_PERF_COUNTER_UNITS_MESSAGES]                             = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
3187ec681f3Smrg   [INTEL_PERF_COUNTER_UNITS_NUMBER]                               = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
3197ec681f3Smrg   [INTEL_PERF_COUNTER_UNITS_CYCLES]                               = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
3207ec681f3Smrg   [INTEL_PERF_COUNTER_UNITS_EVENTS]                               = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
3217ec681f3Smrg   [INTEL_PERF_COUNTER_UNITS_UTILIZATION]                          = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
3227ec681f3Smrg   [INTEL_PERF_COUNTER_UNITS_EU_SENDS_TO_L3_CACHE_LINES]           = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
3237ec681f3Smrg   [INTEL_PERF_COUNTER_UNITS_EU_ATOMIC_REQUESTS_TO_L3_CACHE_LINES] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
3247ec681f3Smrg   [INTEL_PERF_COUNTER_UNITS_EU_REQUESTS_TO_L3_CACHE_LINES]        = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
3257ec681f3Smrg   [INTEL_PERF_COUNTER_UNITS_EU_BYTES_PER_L3_CACHE_LINE]           = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
3267ec681f3Smrg};
3277ec681f3Smrg
3287ec681f3Smrgstatic const VkPerformanceCounterStorageKHR
3297ec681f3Smrgintel_perf_counter_data_type_to_vk_storage[] = {
3307ec681f3Smrg   [INTEL_PERF_COUNTER_DATA_TYPE_BOOL32] = VK_PERFORMANCE_COUNTER_STORAGE_UINT32_KHR,
3317ec681f3Smrg   [INTEL_PERF_COUNTER_DATA_TYPE_UINT32] = VK_PERFORMANCE_COUNTER_STORAGE_UINT32_KHR,
3327ec681f3Smrg   [INTEL_PERF_COUNTER_DATA_TYPE_UINT64] = VK_PERFORMANCE_COUNTER_STORAGE_UINT64_KHR,
3337ec681f3Smrg   [INTEL_PERF_COUNTER_DATA_TYPE_FLOAT]  = VK_PERFORMANCE_COUNTER_STORAGE_FLOAT32_KHR,
3347ec681f3Smrg   [INTEL_PERF_COUNTER_DATA_TYPE_DOUBLE] = VK_PERFORMANCE_COUNTER_STORAGE_FLOAT64_KHR,
3357ec681f3Smrg};
3367ec681f3Smrg
3377ec681f3SmrgVkResult anv_EnumeratePhysicalDeviceQueueFamilyPerformanceQueryCountersKHR(
3387ec681f3Smrg    VkPhysicalDevice                            physicalDevice,
3397ec681f3Smrg    uint32_t                                    queueFamilyIndex,
3407ec681f3Smrg    uint32_t*                                   pCounterCount,
3417ec681f3Smrg    VkPerformanceCounterKHR*                    pCounters,
3427ec681f3Smrg    VkPerformanceCounterDescriptionKHR*         pCounterDescriptions)
3437ec681f3Smrg{
3447ec681f3Smrg   ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice);
3457ec681f3Smrg   struct intel_perf_config *perf = pdevice->perf;
3467ec681f3Smrg
3477ec681f3Smrg   uint32_t desc_count = *pCounterCount;
3487ec681f3Smrg
3497ec681f3Smrg   VK_OUTARRAY_MAKE(out, pCounters, pCounterCount);
3507ec681f3Smrg   VK_OUTARRAY_MAKE(out_desc, pCounterDescriptions, &desc_count);
3517ec681f3Smrg
3527ec681f3Smrg   for (int c = 0; c < (perf ? perf->n_counters : 0); c++) {
3537ec681f3Smrg      const struct intel_perf_query_counter *intel_counter = perf->counter_infos[c].counter;
3547ec681f3Smrg
3557ec681f3Smrg      vk_outarray_append(&out, counter) {
3567ec681f3Smrg         counter->unit = intel_perf_counter_unit_to_vk_unit[intel_counter->units];
3577ec681f3Smrg         counter->scope = VK_QUERY_SCOPE_COMMAND_KHR;
3587ec681f3Smrg         counter->storage = intel_perf_counter_data_type_to_vk_storage[intel_counter->data_type];
3597ec681f3Smrg
3607ec681f3Smrg         unsigned char sha1_result[20];
3617ec681f3Smrg         _mesa_sha1_compute(intel_counter->symbol_name,
3627ec681f3Smrg                            strlen(intel_counter->symbol_name),
3637ec681f3Smrg                            sha1_result);
3647ec681f3Smrg         memcpy(counter->uuid, sha1_result, sizeof(counter->uuid));
3657ec681f3Smrg      }
3667ec681f3Smrg
3677ec681f3Smrg      vk_outarray_append(&out_desc, desc) {
3687ec681f3Smrg         desc->flags = 0; /* None so far. */
3697ec681f3Smrg         snprintf(desc->name, sizeof(desc->name), "%s", intel_counter->name);
3707ec681f3Smrg         snprintf(desc->category, sizeof(desc->category), "%s", intel_counter->category);
3717ec681f3Smrg         snprintf(desc->description, sizeof(desc->description), "%s", intel_counter->desc);
3727ec681f3Smrg      }
3737ec681f3Smrg   }
3747ec681f3Smrg
3757ec681f3Smrg   return vk_outarray_status(&out);
3767ec681f3Smrg}
3777ec681f3Smrg
3787ec681f3Smrgvoid anv_GetPhysicalDeviceQueueFamilyPerformanceQueryPassesKHR(
3797ec681f3Smrg    VkPhysicalDevice                            physicalDevice,
3807ec681f3Smrg    const VkQueryPoolPerformanceCreateInfoKHR*  pPerformanceQueryCreateInfo,
3817ec681f3Smrg    uint32_t*                                   pNumPasses)
3827ec681f3Smrg{
3837ec681f3Smrg   ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice);
3847ec681f3Smrg   struct intel_perf_config *perf = pdevice->perf;
3857ec681f3Smrg
3867ec681f3Smrg   if (!perf) {
3877ec681f3Smrg      *pNumPasses = 0;
3887ec681f3Smrg      return;
3897ec681f3Smrg   }
3907ec681f3Smrg
3917ec681f3Smrg   *pNumPasses = intel_perf_get_n_passes(perf,
3927ec681f3Smrg                                       pPerformanceQueryCreateInfo->pCounterIndices,
3937ec681f3Smrg                                       pPerformanceQueryCreateInfo->counterIndexCount,
3947ec681f3Smrg                                       NULL);
3957ec681f3Smrg}
3967ec681f3Smrg
3977ec681f3SmrgVkResult anv_AcquireProfilingLockKHR(
3987ec681f3Smrg    VkDevice                                    _device,
3997ec681f3Smrg    const VkAcquireProfilingLockInfoKHR*        pInfo)
4007ec681f3Smrg{
4017ec681f3Smrg   ANV_FROM_HANDLE(anv_device, device, _device);
4027ec681f3Smrg   struct intel_perf_config *perf = device->physical->perf;
4037ec681f3Smrg   struct intel_perf_query_info *first_metric_set = &perf->queries[0];
4047ec681f3Smrg   int fd = -1;
4057ec681f3Smrg
4067ec681f3Smrg   assert(device->perf_fd == -1);
4077ec681f3Smrg
4087ec681f3Smrg   if (!INTEL_DEBUG(DEBUG_NO_OACONFIG)) {
4097ec681f3Smrg      fd = anv_device_perf_open(device, first_metric_set->oa_metrics_set_id);
4107ec681f3Smrg      if (fd < 0)
4117ec681f3Smrg         return VK_TIMEOUT;
4127ec681f3Smrg   }
4137ec681f3Smrg
4147ec681f3Smrg   device->perf_fd = fd;
4157ec681f3Smrg   return VK_SUCCESS;
4167ec681f3Smrg}
4177ec681f3Smrg
4187ec681f3Smrgvoid anv_ReleaseProfilingLockKHR(
4197ec681f3Smrg    VkDevice                                    _device)
4207ec681f3Smrg{
4217ec681f3Smrg   ANV_FROM_HANDLE(anv_device, device, _device);
4227ec681f3Smrg
4237ec681f3Smrg   if (!INTEL_DEBUG(DEBUG_NO_OACONFIG)) {
4247ec681f3Smrg      assert(device->perf_fd >= 0);
4257ec681f3Smrg      close(device->perf_fd);
4267ec681f3Smrg   }
4277ec681f3Smrg   device->perf_fd = -1;
4287ec681f3Smrg}
4297ec681f3Smrg
4307ec681f3Smrgvoid
4317ec681f3Smrganv_perf_write_pass_results(struct intel_perf_config *perf,
4327ec681f3Smrg                            struct anv_query_pool *pool, uint32_t pass,
4337ec681f3Smrg                            const struct intel_perf_query_result *accumulated_results,
4347ec681f3Smrg                            union VkPerformanceCounterResultKHR *results)
4357ec681f3Smrg{
4367ec681f3Smrg   for (uint32_t c = 0; c < pool->n_counters; c++) {
4377ec681f3Smrg      const struct intel_perf_counter_pass *counter_pass = &pool->counter_pass[c];
4387ec681f3Smrg
4397ec681f3Smrg      if (counter_pass->pass != pass)
4407ec681f3Smrg         continue;
4417ec681f3Smrg
4427ec681f3Smrg      switch (pool->pass_query[pass]->kind) {
4437ec681f3Smrg      case INTEL_PERF_QUERY_TYPE_PIPELINE: {
4447ec681f3Smrg         assert(counter_pass->counter->data_type == INTEL_PERF_COUNTER_DATA_TYPE_UINT64);
4457ec681f3Smrg         uint32_t accu_offset = counter_pass->counter->offset / sizeof(uint64_t);
4467ec681f3Smrg         results[c].uint64 = accumulated_results->accumulator[accu_offset];
4477ec681f3Smrg         break;
4487ec681f3Smrg      }
4497ec681f3Smrg
4507ec681f3Smrg      case INTEL_PERF_QUERY_TYPE_OA:
4517ec681f3Smrg      case INTEL_PERF_QUERY_TYPE_RAW:
4527ec681f3Smrg         switch (counter_pass->counter->data_type) {
4537ec681f3Smrg         case INTEL_PERF_COUNTER_DATA_TYPE_UINT64:
4547ec681f3Smrg            results[c].uint64 =
4557ec681f3Smrg               counter_pass->counter->oa_counter_read_uint64(perf,
4567ec681f3Smrg                                                             counter_pass->query,
4577ec681f3Smrg                                                             accumulated_results);
4587ec681f3Smrg            break;
4597ec681f3Smrg         case INTEL_PERF_COUNTER_DATA_TYPE_FLOAT:
4607ec681f3Smrg            results[c].float32 =
4617ec681f3Smrg               counter_pass->counter->oa_counter_read_float(perf,
4627ec681f3Smrg                                                            counter_pass->query,
4637ec681f3Smrg                                                            accumulated_results);
4647ec681f3Smrg            break;
4657ec681f3Smrg         default:
4667ec681f3Smrg            /* So far we aren't using uint32, double or bool32... */
4677ec681f3Smrg            unreachable("unexpected counter data type");
4687ec681f3Smrg         }
4697ec681f3Smrg         break;
4707ec681f3Smrg
4717ec681f3Smrg      default:
4727ec681f3Smrg         unreachable("invalid query type");
4737ec681f3Smrg      }
4747ec681f3Smrg
4757ec681f3Smrg      /* The Vulkan extension only has nanoseconds as a unit */
4767ec681f3Smrg      if (counter_pass->counter->units == INTEL_PERF_COUNTER_UNITS_US) {
4777ec681f3Smrg         assert(counter_pass->counter->data_type == INTEL_PERF_COUNTER_DATA_TYPE_UINT64);
4787ec681f3Smrg         results[c].uint64 *= 1000;
4797ec681f3Smrg      }
4807ec681f3Smrg   }
4817ec681f3Smrg}
482