17ec681f3Smrg/*
27ec681f3Smrg * Copyright © 2018 Intel Corporation
37ec681f3Smrg *
47ec681f3Smrg * Permission is hereby granted, free of charge, to any person obtaining a
57ec681f3Smrg * copy of this software and associated documentation files (the "Software"),
67ec681f3Smrg * to deal in the Software without restriction, including without limitation
77ec681f3Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
87ec681f3Smrg * and/or sell copies of the Software, and to permit persons to whom the
97ec681f3Smrg * Software is furnished to do so, subject to the following conditions:
107ec681f3Smrg *
117ec681f3Smrg * The above copyright notice and this permission notice (including the next
127ec681f3Smrg * paragraph) shall be included in all copies or substantial portions of the
137ec681f3Smrg * Software.
147ec681f3Smrg *
157ec681f3Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
167ec681f3Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
177ec681f3Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
187ec681f3Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
197ec681f3Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
207ec681f3Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
217ec681f3Smrg * IN THE SOFTWARE.
227ec681f3Smrg */
237ec681f3Smrg
247ec681f3Smrg#include <dirent.h>
257ec681f3Smrg
267ec681f3Smrg#include <sys/types.h>
277ec681f3Smrg#include <sys/stat.h>
287ec681f3Smrg#include <fcntl.h>
297ec681f3Smrg#include <unistd.h>
307ec681f3Smrg#include <errno.h>
317ec681f3Smrg
327ec681f3Smrg#ifndef HAVE_DIRENT_D_TYPE
337ec681f3Smrg#include <limits.h> // PATH_MAX
347ec681f3Smrg#endif
357ec681f3Smrg
367ec681f3Smrg#include <drm-uapi/i915_drm.h>
377ec681f3Smrg
387ec681f3Smrg#include "common/intel_gem.h"
397ec681f3Smrg
407ec681f3Smrg#include "dev/intel_debug.h"
417ec681f3Smrg#include "dev/intel_device_info.h"
427ec681f3Smrg
437ec681f3Smrg#include "perf/intel_perf.h"
447ec681f3Smrg#include "perf/intel_perf_regs.h"
457ec681f3Smrg#include "perf/intel_perf_mdapi.h"
467ec681f3Smrg#include "perf/intel_perf_metrics.h"
477ec681f3Smrg#include "perf/intel_perf_private.h"
487ec681f3Smrg
497ec681f3Smrg#include "util/bitscan.h"
507ec681f3Smrg#include "util/macros.h"
517ec681f3Smrg#include "util/mesa-sha1.h"
527ec681f3Smrg#include "util/u_math.h"
537ec681f3Smrg
547ec681f3Smrg#define FILE_DEBUG_FLAG DEBUG_PERFMON
557ec681f3Smrg
567ec681f3Smrgstatic bool
577ec681f3Smrgis_dir_or_link(const struct dirent *entry, const char *parent_dir)
587ec681f3Smrg{
597ec681f3Smrg#ifdef HAVE_DIRENT_D_TYPE
607ec681f3Smrg   return entry->d_type == DT_DIR || entry->d_type == DT_LNK;
617ec681f3Smrg#else
627ec681f3Smrg   struct stat st;
637ec681f3Smrg   char path[PATH_MAX + 1];
647ec681f3Smrg   snprintf(path, sizeof(path), "%s/%s", parent_dir, entry->d_name);
657ec681f3Smrg   lstat(path, &st);
667ec681f3Smrg   return S_ISDIR(st.st_mode) || S_ISLNK(st.st_mode);
677ec681f3Smrg#endif
687ec681f3Smrg}
697ec681f3Smrg
707ec681f3Smrgstatic bool
717ec681f3Smrgget_sysfs_dev_dir(struct intel_perf_config *perf, int fd)
727ec681f3Smrg{
737ec681f3Smrg   struct stat sb;
747ec681f3Smrg   int min, maj;
757ec681f3Smrg   DIR *drmdir;
767ec681f3Smrg   struct dirent *drm_entry;
777ec681f3Smrg   int len;
787ec681f3Smrg
797ec681f3Smrg   perf->sysfs_dev_dir[0] = '\0';
807ec681f3Smrg
817ec681f3Smrg   if (INTEL_DEBUG(DEBUG_NO_OACONFIG))
827ec681f3Smrg      return true;
837ec681f3Smrg
847ec681f3Smrg   if (fstat(fd, &sb)) {
857ec681f3Smrg      DBG("Failed to stat DRM fd\n");
867ec681f3Smrg      return false;
877ec681f3Smrg   }
887ec681f3Smrg
897ec681f3Smrg   maj = major(sb.st_rdev);
907ec681f3Smrg   min = minor(sb.st_rdev);
917ec681f3Smrg
927ec681f3Smrg   if (!S_ISCHR(sb.st_mode)) {
937ec681f3Smrg      DBG("DRM fd is not a character device as expected\n");
947ec681f3Smrg      return false;
957ec681f3Smrg   }
967ec681f3Smrg
977ec681f3Smrg   len = snprintf(perf->sysfs_dev_dir,
987ec681f3Smrg                  sizeof(perf->sysfs_dev_dir),
997ec681f3Smrg                  "/sys/dev/char/%d:%d/device/drm", maj, min);
1007ec681f3Smrg   if (len < 0 || len >= sizeof(perf->sysfs_dev_dir)) {
1017ec681f3Smrg      DBG("Failed to concatenate sysfs path to drm device\n");
1027ec681f3Smrg      return false;
1037ec681f3Smrg   }
1047ec681f3Smrg
1057ec681f3Smrg   drmdir = opendir(perf->sysfs_dev_dir);
1067ec681f3Smrg   if (!drmdir) {
1077ec681f3Smrg      DBG("Failed to open %s: %m\n", perf->sysfs_dev_dir);
1087ec681f3Smrg      return false;
1097ec681f3Smrg   }
1107ec681f3Smrg
1117ec681f3Smrg   while ((drm_entry = readdir(drmdir))) {
1127ec681f3Smrg      if (is_dir_or_link(drm_entry, perf->sysfs_dev_dir) &&
1137ec681f3Smrg          strncmp(drm_entry->d_name, "card", 4) == 0)
1147ec681f3Smrg      {
1157ec681f3Smrg         len = snprintf(perf->sysfs_dev_dir,
1167ec681f3Smrg                        sizeof(perf->sysfs_dev_dir),
1177ec681f3Smrg                        "/sys/dev/char/%d:%d/device/drm/%s",
1187ec681f3Smrg                        maj, min, drm_entry->d_name);
1197ec681f3Smrg         closedir(drmdir);
1207ec681f3Smrg         if (len < 0 || len >= sizeof(perf->sysfs_dev_dir))
1217ec681f3Smrg            return false;
1227ec681f3Smrg         else
1237ec681f3Smrg            return true;
1247ec681f3Smrg      }
1257ec681f3Smrg   }
1267ec681f3Smrg
1277ec681f3Smrg   closedir(drmdir);
1287ec681f3Smrg
1297ec681f3Smrg   DBG("Failed to find cardX directory under /sys/dev/char/%d:%d/device/drm\n",
1307ec681f3Smrg       maj, min);
1317ec681f3Smrg
1327ec681f3Smrg   return false;
1337ec681f3Smrg}
1347ec681f3Smrg
1357ec681f3Smrgstatic bool
1367ec681f3Smrgread_file_uint64(const char *file, uint64_t *val)
1377ec681f3Smrg{
1387ec681f3Smrg    char buf[32];
1397ec681f3Smrg    int fd, n;
1407ec681f3Smrg
1417ec681f3Smrg    fd = open(file, 0);
1427ec681f3Smrg    if (fd < 0)
1437ec681f3Smrg       return false;
1447ec681f3Smrg    while ((n = read(fd, buf, sizeof (buf) - 1)) < 0 &&
1457ec681f3Smrg           errno == EINTR);
1467ec681f3Smrg    close(fd);
1477ec681f3Smrg    if (n < 0)
1487ec681f3Smrg       return false;
1497ec681f3Smrg
1507ec681f3Smrg    buf[n] = '\0';
1517ec681f3Smrg    *val = strtoull(buf, NULL, 0);
1527ec681f3Smrg
1537ec681f3Smrg    return true;
1547ec681f3Smrg}
1557ec681f3Smrg
1567ec681f3Smrgstatic bool
1577ec681f3Smrgread_sysfs_drm_device_file_uint64(struct intel_perf_config *perf,
1587ec681f3Smrg                                  const char *file,
1597ec681f3Smrg                                  uint64_t *value)
1607ec681f3Smrg{
1617ec681f3Smrg   char buf[512];
1627ec681f3Smrg   int len;
1637ec681f3Smrg
1647ec681f3Smrg   len = snprintf(buf, sizeof(buf), "%s/%s", perf->sysfs_dev_dir, file);
1657ec681f3Smrg   if (len < 0 || len >= sizeof(buf)) {
1667ec681f3Smrg      DBG("Failed to concatenate sys filename to read u64 from\n");
1677ec681f3Smrg      return false;
1687ec681f3Smrg   }
1697ec681f3Smrg
1707ec681f3Smrg   return read_file_uint64(buf, value);
1717ec681f3Smrg}
1727ec681f3Smrg
1737ec681f3Smrgstatic void
1747ec681f3Smrgregister_oa_config(struct intel_perf_config *perf,
1757ec681f3Smrg                   const struct intel_device_info *devinfo,
1767ec681f3Smrg                   const struct intel_perf_query_info *query,
1777ec681f3Smrg                   uint64_t config_id)
1787ec681f3Smrg{
1797ec681f3Smrg   struct intel_perf_query_info *registered_query =
1807ec681f3Smrg      intel_perf_append_query_info(perf, 0);
1817ec681f3Smrg
1827ec681f3Smrg   *registered_query = *query;
1837ec681f3Smrg   registered_query->oa_format = devinfo->ver >= 8 ?
1847ec681f3Smrg      I915_OA_FORMAT_A32u40_A4u32_B8_C8 : I915_OA_FORMAT_A45_B8_C8;
1857ec681f3Smrg   registered_query->oa_metrics_set_id = config_id;
1867ec681f3Smrg   DBG("metric set registered: id = %" PRIu64", guid = %s\n",
1877ec681f3Smrg       registered_query->oa_metrics_set_id, query->guid);
1887ec681f3Smrg}
1897ec681f3Smrg
1907ec681f3Smrgstatic void
1917ec681f3Smrgenumerate_sysfs_metrics(struct intel_perf_config *perf,
1927ec681f3Smrg                        const struct intel_device_info *devinfo)
1937ec681f3Smrg{
1947ec681f3Smrg   DIR *metricsdir = NULL;
1957ec681f3Smrg   struct dirent *metric_entry;
1967ec681f3Smrg   char buf[256];
1977ec681f3Smrg   int len;
1987ec681f3Smrg
1997ec681f3Smrg   len = snprintf(buf, sizeof(buf), "%s/metrics", perf->sysfs_dev_dir);
2007ec681f3Smrg   if (len < 0 || len >= sizeof(buf)) {
2017ec681f3Smrg      DBG("Failed to concatenate path to sysfs metrics/ directory\n");
2027ec681f3Smrg      return;
2037ec681f3Smrg   }
2047ec681f3Smrg
2057ec681f3Smrg   metricsdir = opendir(buf);
2067ec681f3Smrg   if (!metricsdir) {
2077ec681f3Smrg      DBG("Failed to open %s: %m\n", buf);
2087ec681f3Smrg      return;
2097ec681f3Smrg   }
2107ec681f3Smrg
2117ec681f3Smrg   while ((metric_entry = readdir(metricsdir))) {
2127ec681f3Smrg      struct hash_entry *entry;
2137ec681f3Smrg      if (!is_dir_or_link(metric_entry, buf) ||
2147ec681f3Smrg          metric_entry->d_name[0] == '.')
2157ec681f3Smrg         continue;
2167ec681f3Smrg
2177ec681f3Smrg      DBG("metric set: %s\n", metric_entry->d_name);
2187ec681f3Smrg      entry = _mesa_hash_table_search(perf->oa_metrics_table,
2197ec681f3Smrg                                      metric_entry->d_name);
2207ec681f3Smrg      if (entry) {
2217ec681f3Smrg         uint64_t id;
2227ec681f3Smrg         if (!intel_perf_load_metric_id(perf, metric_entry->d_name, &id)) {
2237ec681f3Smrg            DBG("Failed to read metric set id from %s: %m", buf);
2247ec681f3Smrg            continue;
2257ec681f3Smrg         }
2267ec681f3Smrg
2277ec681f3Smrg         register_oa_config(perf, devinfo,
2287ec681f3Smrg                            (const struct intel_perf_query_info *)entry->data, id);
2297ec681f3Smrg      } else
2307ec681f3Smrg         DBG("metric set not known by mesa (skipping)\n");
2317ec681f3Smrg   }
2327ec681f3Smrg
2337ec681f3Smrg   closedir(metricsdir);
2347ec681f3Smrg}
2357ec681f3Smrg
2367ec681f3Smrgstatic void
2377ec681f3Smrgadd_all_metrics(struct intel_perf_config *perf,
2387ec681f3Smrg                const struct intel_device_info *devinfo)
2397ec681f3Smrg{
2407ec681f3Smrg   hash_table_foreach(perf->oa_metrics_table, entry) {
2417ec681f3Smrg      const struct intel_perf_query_info *query = entry->data;
2427ec681f3Smrg      register_oa_config(perf, devinfo, query, 0);
2437ec681f3Smrg   }
2447ec681f3Smrg}
2457ec681f3Smrg
2467ec681f3Smrgstatic bool
2477ec681f3Smrgkernel_has_dynamic_config_support(struct intel_perf_config *perf, int fd)
2487ec681f3Smrg{
2497ec681f3Smrg   uint64_t invalid_config_id = UINT64_MAX;
2507ec681f3Smrg
2517ec681f3Smrg   return intel_ioctl(fd, DRM_IOCTL_I915_PERF_REMOVE_CONFIG,
2527ec681f3Smrg                    &invalid_config_id) < 0 && errno == ENOENT;
2537ec681f3Smrg}
2547ec681f3Smrg
2557ec681f3Smrgstatic bool
2567ec681f3Smrgi915_query_perf_config_supported(struct intel_perf_config *perf, int fd)
2577ec681f3Smrg{
2587ec681f3Smrg   int32_t length = 0;
2597ec681f3Smrg   return !intel_i915_query_flags(fd, DRM_I915_QUERY_PERF_CONFIG,
2607ec681f3Smrg                                  DRM_I915_QUERY_PERF_CONFIG_LIST,
2617ec681f3Smrg                                  NULL, &length);
2627ec681f3Smrg}
2637ec681f3Smrg
2647ec681f3Smrgstatic bool
2657ec681f3Smrgi915_query_perf_config_data(struct intel_perf_config *perf,
2667ec681f3Smrg                            int fd, const char *guid,
2677ec681f3Smrg                            struct drm_i915_perf_oa_config *config)
2687ec681f3Smrg{
2697ec681f3Smrg   char data[sizeof(struct drm_i915_query_perf_config) +
2707ec681f3Smrg             sizeof(struct drm_i915_perf_oa_config)] = {};
2717ec681f3Smrg   struct drm_i915_query_perf_config *query = (void *)data;
2727ec681f3Smrg
2737ec681f3Smrg   memcpy(query->uuid, guid, sizeof(query->uuid));
2747ec681f3Smrg   memcpy(query->data, config, sizeof(*config));
2757ec681f3Smrg
2767ec681f3Smrg   int32_t item_length = sizeof(data);
2777ec681f3Smrg   if (intel_i915_query_flags(fd, DRM_I915_QUERY_PERF_CONFIG,
2787ec681f3Smrg                              DRM_I915_QUERY_PERF_CONFIG_DATA_FOR_UUID,
2797ec681f3Smrg                              query, &item_length))
2807ec681f3Smrg      return false;
2817ec681f3Smrg
2827ec681f3Smrg   memcpy(config, query->data, sizeof(*config));
2837ec681f3Smrg
2847ec681f3Smrg   return true;
2857ec681f3Smrg}
2867ec681f3Smrg
2877ec681f3Smrgbool
2887ec681f3Smrgintel_perf_load_metric_id(struct intel_perf_config *perf_cfg,
2897ec681f3Smrg                          const char *guid,
2907ec681f3Smrg                          uint64_t *metric_id)
2917ec681f3Smrg{
2927ec681f3Smrg   char config_path[280];
2937ec681f3Smrg
2947ec681f3Smrg   snprintf(config_path, sizeof(config_path), "%s/metrics/%s/id",
2957ec681f3Smrg            perf_cfg->sysfs_dev_dir, guid);
2967ec681f3Smrg
2977ec681f3Smrg   /* Don't recreate already loaded configs. */
2987ec681f3Smrg   return read_file_uint64(config_path, metric_id);
2997ec681f3Smrg}
3007ec681f3Smrg
3017ec681f3Smrgstatic uint64_t
3027ec681f3Smrgi915_add_config(struct intel_perf_config *perf, int fd,
3037ec681f3Smrg                const struct intel_perf_registers *config,
3047ec681f3Smrg                const char *guid)
3057ec681f3Smrg{
3067ec681f3Smrg   struct drm_i915_perf_oa_config i915_config = { 0, };
3077ec681f3Smrg
3087ec681f3Smrg   memcpy(i915_config.uuid, guid, sizeof(i915_config.uuid));
3097ec681f3Smrg
3107ec681f3Smrg   i915_config.n_mux_regs = config->n_mux_regs;
3117ec681f3Smrg   i915_config.mux_regs_ptr = to_const_user_pointer(config->mux_regs);
3127ec681f3Smrg
3137ec681f3Smrg   i915_config.n_boolean_regs = config->n_b_counter_regs;
3147ec681f3Smrg   i915_config.boolean_regs_ptr = to_const_user_pointer(config->b_counter_regs);
3157ec681f3Smrg
3167ec681f3Smrg   i915_config.n_flex_regs = config->n_flex_regs;
3177ec681f3Smrg   i915_config.flex_regs_ptr = to_const_user_pointer(config->flex_regs);
3187ec681f3Smrg
3197ec681f3Smrg   int ret = intel_ioctl(fd, DRM_IOCTL_I915_PERF_ADD_CONFIG, &i915_config);
3207ec681f3Smrg   return ret > 0 ? ret : 0;
3217ec681f3Smrg}
3227ec681f3Smrg
3237ec681f3Smrgstatic void
3247ec681f3Smrginit_oa_configs(struct intel_perf_config *perf, int fd,
3257ec681f3Smrg                const struct intel_device_info *devinfo)
3267ec681f3Smrg{
3277ec681f3Smrg   hash_table_foreach(perf->oa_metrics_table, entry) {
3287ec681f3Smrg      const struct intel_perf_query_info *query = entry->data;
3297ec681f3Smrg      uint64_t config_id;
3307ec681f3Smrg
3317ec681f3Smrg      if (intel_perf_load_metric_id(perf, query->guid, &config_id)) {
3327ec681f3Smrg         DBG("metric set: %s (already loaded)\n", query->guid);
3337ec681f3Smrg         register_oa_config(perf, devinfo, query, config_id);
3347ec681f3Smrg         continue;
3357ec681f3Smrg      }
3367ec681f3Smrg
3377ec681f3Smrg      int ret = i915_add_config(perf, fd, &query->config, query->guid);
3387ec681f3Smrg      if (ret < 0) {
3397ec681f3Smrg         DBG("Failed to load \"%s\" (%s) metrics set in kernel: %s\n",
3407ec681f3Smrg             query->name, query->guid, strerror(errno));
3417ec681f3Smrg         continue;
3427ec681f3Smrg      }
3437ec681f3Smrg
3447ec681f3Smrg      register_oa_config(perf, devinfo, query, ret);
3457ec681f3Smrg      DBG("metric set: %s (added)\n", query->guid);
3467ec681f3Smrg   }
3477ec681f3Smrg}
3487ec681f3Smrg
3497ec681f3Smrgstatic void
3507ec681f3Smrgcompute_topology_builtins(struct intel_perf_config *perf,
3517ec681f3Smrg                          const struct intel_device_info *devinfo)
3527ec681f3Smrg{
3537ec681f3Smrg   perf->sys_vars.slice_mask = devinfo->slice_masks;
3547ec681f3Smrg   perf->sys_vars.n_eu_slices = devinfo->num_slices;
3557ec681f3Smrg
3567ec681f3Smrg   for (int i = 0; i < sizeof(devinfo->subslice_masks[i]); i++) {
3577ec681f3Smrg      perf->sys_vars.n_eu_sub_slices +=
3587ec681f3Smrg         util_bitcount(devinfo->subslice_masks[i]);
3597ec681f3Smrg   }
3607ec681f3Smrg
3617ec681f3Smrg   for (int i = 0; i < sizeof(devinfo->eu_masks); i++)
3627ec681f3Smrg      perf->sys_vars.n_eus += util_bitcount(devinfo->eu_masks[i]);
3637ec681f3Smrg
3647ec681f3Smrg   perf->sys_vars.eu_threads_count = devinfo->num_thread_per_eu;
3657ec681f3Smrg
3667ec681f3Smrg   /* The subslice mask builtin contains bits for all slices. Prior to Gfx11
3677ec681f3Smrg    * it had groups of 3bits for each slice, on Gfx11 and above it's 8bits for
3687ec681f3Smrg    * each slice.
3697ec681f3Smrg    *
3707ec681f3Smrg    * Ideally equations would be updated to have a slice/subslice query
3717ec681f3Smrg    * function/operator.
3727ec681f3Smrg    */
3737ec681f3Smrg   perf->sys_vars.subslice_mask = 0;
3747ec681f3Smrg
3757ec681f3Smrg   int bits_per_subslice = devinfo->ver >= 11 ? 8 : 3;
3767ec681f3Smrg
3777ec681f3Smrg   for (int s = 0; s < util_last_bit(devinfo->slice_masks); s++) {
3787ec681f3Smrg      for (int ss = 0; ss < (devinfo->subslice_slice_stride * 8); ss++) {
3797ec681f3Smrg         if (intel_device_info_subslice_available(devinfo, s, ss))
3807ec681f3Smrg            perf->sys_vars.subslice_mask |= 1ULL << (s * bits_per_subslice + ss);
3817ec681f3Smrg      }
3827ec681f3Smrg   }
3837ec681f3Smrg}
3847ec681f3Smrg
3857ec681f3Smrgstatic bool
3867ec681f3Smrginit_oa_sys_vars(struct intel_perf_config *perf,
3877ec681f3Smrg                 const struct intel_device_info *devinfo,
3887ec681f3Smrg                 bool use_register_snapshots)
3897ec681f3Smrg{
3907ec681f3Smrg   uint64_t min_freq_mhz = 0, max_freq_mhz = 0;
3917ec681f3Smrg
3927ec681f3Smrg   if (!INTEL_DEBUG(DEBUG_NO_OACONFIG)) {
3937ec681f3Smrg      if (!read_sysfs_drm_device_file_uint64(perf, "gt_min_freq_mhz", &min_freq_mhz))
3947ec681f3Smrg         return false;
3957ec681f3Smrg
3967ec681f3Smrg      if (!read_sysfs_drm_device_file_uint64(perf,  "gt_max_freq_mhz", &max_freq_mhz))
3977ec681f3Smrg         return false;
3987ec681f3Smrg   } else {
3997ec681f3Smrg      min_freq_mhz = 300;
4007ec681f3Smrg      max_freq_mhz = 1000;
4017ec681f3Smrg   }
4027ec681f3Smrg
4037ec681f3Smrg   memset(&perf->sys_vars, 0, sizeof(perf->sys_vars));
4047ec681f3Smrg   perf->sys_vars.gt_min_freq = min_freq_mhz * 1000000;
4057ec681f3Smrg   perf->sys_vars.gt_max_freq = max_freq_mhz * 1000000;
4067ec681f3Smrg   perf->sys_vars.timestamp_frequency = devinfo->timestamp_frequency;
4077ec681f3Smrg   perf->sys_vars.revision = devinfo->revision;
4087ec681f3Smrg   perf->sys_vars.query_mode = use_register_snapshots;
4097ec681f3Smrg   compute_topology_builtins(perf, devinfo);
4107ec681f3Smrg
4117ec681f3Smrg   return true;
4127ec681f3Smrg}
4137ec681f3Smrg
4147ec681f3Smrgtypedef void (*perf_register_oa_queries_t)(struct intel_perf_config *);
4157ec681f3Smrg
4167ec681f3Smrgstatic perf_register_oa_queries_t
4177ec681f3Smrgget_register_queries_function(const struct intel_device_info *devinfo)
4187ec681f3Smrg{
4197ec681f3Smrg   if (devinfo->is_haswell)
4207ec681f3Smrg      return intel_oa_register_queries_hsw;
4217ec681f3Smrg   if (devinfo->is_cherryview)
4227ec681f3Smrg      return intel_oa_register_queries_chv;
4237ec681f3Smrg   if (devinfo->is_broadwell)
4247ec681f3Smrg      return intel_oa_register_queries_bdw;
4257ec681f3Smrg   if (devinfo->is_broxton)
4267ec681f3Smrg      return intel_oa_register_queries_bxt;
4277ec681f3Smrg   if (devinfo->is_skylake) {
4287ec681f3Smrg      if (devinfo->gt == 2)
4297ec681f3Smrg         return intel_oa_register_queries_sklgt2;
4307ec681f3Smrg      if (devinfo->gt == 3)
4317ec681f3Smrg         return intel_oa_register_queries_sklgt3;
4327ec681f3Smrg      if (devinfo->gt == 4)
4337ec681f3Smrg         return intel_oa_register_queries_sklgt4;
4347ec681f3Smrg   }
4357ec681f3Smrg   if (devinfo->is_kabylake) {
4367ec681f3Smrg      if (devinfo->gt == 2)
4377ec681f3Smrg         return intel_oa_register_queries_kblgt2;
4387ec681f3Smrg      if (devinfo->gt == 3)
4397ec681f3Smrg         return intel_oa_register_queries_kblgt3;
4407ec681f3Smrg   }
4417ec681f3Smrg   if (devinfo->is_geminilake)
4427ec681f3Smrg      return intel_oa_register_queries_glk;
4437ec681f3Smrg   if (devinfo->is_coffeelake) {
4447ec681f3Smrg      if (devinfo->gt == 2)
4457ec681f3Smrg         return intel_oa_register_queries_cflgt2;
4467ec681f3Smrg      if (devinfo->gt == 3)
4477ec681f3Smrg         return intel_oa_register_queries_cflgt3;
4487ec681f3Smrg   }
4497ec681f3Smrg   if (devinfo->ver == 11) {
4507ec681f3Smrg      if (devinfo->is_elkhartlake)
4517ec681f3Smrg         return intel_oa_register_queries_ehl;
4527ec681f3Smrg      return intel_oa_register_queries_icl;
4537ec681f3Smrg   }
4547ec681f3Smrg   if (devinfo->is_tigerlake) {
4557ec681f3Smrg      if (devinfo->gt == 1)
4567ec681f3Smrg         return intel_oa_register_queries_tglgt1;
4577ec681f3Smrg      if (devinfo->gt == 2)
4587ec681f3Smrg         return intel_oa_register_queries_tglgt2;
4597ec681f3Smrg   }
4607ec681f3Smrg   if (devinfo->is_rocketlake)
4617ec681f3Smrg      return intel_oa_register_queries_rkl;
4627ec681f3Smrg   if (devinfo->is_dg1)
4637ec681f3Smrg      return intel_oa_register_queries_dg1;
4647ec681f3Smrg   if (devinfo->is_alderlake)
4657ec681f3Smrg      return intel_oa_register_queries_adl;
4667ec681f3Smrg
4677ec681f3Smrg   return NULL;
4687ec681f3Smrg}
4697ec681f3Smrg
4707ec681f3Smrgstatic int
4717ec681f3Smrgintel_perf_compare_counter_names(const void *v1, const void *v2)
4727ec681f3Smrg{
4737ec681f3Smrg   const struct intel_perf_query_counter *c1 = v1;
4747ec681f3Smrg   const struct intel_perf_query_counter *c2 = v2;
4757ec681f3Smrg
4767ec681f3Smrg   return strcmp(c1->name, c2->name);
4777ec681f3Smrg}
4787ec681f3Smrg
4797ec681f3Smrgstatic void
4807ec681f3Smrgsort_query(struct intel_perf_query_info *q)
4817ec681f3Smrg{
4827ec681f3Smrg   qsort(q->counters, q->n_counters, sizeof(q->counters[0]),
4837ec681f3Smrg         intel_perf_compare_counter_names);
4847ec681f3Smrg}
4857ec681f3Smrg
4867ec681f3Smrgstatic void
4877ec681f3Smrgload_pipeline_statistic_metrics(struct intel_perf_config *perf_cfg,
4887ec681f3Smrg                                const struct intel_device_info *devinfo)
4897ec681f3Smrg{
4907ec681f3Smrg   struct intel_perf_query_info *query =
4917ec681f3Smrg      intel_perf_append_query_info(perf_cfg, MAX_STAT_COUNTERS);
4927ec681f3Smrg
4937ec681f3Smrg   query->kind = INTEL_PERF_QUERY_TYPE_PIPELINE;
4947ec681f3Smrg   query->name = "Pipeline Statistics Registers";
4957ec681f3Smrg
4967ec681f3Smrg   intel_perf_query_add_basic_stat_reg(query, IA_VERTICES_COUNT,
4977ec681f3Smrg                                       "N vertices submitted");
4987ec681f3Smrg   intel_perf_query_add_basic_stat_reg(query, IA_PRIMITIVES_COUNT,
4997ec681f3Smrg                                       "N primitives submitted");
5007ec681f3Smrg   intel_perf_query_add_basic_stat_reg(query, VS_INVOCATION_COUNT,
5017ec681f3Smrg                                       "N vertex shader invocations");
5027ec681f3Smrg
5037ec681f3Smrg   if (devinfo->ver == 6) {
5047ec681f3Smrg      intel_perf_query_add_stat_reg(query, GFX6_SO_PRIM_STORAGE_NEEDED, 1, 1,
5057ec681f3Smrg                                    "SO_PRIM_STORAGE_NEEDED",
5067ec681f3Smrg                                    "N geometry shader stream-out primitives (total)");
5077ec681f3Smrg      intel_perf_query_add_stat_reg(query, GFX6_SO_NUM_PRIMS_WRITTEN, 1, 1,
5087ec681f3Smrg                                    "SO_NUM_PRIMS_WRITTEN",
5097ec681f3Smrg                                    "N geometry shader stream-out primitives (written)");
5107ec681f3Smrg   } else {
5117ec681f3Smrg      intel_perf_query_add_stat_reg(query, GFX7_SO_PRIM_STORAGE_NEEDED(0), 1, 1,
5127ec681f3Smrg                                    "SO_PRIM_STORAGE_NEEDED (Stream 0)",
5137ec681f3Smrg                                    "N stream-out (stream 0) primitives (total)");
5147ec681f3Smrg      intel_perf_query_add_stat_reg(query, GFX7_SO_PRIM_STORAGE_NEEDED(1), 1, 1,
5157ec681f3Smrg                                    "SO_PRIM_STORAGE_NEEDED (Stream 1)",
5167ec681f3Smrg                                    "N stream-out (stream 1) primitives (total)");
5177ec681f3Smrg      intel_perf_query_add_stat_reg(query, GFX7_SO_PRIM_STORAGE_NEEDED(2), 1, 1,
5187ec681f3Smrg                                    "SO_PRIM_STORAGE_NEEDED (Stream 2)",
5197ec681f3Smrg                                    "N stream-out (stream 2) primitives (total)");
5207ec681f3Smrg      intel_perf_query_add_stat_reg(query, GFX7_SO_PRIM_STORAGE_NEEDED(3), 1, 1,
5217ec681f3Smrg                                    "SO_PRIM_STORAGE_NEEDED (Stream 3)",
5227ec681f3Smrg                                    "N stream-out (stream 3) primitives (total)");
5237ec681f3Smrg      intel_perf_query_add_stat_reg(query, GFX7_SO_NUM_PRIMS_WRITTEN(0), 1, 1,
5247ec681f3Smrg                                    "SO_NUM_PRIMS_WRITTEN (Stream 0)",
5257ec681f3Smrg                                    "N stream-out (stream 0) primitives (written)");
5267ec681f3Smrg      intel_perf_query_add_stat_reg(query, GFX7_SO_NUM_PRIMS_WRITTEN(1), 1, 1,
5277ec681f3Smrg                                    "SO_NUM_PRIMS_WRITTEN (Stream 1)",
5287ec681f3Smrg                                    "N stream-out (stream 1) primitives (written)");
5297ec681f3Smrg      intel_perf_query_add_stat_reg(query, GFX7_SO_NUM_PRIMS_WRITTEN(2), 1, 1,
5307ec681f3Smrg                                    "SO_NUM_PRIMS_WRITTEN (Stream 2)",
5317ec681f3Smrg                                    "N stream-out (stream 2) primitives (written)");
5327ec681f3Smrg      intel_perf_query_add_stat_reg(query, GFX7_SO_NUM_PRIMS_WRITTEN(3), 1, 1,
5337ec681f3Smrg                                    "SO_NUM_PRIMS_WRITTEN (Stream 3)",
5347ec681f3Smrg                                    "N stream-out (stream 3) primitives (written)");
5357ec681f3Smrg   }
5367ec681f3Smrg
5377ec681f3Smrg   intel_perf_query_add_basic_stat_reg(query, HS_INVOCATION_COUNT,
5387ec681f3Smrg                                       "N TCS shader invocations");
5397ec681f3Smrg   intel_perf_query_add_basic_stat_reg(query, DS_INVOCATION_COUNT,
5407ec681f3Smrg                                       "N TES shader invocations");
5417ec681f3Smrg
5427ec681f3Smrg   intel_perf_query_add_basic_stat_reg(query, GS_INVOCATION_COUNT,
5437ec681f3Smrg                                       "N geometry shader invocations");
5447ec681f3Smrg   intel_perf_query_add_basic_stat_reg(query, GS_PRIMITIVES_COUNT,
5457ec681f3Smrg                                       "N geometry shader primitives emitted");
5467ec681f3Smrg
5477ec681f3Smrg   intel_perf_query_add_basic_stat_reg(query, CL_INVOCATION_COUNT,
5487ec681f3Smrg                                       "N primitives entering clipping");
5497ec681f3Smrg   intel_perf_query_add_basic_stat_reg(query, CL_PRIMITIVES_COUNT,
5507ec681f3Smrg                                       "N primitives leaving clipping");
5517ec681f3Smrg
5527ec681f3Smrg   if (devinfo->is_haswell || devinfo->ver == 8) {
5537ec681f3Smrg      intel_perf_query_add_stat_reg(query, PS_INVOCATION_COUNT, 1, 4,
5547ec681f3Smrg                                    "N fragment shader invocations",
5557ec681f3Smrg                                    "N fragment shader invocations");
5567ec681f3Smrg   } else {
5577ec681f3Smrg      intel_perf_query_add_basic_stat_reg(query, PS_INVOCATION_COUNT,
5587ec681f3Smrg                                          "N fragment shader invocations");
5597ec681f3Smrg   }
5607ec681f3Smrg
5617ec681f3Smrg   intel_perf_query_add_basic_stat_reg(query, PS_DEPTH_COUNT,
5627ec681f3Smrg                                       "N z-pass fragments");
5637ec681f3Smrg
5647ec681f3Smrg   if (devinfo->ver >= 7) {
5657ec681f3Smrg      intel_perf_query_add_basic_stat_reg(query, CS_INVOCATION_COUNT,
5667ec681f3Smrg                                          "N compute shader invocations");
5677ec681f3Smrg   }
5687ec681f3Smrg
5697ec681f3Smrg   query->data_size = sizeof(uint64_t) * query->n_counters;
5707ec681f3Smrg
5717ec681f3Smrg   sort_query(query);
5727ec681f3Smrg}
5737ec681f3Smrg
5747ec681f3Smrgstatic int
5757ec681f3Smrgi915_perf_version(int drm_fd)
5767ec681f3Smrg{
5777ec681f3Smrg   int tmp;
5787ec681f3Smrg   drm_i915_getparam_t gp = {
5797ec681f3Smrg      .param = I915_PARAM_PERF_REVISION,
5807ec681f3Smrg      .value = &tmp,
5817ec681f3Smrg   };
5827ec681f3Smrg
5837ec681f3Smrg   int ret = intel_ioctl(drm_fd, DRM_IOCTL_I915_GETPARAM, &gp);
5847ec681f3Smrg
5857ec681f3Smrg   /* Return 0 if this getparam is not supported, the first version supported
5867ec681f3Smrg    * is 1.
5877ec681f3Smrg    */
5887ec681f3Smrg   return ret < 0 ? 0 : tmp;
5897ec681f3Smrg}
5907ec681f3Smrg
5917ec681f3Smrgstatic void
5927ec681f3Smrgi915_get_sseu(int drm_fd, struct drm_i915_gem_context_param_sseu *sseu)
5937ec681f3Smrg{
5947ec681f3Smrg   struct drm_i915_gem_context_param arg = {
5957ec681f3Smrg      .param = I915_CONTEXT_PARAM_SSEU,
5967ec681f3Smrg      .size = sizeof(*sseu),
5977ec681f3Smrg      .value = to_user_pointer(sseu)
5987ec681f3Smrg   };
5997ec681f3Smrg
6007ec681f3Smrg   intel_ioctl(drm_fd, DRM_IOCTL_I915_GEM_CONTEXT_GETPARAM, &arg);
6017ec681f3Smrg}
6027ec681f3Smrg
6037ec681f3Smrgstatic inline int
6047ec681f3Smrgcompare_str_or_null(const char *s1, const char *s2)
6057ec681f3Smrg{
6067ec681f3Smrg   if (s1 == NULL && s2 == NULL)
6077ec681f3Smrg      return 0;
6087ec681f3Smrg   if (s1 == NULL)
6097ec681f3Smrg      return -1;
6107ec681f3Smrg   if (s2 == NULL)
6117ec681f3Smrg      return 1;
6127ec681f3Smrg
6137ec681f3Smrg   return strcmp(s1, s2);
6147ec681f3Smrg}
6157ec681f3Smrg
6167ec681f3Smrgstatic int
6177ec681f3Smrgcompare_counter_categories_and_names(const void *_c1, const void *_c2)
6187ec681f3Smrg{
6197ec681f3Smrg   const struct intel_perf_query_counter_info *c1 = (const struct intel_perf_query_counter_info *)_c1;
6207ec681f3Smrg   const struct intel_perf_query_counter_info *c2 = (const struct intel_perf_query_counter_info *)_c2;
6217ec681f3Smrg
6227ec681f3Smrg   /* pipeline counters don't have an assigned category */
6237ec681f3Smrg   int r = compare_str_or_null(c1->counter->category, c2->counter->category);
6247ec681f3Smrg   if (r)
6257ec681f3Smrg      return r;
6267ec681f3Smrg
6277ec681f3Smrg   return strcmp(c1->counter->name, c2->counter->name);
6287ec681f3Smrg}
6297ec681f3Smrg
6307ec681f3Smrgstatic void
6317ec681f3Smrgbuild_unique_counter_list(struct intel_perf_config *perf)
6327ec681f3Smrg{
6337ec681f3Smrg   assert(perf->n_queries < 64);
6347ec681f3Smrg
6357ec681f3Smrg   size_t max_counters = 0;
6367ec681f3Smrg
6377ec681f3Smrg   for (int q = 0; q < perf->n_queries; q++)
6387ec681f3Smrg      max_counters += perf->queries[q].n_counters;
6397ec681f3Smrg
6407ec681f3Smrg   /*
6417ec681f3Smrg    * Allocate big enough array to hold maximum possible number of counters.
6427ec681f3Smrg    * We can't alloc it small and realloc when needed because the hash table
6437ec681f3Smrg    * below contains pointers to this array.
6447ec681f3Smrg    */
6457ec681f3Smrg   struct intel_perf_query_counter_info *counter_infos =
6467ec681f3Smrg         ralloc_array_size(perf, sizeof(counter_infos[0]), max_counters);
6477ec681f3Smrg
6487ec681f3Smrg   perf->n_counters = 0;
6497ec681f3Smrg
6507ec681f3Smrg   struct hash_table *counters_table =
6517ec681f3Smrg      _mesa_hash_table_create(perf,
6527ec681f3Smrg                              _mesa_hash_string,
6537ec681f3Smrg                              _mesa_key_string_equal);
6547ec681f3Smrg   struct hash_entry *entry;
6557ec681f3Smrg   for (int q = 0; q < perf->n_queries ; q++) {
6567ec681f3Smrg      struct intel_perf_query_info *query = &perf->queries[q];
6577ec681f3Smrg
6587ec681f3Smrg      for (int c = 0; c < query->n_counters; c++) {
6597ec681f3Smrg         struct intel_perf_query_counter *counter;
6607ec681f3Smrg         struct intel_perf_query_counter_info *counter_info;
6617ec681f3Smrg
6627ec681f3Smrg         counter = &query->counters[c];
6637ec681f3Smrg         entry = _mesa_hash_table_search(counters_table, counter->symbol_name);
6647ec681f3Smrg
6657ec681f3Smrg         if (entry) {
6667ec681f3Smrg            counter_info = entry->data;
6677ec681f3Smrg            counter_info->query_mask |= BITFIELD64_BIT(q);
6687ec681f3Smrg            continue;
6697ec681f3Smrg         }
6707ec681f3Smrg         assert(perf->n_counters < max_counters);
6717ec681f3Smrg
6727ec681f3Smrg         counter_info = &counter_infos[perf->n_counters++];
6737ec681f3Smrg         counter_info->counter = counter;
6747ec681f3Smrg         counter_info->query_mask = BITFIELD64_BIT(q);
6757ec681f3Smrg
6767ec681f3Smrg         counter_info->location.group_idx = q;
6777ec681f3Smrg         counter_info->location.counter_idx = c;
6787ec681f3Smrg
6797ec681f3Smrg         _mesa_hash_table_insert(counters_table, counter->symbol_name, counter_info);
6807ec681f3Smrg      }
6817ec681f3Smrg   }
6827ec681f3Smrg
6837ec681f3Smrg   _mesa_hash_table_destroy(counters_table, NULL);
6847ec681f3Smrg
6857ec681f3Smrg   /* Now we can realloc counter_infos array because hash table doesn't exist. */
6867ec681f3Smrg   perf->counter_infos = reralloc_array_size(perf, counter_infos,
6877ec681f3Smrg         sizeof(counter_infos[0]), perf->n_counters);
6887ec681f3Smrg
6897ec681f3Smrg   qsort(perf->counter_infos, perf->n_counters, sizeof(perf->counter_infos[0]),
6907ec681f3Smrg         compare_counter_categories_and_names);
6917ec681f3Smrg}
6927ec681f3Smrg
6937ec681f3Smrgstatic bool
6947ec681f3Smrgoa_metrics_available(struct intel_perf_config *perf, int fd,
6957ec681f3Smrg                     const struct intel_device_info *devinfo,
6967ec681f3Smrg                     bool use_register_snapshots)
6977ec681f3Smrg{
6987ec681f3Smrg   perf_register_oa_queries_t oa_register = get_register_queries_function(devinfo);
6997ec681f3Smrg   bool i915_perf_oa_available = false;
7007ec681f3Smrg   struct stat sb;
7017ec681f3Smrg
7027ec681f3Smrg   perf->i915_query_supported = i915_query_perf_config_supported(perf, fd);
7037ec681f3Smrg   perf->i915_perf_version = i915_perf_version(fd);
7047ec681f3Smrg
7057ec681f3Smrg   /* Record the default SSEU configuration. */
7067ec681f3Smrg   i915_get_sseu(fd, &perf->sseu);
7077ec681f3Smrg
7087ec681f3Smrg   /* The existence of this sysctl parameter implies the kernel supports
7097ec681f3Smrg    * the i915 perf interface.
7107ec681f3Smrg    */
7117ec681f3Smrg   if (stat("/proc/sys/dev/i915/perf_stream_paranoid", &sb) == 0) {
7127ec681f3Smrg
7137ec681f3Smrg      /* If _paranoid == 1 then on Gfx8+ we won't be able to access OA
7147ec681f3Smrg       * metrics unless running as root.
7157ec681f3Smrg       */
7167ec681f3Smrg      if (devinfo->is_haswell)
7177ec681f3Smrg         i915_perf_oa_available = true;
7187ec681f3Smrg      else {
7197ec681f3Smrg         uint64_t paranoid = 1;
7207ec681f3Smrg
7217ec681f3Smrg         read_file_uint64("/proc/sys/dev/i915/perf_stream_paranoid", &paranoid);
7227ec681f3Smrg
7237ec681f3Smrg         if (paranoid == 0 || geteuid() == 0)
7247ec681f3Smrg            i915_perf_oa_available = true;
7257ec681f3Smrg      }
7267ec681f3Smrg
7277ec681f3Smrg      perf->platform_supported = oa_register != NULL;
7287ec681f3Smrg   }
7297ec681f3Smrg
7307ec681f3Smrg   return i915_perf_oa_available &&
7317ec681f3Smrg          oa_register &&
7327ec681f3Smrg          get_sysfs_dev_dir(perf, fd) &&
7337ec681f3Smrg          init_oa_sys_vars(perf, devinfo, use_register_snapshots);
7347ec681f3Smrg}
7357ec681f3Smrg
7367ec681f3Smrgstatic void
7377ec681f3Smrgload_oa_metrics(struct intel_perf_config *perf, int fd,
7387ec681f3Smrg                const struct intel_device_info *devinfo)
7397ec681f3Smrg{
7407ec681f3Smrg   int existing_queries = perf->n_queries;
7417ec681f3Smrg
7427ec681f3Smrg   perf_register_oa_queries_t oa_register = get_register_queries_function(devinfo);
7437ec681f3Smrg
7447ec681f3Smrg   perf->oa_metrics_table =
7457ec681f3Smrg      _mesa_hash_table_create(perf, _mesa_hash_string,
7467ec681f3Smrg                              _mesa_key_string_equal);
7477ec681f3Smrg
7487ec681f3Smrg   /* Index all the metric sets mesa knows about before looking to see what
7497ec681f3Smrg    * the kernel is advertising.
7507ec681f3Smrg    */
7517ec681f3Smrg   oa_register(perf);
7527ec681f3Smrg
7537ec681f3Smrg   if (!INTEL_DEBUG(DEBUG_NO_OACONFIG)) {
7547ec681f3Smrg      if (kernel_has_dynamic_config_support(perf, fd))
7557ec681f3Smrg         init_oa_configs(perf, fd, devinfo);
7567ec681f3Smrg      else
7577ec681f3Smrg         enumerate_sysfs_metrics(perf, devinfo);
7587ec681f3Smrg   } else {
7597ec681f3Smrg      add_all_metrics(perf, devinfo);
7607ec681f3Smrg   }
7617ec681f3Smrg
7627ec681f3Smrg   /* sort counters in each individual group created by this function by name */
7637ec681f3Smrg   for (int i = existing_queries; i < perf->n_queries; ++i)
7647ec681f3Smrg      sort_query(&perf->queries[i]);
7657ec681f3Smrg
7667ec681f3Smrg   /* Select a fallback OA metric. Look for the TestOa metric or use the last
7677ec681f3Smrg    * one if no present (on HSW).
7687ec681f3Smrg    */
7697ec681f3Smrg   for (int i = existing_queries; i < perf->n_queries; i++) {
7707ec681f3Smrg      if (perf->queries[i].symbol_name &&
7717ec681f3Smrg          strcmp(perf->queries[i].symbol_name, "TestOa") == 0) {
7727ec681f3Smrg         perf->fallback_raw_oa_metric = perf->queries[i].oa_metrics_set_id;
7737ec681f3Smrg         break;
7747ec681f3Smrg      }
7757ec681f3Smrg   }
7767ec681f3Smrg   if (perf->fallback_raw_oa_metric == 0 && perf->n_queries > 0)
7777ec681f3Smrg      perf->fallback_raw_oa_metric = perf->queries[perf->n_queries - 1].oa_metrics_set_id;
7787ec681f3Smrg}
7797ec681f3Smrg
7807ec681f3Smrgstruct intel_perf_registers *
7817ec681f3Smrgintel_perf_load_configuration(struct intel_perf_config *perf_cfg, int fd, const char *guid)
7827ec681f3Smrg{
7837ec681f3Smrg   if (!perf_cfg->i915_query_supported)
7847ec681f3Smrg      return NULL;
7857ec681f3Smrg
7867ec681f3Smrg   struct drm_i915_perf_oa_config i915_config = { 0, };
7877ec681f3Smrg   if (!i915_query_perf_config_data(perf_cfg, fd, guid, &i915_config))
7887ec681f3Smrg      return NULL;
7897ec681f3Smrg
7907ec681f3Smrg   struct intel_perf_registers *config = rzalloc(NULL, struct intel_perf_registers);
7917ec681f3Smrg   config->n_flex_regs = i915_config.n_flex_regs;
7927ec681f3Smrg   config->flex_regs = rzalloc_array(config, struct intel_perf_query_register_prog, config->n_flex_regs);
7937ec681f3Smrg   config->n_mux_regs = i915_config.n_mux_regs;
7947ec681f3Smrg   config->mux_regs = rzalloc_array(config, struct intel_perf_query_register_prog, config->n_mux_regs);
7957ec681f3Smrg   config->n_b_counter_regs = i915_config.n_boolean_regs;
7967ec681f3Smrg   config->b_counter_regs = rzalloc_array(config, struct intel_perf_query_register_prog, config->n_b_counter_regs);
7977ec681f3Smrg
7987ec681f3Smrg   /*
7997ec681f3Smrg    * struct intel_perf_query_register_prog maps exactly to the tuple of
8007ec681f3Smrg    * (register offset, register value) returned by the i915.
8017ec681f3Smrg    */
8027ec681f3Smrg   i915_config.flex_regs_ptr = to_const_user_pointer(config->flex_regs);
8037ec681f3Smrg   i915_config.mux_regs_ptr = to_const_user_pointer(config->mux_regs);
8047ec681f3Smrg   i915_config.boolean_regs_ptr = to_const_user_pointer(config->b_counter_regs);
8057ec681f3Smrg   if (!i915_query_perf_config_data(perf_cfg, fd, guid, &i915_config)) {
8067ec681f3Smrg      ralloc_free(config);
8077ec681f3Smrg      return NULL;
8087ec681f3Smrg   }
8097ec681f3Smrg
8107ec681f3Smrg   return config;
8117ec681f3Smrg}
8127ec681f3Smrg
8137ec681f3Smrguint64_t
8147ec681f3Smrgintel_perf_store_configuration(struct intel_perf_config *perf_cfg, int fd,
8157ec681f3Smrg                               const struct intel_perf_registers *config,
8167ec681f3Smrg                               const char *guid)
8177ec681f3Smrg{
8187ec681f3Smrg   if (guid)
8197ec681f3Smrg      return i915_add_config(perf_cfg, fd, config, guid);
8207ec681f3Smrg
8217ec681f3Smrg   struct mesa_sha1 sha1_ctx;
8227ec681f3Smrg   _mesa_sha1_init(&sha1_ctx);
8237ec681f3Smrg
8247ec681f3Smrg   if (config->flex_regs) {
8257ec681f3Smrg      _mesa_sha1_update(&sha1_ctx, config->flex_regs,
8267ec681f3Smrg                        sizeof(config->flex_regs[0]) *
8277ec681f3Smrg                        config->n_flex_regs);
8287ec681f3Smrg   }
8297ec681f3Smrg   if (config->mux_regs) {
8307ec681f3Smrg      _mesa_sha1_update(&sha1_ctx, config->mux_regs,
8317ec681f3Smrg                        sizeof(config->mux_regs[0]) *
8327ec681f3Smrg                        config->n_mux_regs);
8337ec681f3Smrg   }
8347ec681f3Smrg   if (config->b_counter_regs) {
8357ec681f3Smrg      _mesa_sha1_update(&sha1_ctx, config->b_counter_regs,
8367ec681f3Smrg                        sizeof(config->b_counter_regs[0]) *
8377ec681f3Smrg                        config->n_b_counter_regs);
8387ec681f3Smrg   }
8397ec681f3Smrg
8407ec681f3Smrg   uint8_t hash[20];
8417ec681f3Smrg   _mesa_sha1_final(&sha1_ctx, hash);
8427ec681f3Smrg
8437ec681f3Smrg   char formatted_hash[41];
8447ec681f3Smrg   _mesa_sha1_format(formatted_hash, hash);
8457ec681f3Smrg
8467ec681f3Smrg   char generated_guid[37];
8477ec681f3Smrg   snprintf(generated_guid, sizeof(generated_guid),
8487ec681f3Smrg            "%.8s-%.4s-%.4s-%.4s-%.12s",
8497ec681f3Smrg            &formatted_hash[0], &formatted_hash[8],
8507ec681f3Smrg            &formatted_hash[8 + 4], &formatted_hash[8 + 4 + 4],
8517ec681f3Smrg            &formatted_hash[8 + 4 + 4 + 4]);
8527ec681f3Smrg
8537ec681f3Smrg   /* Check if already present. */
8547ec681f3Smrg   uint64_t id;
8557ec681f3Smrg   if (intel_perf_load_metric_id(perf_cfg, generated_guid, &id))
8567ec681f3Smrg      return id;
8577ec681f3Smrg
8587ec681f3Smrg   return i915_add_config(perf_cfg, fd, config, generated_guid);
8597ec681f3Smrg}
8607ec681f3Smrg
8617ec681f3Smrgstatic uint64_t
8627ec681f3Smrgget_passes_mask(struct intel_perf_config *perf,
8637ec681f3Smrg                const uint32_t *counter_indices,
8647ec681f3Smrg                uint32_t counter_indices_count)
8657ec681f3Smrg{
8667ec681f3Smrg   uint64_t queries_mask = 0;
8677ec681f3Smrg
8687ec681f3Smrg   assert(perf->n_queries < 64);
8697ec681f3Smrg
8707ec681f3Smrg   /* Compute the number of passes by going through all counters N times (with
8717ec681f3Smrg    * N the number of queries) to make sure we select the most constraining
8727ec681f3Smrg    * counters first and look at the more flexible ones (that could be
8737ec681f3Smrg    * obtained from multiple queries) later. That way we minimize the number
8747ec681f3Smrg    * of passes required.
8757ec681f3Smrg    */
8767ec681f3Smrg   for (uint32_t q = 0; q < perf->n_queries; q++) {
8777ec681f3Smrg      for (uint32_t i = 0; i < counter_indices_count; i++) {
8787ec681f3Smrg         assert(counter_indices[i] < perf->n_counters);
8797ec681f3Smrg
8807ec681f3Smrg         uint32_t idx = counter_indices[i];
8817ec681f3Smrg         if (util_bitcount64(perf->counter_infos[idx].query_mask) != (q + 1))
8827ec681f3Smrg            continue;
8837ec681f3Smrg
8847ec681f3Smrg         if (queries_mask & perf->counter_infos[idx].query_mask)
8857ec681f3Smrg            continue;
8867ec681f3Smrg
8877ec681f3Smrg         queries_mask |= BITFIELD64_BIT(ffsll(perf->counter_infos[idx].query_mask) - 1);
8887ec681f3Smrg      }
8897ec681f3Smrg   }
8907ec681f3Smrg
8917ec681f3Smrg   return queries_mask;
8927ec681f3Smrg}
8937ec681f3Smrg
8947ec681f3Smrguint32_t
8957ec681f3Smrgintel_perf_get_n_passes(struct intel_perf_config *perf,
8967ec681f3Smrg                        const uint32_t *counter_indices,
8977ec681f3Smrg                        uint32_t counter_indices_count,
8987ec681f3Smrg                        struct intel_perf_query_info **pass_queries)
8997ec681f3Smrg{
9007ec681f3Smrg   uint64_t queries_mask = get_passes_mask(perf, counter_indices, counter_indices_count);
9017ec681f3Smrg
9027ec681f3Smrg   if (pass_queries) {
9037ec681f3Smrg      uint32_t pass = 0;
9047ec681f3Smrg      for (uint32_t q = 0; q < perf->n_queries; q++) {
9057ec681f3Smrg         if ((1ULL << q) & queries_mask)
9067ec681f3Smrg            pass_queries[pass++] = &perf->queries[q];
9077ec681f3Smrg      }
9087ec681f3Smrg   }
9097ec681f3Smrg
9107ec681f3Smrg   return util_bitcount64(queries_mask);
9117ec681f3Smrg}
9127ec681f3Smrg
9137ec681f3Smrgvoid
9147ec681f3Smrgintel_perf_get_counters_passes(struct intel_perf_config *perf,
9157ec681f3Smrg                               const uint32_t *counter_indices,
9167ec681f3Smrg                               uint32_t counter_indices_count,
9177ec681f3Smrg                               struct intel_perf_counter_pass *counter_pass)
9187ec681f3Smrg{
9197ec681f3Smrg   uint64_t queries_mask = get_passes_mask(perf, counter_indices, counter_indices_count);
9207ec681f3Smrg   ASSERTED uint32_t n_passes = util_bitcount64(queries_mask);
9217ec681f3Smrg
9227ec681f3Smrg   for (uint32_t i = 0; i < counter_indices_count; i++) {
9237ec681f3Smrg      assert(counter_indices[i] < perf->n_counters);
9247ec681f3Smrg
9257ec681f3Smrg      uint32_t idx = counter_indices[i];
9267ec681f3Smrg      counter_pass[i].counter = perf->counter_infos[idx].counter;
9277ec681f3Smrg
9287ec681f3Smrg      uint32_t query_idx = ffsll(perf->counter_infos[idx].query_mask & queries_mask) - 1;
9297ec681f3Smrg      counter_pass[i].query = &perf->queries[query_idx];
9307ec681f3Smrg
9317ec681f3Smrg      uint32_t clear_bits = 63 - query_idx;
9327ec681f3Smrg      counter_pass[i].pass = util_bitcount64((queries_mask << clear_bits) >> clear_bits) - 1;
9337ec681f3Smrg      assert(counter_pass[i].pass < n_passes);
9347ec681f3Smrg   }
9357ec681f3Smrg}
9367ec681f3Smrg
9377ec681f3Smrg/* Accumulate 32bits OA counters */
9387ec681f3Smrgstatic inline void
9397ec681f3Smrgaccumulate_uint32(const uint32_t *report0,
9407ec681f3Smrg                  const uint32_t *report1,
9417ec681f3Smrg                  uint64_t *accumulator)
9427ec681f3Smrg{
9437ec681f3Smrg   *accumulator += (uint32_t)(*report1 - *report0);
9447ec681f3Smrg}
9457ec681f3Smrg
9467ec681f3Smrg/* Accumulate 40bits OA counters */
9477ec681f3Smrgstatic inline void
9487ec681f3Smrgaccumulate_uint40(int a_index,
9497ec681f3Smrg                  const uint32_t *report0,
9507ec681f3Smrg                  const uint32_t *report1,
9517ec681f3Smrg                  uint64_t *accumulator)
9527ec681f3Smrg{
9537ec681f3Smrg   const uint8_t *high_bytes0 = (uint8_t *)(report0 + 40);
9547ec681f3Smrg   const uint8_t *high_bytes1 = (uint8_t *)(report1 + 40);
9557ec681f3Smrg   uint64_t high0 = (uint64_t)(high_bytes0[a_index]) << 32;
9567ec681f3Smrg   uint64_t high1 = (uint64_t)(high_bytes1[a_index]) << 32;
9577ec681f3Smrg   uint64_t value0 = report0[a_index + 4] | high0;
9587ec681f3Smrg   uint64_t value1 = report1[a_index + 4] | high1;
9597ec681f3Smrg   uint64_t delta;
9607ec681f3Smrg
9617ec681f3Smrg   if (value0 > value1)
9627ec681f3Smrg      delta = (1ULL << 40) + value1 - value0;
9637ec681f3Smrg   else
9647ec681f3Smrg      delta = value1 - value0;
9657ec681f3Smrg
9667ec681f3Smrg   *accumulator += delta;
9677ec681f3Smrg}
9687ec681f3Smrg
9697ec681f3Smrgstatic void
9707ec681f3Smrggfx8_read_report_clock_ratios(const uint32_t *report,
9717ec681f3Smrg                              uint64_t *slice_freq_hz,
9727ec681f3Smrg                              uint64_t *unslice_freq_hz)
9737ec681f3Smrg{
9747ec681f3Smrg   /* The lower 16bits of the RPT_ID field of the OA reports contains a
9757ec681f3Smrg    * snapshot of the bits coming from the RP_FREQ_NORMAL register and is
9767ec681f3Smrg    * divided this way :
9777ec681f3Smrg    *
9787ec681f3Smrg    * RPT_ID[31:25]: RP_FREQ_NORMAL[20:14] (low squashed_slice_clock_frequency)
9797ec681f3Smrg    * RPT_ID[10:9]:  RP_FREQ_NORMAL[22:21] (high squashed_slice_clock_frequency)
9807ec681f3Smrg    * RPT_ID[8:0]:   RP_FREQ_NORMAL[31:23] (squashed_unslice_clock_frequency)
9817ec681f3Smrg    *
9827ec681f3Smrg    * RP_FREQ_NORMAL[31:23]: Software Unslice Ratio Request
9837ec681f3Smrg    *                        Multiple of 33.33MHz 2xclk (16 MHz 1xclk)
9847ec681f3Smrg    *
9857ec681f3Smrg    * RP_FREQ_NORMAL[22:14]: Software Slice Ratio Request
9867ec681f3Smrg    *                        Multiple of 33.33MHz 2xclk (16 MHz 1xclk)
9877ec681f3Smrg    */
9887ec681f3Smrg
9897ec681f3Smrg   uint32_t unslice_freq = report[0] & 0x1ff;
9907ec681f3Smrg   uint32_t slice_freq_low = (report[0] >> 25) & 0x7f;
9917ec681f3Smrg   uint32_t slice_freq_high = (report[0] >> 9) & 0x3;
9927ec681f3Smrg   uint32_t slice_freq = slice_freq_low | (slice_freq_high << 7);
9937ec681f3Smrg
9947ec681f3Smrg   *slice_freq_hz = slice_freq * 16666667ULL;
9957ec681f3Smrg   *unslice_freq_hz = unslice_freq * 16666667ULL;
9967ec681f3Smrg}
9977ec681f3Smrg
9987ec681f3Smrgvoid
9997ec681f3Smrgintel_perf_query_result_read_frequencies(struct intel_perf_query_result *result,
10007ec681f3Smrg                                         const struct intel_device_info *devinfo,
10017ec681f3Smrg                                         const uint32_t *start,
10027ec681f3Smrg                                         const uint32_t *end)
10037ec681f3Smrg{
10047ec681f3Smrg   /* Slice/Unslice frequency is only available in the OA reports when the
10057ec681f3Smrg    * "Disable OA reports due to clock ratio change" field in
10067ec681f3Smrg    * OA_DEBUG_REGISTER is set to 1. This is how the kernel programs this
10077ec681f3Smrg    * global register (see drivers/gpu/drm/i915/i915_perf.c)
10087ec681f3Smrg    *
10097ec681f3Smrg    * Documentation says this should be available on Gfx9+ but experimentation
10107ec681f3Smrg    * shows that Gfx8 reports similar values, so we enable it there too.
10117ec681f3Smrg    */
10127ec681f3Smrg   if (devinfo->ver < 8)
10137ec681f3Smrg      return;
10147ec681f3Smrg
10157ec681f3Smrg   gfx8_read_report_clock_ratios(start,
10167ec681f3Smrg                                 &result->slice_frequency[0],
10177ec681f3Smrg                                 &result->unslice_frequency[0]);
10187ec681f3Smrg   gfx8_read_report_clock_ratios(end,
10197ec681f3Smrg                                 &result->slice_frequency[1],
10207ec681f3Smrg                                 &result->unslice_frequency[1]);
10217ec681f3Smrg}
10227ec681f3Smrg
10237ec681f3Smrgstatic inline bool
10247ec681f3Smrgcan_use_mi_rpc_bc_counters(const struct intel_device_info *devinfo)
10257ec681f3Smrg{
10267ec681f3Smrg   return devinfo->ver <= 11;
10277ec681f3Smrg}
10287ec681f3Smrg
10297ec681f3Smrgvoid
10307ec681f3Smrgintel_perf_query_result_accumulate(struct intel_perf_query_result *result,
10317ec681f3Smrg                                   const struct intel_perf_query_info *query,
10327ec681f3Smrg                                   const struct intel_device_info *devinfo,
10337ec681f3Smrg                                   const uint32_t *start,
10347ec681f3Smrg                                   const uint32_t *end)
10357ec681f3Smrg{
10367ec681f3Smrg   int i;
10377ec681f3Smrg
10387ec681f3Smrg   if (result->hw_id == INTEL_PERF_INVALID_CTX_ID &&
10397ec681f3Smrg       start[2] != INTEL_PERF_INVALID_CTX_ID)
10407ec681f3Smrg      result->hw_id = start[2];
10417ec681f3Smrg   if (result->reports_accumulated == 0)
10427ec681f3Smrg      result->begin_timestamp = start[1];
10437ec681f3Smrg   result->reports_accumulated++;
10447ec681f3Smrg
10457ec681f3Smrg   switch (query->oa_format) {
10467ec681f3Smrg   case I915_OA_FORMAT_A32u40_A4u32_B8_C8:
10477ec681f3Smrg      accumulate_uint32(start + 1, end + 1,
10487ec681f3Smrg                        result->accumulator + query->gpu_time_offset); /* timestamp */
10497ec681f3Smrg      accumulate_uint32(start + 3, end + 3,
10507ec681f3Smrg                        result->accumulator + query->gpu_clock_offset); /* clock */
10517ec681f3Smrg
10527ec681f3Smrg      /* 32x 40bit A counters... */
10537ec681f3Smrg      for (i = 0; i < 32; i++) {
10547ec681f3Smrg         accumulate_uint40(i, start, end,
10557ec681f3Smrg                           result->accumulator + query->a_offset + i);
10567ec681f3Smrg      }
10577ec681f3Smrg
10587ec681f3Smrg      /* 4x 32bit A counters... */
10597ec681f3Smrg      for (i = 0; i < 4; i++) {
10607ec681f3Smrg         accumulate_uint32(start + 36 + i, end + 36 + i,
10617ec681f3Smrg                           result->accumulator + query->a_offset + 32 + i);
10627ec681f3Smrg      }
10637ec681f3Smrg
10647ec681f3Smrg      if (can_use_mi_rpc_bc_counters(devinfo)) {
10657ec681f3Smrg         /* 8x 32bit B counters */
10667ec681f3Smrg         for (i = 0; i < 8; i++) {
10677ec681f3Smrg            accumulate_uint32(start + 48 + i, end + 48 + i,
10687ec681f3Smrg                              result->accumulator + query->b_offset + i);
10697ec681f3Smrg         }
10707ec681f3Smrg
10717ec681f3Smrg         /* 8x 32bit C counters... */
10727ec681f3Smrg         for (i = 0; i < 8; i++) {
10737ec681f3Smrg            accumulate_uint32(start + 56 + i, end + 56 + i,
10747ec681f3Smrg                              result->accumulator + query->c_offset + i);
10757ec681f3Smrg         }
10767ec681f3Smrg      }
10777ec681f3Smrg      break;
10787ec681f3Smrg
10797ec681f3Smrg   case I915_OA_FORMAT_A45_B8_C8:
10807ec681f3Smrg      accumulate_uint32(start + 1, end + 1, result->accumulator); /* timestamp */
10817ec681f3Smrg
10827ec681f3Smrg      for (i = 0; i < 61; i++) {
10837ec681f3Smrg         accumulate_uint32(start + 3 + i, end + 3 + i,
10847ec681f3Smrg                           result->accumulator + query->a_offset + i);
10857ec681f3Smrg      }
10867ec681f3Smrg      break;
10877ec681f3Smrg
10887ec681f3Smrg   default:
10897ec681f3Smrg      unreachable("Can't accumulate OA counters in unknown format");
10907ec681f3Smrg   }
10917ec681f3Smrg
10927ec681f3Smrg}
10937ec681f3Smrg
10947ec681f3Smrg#define GET_FIELD(word, field) (((word)  & field ## _MASK) >> field ## _SHIFT)
10957ec681f3Smrg
10967ec681f3Smrgvoid
10977ec681f3Smrgintel_perf_query_result_read_gt_frequency(struct intel_perf_query_result *result,
10987ec681f3Smrg                                          const struct intel_device_info *devinfo,
10997ec681f3Smrg                                          const uint32_t start,
11007ec681f3Smrg                                          const uint32_t end)
11017ec681f3Smrg{
11027ec681f3Smrg   switch (devinfo->ver) {
11037ec681f3Smrg   case 7:
11047ec681f3Smrg   case 8:
11057ec681f3Smrg      result->gt_frequency[0] = GET_FIELD(start, GFX7_RPSTAT1_CURR_GT_FREQ) * 50ULL;
11067ec681f3Smrg      result->gt_frequency[1] = GET_FIELD(end, GFX7_RPSTAT1_CURR_GT_FREQ) * 50ULL;
11077ec681f3Smrg      break;
11087ec681f3Smrg   case 9:
11097ec681f3Smrg   case 11:
11107ec681f3Smrg   case 12:
11117ec681f3Smrg      result->gt_frequency[0] = GET_FIELD(start, GFX9_RPSTAT0_CURR_GT_FREQ) * 50ULL / 3ULL;
11127ec681f3Smrg      result->gt_frequency[1] = GET_FIELD(end, GFX9_RPSTAT0_CURR_GT_FREQ) * 50ULL / 3ULL;
11137ec681f3Smrg      break;
11147ec681f3Smrg   default:
11157ec681f3Smrg      unreachable("unexpected gen");
11167ec681f3Smrg   }
11177ec681f3Smrg
11187ec681f3Smrg   /* Put the numbers into Hz. */
11197ec681f3Smrg   result->gt_frequency[0] *= 1000000ULL;
11207ec681f3Smrg   result->gt_frequency[1] *= 1000000ULL;
11217ec681f3Smrg}
11227ec681f3Smrg
11237ec681f3Smrgvoid
11247ec681f3Smrgintel_perf_query_result_read_perfcnts(struct intel_perf_query_result *result,
11257ec681f3Smrg                                      const struct intel_perf_query_info *query,
11267ec681f3Smrg                                      const uint64_t *start,
11277ec681f3Smrg                                      const uint64_t *end)
11287ec681f3Smrg{
11297ec681f3Smrg   for (uint32_t i = 0; i < 2; i++) {
11307ec681f3Smrg      uint64_t v0 = start[i] & PERF_CNT_VALUE_MASK;
11317ec681f3Smrg      uint64_t v1 = end[i] & PERF_CNT_VALUE_MASK;
11327ec681f3Smrg
11337ec681f3Smrg      result->accumulator[query->perfcnt_offset + i] = v0 > v1 ?
11347ec681f3Smrg         (PERF_CNT_VALUE_MASK + 1 + v1 - v0) :
11357ec681f3Smrg         (v1 - v0);
11367ec681f3Smrg   }
11377ec681f3Smrg}
11387ec681f3Smrg
11397ec681f3Smrgstatic uint32_t
11407ec681f3Smrgquery_accumulator_offset(const struct intel_perf_query_info *query,
11417ec681f3Smrg                         enum intel_perf_query_field_type type,
11427ec681f3Smrg                         uint8_t index)
11437ec681f3Smrg{
11447ec681f3Smrg   switch (type) {
11457ec681f3Smrg   case INTEL_PERF_QUERY_FIELD_TYPE_SRM_PERFCNT:
11467ec681f3Smrg      return query->perfcnt_offset + index;
11477ec681f3Smrg   case INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_B:
11487ec681f3Smrg      return query->b_offset + index;
11497ec681f3Smrg   case INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_C:
11507ec681f3Smrg      return query->c_offset + index;
11517ec681f3Smrg   default:
11527ec681f3Smrg      unreachable("Invalid register type");
11537ec681f3Smrg      return 0;
11547ec681f3Smrg   }
11557ec681f3Smrg}
11567ec681f3Smrg
11577ec681f3Smrgvoid
11587ec681f3Smrgintel_perf_query_result_accumulate_fields(struct intel_perf_query_result *result,
11597ec681f3Smrg                                          const struct intel_perf_query_info *query,
11607ec681f3Smrg                                          const struct intel_device_info *devinfo,
11617ec681f3Smrg                                          const void *start,
11627ec681f3Smrg                                          const void *end,
11637ec681f3Smrg                                          bool no_oa_accumulate)
11647ec681f3Smrg{
11657ec681f3Smrg   struct intel_perf_query_field_layout *layout = &query->perf->query_layout;
11667ec681f3Smrg
11677ec681f3Smrg   for (uint32_t r = 0; r < layout->n_fields; r++) {
11687ec681f3Smrg      struct intel_perf_query_field *field = &layout->fields[r];
11697ec681f3Smrg
11707ec681f3Smrg      if (field->type == INTEL_PERF_QUERY_FIELD_TYPE_MI_RPC) {
11717ec681f3Smrg         intel_perf_query_result_read_frequencies(result, devinfo,
11727ec681f3Smrg                                                start + field->location,
11737ec681f3Smrg                                                end + field->location);
11747ec681f3Smrg         /* no_oa_accumulate=true is used when doing GL perf queries, we
11757ec681f3Smrg          * manually parse the OA reports from the OA buffer and substract
11767ec681f3Smrg          * unrelated deltas, so don't accumulate the begin/end reports here.
11777ec681f3Smrg          */
11787ec681f3Smrg         if (!no_oa_accumulate) {
11797ec681f3Smrg            intel_perf_query_result_accumulate(result, query, devinfo,
11807ec681f3Smrg                                               start + field->location,
11817ec681f3Smrg                                               end + field->location);
11827ec681f3Smrg         }
11837ec681f3Smrg      } else {
11847ec681f3Smrg         uint64_t v0, v1;
11857ec681f3Smrg
11867ec681f3Smrg         if (field->size == 4) {
11877ec681f3Smrg            v0 = *(const uint32_t *)(start + field->location);
11887ec681f3Smrg            v1 = *(const uint32_t *)(end + field->location);
11897ec681f3Smrg         } else {
11907ec681f3Smrg            assert(field->size == 8);
11917ec681f3Smrg            v0 = *(const uint64_t *)(start + field->location);
11927ec681f3Smrg            v1 = *(const uint64_t *)(end + field->location);
11937ec681f3Smrg         }
11947ec681f3Smrg
11957ec681f3Smrg         if (field->mask) {
11967ec681f3Smrg            v0 = field->mask & v0;
11977ec681f3Smrg            v1 = field->mask & v1;
11987ec681f3Smrg         }
11997ec681f3Smrg
12007ec681f3Smrg         /* RPSTAT is a bit of a special case because its begin/end values
12017ec681f3Smrg          * represent frequencies. We store it in a separate location.
12027ec681f3Smrg          */
12037ec681f3Smrg         if (field->type == INTEL_PERF_QUERY_FIELD_TYPE_SRM_RPSTAT)
12047ec681f3Smrg            intel_perf_query_result_read_gt_frequency(result, devinfo, v0, v1);
12057ec681f3Smrg         else
12067ec681f3Smrg            result->accumulator[query_accumulator_offset(query, field->type, field->index)] = v1 - v0;
12077ec681f3Smrg      }
12087ec681f3Smrg   }
12097ec681f3Smrg}
12107ec681f3Smrg
12117ec681f3Smrgvoid
12127ec681f3Smrgintel_perf_query_result_clear(struct intel_perf_query_result *result)
12137ec681f3Smrg{
12147ec681f3Smrg   memset(result, 0, sizeof(*result));
12157ec681f3Smrg   result->hw_id = INTEL_PERF_INVALID_CTX_ID;
12167ec681f3Smrg}
12177ec681f3Smrg
12187ec681f3Smrgvoid
12197ec681f3Smrgintel_perf_query_result_print_fields(const struct intel_perf_query_info *query,
12207ec681f3Smrg                                     const struct intel_device_info *devinfo,
12217ec681f3Smrg                                     const void *data)
12227ec681f3Smrg{
12237ec681f3Smrg   const struct intel_perf_query_field_layout *layout = &query->perf->query_layout;
12247ec681f3Smrg
12257ec681f3Smrg   for (uint32_t r = 0; r < layout->n_fields; r++) {
12267ec681f3Smrg      const struct intel_perf_query_field *field = &layout->fields[r];
12277ec681f3Smrg      const uint32_t *value32 = data + field->location;
12287ec681f3Smrg
12297ec681f3Smrg      switch (field->type) {
12307ec681f3Smrg      case INTEL_PERF_QUERY_FIELD_TYPE_MI_RPC:
12317ec681f3Smrg         fprintf(stderr, "MI_RPC:\n");
12327ec681f3Smrg         fprintf(stderr, "  TS: 0x%08x\n", *(value32 + 1));
12337ec681f3Smrg         fprintf(stderr, "  CLK: 0x%08x\n", *(value32 + 3));
12347ec681f3Smrg         break;
12357ec681f3Smrg      case INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_B:
12367ec681f3Smrg         fprintf(stderr, "B%u: 0x%08x\n", field->index, *value32);
12377ec681f3Smrg         break;
12387ec681f3Smrg      case INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_C:
12397ec681f3Smrg         fprintf(stderr, "C%u: 0x%08x\n", field->index, *value32);
12407ec681f3Smrg         break;
12417ec681f3Smrg      default:
12427ec681f3Smrg         break;
12437ec681f3Smrg      }
12447ec681f3Smrg   }
12457ec681f3Smrg}
12467ec681f3Smrg
12477ec681f3Smrgstatic int
12487ec681f3Smrgintel_perf_compare_query_names(const void *v1, const void *v2)
12497ec681f3Smrg{
12507ec681f3Smrg   const struct intel_perf_query_info *q1 = v1;
12517ec681f3Smrg   const struct intel_perf_query_info *q2 = v2;
12527ec681f3Smrg
12537ec681f3Smrg   return strcmp(q1->name, q2->name);
12547ec681f3Smrg}
12557ec681f3Smrg
12567ec681f3Smrgstatic inline struct intel_perf_query_field *
12577ec681f3Smrgadd_query_register(struct intel_perf_query_field_layout *layout,
12587ec681f3Smrg                   enum intel_perf_query_field_type type,
12597ec681f3Smrg                   uint16_t offset,
12607ec681f3Smrg                   uint16_t size,
12617ec681f3Smrg                   uint8_t index)
12627ec681f3Smrg{
12637ec681f3Smrg   /* Align MI_RPC to 64bytes (HW requirement) & 64bit registers to 8bytes
12647ec681f3Smrg    * (shows up nicely in the debugger).
12657ec681f3Smrg    */
12667ec681f3Smrg   if (type == INTEL_PERF_QUERY_FIELD_TYPE_MI_RPC)
12677ec681f3Smrg      layout->size = align(layout->size, 64);
12687ec681f3Smrg   else if (size % 8 == 0)
12697ec681f3Smrg      layout->size = align(layout->size, 8);
12707ec681f3Smrg
12717ec681f3Smrg   layout->fields[layout->n_fields++] = (struct intel_perf_query_field) {
12727ec681f3Smrg      .mmio_offset = offset,
12737ec681f3Smrg      .location = layout->size,
12747ec681f3Smrg      .type = type,
12757ec681f3Smrg      .index = index,
12767ec681f3Smrg      .size = size,
12777ec681f3Smrg   };
12787ec681f3Smrg   layout->size += size;
12797ec681f3Smrg
12807ec681f3Smrg   return &layout->fields[layout->n_fields - 1];
12817ec681f3Smrg}
12827ec681f3Smrg
12837ec681f3Smrgstatic void
12847ec681f3Smrgintel_perf_init_query_fields(struct intel_perf_config *perf_cfg,
12857ec681f3Smrg                             const struct intel_device_info *devinfo,
12867ec681f3Smrg                             bool use_register_snapshots)
12877ec681f3Smrg{
12887ec681f3Smrg   struct intel_perf_query_field_layout *layout = &perf_cfg->query_layout;
12897ec681f3Smrg
12907ec681f3Smrg   layout->n_fields = 0;
12917ec681f3Smrg
12927ec681f3Smrg   /* MI_RPC requires a 64byte alignment. */
12937ec681f3Smrg   layout->alignment = 64;
12947ec681f3Smrg
12957ec681f3Smrg   layout->fields = rzalloc_array(perf_cfg, struct intel_perf_query_field, 5 + 16);
12967ec681f3Smrg
12977ec681f3Smrg   add_query_register(layout, INTEL_PERF_QUERY_FIELD_TYPE_MI_RPC,
12987ec681f3Smrg                      0, 256, 0);
12997ec681f3Smrg
13007ec681f3Smrg   if (use_register_snapshots) {
13017ec681f3Smrg      if (devinfo->ver <= 11) {
13027ec681f3Smrg         struct intel_perf_query_field *field =
13037ec681f3Smrg            add_query_register(layout,
13047ec681f3Smrg                               INTEL_PERF_QUERY_FIELD_TYPE_SRM_PERFCNT,
13057ec681f3Smrg                               PERF_CNT_1_DW0, 8, 0);
13067ec681f3Smrg         field->mask = PERF_CNT_VALUE_MASK;
13077ec681f3Smrg
13087ec681f3Smrg         field = add_query_register(layout,
13097ec681f3Smrg                                    INTEL_PERF_QUERY_FIELD_TYPE_SRM_PERFCNT,
13107ec681f3Smrg                                    PERF_CNT_2_DW0, 8, 1);
13117ec681f3Smrg         field->mask = PERF_CNT_VALUE_MASK;
13127ec681f3Smrg      }
13137ec681f3Smrg
13147ec681f3Smrg      if (devinfo->ver == 8 && !devinfo->is_cherryview) {
13157ec681f3Smrg         add_query_register(layout,
13167ec681f3Smrg                         INTEL_PERF_QUERY_FIELD_TYPE_SRM_RPSTAT,
13177ec681f3Smrg                            GFX7_RPSTAT1, 4, 0);
13187ec681f3Smrg      }
13197ec681f3Smrg
13207ec681f3Smrg      if (devinfo->ver >= 9) {
13217ec681f3Smrg         add_query_register(layout,
13227ec681f3Smrg                            INTEL_PERF_QUERY_FIELD_TYPE_SRM_RPSTAT,
13237ec681f3Smrg                            GFX9_RPSTAT0, 4, 0);
13247ec681f3Smrg      }
13257ec681f3Smrg
13267ec681f3Smrg      if (!can_use_mi_rpc_bc_counters(devinfo)) {
13277ec681f3Smrg         if (devinfo->ver >= 8 && devinfo->ver <= 11) {
13287ec681f3Smrg            for (uint32_t i = 0; i < GFX8_N_OA_PERF_B32; i++) {
13297ec681f3Smrg               add_query_register(layout, INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_B,
13307ec681f3Smrg                                  GFX8_OA_PERF_B32(i), 4, i);
13317ec681f3Smrg            }
13327ec681f3Smrg            for (uint32_t i = 0; i < GFX8_N_OA_PERF_C32; i++) {
13337ec681f3Smrg               add_query_register(layout, INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_C,
13347ec681f3Smrg                                  GFX8_OA_PERF_C32(i), 4, i);
13357ec681f3Smrg            }
13367ec681f3Smrg         } else if (devinfo->ver == 12) {
13377ec681f3Smrg            for (uint32_t i = 0; i < GFX12_N_OAG_PERF_B32; i++) {
13387ec681f3Smrg               add_query_register(layout, INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_B,
13397ec681f3Smrg                                  GFX12_OAG_PERF_B32(i), 4, i);
13407ec681f3Smrg            }
13417ec681f3Smrg            for (uint32_t i = 0; i < GFX12_N_OAG_PERF_C32; i++) {
13427ec681f3Smrg               add_query_register(layout, INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_C,
13437ec681f3Smrg                                  GFX12_OAG_PERF_C32(i), 4, i);
13447ec681f3Smrg            }
13457ec681f3Smrg         }
13467ec681f3Smrg      }
13477ec681f3Smrg   }
13487ec681f3Smrg
13497ec681f3Smrg   /* Align the whole package to 64bytes so that 2 snapshots can be put
13507ec681f3Smrg    * together without extract alignment for the user.
13517ec681f3Smrg    */
13527ec681f3Smrg   layout->size = align(layout->size, 64);
13537ec681f3Smrg}
13547ec681f3Smrg
13557ec681f3Smrgvoid
13567ec681f3Smrgintel_perf_init_metrics(struct intel_perf_config *perf_cfg,
13577ec681f3Smrg                        const struct intel_device_info *devinfo,
13587ec681f3Smrg                        int drm_fd,
13597ec681f3Smrg                        bool include_pipeline_statistics,
13607ec681f3Smrg                        bool use_register_snapshots)
13617ec681f3Smrg{
13627ec681f3Smrg   intel_perf_init_query_fields(perf_cfg, devinfo, use_register_snapshots);
13637ec681f3Smrg
13647ec681f3Smrg   if (include_pipeline_statistics) {
13657ec681f3Smrg      load_pipeline_statistic_metrics(perf_cfg, devinfo);
13667ec681f3Smrg      intel_perf_register_mdapi_statistic_query(perf_cfg, devinfo);
13677ec681f3Smrg   }
13687ec681f3Smrg
13697ec681f3Smrg   bool oa_metrics = oa_metrics_available(perf_cfg, drm_fd, devinfo,
13707ec681f3Smrg                                          use_register_snapshots);
13717ec681f3Smrg   if (oa_metrics)
13727ec681f3Smrg      load_oa_metrics(perf_cfg, drm_fd, devinfo);
13737ec681f3Smrg
13747ec681f3Smrg   /* sort query groups by name */
13757ec681f3Smrg   qsort(perf_cfg->queries, perf_cfg->n_queries,
13767ec681f3Smrg         sizeof(perf_cfg->queries[0]), intel_perf_compare_query_names);
13777ec681f3Smrg
13787ec681f3Smrg   build_unique_counter_list(perf_cfg);
13797ec681f3Smrg
13807ec681f3Smrg   if (oa_metrics)
13817ec681f3Smrg      intel_perf_register_mdapi_oa_query(perf_cfg, devinfo);
13827ec681f3Smrg}
1383