17ec681f3Smrg/* 27ec681f3Smrg * Copyright © 2018 Intel Corporation 37ec681f3Smrg * 47ec681f3Smrg * Permission is hereby granted, free of charge, to any person obtaining a 57ec681f3Smrg * copy of this software and associated documentation files (the "Software"), 67ec681f3Smrg * to deal in the Software without restriction, including without limitation 77ec681f3Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 87ec681f3Smrg * and/or sell copies of the Software, and to permit persons to whom the 97ec681f3Smrg * Software is furnished to do so, subject to the following conditions: 107ec681f3Smrg * 117ec681f3Smrg * The above copyright notice and this permission notice (including the next 127ec681f3Smrg * paragraph) shall be included in all copies or substantial portions of the 137ec681f3Smrg * Software. 147ec681f3Smrg * 157ec681f3Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 167ec681f3Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 177ec681f3Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 187ec681f3Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 197ec681f3Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 207ec681f3Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 217ec681f3Smrg * IN THE SOFTWARE. 227ec681f3Smrg */ 237ec681f3Smrg 247ec681f3Smrg#include <dirent.h> 257ec681f3Smrg 267ec681f3Smrg#include <sys/types.h> 277ec681f3Smrg#include <sys/stat.h> 287ec681f3Smrg#include <fcntl.h> 297ec681f3Smrg#include <unistd.h> 307ec681f3Smrg#include <errno.h> 317ec681f3Smrg 327ec681f3Smrg#ifndef HAVE_DIRENT_D_TYPE 337ec681f3Smrg#include <limits.h> // PATH_MAX 347ec681f3Smrg#endif 357ec681f3Smrg 367ec681f3Smrg#include <drm-uapi/i915_drm.h> 377ec681f3Smrg 387ec681f3Smrg#include "common/intel_gem.h" 397ec681f3Smrg 407ec681f3Smrg#include "dev/intel_debug.h" 417ec681f3Smrg#include "dev/intel_device_info.h" 427ec681f3Smrg 437ec681f3Smrg#include "perf/intel_perf.h" 447ec681f3Smrg#include "perf/intel_perf_regs.h" 457ec681f3Smrg#include "perf/intel_perf_mdapi.h" 467ec681f3Smrg#include "perf/intel_perf_metrics.h" 477ec681f3Smrg#include "perf/intel_perf_private.h" 487ec681f3Smrg 497ec681f3Smrg#include "util/bitscan.h" 507ec681f3Smrg#include "util/macros.h" 517ec681f3Smrg#include "util/mesa-sha1.h" 527ec681f3Smrg#include "util/u_math.h" 537ec681f3Smrg 547ec681f3Smrg#define FILE_DEBUG_FLAG DEBUG_PERFMON 557ec681f3Smrg 567ec681f3Smrgstatic bool 577ec681f3Smrgis_dir_or_link(const struct dirent *entry, const char *parent_dir) 587ec681f3Smrg{ 597ec681f3Smrg#ifdef HAVE_DIRENT_D_TYPE 607ec681f3Smrg return entry->d_type == DT_DIR || entry->d_type == DT_LNK; 617ec681f3Smrg#else 627ec681f3Smrg struct stat st; 637ec681f3Smrg char path[PATH_MAX + 1]; 647ec681f3Smrg snprintf(path, sizeof(path), "%s/%s", parent_dir, entry->d_name); 657ec681f3Smrg lstat(path, &st); 667ec681f3Smrg return S_ISDIR(st.st_mode) || S_ISLNK(st.st_mode); 677ec681f3Smrg#endif 687ec681f3Smrg} 697ec681f3Smrg 707ec681f3Smrgstatic bool 717ec681f3Smrgget_sysfs_dev_dir(struct intel_perf_config *perf, int fd) 727ec681f3Smrg{ 737ec681f3Smrg struct stat sb; 747ec681f3Smrg int min, maj; 757ec681f3Smrg DIR *drmdir; 767ec681f3Smrg struct dirent *drm_entry; 777ec681f3Smrg int len; 787ec681f3Smrg 797ec681f3Smrg perf->sysfs_dev_dir[0] = '\0'; 807ec681f3Smrg 817ec681f3Smrg if (INTEL_DEBUG(DEBUG_NO_OACONFIG)) 827ec681f3Smrg return true; 837ec681f3Smrg 847ec681f3Smrg if (fstat(fd, &sb)) { 857ec681f3Smrg DBG("Failed to stat DRM fd\n"); 867ec681f3Smrg return false; 877ec681f3Smrg } 887ec681f3Smrg 897ec681f3Smrg maj = major(sb.st_rdev); 907ec681f3Smrg min = minor(sb.st_rdev); 917ec681f3Smrg 927ec681f3Smrg if (!S_ISCHR(sb.st_mode)) { 937ec681f3Smrg DBG("DRM fd is not a character device as expected\n"); 947ec681f3Smrg return false; 957ec681f3Smrg } 967ec681f3Smrg 977ec681f3Smrg len = snprintf(perf->sysfs_dev_dir, 987ec681f3Smrg sizeof(perf->sysfs_dev_dir), 997ec681f3Smrg "/sys/dev/char/%d:%d/device/drm", maj, min); 1007ec681f3Smrg if (len < 0 || len >= sizeof(perf->sysfs_dev_dir)) { 1017ec681f3Smrg DBG("Failed to concatenate sysfs path to drm device\n"); 1027ec681f3Smrg return false; 1037ec681f3Smrg } 1047ec681f3Smrg 1057ec681f3Smrg drmdir = opendir(perf->sysfs_dev_dir); 1067ec681f3Smrg if (!drmdir) { 1077ec681f3Smrg DBG("Failed to open %s: %m\n", perf->sysfs_dev_dir); 1087ec681f3Smrg return false; 1097ec681f3Smrg } 1107ec681f3Smrg 1117ec681f3Smrg while ((drm_entry = readdir(drmdir))) { 1127ec681f3Smrg if (is_dir_or_link(drm_entry, perf->sysfs_dev_dir) && 1137ec681f3Smrg strncmp(drm_entry->d_name, "card", 4) == 0) 1147ec681f3Smrg { 1157ec681f3Smrg len = snprintf(perf->sysfs_dev_dir, 1167ec681f3Smrg sizeof(perf->sysfs_dev_dir), 1177ec681f3Smrg "/sys/dev/char/%d:%d/device/drm/%s", 1187ec681f3Smrg maj, min, drm_entry->d_name); 1197ec681f3Smrg closedir(drmdir); 1207ec681f3Smrg if (len < 0 || len >= sizeof(perf->sysfs_dev_dir)) 1217ec681f3Smrg return false; 1227ec681f3Smrg else 1237ec681f3Smrg return true; 1247ec681f3Smrg } 1257ec681f3Smrg } 1267ec681f3Smrg 1277ec681f3Smrg closedir(drmdir); 1287ec681f3Smrg 1297ec681f3Smrg DBG("Failed to find cardX directory under /sys/dev/char/%d:%d/device/drm\n", 1307ec681f3Smrg maj, min); 1317ec681f3Smrg 1327ec681f3Smrg return false; 1337ec681f3Smrg} 1347ec681f3Smrg 1357ec681f3Smrgstatic bool 1367ec681f3Smrgread_file_uint64(const char *file, uint64_t *val) 1377ec681f3Smrg{ 1387ec681f3Smrg char buf[32]; 1397ec681f3Smrg int fd, n; 1407ec681f3Smrg 1417ec681f3Smrg fd = open(file, 0); 1427ec681f3Smrg if (fd < 0) 1437ec681f3Smrg return false; 1447ec681f3Smrg while ((n = read(fd, buf, sizeof (buf) - 1)) < 0 && 1457ec681f3Smrg errno == EINTR); 1467ec681f3Smrg close(fd); 1477ec681f3Smrg if (n < 0) 1487ec681f3Smrg return false; 1497ec681f3Smrg 1507ec681f3Smrg buf[n] = '\0'; 1517ec681f3Smrg *val = strtoull(buf, NULL, 0); 1527ec681f3Smrg 1537ec681f3Smrg return true; 1547ec681f3Smrg} 1557ec681f3Smrg 1567ec681f3Smrgstatic bool 1577ec681f3Smrgread_sysfs_drm_device_file_uint64(struct intel_perf_config *perf, 1587ec681f3Smrg const char *file, 1597ec681f3Smrg uint64_t *value) 1607ec681f3Smrg{ 1617ec681f3Smrg char buf[512]; 1627ec681f3Smrg int len; 1637ec681f3Smrg 1647ec681f3Smrg len = snprintf(buf, sizeof(buf), "%s/%s", perf->sysfs_dev_dir, file); 1657ec681f3Smrg if (len < 0 || len >= sizeof(buf)) { 1667ec681f3Smrg DBG("Failed to concatenate sys filename to read u64 from\n"); 1677ec681f3Smrg return false; 1687ec681f3Smrg } 1697ec681f3Smrg 1707ec681f3Smrg return read_file_uint64(buf, value); 1717ec681f3Smrg} 1727ec681f3Smrg 1737ec681f3Smrgstatic void 1747ec681f3Smrgregister_oa_config(struct intel_perf_config *perf, 1757ec681f3Smrg const struct intel_device_info *devinfo, 1767ec681f3Smrg const struct intel_perf_query_info *query, 1777ec681f3Smrg uint64_t config_id) 1787ec681f3Smrg{ 1797ec681f3Smrg struct intel_perf_query_info *registered_query = 1807ec681f3Smrg intel_perf_append_query_info(perf, 0); 1817ec681f3Smrg 1827ec681f3Smrg *registered_query = *query; 1837ec681f3Smrg registered_query->oa_format = devinfo->ver >= 8 ? 1847ec681f3Smrg I915_OA_FORMAT_A32u40_A4u32_B8_C8 : I915_OA_FORMAT_A45_B8_C8; 1857ec681f3Smrg registered_query->oa_metrics_set_id = config_id; 1867ec681f3Smrg DBG("metric set registered: id = %" PRIu64", guid = %s\n", 1877ec681f3Smrg registered_query->oa_metrics_set_id, query->guid); 1887ec681f3Smrg} 1897ec681f3Smrg 1907ec681f3Smrgstatic void 1917ec681f3Smrgenumerate_sysfs_metrics(struct intel_perf_config *perf, 1927ec681f3Smrg const struct intel_device_info *devinfo) 1937ec681f3Smrg{ 1947ec681f3Smrg DIR *metricsdir = NULL; 1957ec681f3Smrg struct dirent *metric_entry; 1967ec681f3Smrg char buf[256]; 1977ec681f3Smrg int len; 1987ec681f3Smrg 1997ec681f3Smrg len = snprintf(buf, sizeof(buf), "%s/metrics", perf->sysfs_dev_dir); 2007ec681f3Smrg if (len < 0 || len >= sizeof(buf)) { 2017ec681f3Smrg DBG("Failed to concatenate path to sysfs metrics/ directory\n"); 2027ec681f3Smrg return; 2037ec681f3Smrg } 2047ec681f3Smrg 2057ec681f3Smrg metricsdir = opendir(buf); 2067ec681f3Smrg if (!metricsdir) { 2077ec681f3Smrg DBG("Failed to open %s: %m\n", buf); 2087ec681f3Smrg return; 2097ec681f3Smrg } 2107ec681f3Smrg 2117ec681f3Smrg while ((metric_entry = readdir(metricsdir))) { 2127ec681f3Smrg struct hash_entry *entry; 2137ec681f3Smrg if (!is_dir_or_link(metric_entry, buf) || 2147ec681f3Smrg metric_entry->d_name[0] == '.') 2157ec681f3Smrg continue; 2167ec681f3Smrg 2177ec681f3Smrg DBG("metric set: %s\n", metric_entry->d_name); 2187ec681f3Smrg entry = _mesa_hash_table_search(perf->oa_metrics_table, 2197ec681f3Smrg metric_entry->d_name); 2207ec681f3Smrg if (entry) { 2217ec681f3Smrg uint64_t id; 2227ec681f3Smrg if (!intel_perf_load_metric_id(perf, metric_entry->d_name, &id)) { 2237ec681f3Smrg DBG("Failed to read metric set id from %s: %m", buf); 2247ec681f3Smrg continue; 2257ec681f3Smrg } 2267ec681f3Smrg 2277ec681f3Smrg register_oa_config(perf, devinfo, 2287ec681f3Smrg (const struct intel_perf_query_info *)entry->data, id); 2297ec681f3Smrg } else 2307ec681f3Smrg DBG("metric set not known by mesa (skipping)\n"); 2317ec681f3Smrg } 2327ec681f3Smrg 2337ec681f3Smrg closedir(metricsdir); 2347ec681f3Smrg} 2357ec681f3Smrg 2367ec681f3Smrgstatic void 2377ec681f3Smrgadd_all_metrics(struct intel_perf_config *perf, 2387ec681f3Smrg const struct intel_device_info *devinfo) 2397ec681f3Smrg{ 2407ec681f3Smrg hash_table_foreach(perf->oa_metrics_table, entry) { 2417ec681f3Smrg const struct intel_perf_query_info *query = entry->data; 2427ec681f3Smrg register_oa_config(perf, devinfo, query, 0); 2437ec681f3Smrg } 2447ec681f3Smrg} 2457ec681f3Smrg 2467ec681f3Smrgstatic bool 2477ec681f3Smrgkernel_has_dynamic_config_support(struct intel_perf_config *perf, int fd) 2487ec681f3Smrg{ 2497ec681f3Smrg uint64_t invalid_config_id = UINT64_MAX; 2507ec681f3Smrg 2517ec681f3Smrg return intel_ioctl(fd, DRM_IOCTL_I915_PERF_REMOVE_CONFIG, 2527ec681f3Smrg &invalid_config_id) < 0 && errno == ENOENT; 2537ec681f3Smrg} 2547ec681f3Smrg 2557ec681f3Smrgstatic bool 2567ec681f3Smrgi915_query_perf_config_supported(struct intel_perf_config *perf, int fd) 2577ec681f3Smrg{ 2587ec681f3Smrg int32_t length = 0; 2597ec681f3Smrg return !intel_i915_query_flags(fd, DRM_I915_QUERY_PERF_CONFIG, 2607ec681f3Smrg DRM_I915_QUERY_PERF_CONFIG_LIST, 2617ec681f3Smrg NULL, &length); 2627ec681f3Smrg} 2637ec681f3Smrg 2647ec681f3Smrgstatic bool 2657ec681f3Smrgi915_query_perf_config_data(struct intel_perf_config *perf, 2667ec681f3Smrg int fd, const char *guid, 2677ec681f3Smrg struct drm_i915_perf_oa_config *config) 2687ec681f3Smrg{ 2697ec681f3Smrg char data[sizeof(struct drm_i915_query_perf_config) + 2707ec681f3Smrg sizeof(struct drm_i915_perf_oa_config)] = {}; 2717ec681f3Smrg struct drm_i915_query_perf_config *query = (void *)data; 2727ec681f3Smrg 2737ec681f3Smrg memcpy(query->uuid, guid, sizeof(query->uuid)); 2747ec681f3Smrg memcpy(query->data, config, sizeof(*config)); 2757ec681f3Smrg 2767ec681f3Smrg int32_t item_length = sizeof(data); 2777ec681f3Smrg if (intel_i915_query_flags(fd, DRM_I915_QUERY_PERF_CONFIG, 2787ec681f3Smrg DRM_I915_QUERY_PERF_CONFIG_DATA_FOR_UUID, 2797ec681f3Smrg query, &item_length)) 2807ec681f3Smrg return false; 2817ec681f3Smrg 2827ec681f3Smrg memcpy(config, query->data, sizeof(*config)); 2837ec681f3Smrg 2847ec681f3Smrg return true; 2857ec681f3Smrg} 2867ec681f3Smrg 2877ec681f3Smrgbool 2887ec681f3Smrgintel_perf_load_metric_id(struct intel_perf_config *perf_cfg, 2897ec681f3Smrg const char *guid, 2907ec681f3Smrg uint64_t *metric_id) 2917ec681f3Smrg{ 2927ec681f3Smrg char config_path[280]; 2937ec681f3Smrg 2947ec681f3Smrg snprintf(config_path, sizeof(config_path), "%s/metrics/%s/id", 2957ec681f3Smrg perf_cfg->sysfs_dev_dir, guid); 2967ec681f3Smrg 2977ec681f3Smrg /* Don't recreate already loaded configs. */ 2987ec681f3Smrg return read_file_uint64(config_path, metric_id); 2997ec681f3Smrg} 3007ec681f3Smrg 3017ec681f3Smrgstatic uint64_t 3027ec681f3Smrgi915_add_config(struct intel_perf_config *perf, int fd, 3037ec681f3Smrg const struct intel_perf_registers *config, 3047ec681f3Smrg const char *guid) 3057ec681f3Smrg{ 3067ec681f3Smrg struct drm_i915_perf_oa_config i915_config = { 0, }; 3077ec681f3Smrg 3087ec681f3Smrg memcpy(i915_config.uuid, guid, sizeof(i915_config.uuid)); 3097ec681f3Smrg 3107ec681f3Smrg i915_config.n_mux_regs = config->n_mux_regs; 3117ec681f3Smrg i915_config.mux_regs_ptr = to_const_user_pointer(config->mux_regs); 3127ec681f3Smrg 3137ec681f3Smrg i915_config.n_boolean_regs = config->n_b_counter_regs; 3147ec681f3Smrg i915_config.boolean_regs_ptr = to_const_user_pointer(config->b_counter_regs); 3157ec681f3Smrg 3167ec681f3Smrg i915_config.n_flex_regs = config->n_flex_regs; 3177ec681f3Smrg i915_config.flex_regs_ptr = to_const_user_pointer(config->flex_regs); 3187ec681f3Smrg 3197ec681f3Smrg int ret = intel_ioctl(fd, DRM_IOCTL_I915_PERF_ADD_CONFIG, &i915_config); 3207ec681f3Smrg return ret > 0 ? ret : 0; 3217ec681f3Smrg} 3227ec681f3Smrg 3237ec681f3Smrgstatic void 3247ec681f3Smrginit_oa_configs(struct intel_perf_config *perf, int fd, 3257ec681f3Smrg const struct intel_device_info *devinfo) 3267ec681f3Smrg{ 3277ec681f3Smrg hash_table_foreach(perf->oa_metrics_table, entry) { 3287ec681f3Smrg const struct intel_perf_query_info *query = entry->data; 3297ec681f3Smrg uint64_t config_id; 3307ec681f3Smrg 3317ec681f3Smrg if (intel_perf_load_metric_id(perf, query->guid, &config_id)) { 3327ec681f3Smrg DBG("metric set: %s (already loaded)\n", query->guid); 3337ec681f3Smrg register_oa_config(perf, devinfo, query, config_id); 3347ec681f3Smrg continue; 3357ec681f3Smrg } 3367ec681f3Smrg 3377ec681f3Smrg int ret = i915_add_config(perf, fd, &query->config, query->guid); 3387ec681f3Smrg if (ret < 0) { 3397ec681f3Smrg DBG("Failed to load \"%s\" (%s) metrics set in kernel: %s\n", 3407ec681f3Smrg query->name, query->guid, strerror(errno)); 3417ec681f3Smrg continue; 3427ec681f3Smrg } 3437ec681f3Smrg 3447ec681f3Smrg register_oa_config(perf, devinfo, query, ret); 3457ec681f3Smrg DBG("metric set: %s (added)\n", query->guid); 3467ec681f3Smrg } 3477ec681f3Smrg} 3487ec681f3Smrg 3497ec681f3Smrgstatic void 3507ec681f3Smrgcompute_topology_builtins(struct intel_perf_config *perf, 3517ec681f3Smrg const struct intel_device_info *devinfo) 3527ec681f3Smrg{ 3537ec681f3Smrg perf->sys_vars.slice_mask = devinfo->slice_masks; 3547ec681f3Smrg perf->sys_vars.n_eu_slices = devinfo->num_slices; 3557ec681f3Smrg 3567ec681f3Smrg for (int i = 0; i < sizeof(devinfo->subslice_masks[i]); i++) { 3577ec681f3Smrg perf->sys_vars.n_eu_sub_slices += 3587ec681f3Smrg util_bitcount(devinfo->subslice_masks[i]); 3597ec681f3Smrg } 3607ec681f3Smrg 3617ec681f3Smrg for (int i = 0; i < sizeof(devinfo->eu_masks); i++) 3627ec681f3Smrg perf->sys_vars.n_eus += util_bitcount(devinfo->eu_masks[i]); 3637ec681f3Smrg 3647ec681f3Smrg perf->sys_vars.eu_threads_count = devinfo->num_thread_per_eu; 3657ec681f3Smrg 3667ec681f3Smrg /* The subslice mask builtin contains bits for all slices. Prior to Gfx11 3677ec681f3Smrg * it had groups of 3bits for each slice, on Gfx11 and above it's 8bits for 3687ec681f3Smrg * each slice. 3697ec681f3Smrg * 3707ec681f3Smrg * Ideally equations would be updated to have a slice/subslice query 3717ec681f3Smrg * function/operator. 3727ec681f3Smrg */ 3737ec681f3Smrg perf->sys_vars.subslice_mask = 0; 3747ec681f3Smrg 3757ec681f3Smrg int bits_per_subslice = devinfo->ver >= 11 ? 8 : 3; 3767ec681f3Smrg 3777ec681f3Smrg for (int s = 0; s < util_last_bit(devinfo->slice_masks); s++) { 3787ec681f3Smrg for (int ss = 0; ss < (devinfo->subslice_slice_stride * 8); ss++) { 3797ec681f3Smrg if (intel_device_info_subslice_available(devinfo, s, ss)) 3807ec681f3Smrg perf->sys_vars.subslice_mask |= 1ULL << (s * bits_per_subslice + ss); 3817ec681f3Smrg } 3827ec681f3Smrg } 3837ec681f3Smrg} 3847ec681f3Smrg 3857ec681f3Smrgstatic bool 3867ec681f3Smrginit_oa_sys_vars(struct intel_perf_config *perf, 3877ec681f3Smrg const struct intel_device_info *devinfo, 3887ec681f3Smrg bool use_register_snapshots) 3897ec681f3Smrg{ 3907ec681f3Smrg uint64_t min_freq_mhz = 0, max_freq_mhz = 0; 3917ec681f3Smrg 3927ec681f3Smrg if (!INTEL_DEBUG(DEBUG_NO_OACONFIG)) { 3937ec681f3Smrg if (!read_sysfs_drm_device_file_uint64(perf, "gt_min_freq_mhz", &min_freq_mhz)) 3947ec681f3Smrg return false; 3957ec681f3Smrg 3967ec681f3Smrg if (!read_sysfs_drm_device_file_uint64(perf, "gt_max_freq_mhz", &max_freq_mhz)) 3977ec681f3Smrg return false; 3987ec681f3Smrg } else { 3997ec681f3Smrg min_freq_mhz = 300; 4007ec681f3Smrg max_freq_mhz = 1000; 4017ec681f3Smrg } 4027ec681f3Smrg 4037ec681f3Smrg memset(&perf->sys_vars, 0, sizeof(perf->sys_vars)); 4047ec681f3Smrg perf->sys_vars.gt_min_freq = min_freq_mhz * 1000000; 4057ec681f3Smrg perf->sys_vars.gt_max_freq = max_freq_mhz * 1000000; 4067ec681f3Smrg perf->sys_vars.timestamp_frequency = devinfo->timestamp_frequency; 4077ec681f3Smrg perf->sys_vars.revision = devinfo->revision; 4087ec681f3Smrg perf->sys_vars.query_mode = use_register_snapshots; 4097ec681f3Smrg compute_topology_builtins(perf, devinfo); 4107ec681f3Smrg 4117ec681f3Smrg return true; 4127ec681f3Smrg} 4137ec681f3Smrg 4147ec681f3Smrgtypedef void (*perf_register_oa_queries_t)(struct intel_perf_config *); 4157ec681f3Smrg 4167ec681f3Smrgstatic perf_register_oa_queries_t 4177ec681f3Smrgget_register_queries_function(const struct intel_device_info *devinfo) 4187ec681f3Smrg{ 4197ec681f3Smrg if (devinfo->is_haswell) 4207ec681f3Smrg return intel_oa_register_queries_hsw; 4217ec681f3Smrg if (devinfo->is_cherryview) 4227ec681f3Smrg return intel_oa_register_queries_chv; 4237ec681f3Smrg if (devinfo->is_broadwell) 4247ec681f3Smrg return intel_oa_register_queries_bdw; 4257ec681f3Smrg if (devinfo->is_broxton) 4267ec681f3Smrg return intel_oa_register_queries_bxt; 4277ec681f3Smrg if (devinfo->is_skylake) { 4287ec681f3Smrg if (devinfo->gt == 2) 4297ec681f3Smrg return intel_oa_register_queries_sklgt2; 4307ec681f3Smrg if (devinfo->gt == 3) 4317ec681f3Smrg return intel_oa_register_queries_sklgt3; 4327ec681f3Smrg if (devinfo->gt == 4) 4337ec681f3Smrg return intel_oa_register_queries_sklgt4; 4347ec681f3Smrg } 4357ec681f3Smrg if (devinfo->is_kabylake) { 4367ec681f3Smrg if (devinfo->gt == 2) 4377ec681f3Smrg return intel_oa_register_queries_kblgt2; 4387ec681f3Smrg if (devinfo->gt == 3) 4397ec681f3Smrg return intel_oa_register_queries_kblgt3; 4407ec681f3Smrg } 4417ec681f3Smrg if (devinfo->is_geminilake) 4427ec681f3Smrg return intel_oa_register_queries_glk; 4437ec681f3Smrg if (devinfo->is_coffeelake) { 4447ec681f3Smrg if (devinfo->gt == 2) 4457ec681f3Smrg return intel_oa_register_queries_cflgt2; 4467ec681f3Smrg if (devinfo->gt == 3) 4477ec681f3Smrg return intel_oa_register_queries_cflgt3; 4487ec681f3Smrg } 4497ec681f3Smrg if (devinfo->ver == 11) { 4507ec681f3Smrg if (devinfo->is_elkhartlake) 4517ec681f3Smrg return intel_oa_register_queries_ehl; 4527ec681f3Smrg return intel_oa_register_queries_icl; 4537ec681f3Smrg } 4547ec681f3Smrg if (devinfo->is_tigerlake) { 4557ec681f3Smrg if (devinfo->gt == 1) 4567ec681f3Smrg return intel_oa_register_queries_tglgt1; 4577ec681f3Smrg if (devinfo->gt == 2) 4587ec681f3Smrg return intel_oa_register_queries_tglgt2; 4597ec681f3Smrg } 4607ec681f3Smrg if (devinfo->is_rocketlake) 4617ec681f3Smrg return intel_oa_register_queries_rkl; 4627ec681f3Smrg if (devinfo->is_dg1) 4637ec681f3Smrg return intel_oa_register_queries_dg1; 4647ec681f3Smrg if (devinfo->is_alderlake) 4657ec681f3Smrg return intel_oa_register_queries_adl; 4667ec681f3Smrg 4677ec681f3Smrg return NULL; 4687ec681f3Smrg} 4697ec681f3Smrg 4707ec681f3Smrgstatic int 4717ec681f3Smrgintel_perf_compare_counter_names(const void *v1, const void *v2) 4727ec681f3Smrg{ 4737ec681f3Smrg const struct intel_perf_query_counter *c1 = v1; 4747ec681f3Smrg const struct intel_perf_query_counter *c2 = v2; 4757ec681f3Smrg 4767ec681f3Smrg return strcmp(c1->name, c2->name); 4777ec681f3Smrg} 4787ec681f3Smrg 4797ec681f3Smrgstatic void 4807ec681f3Smrgsort_query(struct intel_perf_query_info *q) 4817ec681f3Smrg{ 4827ec681f3Smrg qsort(q->counters, q->n_counters, sizeof(q->counters[0]), 4837ec681f3Smrg intel_perf_compare_counter_names); 4847ec681f3Smrg} 4857ec681f3Smrg 4867ec681f3Smrgstatic void 4877ec681f3Smrgload_pipeline_statistic_metrics(struct intel_perf_config *perf_cfg, 4887ec681f3Smrg const struct intel_device_info *devinfo) 4897ec681f3Smrg{ 4907ec681f3Smrg struct intel_perf_query_info *query = 4917ec681f3Smrg intel_perf_append_query_info(perf_cfg, MAX_STAT_COUNTERS); 4927ec681f3Smrg 4937ec681f3Smrg query->kind = INTEL_PERF_QUERY_TYPE_PIPELINE; 4947ec681f3Smrg query->name = "Pipeline Statistics Registers"; 4957ec681f3Smrg 4967ec681f3Smrg intel_perf_query_add_basic_stat_reg(query, IA_VERTICES_COUNT, 4977ec681f3Smrg "N vertices submitted"); 4987ec681f3Smrg intel_perf_query_add_basic_stat_reg(query, IA_PRIMITIVES_COUNT, 4997ec681f3Smrg "N primitives submitted"); 5007ec681f3Smrg intel_perf_query_add_basic_stat_reg(query, VS_INVOCATION_COUNT, 5017ec681f3Smrg "N vertex shader invocations"); 5027ec681f3Smrg 5037ec681f3Smrg if (devinfo->ver == 6) { 5047ec681f3Smrg intel_perf_query_add_stat_reg(query, GFX6_SO_PRIM_STORAGE_NEEDED, 1, 1, 5057ec681f3Smrg "SO_PRIM_STORAGE_NEEDED", 5067ec681f3Smrg "N geometry shader stream-out primitives (total)"); 5077ec681f3Smrg intel_perf_query_add_stat_reg(query, GFX6_SO_NUM_PRIMS_WRITTEN, 1, 1, 5087ec681f3Smrg "SO_NUM_PRIMS_WRITTEN", 5097ec681f3Smrg "N geometry shader stream-out primitives (written)"); 5107ec681f3Smrg } else { 5117ec681f3Smrg intel_perf_query_add_stat_reg(query, GFX7_SO_PRIM_STORAGE_NEEDED(0), 1, 1, 5127ec681f3Smrg "SO_PRIM_STORAGE_NEEDED (Stream 0)", 5137ec681f3Smrg "N stream-out (stream 0) primitives (total)"); 5147ec681f3Smrg intel_perf_query_add_stat_reg(query, GFX7_SO_PRIM_STORAGE_NEEDED(1), 1, 1, 5157ec681f3Smrg "SO_PRIM_STORAGE_NEEDED (Stream 1)", 5167ec681f3Smrg "N stream-out (stream 1) primitives (total)"); 5177ec681f3Smrg intel_perf_query_add_stat_reg(query, GFX7_SO_PRIM_STORAGE_NEEDED(2), 1, 1, 5187ec681f3Smrg "SO_PRIM_STORAGE_NEEDED (Stream 2)", 5197ec681f3Smrg "N stream-out (stream 2) primitives (total)"); 5207ec681f3Smrg intel_perf_query_add_stat_reg(query, GFX7_SO_PRIM_STORAGE_NEEDED(3), 1, 1, 5217ec681f3Smrg "SO_PRIM_STORAGE_NEEDED (Stream 3)", 5227ec681f3Smrg "N stream-out (stream 3) primitives (total)"); 5237ec681f3Smrg intel_perf_query_add_stat_reg(query, GFX7_SO_NUM_PRIMS_WRITTEN(0), 1, 1, 5247ec681f3Smrg "SO_NUM_PRIMS_WRITTEN (Stream 0)", 5257ec681f3Smrg "N stream-out (stream 0) primitives (written)"); 5267ec681f3Smrg intel_perf_query_add_stat_reg(query, GFX7_SO_NUM_PRIMS_WRITTEN(1), 1, 1, 5277ec681f3Smrg "SO_NUM_PRIMS_WRITTEN (Stream 1)", 5287ec681f3Smrg "N stream-out (stream 1) primitives (written)"); 5297ec681f3Smrg intel_perf_query_add_stat_reg(query, GFX7_SO_NUM_PRIMS_WRITTEN(2), 1, 1, 5307ec681f3Smrg "SO_NUM_PRIMS_WRITTEN (Stream 2)", 5317ec681f3Smrg "N stream-out (stream 2) primitives (written)"); 5327ec681f3Smrg intel_perf_query_add_stat_reg(query, GFX7_SO_NUM_PRIMS_WRITTEN(3), 1, 1, 5337ec681f3Smrg "SO_NUM_PRIMS_WRITTEN (Stream 3)", 5347ec681f3Smrg "N stream-out (stream 3) primitives (written)"); 5357ec681f3Smrg } 5367ec681f3Smrg 5377ec681f3Smrg intel_perf_query_add_basic_stat_reg(query, HS_INVOCATION_COUNT, 5387ec681f3Smrg "N TCS shader invocations"); 5397ec681f3Smrg intel_perf_query_add_basic_stat_reg(query, DS_INVOCATION_COUNT, 5407ec681f3Smrg "N TES shader invocations"); 5417ec681f3Smrg 5427ec681f3Smrg intel_perf_query_add_basic_stat_reg(query, GS_INVOCATION_COUNT, 5437ec681f3Smrg "N geometry shader invocations"); 5447ec681f3Smrg intel_perf_query_add_basic_stat_reg(query, GS_PRIMITIVES_COUNT, 5457ec681f3Smrg "N geometry shader primitives emitted"); 5467ec681f3Smrg 5477ec681f3Smrg intel_perf_query_add_basic_stat_reg(query, CL_INVOCATION_COUNT, 5487ec681f3Smrg "N primitives entering clipping"); 5497ec681f3Smrg intel_perf_query_add_basic_stat_reg(query, CL_PRIMITIVES_COUNT, 5507ec681f3Smrg "N primitives leaving clipping"); 5517ec681f3Smrg 5527ec681f3Smrg if (devinfo->is_haswell || devinfo->ver == 8) { 5537ec681f3Smrg intel_perf_query_add_stat_reg(query, PS_INVOCATION_COUNT, 1, 4, 5547ec681f3Smrg "N fragment shader invocations", 5557ec681f3Smrg "N fragment shader invocations"); 5567ec681f3Smrg } else { 5577ec681f3Smrg intel_perf_query_add_basic_stat_reg(query, PS_INVOCATION_COUNT, 5587ec681f3Smrg "N fragment shader invocations"); 5597ec681f3Smrg } 5607ec681f3Smrg 5617ec681f3Smrg intel_perf_query_add_basic_stat_reg(query, PS_DEPTH_COUNT, 5627ec681f3Smrg "N z-pass fragments"); 5637ec681f3Smrg 5647ec681f3Smrg if (devinfo->ver >= 7) { 5657ec681f3Smrg intel_perf_query_add_basic_stat_reg(query, CS_INVOCATION_COUNT, 5667ec681f3Smrg "N compute shader invocations"); 5677ec681f3Smrg } 5687ec681f3Smrg 5697ec681f3Smrg query->data_size = sizeof(uint64_t) * query->n_counters; 5707ec681f3Smrg 5717ec681f3Smrg sort_query(query); 5727ec681f3Smrg} 5737ec681f3Smrg 5747ec681f3Smrgstatic int 5757ec681f3Smrgi915_perf_version(int drm_fd) 5767ec681f3Smrg{ 5777ec681f3Smrg int tmp; 5787ec681f3Smrg drm_i915_getparam_t gp = { 5797ec681f3Smrg .param = I915_PARAM_PERF_REVISION, 5807ec681f3Smrg .value = &tmp, 5817ec681f3Smrg }; 5827ec681f3Smrg 5837ec681f3Smrg int ret = intel_ioctl(drm_fd, DRM_IOCTL_I915_GETPARAM, &gp); 5847ec681f3Smrg 5857ec681f3Smrg /* Return 0 if this getparam is not supported, the first version supported 5867ec681f3Smrg * is 1. 5877ec681f3Smrg */ 5887ec681f3Smrg return ret < 0 ? 0 : tmp; 5897ec681f3Smrg} 5907ec681f3Smrg 5917ec681f3Smrgstatic void 5927ec681f3Smrgi915_get_sseu(int drm_fd, struct drm_i915_gem_context_param_sseu *sseu) 5937ec681f3Smrg{ 5947ec681f3Smrg struct drm_i915_gem_context_param arg = { 5957ec681f3Smrg .param = I915_CONTEXT_PARAM_SSEU, 5967ec681f3Smrg .size = sizeof(*sseu), 5977ec681f3Smrg .value = to_user_pointer(sseu) 5987ec681f3Smrg }; 5997ec681f3Smrg 6007ec681f3Smrg intel_ioctl(drm_fd, DRM_IOCTL_I915_GEM_CONTEXT_GETPARAM, &arg); 6017ec681f3Smrg} 6027ec681f3Smrg 6037ec681f3Smrgstatic inline int 6047ec681f3Smrgcompare_str_or_null(const char *s1, const char *s2) 6057ec681f3Smrg{ 6067ec681f3Smrg if (s1 == NULL && s2 == NULL) 6077ec681f3Smrg return 0; 6087ec681f3Smrg if (s1 == NULL) 6097ec681f3Smrg return -1; 6107ec681f3Smrg if (s2 == NULL) 6117ec681f3Smrg return 1; 6127ec681f3Smrg 6137ec681f3Smrg return strcmp(s1, s2); 6147ec681f3Smrg} 6157ec681f3Smrg 6167ec681f3Smrgstatic int 6177ec681f3Smrgcompare_counter_categories_and_names(const void *_c1, const void *_c2) 6187ec681f3Smrg{ 6197ec681f3Smrg const struct intel_perf_query_counter_info *c1 = (const struct intel_perf_query_counter_info *)_c1; 6207ec681f3Smrg const struct intel_perf_query_counter_info *c2 = (const struct intel_perf_query_counter_info *)_c2; 6217ec681f3Smrg 6227ec681f3Smrg /* pipeline counters don't have an assigned category */ 6237ec681f3Smrg int r = compare_str_or_null(c1->counter->category, c2->counter->category); 6247ec681f3Smrg if (r) 6257ec681f3Smrg return r; 6267ec681f3Smrg 6277ec681f3Smrg return strcmp(c1->counter->name, c2->counter->name); 6287ec681f3Smrg} 6297ec681f3Smrg 6307ec681f3Smrgstatic void 6317ec681f3Smrgbuild_unique_counter_list(struct intel_perf_config *perf) 6327ec681f3Smrg{ 6337ec681f3Smrg assert(perf->n_queries < 64); 6347ec681f3Smrg 6357ec681f3Smrg size_t max_counters = 0; 6367ec681f3Smrg 6377ec681f3Smrg for (int q = 0; q < perf->n_queries; q++) 6387ec681f3Smrg max_counters += perf->queries[q].n_counters; 6397ec681f3Smrg 6407ec681f3Smrg /* 6417ec681f3Smrg * Allocate big enough array to hold maximum possible number of counters. 6427ec681f3Smrg * We can't alloc it small and realloc when needed because the hash table 6437ec681f3Smrg * below contains pointers to this array. 6447ec681f3Smrg */ 6457ec681f3Smrg struct intel_perf_query_counter_info *counter_infos = 6467ec681f3Smrg ralloc_array_size(perf, sizeof(counter_infos[0]), max_counters); 6477ec681f3Smrg 6487ec681f3Smrg perf->n_counters = 0; 6497ec681f3Smrg 6507ec681f3Smrg struct hash_table *counters_table = 6517ec681f3Smrg _mesa_hash_table_create(perf, 6527ec681f3Smrg _mesa_hash_string, 6537ec681f3Smrg _mesa_key_string_equal); 6547ec681f3Smrg struct hash_entry *entry; 6557ec681f3Smrg for (int q = 0; q < perf->n_queries ; q++) { 6567ec681f3Smrg struct intel_perf_query_info *query = &perf->queries[q]; 6577ec681f3Smrg 6587ec681f3Smrg for (int c = 0; c < query->n_counters; c++) { 6597ec681f3Smrg struct intel_perf_query_counter *counter; 6607ec681f3Smrg struct intel_perf_query_counter_info *counter_info; 6617ec681f3Smrg 6627ec681f3Smrg counter = &query->counters[c]; 6637ec681f3Smrg entry = _mesa_hash_table_search(counters_table, counter->symbol_name); 6647ec681f3Smrg 6657ec681f3Smrg if (entry) { 6667ec681f3Smrg counter_info = entry->data; 6677ec681f3Smrg counter_info->query_mask |= BITFIELD64_BIT(q); 6687ec681f3Smrg continue; 6697ec681f3Smrg } 6707ec681f3Smrg assert(perf->n_counters < max_counters); 6717ec681f3Smrg 6727ec681f3Smrg counter_info = &counter_infos[perf->n_counters++]; 6737ec681f3Smrg counter_info->counter = counter; 6747ec681f3Smrg counter_info->query_mask = BITFIELD64_BIT(q); 6757ec681f3Smrg 6767ec681f3Smrg counter_info->location.group_idx = q; 6777ec681f3Smrg counter_info->location.counter_idx = c; 6787ec681f3Smrg 6797ec681f3Smrg _mesa_hash_table_insert(counters_table, counter->symbol_name, counter_info); 6807ec681f3Smrg } 6817ec681f3Smrg } 6827ec681f3Smrg 6837ec681f3Smrg _mesa_hash_table_destroy(counters_table, NULL); 6847ec681f3Smrg 6857ec681f3Smrg /* Now we can realloc counter_infos array because hash table doesn't exist. */ 6867ec681f3Smrg perf->counter_infos = reralloc_array_size(perf, counter_infos, 6877ec681f3Smrg sizeof(counter_infos[0]), perf->n_counters); 6887ec681f3Smrg 6897ec681f3Smrg qsort(perf->counter_infos, perf->n_counters, sizeof(perf->counter_infos[0]), 6907ec681f3Smrg compare_counter_categories_and_names); 6917ec681f3Smrg} 6927ec681f3Smrg 6937ec681f3Smrgstatic bool 6947ec681f3Smrgoa_metrics_available(struct intel_perf_config *perf, int fd, 6957ec681f3Smrg const struct intel_device_info *devinfo, 6967ec681f3Smrg bool use_register_snapshots) 6977ec681f3Smrg{ 6987ec681f3Smrg perf_register_oa_queries_t oa_register = get_register_queries_function(devinfo); 6997ec681f3Smrg bool i915_perf_oa_available = false; 7007ec681f3Smrg struct stat sb; 7017ec681f3Smrg 7027ec681f3Smrg perf->i915_query_supported = i915_query_perf_config_supported(perf, fd); 7037ec681f3Smrg perf->i915_perf_version = i915_perf_version(fd); 7047ec681f3Smrg 7057ec681f3Smrg /* Record the default SSEU configuration. */ 7067ec681f3Smrg i915_get_sseu(fd, &perf->sseu); 7077ec681f3Smrg 7087ec681f3Smrg /* The existence of this sysctl parameter implies the kernel supports 7097ec681f3Smrg * the i915 perf interface. 7107ec681f3Smrg */ 7117ec681f3Smrg if (stat("/proc/sys/dev/i915/perf_stream_paranoid", &sb) == 0) { 7127ec681f3Smrg 7137ec681f3Smrg /* If _paranoid == 1 then on Gfx8+ we won't be able to access OA 7147ec681f3Smrg * metrics unless running as root. 7157ec681f3Smrg */ 7167ec681f3Smrg if (devinfo->is_haswell) 7177ec681f3Smrg i915_perf_oa_available = true; 7187ec681f3Smrg else { 7197ec681f3Smrg uint64_t paranoid = 1; 7207ec681f3Smrg 7217ec681f3Smrg read_file_uint64("/proc/sys/dev/i915/perf_stream_paranoid", ¶noid); 7227ec681f3Smrg 7237ec681f3Smrg if (paranoid == 0 || geteuid() == 0) 7247ec681f3Smrg i915_perf_oa_available = true; 7257ec681f3Smrg } 7267ec681f3Smrg 7277ec681f3Smrg perf->platform_supported = oa_register != NULL; 7287ec681f3Smrg } 7297ec681f3Smrg 7307ec681f3Smrg return i915_perf_oa_available && 7317ec681f3Smrg oa_register && 7327ec681f3Smrg get_sysfs_dev_dir(perf, fd) && 7337ec681f3Smrg init_oa_sys_vars(perf, devinfo, use_register_snapshots); 7347ec681f3Smrg} 7357ec681f3Smrg 7367ec681f3Smrgstatic void 7377ec681f3Smrgload_oa_metrics(struct intel_perf_config *perf, int fd, 7387ec681f3Smrg const struct intel_device_info *devinfo) 7397ec681f3Smrg{ 7407ec681f3Smrg int existing_queries = perf->n_queries; 7417ec681f3Smrg 7427ec681f3Smrg perf_register_oa_queries_t oa_register = get_register_queries_function(devinfo); 7437ec681f3Smrg 7447ec681f3Smrg perf->oa_metrics_table = 7457ec681f3Smrg _mesa_hash_table_create(perf, _mesa_hash_string, 7467ec681f3Smrg _mesa_key_string_equal); 7477ec681f3Smrg 7487ec681f3Smrg /* Index all the metric sets mesa knows about before looking to see what 7497ec681f3Smrg * the kernel is advertising. 7507ec681f3Smrg */ 7517ec681f3Smrg oa_register(perf); 7527ec681f3Smrg 7537ec681f3Smrg if (!INTEL_DEBUG(DEBUG_NO_OACONFIG)) { 7547ec681f3Smrg if (kernel_has_dynamic_config_support(perf, fd)) 7557ec681f3Smrg init_oa_configs(perf, fd, devinfo); 7567ec681f3Smrg else 7577ec681f3Smrg enumerate_sysfs_metrics(perf, devinfo); 7587ec681f3Smrg } else { 7597ec681f3Smrg add_all_metrics(perf, devinfo); 7607ec681f3Smrg } 7617ec681f3Smrg 7627ec681f3Smrg /* sort counters in each individual group created by this function by name */ 7637ec681f3Smrg for (int i = existing_queries; i < perf->n_queries; ++i) 7647ec681f3Smrg sort_query(&perf->queries[i]); 7657ec681f3Smrg 7667ec681f3Smrg /* Select a fallback OA metric. Look for the TestOa metric or use the last 7677ec681f3Smrg * one if no present (on HSW). 7687ec681f3Smrg */ 7697ec681f3Smrg for (int i = existing_queries; i < perf->n_queries; i++) { 7707ec681f3Smrg if (perf->queries[i].symbol_name && 7717ec681f3Smrg strcmp(perf->queries[i].symbol_name, "TestOa") == 0) { 7727ec681f3Smrg perf->fallback_raw_oa_metric = perf->queries[i].oa_metrics_set_id; 7737ec681f3Smrg break; 7747ec681f3Smrg } 7757ec681f3Smrg } 7767ec681f3Smrg if (perf->fallback_raw_oa_metric == 0 && perf->n_queries > 0) 7777ec681f3Smrg perf->fallback_raw_oa_metric = perf->queries[perf->n_queries - 1].oa_metrics_set_id; 7787ec681f3Smrg} 7797ec681f3Smrg 7807ec681f3Smrgstruct intel_perf_registers * 7817ec681f3Smrgintel_perf_load_configuration(struct intel_perf_config *perf_cfg, int fd, const char *guid) 7827ec681f3Smrg{ 7837ec681f3Smrg if (!perf_cfg->i915_query_supported) 7847ec681f3Smrg return NULL; 7857ec681f3Smrg 7867ec681f3Smrg struct drm_i915_perf_oa_config i915_config = { 0, }; 7877ec681f3Smrg if (!i915_query_perf_config_data(perf_cfg, fd, guid, &i915_config)) 7887ec681f3Smrg return NULL; 7897ec681f3Smrg 7907ec681f3Smrg struct intel_perf_registers *config = rzalloc(NULL, struct intel_perf_registers); 7917ec681f3Smrg config->n_flex_regs = i915_config.n_flex_regs; 7927ec681f3Smrg config->flex_regs = rzalloc_array(config, struct intel_perf_query_register_prog, config->n_flex_regs); 7937ec681f3Smrg config->n_mux_regs = i915_config.n_mux_regs; 7947ec681f3Smrg config->mux_regs = rzalloc_array(config, struct intel_perf_query_register_prog, config->n_mux_regs); 7957ec681f3Smrg config->n_b_counter_regs = i915_config.n_boolean_regs; 7967ec681f3Smrg config->b_counter_regs = rzalloc_array(config, struct intel_perf_query_register_prog, config->n_b_counter_regs); 7977ec681f3Smrg 7987ec681f3Smrg /* 7997ec681f3Smrg * struct intel_perf_query_register_prog maps exactly to the tuple of 8007ec681f3Smrg * (register offset, register value) returned by the i915. 8017ec681f3Smrg */ 8027ec681f3Smrg i915_config.flex_regs_ptr = to_const_user_pointer(config->flex_regs); 8037ec681f3Smrg i915_config.mux_regs_ptr = to_const_user_pointer(config->mux_regs); 8047ec681f3Smrg i915_config.boolean_regs_ptr = to_const_user_pointer(config->b_counter_regs); 8057ec681f3Smrg if (!i915_query_perf_config_data(perf_cfg, fd, guid, &i915_config)) { 8067ec681f3Smrg ralloc_free(config); 8077ec681f3Smrg return NULL; 8087ec681f3Smrg } 8097ec681f3Smrg 8107ec681f3Smrg return config; 8117ec681f3Smrg} 8127ec681f3Smrg 8137ec681f3Smrguint64_t 8147ec681f3Smrgintel_perf_store_configuration(struct intel_perf_config *perf_cfg, int fd, 8157ec681f3Smrg const struct intel_perf_registers *config, 8167ec681f3Smrg const char *guid) 8177ec681f3Smrg{ 8187ec681f3Smrg if (guid) 8197ec681f3Smrg return i915_add_config(perf_cfg, fd, config, guid); 8207ec681f3Smrg 8217ec681f3Smrg struct mesa_sha1 sha1_ctx; 8227ec681f3Smrg _mesa_sha1_init(&sha1_ctx); 8237ec681f3Smrg 8247ec681f3Smrg if (config->flex_regs) { 8257ec681f3Smrg _mesa_sha1_update(&sha1_ctx, config->flex_regs, 8267ec681f3Smrg sizeof(config->flex_regs[0]) * 8277ec681f3Smrg config->n_flex_regs); 8287ec681f3Smrg } 8297ec681f3Smrg if (config->mux_regs) { 8307ec681f3Smrg _mesa_sha1_update(&sha1_ctx, config->mux_regs, 8317ec681f3Smrg sizeof(config->mux_regs[0]) * 8327ec681f3Smrg config->n_mux_regs); 8337ec681f3Smrg } 8347ec681f3Smrg if (config->b_counter_regs) { 8357ec681f3Smrg _mesa_sha1_update(&sha1_ctx, config->b_counter_regs, 8367ec681f3Smrg sizeof(config->b_counter_regs[0]) * 8377ec681f3Smrg config->n_b_counter_regs); 8387ec681f3Smrg } 8397ec681f3Smrg 8407ec681f3Smrg uint8_t hash[20]; 8417ec681f3Smrg _mesa_sha1_final(&sha1_ctx, hash); 8427ec681f3Smrg 8437ec681f3Smrg char formatted_hash[41]; 8447ec681f3Smrg _mesa_sha1_format(formatted_hash, hash); 8457ec681f3Smrg 8467ec681f3Smrg char generated_guid[37]; 8477ec681f3Smrg snprintf(generated_guid, sizeof(generated_guid), 8487ec681f3Smrg "%.8s-%.4s-%.4s-%.4s-%.12s", 8497ec681f3Smrg &formatted_hash[0], &formatted_hash[8], 8507ec681f3Smrg &formatted_hash[8 + 4], &formatted_hash[8 + 4 + 4], 8517ec681f3Smrg &formatted_hash[8 + 4 + 4 + 4]); 8527ec681f3Smrg 8537ec681f3Smrg /* Check if already present. */ 8547ec681f3Smrg uint64_t id; 8557ec681f3Smrg if (intel_perf_load_metric_id(perf_cfg, generated_guid, &id)) 8567ec681f3Smrg return id; 8577ec681f3Smrg 8587ec681f3Smrg return i915_add_config(perf_cfg, fd, config, generated_guid); 8597ec681f3Smrg} 8607ec681f3Smrg 8617ec681f3Smrgstatic uint64_t 8627ec681f3Smrgget_passes_mask(struct intel_perf_config *perf, 8637ec681f3Smrg const uint32_t *counter_indices, 8647ec681f3Smrg uint32_t counter_indices_count) 8657ec681f3Smrg{ 8667ec681f3Smrg uint64_t queries_mask = 0; 8677ec681f3Smrg 8687ec681f3Smrg assert(perf->n_queries < 64); 8697ec681f3Smrg 8707ec681f3Smrg /* Compute the number of passes by going through all counters N times (with 8717ec681f3Smrg * N the number of queries) to make sure we select the most constraining 8727ec681f3Smrg * counters first and look at the more flexible ones (that could be 8737ec681f3Smrg * obtained from multiple queries) later. That way we minimize the number 8747ec681f3Smrg * of passes required. 8757ec681f3Smrg */ 8767ec681f3Smrg for (uint32_t q = 0; q < perf->n_queries; q++) { 8777ec681f3Smrg for (uint32_t i = 0; i < counter_indices_count; i++) { 8787ec681f3Smrg assert(counter_indices[i] < perf->n_counters); 8797ec681f3Smrg 8807ec681f3Smrg uint32_t idx = counter_indices[i]; 8817ec681f3Smrg if (util_bitcount64(perf->counter_infos[idx].query_mask) != (q + 1)) 8827ec681f3Smrg continue; 8837ec681f3Smrg 8847ec681f3Smrg if (queries_mask & perf->counter_infos[idx].query_mask) 8857ec681f3Smrg continue; 8867ec681f3Smrg 8877ec681f3Smrg queries_mask |= BITFIELD64_BIT(ffsll(perf->counter_infos[idx].query_mask) - 1); 8887ec681f3Smrg } 8897ec681f3Smrg } 8907ec681f3Smrg 8917ec681f3Smrg return queries_mask; 8927ec681f3Smrg} 8937ec681f3Smrg 8947ec681f3Smrguint32_t 8957ec681f3Smrgintel_perf_get_n_passes(struct intel_perf_config *perf, 8967ec681f3Smrg const uint32_t *counter_indices, 8977ec681f3Smrg uint32_t counter_indices_count, 8987ec681f3Smrg struct intel_perf_query_info **pass_queries) 8997ec681f3Smrg{ 9007ec681f3Smrg uint64_t queries_mask = get_passes_mask(perf, counter_indices, counter_indices_count); 9017ec681f3Smrg 9027ec681f3Smrg if (pass_queries) { 9037ec681f3Smrg uint32_t pass = 0; 9047ec681f3Smrg for (uint32_t q = 0; q < perf->n_queries; q++) { 9057ec681f3Smrg if ((1ULL << q) & queries_mask) 9067ec681f3Smrg pass_queries[pass++] = &perf->queries[q]; 9077ec681f3Smrg } 9087ec681f3Smrg } 9097ec681f3Smrg 9107ec681f3Smrg return util_bitcount64(queries_mask); 9117ec681f3Smrg} 9127ec681f3Smrg 9137ec681f3Smrgvoid 9147ec681f3Smrgintel_perf_get_counters_passes(struct intel_perf_config *perf, 9157ec681f3Smrg const uint32_t *counter_indices, 9167ec681f3Smrg uint32_t counter_indices_count, 9177ec681f3Smrg struct intel_perf_counter_pass *counter_pass) 9187ec681f3Smrg{ 9197ec681f3Smrg uint64_t queries_mask = get_passes_mask(perf, counter_indices, counter_indices_count); 9207ec681f3Smrg ASSERTED uint32_t n_passes = util_bitcount64(queries_mask); 9217ec681f3Smrg 9227ec681f3Smrg for (uint32_t i = 0; i < counter_indices_count; i++) { 9237ec681f3Smrg assert(counter_indices[i] < perf->n_counters); 9247ec681f3Smrg 9257ec681f3Smrg uint32_t idx = counter_indices[i]; 9267ec681f3Smrg counter_pass[i].counter = perf->counter_infos[idx].counter; 9277ec681f3Smrg 9287ec681f3Smrg uint32_t query_idx = ffsll(perf->counter_infos[idx].query_mask & queries_mask) - 1; 9297ec681f3Smrg counter_pass[i].query = &perf->queries[query_idx]; 9307ec681f3Smrg 9317ec681f3Smrg uint32_t clear_bits = 63 - query_idx; 9327ec681f3Smrg counter_pass[i].pass = util_bitcount64((queries_mask << clear_bits) >> clear_bits) - 1; 9337ec681f3Smrg assert(counter_pass[i].pass < n_passes); 9347ec681f3Smrg } 9357ec681f3Smrg} 9367ec681f3Smrg 9377ec681f3Smrg/* Accumulate 32bits OA counters */ 9387ec681f3Smrgstatic inline void 9397ec681f3Smrgaccumulate_uint32(const uint32_t *report0, 9407ec681f3Smrg const uint32_t *report1, 9417ec681f3Smrg uint64_t *accumulator) 9427ec681f3Smrg{ 9437ec681f3Smrg *accumulator += (uint32_t)(*report1 - *report0); 9447ec681f3Smrg} 9457ec681f3Smrg 9467ec681f3Smrg/* Accumulate 40bits OA counters */ 9477ec681f3Smrgstatic inline void 9487ec681f3Smrgaccumulate_uint40(int a_index, 9497ec681f3Smrg const uint32_t *report0, 9507ec681f3Smrg const uint32_t *report1, 9517ec681f3Smrg uint64_t *accumulator) 9527ec681f3Smrg{ 9537ec681f3Smrg const uint8_t *high_bytes0 = (uint8_t *)(report0 + 40); 9547ec681f3Smrg const uint8_t *high_bytes1 = (uint8_t *)(report1 + 40); 9557ec681f3Smrg uint64_t high0 = (uint64_t)(high_bytes0[a_index]) << 32; 9567ec681f3Smrg uint64_t high1 = (uint64_t)(high_bytes1[a_index]) << 32; 9577ec681f3Smrg uint64_t value0 = report0[a_index + 4] | high0; 9587ec681f3Smrg uint64_t value1 = report1[a_index + 4] | high1; 9597ec681f3Smrg uint64_t delta; 9607ec681f3Smrg 9617ec681f3Smrg if (value0 > value1) 9627ec681f3Smrg delta = (1ULL << 40) + value1 - value0; 9637ec681f3Smrg else 9647ec681f3Smrg delta = value1 - value0; 9657ec681f3Smrg 9667ec681f3Smrg *accumulator += delta; 9677ec681f3Smrg} 9687ec681f3Smrg 9697ec681f3Smrgstatic void 9707ec681f3Smrggfx8_read_report_clock_ratios(const uint32_t *report, 9717ec681f3Smrg uint64_t *slice_freq_hz, 9727ec681f3Smrg uint64_t *unslice_freq_hz) 9737ec681f3Smrg{ 9747ec681f3Smrg /* The lower 16bits of the RPT_ID field of the OA reports contains a 9757ec681f3Smrg * snapshot of the bits coming from the RP_FREQ_NORMAL register and is 9767ec681f3Smrg * divided this way : 9777ec681f3Smrg * 9787ec681f3Smrg * RPT_ID[31:25]: RP_FREQ_NORMAL[20:14] (low squashed_slice_clock_frequency) 9797ec681f3Smrg * RPT_ID[10:9]: RP_FREQ_NORMAL[22:21] (high squashed_slice_clock_frequency) 9807ec681f3Smrg * RPT_ID[8:0]: RP_FREQ_NORMAL[31:23] (squashed_unslice_clock_frequency) 9817ec681f3Smrg * 9827ec681f3Smrg * RP_FREQ_NORMAL[31:23]: Software Unslice Ratio Request 9837ec681f3Smrg * Multiple of 33.33MHz 2xclk (16 MHz 1xclk) 9847ec681f3Smrg * 9857ec681f3Smrg * RP_FREQ_NORMAL[22:14]: Software Slice Ratio Request 9867ec681f3Smrg * Multiple of 33.33MHz 2xclk (16 MHz 1xclk) 9877ec681f3Smrg */ 9887ec681f3Smrg 9897ec681f3Smrg uint32_t unslice_freq = report[0] & 0x1ff; 9907ec681f3Smrg uint32_t slice_freq_low = (report[0] >> 25) & 0x7f; 9917ec681f3Smrg uint32_t slice_freq_high = (report[0] >> 9) & 0x3; 9927ec681f3Smrg uint32_t slice_freq = slice_freq_low | (slice_freq_high << 7); 9937ec681f3Smrg 9947ec681f3Smrg *slice_freq_hz = slice_freq * 16666667ULL; 9957ec681f3Smrg *unslice_freq_hz = unslice_freq * 16666667ULL; 9967ec681f3Smrg} 9977ec681f3Smrg 9987ec681f3Smrgvoid 9997ec681f3Smrgintel_perf_query_result_read_frequencies(struct intel_perf_query_result *result, 10007ec681f3Smrg const struct intel_device_info *devinfo, 10017ec681f3Smrg const uint32_t *start, 10027ec681f3Smrg const uint32_t *end) 10037ec681f3Smrg{ 10047ec681f3Smrg /* Slice/Unslice frequency is only available in the OA reports when the 10057ec681f3Smrg * "Disable OA reports due to clock ratio change" field in 10067ec681f3Smrg * OA_DEBUG_REGISTER is set to 1. This is how the kernel programs this 10077ec681f3Smrg * global register (see drivers/gpu/drm/i915/i915_perf.c) 10087ec681f3Smrg * 10097ec681f3Smrg * Documentation says this should be available on Gfx9+ but experimentation 10107ec681f3Smrg * shows that Gfx8 reports similar values, so we enable it there too. 10117ec681f3Smrg */ 10127ec681f3Smrg if (devinfo->ver < 8) 10137ec681f3Smrg return; 10147ec681f3Smrg 10157ec681f3Smrg gfx8_read_report_clock_ratios(start, 10167ec681f3Smrg &result->slice_frequency[0], 10177ec681f3Smrg &result->unslice_frequency[0]); 10187ec681f3Smrg gfx8_read_report_clock_ratios(end, 10197ec681f3Smrg &result->slice_frequency[1], 10207ec681f3Smrg &result->unslice_frequency[1]); 10217ec681f3Smrg} 10227ec681f3Smrg 10237ec681f3Smrgstatic inline bool 10247ec681f3Smrgcan_use_mi_rpc_bc_counters(const struct intel_device_info *devinfo) 10257ec681f3Smrg{ 10267ec681f3Smrg return devinfo->ver <= 11; 10277ec681f3Smrg} 10287ec681f3Smrg 10297ec681f3Smrgvoid 10307ec681f3Smrgintel_perf_query_result_accumulate(struct intel_perf_query_result *result, 10317ec681f3Smrg const struct intel_perf_query_info *query, 10327ec681f3Smrg const struct intel_device_info *devinfo, 10337ec681f3Smrg const uint32_t *start, 10347ec681f3Smrg const uint32_t *end) 10357ec681f3Smrg{ 10367ec681f3Smrg int i; 10377ec681f3Smrg 10387ec681f3Smrg if (result->hw_id == INTEL_PERF_INVALID_CTX_ID && 10397ec681f3Smrg start[2] != INTEL_PERF_INVALID_CTX_ID) 10407ec681f3Smrg result->hw_id = start[2]; 10417ec681f3Smrg if (result->reports_accumulated == 0) 10427ec681f3Smrg result->begin_timestamp = start[1]; 10437ec681f3Smrg result->reports_accumulated++; 10447ec681f3Smrg 10457ec681f3Smrg switch (query->oa_format) { 10467ec681f3Smrg case I915_OA_FORMAT_A32u40_A4u32_B8_C8: 10477ec681f3Smrg accumulate_uint32(start + 1, end + 1, 10487ec681f3Smrg result->accumulator + query->gpu_time_offset); /* timestamp */ 10497ec681f3Smrg accumulate_uint32(start + 3, end + 3, 10507ec681f3Smrg result->accumulator + query->gpu_clock_offset); /* clock */ 10517ec681f3Smrg 10527ec681f3Smrg /* 32x 40bit A counters... */ 10537ec681f3Smrg for (i = 0; i < 32; i++) { 10547ec681f3Smrg accumulate_uint40(i, start, end, 10557ec681f3Smrg result->accumulator + query->a_offset + i); 10567ec681f3Smrg } 10577ec681f3Smrg 10587ec681f3Smrg /* 4x 32bit A counters... */ 10597ec681f3Smrg for (i = 0; i < 4; i++) { 10607ec681f3Smrg accumulate_uint32(start + 36 + i, end + 36 + i, 10617ec681f3Smrg result->accumulator + query->a_offset + 32 + i); 10627ec681f3Smrg } 10637ec681f3Smrg 10647ec681f3Smrg if (can_use_mi_rpc_bc_counters(devinfo)) { 10657ec681f3Smrg /* 8x 32bit B counters */ 10667ec681f3Smrg for (i = 0; i < 8; i++) { 10677ec681f3Smrg accumulate_uint32(start + 48 + i, end + 48 + i, 10687ec681f3Smrg result->accumulator + query->b_offset + i); 10697ec681f3Smrg } 10707ec681f3Smrg 10717ec681f3Smrg /* 8x 32bit C counters... */ 10727ec681f3Smrg for (i = 0; i < 8; i++) { 10737ec681f3Smrg accumulate_uint32(start + 56 + i, end + 56 + i, 10747ec681f3Smrg result->accumulator + query->c_offset + i); 10757ec681f3Smrg } 10767ec681f3Smrg } 10777ec681f3Smrg break; 10787ec681f3Smrg 10797ec681f3Smrg case I915_OA_FORMAT_A45_B8_C8: 10807ec681f3Smrg accumulate_uint32(start + 1, end + 1, result->accumulator); /* timestamp */ 10817ec681f3Smrg 10827ec681f3Smrg for (i = 0; i < 61; i++) { 10837ec681f3Smrg accumulate_uint32(start + 3 + i, end + 3 + i, 10847ec681f3Smrg result->accumulator + query->a_offset + i); 10857ec681f3Smrg } 10867ec681f3Smrg break; 10877ec681f3Smrg 10887ec681f3Smrg default: 10897ec681f3Smrg unreachable("Can't accumulate OA counters in unknown format"); 10907ec681f3Smrg } 10917ec681f3Smrg 10927ec681f3Smrg} 10937ec681f3Smrg 10947ec681f3Smrg#define GET_FIELD(word, field) (((word) & field ## _MASK) >> field ## _SHIFT) 10957ec681f3Smrg 10967ec681f3Smrgvoid 10977ec681f3Smrgintel_perf_query_result_read_gt_frequency(struct intel_perf_query_result *result, 10987ec681f3Smrg const struct intel_device_info *devinfo, 10997ec681f3Smrg const uint32_t start, 11007ec681f3Smrg const uint32_t end) 11017ec681f3Smrg{ 11027ec681f3Smrg switch (devinfo->ver) { 11037ec681f3Smrg case 7: 11047ec681f3Smrg case 8: 11057ec681f3Smrg result->gt_frequency[0] = GET_FIELD(start, GFX7_RPSTAT1_CURR_GT_FREQ) * 50ULL; 11067ec681f3Smrg result->gt_frequency[1] = GET_FIELD(end, GFX7_RPSTAT1_CURR_GT_FREQ) * 50ULL; 11077ec681f3Smrg break; 11087ec681f3Smrg case 9: 11097ec681f3Smrg case 11: 11107ec681f3Smrg case 12: 11117ec681f3Smrg result->gt_frequency[0] = GET_FIELD(start, GFX9_RPSTAT0_CURR_GT_FREQ) * 50ULL / 3ULL; 11127ec681f3Smrg result->gt_frequency[1] = GET_FIELD(end, GFX9_RPSTAT0_CURR_GT_FREQ) * 50ULL / 3ULL; 11137ec681f3Smrg break; 11147ec681f3Smrg default: 11157ec681f3Smrg unreachable("unexpected gen"); 11167ec681f3Smrg } 11177ec681f3Smrg 11187ec681f3Smrg /* Put the numbers into Hz. */ 11197ec681f3Smrg result->gt_frequency[0] *= 1000000ULL; 11207ec681f3Smrg result->gt_frequency[1] *= 1000000ULL; 11217ec681f3Smrg} 11227ec681f3Smrg 11237ec681f3Smrgvoid 11247ec681f3Smrgintel_perf_query_result_read_perfcnts(struct intel_perf_query_result *result, 11257ec681f3Smrg const struct intel_perf_query_info *query, 11267ec681f3Smrg const uint64_t *start, 11277ec681f3Smrg const uint64_t *end) 11287ec681f3Smrg{ 11297ec681f3Smrg for (uint32_t i = 0; i < 2; i++) { 11307ec681f3Smrg uint64_t v0 = start[i] & PERF_CNT_VALUE_MASK; 11317ec681f3Smrg uint64_t v1 = end[i] & PERF_CNT_VALUE_MASK; 11327ec681f3Smrg 11337ec681f3Smrg result->accumulator[query->perfcnt_offset + i] = v0 > v1 ? 11347ec681f3Smrg (PERF_CNT_VALUE_MASK + 1 + v1 - v0) : 11357ec681f3Smrg (v1 - v0); 11367ec681f3Smrg } 11377ec681f3Smrg} 11387ec681f3Smrg 11397ec681f3Smrgstatic uint32_t 11407ec681f3Smrgquery_accumulator_offset(const struct intel_perf_query_info *query, 11417ec681f3Smrg enum intel_perf_query_field_type type, 11427ec681f3Smrg uint8_t index) 11437ec681f3Smrg{ 11447ec681f3Smrg switch (type) { 11457ec681f3Smrg case INTEL_PERF_QUERY_FIELD_TYPE_SRM_PERFCNT: 11467ec681f3Smrg return query->perfcnt_offset + index; 11477ec681f3Smrg case INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_B: 11487ec681f3Smrg return query->b_offset + index; 11497ec681f3Smrg case INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_C: 11507ec681f3Smrg return query->c_offset + index; 11517ec681f3Smrg default: 11527ec681f3Smrg unreachable("Invalid register type"); 11537ec681f3Smrg return 0; 11547ec681f3Smrg } 11557ec681f3Smrg} 11567ec681f3Smrg 11577ec681f3Smrgvoid 11587ec681f3Smrgintel_perf_query_result_accumulate_fields(struct intel_perf_query_result *result, 11597ec681f3Smrg const struct intel_perf_query_info *query, 11607ec681f3Smrg const struct intel_device_info *devinfo, 11617ec681f3Smrg const void *start, 11627ec681f3Smrg const void *end, 11637ec681f3Smrg bool no_oa_accumulate) 11647ec681f3Smrg{ 11657ec681f3Smrg struct intel_perf_query_field_layout *layout = &query->perf->query_layout; 11667ec681f3Smrg 11677ec681f3Smrg for (uint32_t r = 0; r < layout->n_fields; r++) { 11687ec681f3Smrg struct intel_perf_query_field *field = &layout->fields[r]; 11697ec681f3Smrg 11707ec681f3Smrg if (field->type == INTEL_PERF_QUERY_FIELD_TYPE_MI_RPC) { 11717ec681f3Smrg intel_perf_query_result_read_frequencies(result, devinfo, 11727ec681f3Smrg start + field->location, 11737ec681f3Smrg end + field->location); 11747ec681f3Smrg /* no_oa_accumulate=true is used when doing GL perf queries, we 11757ec681f3Smrg * manually parse the OA reports from the OA buffer and substract 11767ec681f3Smrg * unrelated deltas, so don't accumulate the begin/end reports here. 11777ec681f3Smrg */ 11787ec681f3Smrg if (!no_oa_accumulate) { 11797ec681f3Smrg intel_perf_query_result_accumulate(result, query, devinfo, 11807ec681f3Smrg start + field->location, 11817ec681f3Smrg end + field->location); 11827ec681f3Smrg } 11837ec681f3Smrg } else { 11847ec681f3Smrg uint64_t v0, v1; 11857ec681f3Smrg 11867ec681f3Smrg if (field->size == 4) { 11877ec681f3Smrg v0 = *(const uint32_t *)(start + field->location); 11887ec681f3Smrg v1 = *(const uint32_t *)(end + field->location); 11897ec681f3Smrg } else { 11907ec681f3Smrg assert(field->size == 8); 11917ec681f3Smrg v0 = *(const uint64_t *)(start + field->location); 11927ec681f3Smrg v1 = *(const uint64_t *)(end + field->location); 11937ec681f3Smrg } 11947ec681f3Smrg 11957ec681f3Smrg if (field->mask) { 11967ec681f3Smrg v0 = field->mask & v0; 11977ec681f3Smrg v1 = field->mask & v1; 11987ec681f3Smrg } 11997ec681f3Smrg 12007ec681f3Smrg /* RPSTAT is a bit of a special case because its begin/end values 12017ec681f3Smrg * represent frequencies. We store it in a separate location. 12027ec681f3Smrg */ 12037ec681f3Smrg if (field->type == INTEL_PERF_QUERY_FIELD_TYPE_SRM_RPSTAT) 12047ec681f3Smrg intel_perf_query_result_read_gt_frequency(result, devinfo, v0, v1); 12057ec681f3Smrg else 12067ec681f3Smrg result->accumulator[query_accumulator_offset(query, field->type, field->index)] = v1 - v0; 12077ec681f3Smrg } 12087ec681f3Smrg } 12097ec681f3Smrg} 12107ec681f3Smrg 12117ec681f3Smrgvoid 12127ec681f3Smrgintel_perf_query_result_clear(struct intel_perf_query_result *result) 12137ec681f3Smrg{ 12147ec681f3Smrg memset(result, 0, sizeof(*result)); 12157ec681f3Smrg result->hw_id = INTEL_PERF_INVALID_CTX_ID; 12167ec681f3Smrg} 12177ec681f3Smrg 12187ec681f3Smrgvoid 12197ec681f3Smrgintel_perf_query_result_print_fields(const struct intel_perf_query_info *query, 12207ec681f3Smrg const struct intel_device_info *devinfo, 12217ec681f3Smrg const void *data) 12227ec681f3Smrg{ 12237ec681f3Smrg const struct intel_perf_query_field_layout *layout = &query->perf->query_layout; 12247ec681f3Smrg 12257ec681f3Smrg for (uint32_t r = 0; r < layout->n_fields; r++) { 12267ec681f3Smrg const struct intel_perf_query_field *field = &layout->fields[r]; 12277ec681f3Smrg const uint32_t *value32 = data + field->location; 12287ec681f3Smrg 12297ec681f3Smrg switch (field->type) { 12307ec681f3Smrg case INTEL_PERF_QUERY_FIELD_TYPE_MI_RPC: 12317ec681f3Smrg fprintf(stderr, "MI_RPC:\n"); 12327ec681f3Smrg fprintf(stderr, " TS: 0x%08x\n", *(value32 + 1)); 12337ec681f3Smrg fprintf(stderr, " CLK: 0x%08x\n", *(value32 + 3)); 12347ec681f3Smrg break; 12357ec681f3Smrg case INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_B: 12367ec681f3Smrg fprintf(stderr, "B%u: 0x%08x\n", field->index, *value32); 12377ec681f3Smrg break; 12387ec681f3Smrg case INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_C: 12397ec681f3Smrg fprintf(stderr, "C%u: 0x%08x\n", field->index, *value32); 12407ec681f3Smrg break; 12417ec681f3Smrg default: 12427ec681f3Smrg break; 12437ec681f3Smrg } 12447ec681f3Smrg } 12457ec681f3Smrg} 12467ec681f3Smrg 12477ec681f3Smrgstatic int 12487ec681f3Smrgintel_perf_compare_query_names(const void *v1, const void *v2) 12497ec681f3Smrg{ 12507ec681f3Smrg const struct intel_perf_query_info *q1 = v1; 12517ec681f3Smrg const struct intel_perf_query_info *q2 = v2; 12527ec681f3Smrg 12537ec681f3Smrg return strcmp(q1->name, q2->name); 12547ec681f3Smrg} 12557ec681f3Smrg 12567ec681f3Smrgstatic inline struct intel_perf_query_field * 12577ec681f3Smrgadd_query_register(struct intel_perf_query_field_layout *layout, 12587ec681f3Smrg enum intel_perf_query_field_type type, 12597ec681f3Smrg uint16_t offset, 12607ec681f3Smrg uint16_t size, 12617ec681f3Smrg uint8_t index) 12627ec681f3Smrg{ 12637ec681f3Smrg /* Align MI_RPC to 64bytes (HW requirement) & 64bit registers to 8bytes 12647ec681f3Smrg * (shows up nicely in the debugger). 12657ec681f3Smrg */ 12667ec681f3Smrg if (type == INTEL_PERF_QUERY_FIELD_TYPE_MI_RPC) 12677ec681f3Smrg layout->size = align(layout->size, 64); 12687ec681f3Smrg else if (size % 8 == 0) 12697ec681f3Smrg layout->size = align(layout->size, 8); 12707ec681f3Smrg 12717ec681f3Smrg layout->fields[layout->n_fields++] = (struct intel_perf_query_field) { 12727ec681f3Smrg .mmio_offset = offset, 12737ec681f3Smrg .location = layout->size, 12747ec681f3Smrg .type = type, 12757ec681f3Smrg .index = index, 12767ec681f3Smrg .size = size, 12777ec681f3Smrg }; 12787ec681f3Smrg layout->size += size; 12797ec681f3Smrg 12807ec681f3Smrg return &layout->fields[layout->n_fields - 1]; 12817ec681f3Smrg} 12827ec681f3Smrg 12837ec681f3Smrgstatic void 12847ec681f3Smrgintel_perf_init_query_fields(struct intel_perf_config *perf_cfg, 12857ec681f3Smrg const struct intel_device_info *devinfo, 12867ec681f3Smrg bool use_register_snapshots) 12877ec681f3Smrg{ 12887ec681f3Smrg struct intel_perf_query_field_layout *layout = &perf_cfg->query_layout; 12897ec681f3Smrg 12907ec681f3Smrg layout->n_fields = 0; 12917ec681f3Smrg 12927ec681f3Smrg /* MI_RPC requires a 64byte alignment. */ 12937ec681f3Smrg layout->alignment = 64; 12947ec681f3Smrg 12957ec681f3Smrg layout->fields = rzalloc_array(perf_cfg, struct intel_perf_query_field, 5 + 16); 12967ec681f3Smrg 12977ec681f3Smrg add_query_register(layout, INTEL_PERF_QUERY_FIELD_TYPE_MI_RPC, 12987ec681f3Smrg 0, 256, 0); 12997ec681f3Smrg 13007ec681f3Smrg if (use_register_snapshots) { 13017ec681f3Smrg if (devinfo->ver <= 11) { 13027ec681f3Smrg struct intel_perf_query_field *field = 13037ec681f3Smrg add_query_register(layout, 13047ec681f3Smrg INTEL_PERF_QUERY_FIELD_TYPE_SRM_PERFCNT, 13057ec681f3Smrg PERF_CNT_1_DW0, 8, 0); 13067ec681f3Smrg field->mask = PERF_CNT_VALUE_MASK; 13077ec681f3Smrg 13087ec681f3Smrg field = add_query_register(layout, 13097ec681f3Smrg INTEL_PERF_QUERY_FIELD_TYPE_SRM_PERFCNT, 13107ec681f3Smrg PERF_CNT_2_DW0, 8, 1); 13117ec681f3Smrg field->mask = PERF_CNT_VALUE_MASK; 13127ec681f3Smrg } 13137ec681f3Smrg 13147ec681f3Smrg if (devinfo->ver == 8 && !devinfo->is_cherryview) { 13157ec681f3Smrg add_query_register(layout, 13167ec681f3Smrg INTEL_PERF_QUERY_FIELD_TYPE_SRM_RPSTAT, 13177ec681f3Smrg GFX7_RPSTAT1, 4, 0); 13187ec681f3Smrg } 13197ec681f3Smrg 13207ec681f3Smrg if (devinfo->ver >= 9) { 13217ec681f3Smrg add_query_register(layout, 13227ec681f3Smrg INTEL_PERF_QUERY_FIELD_TYPE_SRM_RPSTAT, 13237ec681f3Smrg GFX9_RPSTAT0, 4, 0); 13247ec681f3Smrg } 13257ec681f3Smrg 13267ec681f3Smrg if (!can_use_mi_rpc_bc_counters(devinfo)) { 13277ec681f3Smrg if (devinfo->ver >= 8 && devinfo->ver <= 11) { 13287ec681f3Smrg for (uint32_t i = 0; i < GFX8_N_OA_PERF_B32; i++) { 13297ec681f3Smrg add_query_register(layout, INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_B, 13307ec681f3Smrg GFX8_OA_PERF_B32(i), 4, i); 13317ec681f3Smrg } 13327ec681f3Smrg for (uint32_t i = 0; i < GFX8_N_OA_PERF_C32; i++) { 13337ec681f3Smrg add_query_register(layout, INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_C, 13347ec681f3Smrg GFX8_OA_PERF_C32(i), 4, i); 13357ec681f3Smrg } 13367ec681f3Smrg } else if (devinfo->ver == 12) { 13377ec681f3Smrg for (uint32_t i = 0; i < GFX12_N_OAG_PERF_B32; i++) { 13387ec681f3Smrg add_query_register(layout, INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_B, 13397ec681f3Smrg GFX12_OAG_PERF_B32(i), 4, i); 13407ec681f3Smrg } 13417ec681f3Smrg for (uint32_t i = 0; i < GFX12_N_OAG_PERF_C32; i++) { 13427ec681f3Smrg add_query_register(layout, INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_C, 13437ec681f3Smrg GFX12_OAG_PERF_C32(i), 4, i); 13447ec681f3Smrg } 13457ec681f3Smrg } 13467ec681f3Smrg } 13477ec681f3Smrg } 13487ec681f3Smrg 13497ec681f3Smrg /* Align the whole package to 64bytes so that 2 snapshots can be put 13507ec681f3Smrg * together without extract alignment for the user. 13517ec681f3Smrg */ 13527ec681f3Smrg layout->size = align(layout->size, 64); 13537ec681f3Smrg} 13547ec681f3Smrg 13557ec681f3Smrgvoid 13567ec681f3Smrgintel_perf_init_metrics(struct intel_perf_config *perf_cfg, 13577ec681f3Smrg const struct intel_device_info *devinfo, 13587ec681f3Smrg int drm_fd, 13597ec681f3Smrg bool include_pipeline_statistics, 13607ec681f3Smrg bool use_register_snapshots) 13617ec681f3Smrg{ 13627ec681f3Smrg intel_perf_init_query_fields(perf_cfg, devinfo, use_register_snapshots); 13637ec681f3Smrg 13647ec681f3Smrg if (include_pipeline_statistics) { 13657ec681f3Smrg load_pipeline_statistic_metrics(perf_cfg, devinfo); 13667ec681f3Smrg intel_perf_register_mdapi_statistic_query(perf_cfg, devinfo); 13677ec681f3Smrg } 13687ec681f3Smrg 13697ec681f3Smrg bool oa_metrics = oa_metrics_available(perf_cfg, drm_fd, devinfo, 13707ec681f3Smrg use_register_snapshots); 13717ec681f3Smrg if (oa_metrics) 13727ec681f3Smrg load_oa_metrics(perf_cfg, drm_fd, devinfo); 13737ec681f3Smrg 13747ec681f3Smrg /* sort query groups by name */ 13757ec681f3Smrg qsort(perf_cfg->queries, perf_cfg->n_queries, 13767ec681f3Smrg sizeof(perf_cfg->queries[0]), intel_perf_compare_query_names); 13777ec681f3Smrg 13787ec681f3Smrg build_unique_counter_list(perf_cfg); 13797ec681f3Smrg 13807ec681f3Smrg if (oa_metrics) 13817ec681f3Smrg intel_perf_register_mdapi_oa_query(perf_cfg, devinfo); 13827ec681f3Smrg} 1383