17ec681f3Smrg/* 27ec681f3Smrg * Copyright © 2018 Intel Corporation 37ec681f3Smrg * 47ec681f3Smrg * Permission is hereby granted, free of charge, to any person obtaining a 57ec681f3Smrg * copy of this software and associated documentation files (the "Software"), 67ec681f3Smrg * to deal in the Software without restriction, including without limitation 77ec681f3Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 87ec681f3Smrg * and/or sell copies of the Software, and to permit persons to whom the 97ec681f3Smrg * Software is furnished to do so, subject to the following conditions: 107ec681f3Smrg * 117ec681f3Smrg * The above copyright notice and this permission notice (including the next 127ec681f3Smrg * paragraph) shall be included in all copies or substantial portions of the 137ec681f3Smrg * Software. 147ec681f3Smrg * 157ec681f3Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 167ec681f3Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 177ec681f3Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 187ec681f3Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 197ec681f3Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 207ec681f3Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 217ec681f3Smrg * IN THE SOFTWARE. 227ec681f3Smrg */ 237ec681f3Smrg 247ec681f3Smrg#ifndef INTEL_PERF_H 257ec681f3Smrg#define INTEL_PERF_H 267ec681f3Smrg 277ec681f3Smrg#include <stdio.h> 287ec681f3Smrg#include <stdbool.h> 297ec681f3Smrg#include <stdint.h> 307ec681f3Smrg#include <string.h> 317ec681f3Smrg 327ec681f3Smrg#if defined(MAJOR_IN_SYSMACROS) 337ec681f3Smrg#include <sys/sysmacros.h> 347ec681f3Smrg#elif defined(MAJOR_IN_MKDEV) 357ec681f3Smrg#include <sys/mkdev.h> 367ec681f3Smrg#endif 377ec681f3Smrg 387ec681f3Smrg#include "util/hash_table.h" 397ec681f3Smrg#include "compiler/glsl/list.h" 407ec681f3Smrg#include "util/ralloc.h" 417ec681f3Smrg 427ec681f3Smrg#include "drm-uapi/i915_drm.h" 437ec681f3Smrg 447ec681f3Smrg#ifdef __cplusplus 457ec681f3Smrgextern "C" { 467ec681f3Smrg#endif 477ec681f3Smrg 487ec681f3Smrgstruct intel_device_info; 497ec681f3Smrg 507ec681f3Smrgstruct intel_perf_config; 517ec681f3Smrgstruct intel_perf_query_info; 527ec681f3Smrg 537ec681f3Smrg#define INTEL_PERF_INVALID_CTX_ID (0xffffffff) 547ec681f3Smrg 557ec681f3Smrgenum intel_perf_counter_type { 567ec681f3Smrg INTEL_PERF_COUNTER_TYPE_EVENT, 577ec681f3Smrg INTEL_PERF_COUNTER_TYPE_DURATION_NORM, 587ec681f3Smrg INTEL_PERF_COUNTER_TYPE_DURATION_RAW, 597ec681f3Smrg INTEL_PERF_COUNTER_TYPE_THROUGHPUT, 607ec681f3Smrg INTEL_PERF_COUNTER_TYPE_RAW, 617ec681f3Smrg INTEL_PERF_COUNTER_TYPE_TIMESTAMP, 627ec681f3Smrg}; 637ec681f3Smrg 647ec681f3Smrgenum intel_perf_counter_data_type { 657ec681f3Smrg INTEL_PERF_COUNTER_DATA_TYPE_BOOL32, 667ec681f3Smrg INTEL_PERF_COUNTER_DATA_TYPE_UINT32, 677ec681f3Smrg INTEL_PERF_COUNTER_DATA_TYPE_UINT64, 687ec681f3Smrg INTEL_PERF_COUNTER_DATA_TYPE_FLOAT, 697ec681f3Smrg INTEL_PERF_COUNTER_DATA_TYPE_DOUBLE, 707ec681f3Smrg}; 717ec681f3Smrg 727ec681f3Smrgenum intel_perf_counter_units { 737ec681f3Smrg /* size */ 747ec681f3Smrg INTEL_PERF_COUNTER_UNITS_BYTES, 757ec681f3Smrg 767ec681f3Smrg /* frequency */ 777ec681f3Smrg INTEL_PERF_COUNTER_UNITS_HZ, 787ec681f3Smrg 797ec681f3Smrg /* time */ 807ec681f3Smrg INTEL_PERF_COUNTER_UNITS_NS, 817ec681f3Smrg INTEL_PERF_COUNTER_UNITS_US, 827ec681f3Smrg 837ec681f3Smrg /**/ 847ec681f3Smrg INTEL_PERF_COUNTER_UNITS_PIXELS, 857ec681f3Smrg INTEL_PERF_COUNTER_UNITS_TEXELS, 867ec681f3Smrg INTEL_PERF_COUNTER_UNITS_THREADS, 877ec681f3Smrg INTEL_PERF_COUNTER_UNITS_PERCENT, 887ec681f3Smrg 897ec681f3Smrg /* events */ 907ec681f3Smrg INTEL_PERF_COUNTER_UNITS_MESSAGES, 917ec681f3Smrg INTEL_PERF_COUNTER_UNITS_NUMBER, 927ec681f3Smrg INTEL_PERF_COUNTER_UNITS_CYCLES, 937ec681f3Smrg INTEL_PERF_COUNTER_UNITS_EVENTS, 947ec681f3Smrg INTEL_PERF_COUNTER_UNITS_UTILIZATION, 957ec681f3Smrg 967ec681f3Smrg /**/ 977ec681f3Smrg INTEL_PERF_COUNTER_UNITS_EU_SENDS_TO_L3_CACHE_LINES, 987ec681f3Smrg INTEL_PERF_COUNTER_UNITS_EU_ATOMIC_REQUESTS_TO_L3_CACHE_LINES, 997ec681f3Smrg INTEL_PERF_COUNTER_UNITS_EU_REQUESTS_TO_L3_CACHE_LINES, 1007ec681f3Smrg INTEL_PERF_COUNTER_UNITS_EU_BYTES_PER_L3_CACHE_LINE, 1017ec681f3Smrg 1027ec681f3Smrg INTEL_PERF_COUNTER_UNITS_MAX 1037ec681f3Smrg}; 1047ec681f3Smrg 1057ec681f3Smrgstruct intel_pipeline_stat { 1067ec681f3Smrg uint32_t reg; 1077ec681f3Smrg uint32_t numerator; 1087ec681f3Smrg uint32_t denominator; 1097ec681f3Smrg}; 1107ec681f3Smrg 1117ec681f3Smrg/* 1127ec681f3Smrg * The largest OA formats we can use include: 1137ec681f3Smrg * For Haswell: 1147ec681f3Smrg * 1 timestamp, 45 A counters, 8 B counters and 8 C counters. 1157ec681f3Smrg * For Gfx8+ 1167ec681f3Smrg * 1 timestamp, 1 clock, 36 A counters, 8 B counters and 8 C counters 1177ec681f3Smrg * 1187ec681f3Smrg * Plus 2 PERF_CNT registers and 1 RPSTAT register. 1197ec681f3Smrg */ 1207ec681f3Smrg#define MAX_OA_REPORT_COUNTERS (62 + 2 + 1) 1217ec681f3Smrg 1227ec681f3Smrg/* 1237ec681f3Smrg * When currently allocate only one page for pipeline statistics queries. Here 1247ec681f3Smrg * we derived the maximum number of counters for that amount. 1257ec681f3Smrg */ 1267ec681f3Smrg#define STATS_BO_SIZE 4096 1277ec681f3Smrg#define STATS_BO_END_OFFSET_BYTES (STATS_BO_SIZE / 2) 1287ec681f3Smrg#define MAX_STAT_COUNTERS (STATS_BO_END_OFFSET_BYTES / 8) 1297ec681f3Smrg 1307ec681f3Smrg#define I915_PERF_OA_SAMPLE_SIZE (8 + /* drm_i915_perf_record_header */ \ 1317ec681f3Smrg 256) /* OA counter report */ 1327ec681f3Smrg 1337ec681f3Smrgstruct intel_perf_query_result { 1347ec681f3Smrg /** 1357ec681f3Smrg * Storage for the final accumulated OA counters. 1367ec681f3Smrg */ 1377ec681f3Smrg uint64_t accumulator[MAX_OA_REPORT_COUNTERS]; 1387ec681f3Smrg 1397ec681f3Smrg /** 1407ec681f3Smrg * Hw ID used by the context on which the query was running. 1417ec681f3Smrg */ 1427ec681f3Smrg uint32_t hw_id; 1437ec681f3Smrg 1447ec681f3Smrg /** 1457ec681f3Smrg * Number of reports accumulated to produce the results. 1467ec681f3Smrg */ 1477ec681f3Smrg uint32_t reports_accumulated; 1487ec681f3Smrg 1497ec681f3Smrg /** 1507ec681f3Smrg * Frequency in the slices of the GT at the begin and end of the 1517ec681f3Smrg * query. 1527ec681f3Smrg */ 1537ec681f3Smrg uint64_t slice_frequency[2]; 1547ec681f3Smrg 1557ec681f3Smrg /** 1567ec681f3Smrg * Frequency in the unslice of the GT at the begin and end of the 1577ec681f3Smrg * query. 1587ec681f3Smrg */ 1597ec681f3Smrg uint64_t unslice_frequency[2]; 1607ec681f3Smrg 1617ec681f3Smrg /** 1627ec681f3Smrg * Frequency of the whole GT at the begin and end of the query. 1637ec681f3Smrg */ 1647ec681f3Smrg uint64_t gt_frequency[2]; 1657ec681f3Smrg 1667ec681f3Smrg /** 1677ec681f3Smrg * Timestamp of the query. 1687ec681f3Smrg */ 1697ec681f3Smrg uint64_t begin_timestamp; 1707ec681f3Smrg 1717ec681f3Smrg /** 1727ec681f3Smrg * Whether the query was interrupted by another workload (aka preemption). 1737ec681f3Smrg */ 1747ec681f3Smrg bool query_disjoint; 1757ec681f3Smrg}; 1767ec681f3Smrg 1777ec681f3Smrgstruct intel_perf_query_counter { 1787ec681f3Smrg const char *name; 1797ec681f3Smrg const char *desc; 1807ec681f3Smrg const char *symbol_name; 1817ec681f3Smrg const char *category; 1827ec681f3Smrg enum intel_perf_counter_type type; 1837ec681f3Smrg enum intel_perf_counter_data_type data_type; 1847ec681f3Smrg enum intel_perf_counter_units units; 1857ec681f3Smrg uint64_t raw_max; 1867ec681f3Smrg size_t offset; 1877ec681f3Smrg 1887ec681f3Smrg union { 1897ec681f3Smrg uint64_t (*oa_counter_read_uint64)(struct intel_perf_config *perf, 1907ec681f3Smrg const struct intel_perf_query_info *query, 1917ec681f3Smrg const struct intel_perf_query_result *results); 1927ec681f3Smrg float (*oa_counter_read_float)(struct intel_perf_config *perf, 1937ec681f3Smrg const struct intel_perf_query_info *query, 1947ec681f3Smrg const struct intel_perf_query_result *results); 1957ec681f3Smrg struct intel_pipeline_stat pipeline_stat; 1967ec681f3Smrg }; 1977ec681f3Smrg}; 1987ec681f3Smrg 1997ec681f3Smrgstruct intel_perf_query_register_prog { 2007ec681f3Smrg uint32_t reg; 2017ec681f3Smrg uint32_t val; 2027ec681f3Smrg}; 2037ec681f3Smrg 2047ec681f3Smrg/* Register programming for a given query */ 2057ec681f3Smrgstruct intel_perf_registers { 2067ec681f3Smrg const struct intel_perf_query_register_prog *flex_regs; 2077ec681f3Smrg uint32_t n_flex_regs; 2087ec681f3Smrg 2097ec681f3Smrg const struct intel_perf_query_register_prog *mux_regs; 2107ec681f3Smrg uint32_t n_mux_regs; 2117ec681f3Smrg 2127ec681f3Smrg const struct intel_perf_query_register_prog *b_counter_regs; 2137ec681f3Smrg uint32_t n_b_counter_regs; 2147ec681f3Smrg}; 2157ec681f3Smrg 2167ec681f3Smrgstruct intel_perf_query_info { 2177ec681f3Smrg struct intel_perf_config *perf; 2187ec681f3Smrg 2197ec681f3Smrg enum intel_perf_query_type { 2207ec681f3Smrg INTEL_PERF_QUERY_TYPE_OA, 2217ec681f3Smrg INTEL_PERF_QUERY_TYPE_RAW, 2227ec681f3Smrg INTEL_PERF_QUERY_TYPE_PIPELINE, 2237ec681f3Smrg } kind; 2247ec681f3Smrg const char *name; 2257ec681f3Smrg const char *symbol_name; 2267ec681f3Smrg const char *guid; 2277ec681f3Smrg struct intel_perf_query_counter *counters; 2287ec681f3Smrg int n_counters; 2297ec681f3Smrg int max_counters; 2307ec681f3Smrg size_t data_size; 2317ec681f3Smrg 2327ec681f3Smrg /* OA specific */ 2337ec681f3Smrg uint64_t oa_metrics_set_id; 2347ec681f3Smrg int oa_format; 2357ec681f3Smrg 2367ec681f3Smrg /* For indexing into the accumulator[] ... */ 2377ec681f3Smrg int gpu_time_offset; 2387ec681f3Smrg int gpu_clock_offset; 2397ec681f3Smrg int a_offset; 2407ec681f3Smrg int b_offset; 2417ec681f3Smrg int c_offset; 2427ec681f3Smrg int perfcnt_offset; 2437ec681f3Smrg int rpstat_offset; 2447ec681f3Smrg 2457ec681f3Smrg struct intel_perf_registers config; 2467ec681f3Smrg}; 2477ec681f3Smrg 2487ec681f3Smrg/* When not using the MI_RPC command, this structure describes the list of 2497ec681f3Smrg * register offsets as well as their storage location so that they can be 2507ec681f3Smrg * stored through a series of MI_SRM commands and accumulated with 2517ec681f3Smrg * intel_perf_query_result_accumulate_snapshots(). 2527ec681f3Smrg */ 2537ec681f3Smrgstruct intel_perf_query_field_layout { 2547ec681f3Smrg /* Alignment for the layout */ 2557ec681f3Smrg uint32_t alignment; 2567ec681f3Smrg 2577ec681f3Smrg /* Size of the whole layout */ 2587ec681f3Smrg uint32_t size; 2597ec681f3Smrg 2607ec681f3Smrg uint32_t n_fields; 2617ec681f3Smrg 2627ec681f3Smrg struct intel_perf_query_field { 2637ec681f3Smrg /* MMIO location of this register */ 2647ec681f3Smrg uint16_t mmio_offset; 2657ec681f3Smrg 2667ec681f3Smrg /* Location of this register in the storage */ 2677ec681f3Smrg uint16_t location; 2687ec681f3Smrg 2697ec681f3Smrg /* Type of register, for accumulation (see intel_perf_query_info:*_offset 2707ec681f3Smrg * fields) 2717ec681f3Smrg */ 2727ec681f3Smrg enum intel_perf_query_field_type { 2737ec681f3Smrg INTEL_PERF_QUERY_FIELD_TYPE_MI_RPC, 2747ec681f3Smrg INTEL_PERF_QUERY_FIELD_TYPE_SRM_PERFCNT, 2757ec681f3Smrg INTEL_PERF_QUERY_FIELD_TYPE_SRM_RPSTAT, 2767ec681f3Smrg INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_B, 2777ec681f3Smrg INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_C, 2787ec681f3Smrg } type; 2797ec681f3Smrg 2807ec681f3Smrg /* Index of register in the given type (for instance A31 or B2, 2817ec681f3Smrg * etc...) 2827ec681f3Smrg */ 2837ec681f3Smrg uint8_t index; 2847ec681f3Smrg 2857ec681f3Smrg /* 4, 8 or 256 */ 2867ec681f3Smrg uint16_t size; 2877ec681f3Smrg 2887ec681f3Smrg /* If not 0, mask to apply to the register value. */ 2897ec681f3Smrg uint64_t mask; 2907ec681f3Smrg } *fields; 2917ec681f3Smrg}; 2927ec681f3Smrg 2937ec681f3Smrgstruct intel_perf_query_counter_info { 2947ec681f3Smrg struct intel_perf_query_counter *counter; 2957ec681f3Smrg 2967ec681f3Smrg uint64_t query_mask; 2977ec681f3Smrg 2987ec681f3Smrg /** 2997ec681f3Smrg * Each counter can be a part of many groups, each time at different index. 3007ec681f3Smrg * This struct stores one of those locations. 3017ec681f3Smrg */ 3027ec681f3Smrg struct { 3037ec681f3Smrg int group_idx; /* query/group number */ 3047ec681f3Smrg int counter_idx; /* index inside of query/group */ 3057ec681f3Smrg } location; 3067ec681f3Smrg}; 3077ec681f3Smrg 3087ec681f3Smrgstruct intel_perf_config { 3097ec681f3Smrg /* Whether i915 has DRM_I915_QUERY_PERF_CONFIG support. */ 3107ec681f3Smrg bool i915_query_supported; 3117ec681f3Smrg 3127ec681f3Smrg /* Version of the i915-perf subsystem, refer to i915_drm.h. */ 3137ec681f3Smrg int i915_perf_version; 3147ec681f3Smrg 3157ec681f3Smrg /* Powergating configuration for the running the query. */ 3167ec681f3Smrg struct drm_i915_gem_context_param_sseu sseu; 3177ec681f3Smrg 3187ec681f3Smrg struct intel_perf_query_info *queries; 3197ec681f3Smrg int n_queries; 3207ec681f3Smrg 3217ec681f3Smrg struct intel_perf_query_counter_info *counter_infos; 3227ec681f3Smrg int n_counters; 3237ec681f3Smrg 3247ec681f3Smrg struct intel_perf_query_field_layout query_layout; 3257ec681f3Smrg 3267ec681f3Smrg /* Variables referenced in the XML meta data for OA performance 3277ec681f3Smrg * counters, e.g in the normalization equations. 3287ec681f3Smrg * 3297ec681f3Smrg * All uint64_t for consistent operand types in generated code 3307ec681f3Smrg */ 3317ec681f3Smrg struct { 3327ec681f3Smrg uint64_t timestamp_frequency; /** $GpuTimestampFrequency */ 3337ec681f3Smrg uint64_t n_eus; /** $EuCoresTotalCount */ 3347ec681f3Smrg uint64_t n_eu_slices; /** $EuSlicesTotalCount */ 3357ec681f3Smrg uint64_t n_eu_sub_slices; /** $EuSubslicesTotalCount */ 3367ec681f3Smrg uint64_t eu_threads_count; /** $EuThreadsCount */ 3377ec681f3Smrg uint64_t slice_mask; /** $SliceMask */ 3387ec681f3Smrg uint64_t subslice_mask; /** $SubsliceMask */ 3397ec681f3Smrg uint64_t gt_min_freq; /** $GpuMinFrequency */ 3407ec681f3Smrg uint64_t gt_max_freq; /** $GpuMaxFrequency */ 3417ec681f3Smrg uint64_t revision; /** $SkuRevisionId */ 3427ec681f3Smrg bool query_mode; /** $QueryMode */ 3437ec681f3Smrg } sys_vars; 3447ec681f3Smrg 3457ec681f3Smrg /* OA metric sets, indexed by GUID, as know by Mesa at build time, to 3467ec681f3Smrg * cross-reference with the GUIDs of configs advertised by the kernel at 3477ec681f3Smrg * runtime 3487ec681f3Smrg */ 3497ec681f3Smrg struct hash_table *oa_metrics_table; 3507ec681f3Smrg 3517ec681f3Smrg /* When MDAPI hasn't configured the metric we need to use by the time the 3527ec681f3Smrg * query begins, this OA metric is used as a fallback. 3537ec681f3Smrg */ 3547ec681f3Smrg uint64_t fallback_raw_oa_metric; 3557ec681f3Smrg 3567ec681f3Smrg /* Whether we have support for this platform. If true && n_queries == 0, 3577ec681f3Smrg * this means we will not be able to use i915-perf because of it is in 3587ec681f3Smrg * paranoid mode. 3597ec681f3Smrg */ 3607ec681f3Smrg bool platform_supported; 3617ec681f3Smrg 3627ec681f3Smrg /* Location of the device's sysfs entry. */ 3637ec681f3Smrg char sysfs_dev_dir[256]; 3647ec681f3Smrg 3657ec681f3Smrg struct { 3667ec681f3Smrg void *(*bo_alloc)(void *bufmgr, const char *name, uint64_t size); 3677ec681f3Smrg void (*bo_unreference)(void *bo); 3687ec681f3Smrg void *(*bo_map)(void *ctx, void *bo, unsigned flags); 3697ec681f3Smrg void (*bo_unmap)(void *bo); 3707ec681f3Smrg bool (*batch_references)(void *batch, void *bo); 3717ec681f3Smrg void (*bo_wait_rendering)(void *bo); 3727ec681f3Smrg int (*bo_busy)(void *bo); 3737ec681f3Smrg void (*emit_stall_at_pixel_scoreboard)(void *ctx); 3747ec681f3Smrg void (*emit_mi_report_perf_count)(void *ctx, 3757ec681f3Smrg void *bo, 3767ec681f3Smrg uint32_t offset_in_bytes, 3777ec681f3Smrg uint32_t report_id); 3787ec681f3Smrg void (*batchbuffer_flush)(void *ctx, 3797ec681f3Smrg const char *file, int line); 3807ec681f3Smrg void (*store_register_mem)(void *ctx, void *bo, uint32_t reg, uint32_t reg_size, uint32_t offset); 3817ec681f3Smrg 3827ec681f3Smrg } vtbl; 3837ec681f3Smrg}; 3847ec681f3Smrg 3857ec681f3Smrgstruct intel_perf_counter_pass { 3867ec681f3Smrg struct intel_perf_query_info *query; 3877ec681f3Smrg struct intel_perf_query_counter *counter; 3887ec681f3Smrg uint32_t pass; 3897ec681f3Smrg}; 3907ec681f3Smrg 3917ec681f3Smrg/** Initialize the intel_perf_config object for a given device. 3927ec681f3Smrg * 3937ec681f3Smrg * include_pipeline_statistics : Whether to add a pipeline statistic query 3947ec681f3Smrg * intel_perf_query_info object 3957ec681f3Smrg * 3967ec681f3Smrg * use_register_snapshots : Whether the queries should include counters 3977ec681f3Smrg * that rely on register snapshots using command 3987ec681f3Smrg * streamer instructions (not possible when using 3997ec681f3Smrg * only the OA buffer data). 4007ec681f3Smrg */ 4017ec681f3Smrgvoid intel_perf_init_metrics(struct intel_perf_config *perf_cfg, 4027ec681f3Smrg const struct intel_device_info *devinfo, 4037ec681f3Smrg int drm_fd, 4047ec681f3Smrg bool include_pipeline_statistics, 4057ec681f3Smrg bool use_register_snapshots); 4067ec681f3Smrg 4077ec681f3Smrg/** Query i915 for a metric id using guid. 4087ec681f3Smrg */ 4097ec681f3Smrgbool intel_perf_load_metric_id(struct intel_perf_config *perf_cfg, 4107ec681f3Smrg const char *guid, 4117ec681f3Smrg uint64_t *metric_id); 4127ec681f3Smrg 4137ec681f3Smrg/** Load a configuation's content from i915 using a guid. 4147ec681f3Smrg */ 4157ec681f3Smrgstruct intel_perf_registers *intel_perf_load_configuration(struct intel_perf_config *perf_cfg, 4167ec681f3Smrg int fd, const char *guid); 4177ec681f3Smrg 4187ec681f3Smrg/** Store a configuration into i915 using guid and return a new metric id. 4197ec681f3Smrg * 4207ec681f3Smrg * If guid is NULL, then a generated one will be provided by hashing the 4217ec681f3Smrg * content of the configuration. 4227ec681f3Smrg */ 4237ec681f3Smrguint64_t intel_perf_store_configuration(struct intel_perf_config *perf_cfg, int fd, 4247ec681f3Smrg const struct intel_perf_registers *config, 4257ec681f3Smrg const char *guid); 4267ec681f3Smrg 4277ec681f3Smrg/** Read the slice/unslice frequency from 2 OA reports and store then into 4287ec681f3Smrg * result. 4297ec681f3Smrg */ 4307ec681f3Smrgvoid intel_perf_query_result_read_frequencies(struct intel_perf_query_result *result, 4317ec681f3Smrg const struct intel_device_info *devinfo, 4327ec681f3Smrg const uint32_t *start, 4337ec681f3Smrg const uint32_t *end); 4347ec681f3Smrg 4357ec681f3Smrg/** Store the GT frequency as reported by the RPSTAT register. 4367ec681f3Smrg */ 4377ec681f3Smrgvoid intel_perf_query_result_read_gt_frequency(struct intel_perf_query_result *result, 4387ec681f3Smrg const struct intel_device_info *devinfo, 4397ec681f3Smrg const uint32_t start, 4407ec681f3Smrg const uint32_t end); 4417ec681f3Smrg 4427ec681f3Smrg/** Store PERFCNT registers values. 4437ec681f3Smrg */ 4447ec681f3Smrgvoid intel_perf_query_result_read_perfcnts(struct intel_perf_query_result *result, 4457ec681f3Smrg const struct intel_perf_query_info *query, 4467ec681f3Smrg const uint64_t *start, 4477ec681f3Smrg const uint64_t *end); 4487ec681f3Smrg 4497ec681f3Smrg/** Accumulate the delta between 2 OA reports into result for a given query. 4507ec681f3Smrg */ 4517ec681f3Smrgvoid intel_perf_query_result_accumulate(struct intel_perf_query_result *result, 4527ec681f3Smrg const struct intel_perf_query_info *query, 4537ec681f3Smrg const struct intel_device_info *devinfo, 4547ec681f3Smrg const uint32_t *start, 4557ec681f3Smrg const uint32_t *end); 4567ec681f3Smrg 4577ec681f3Smrg/** Accumulate the delta between 2 snapshots of OA perf registers (layout 4587ec681f3Smrg * should match description specified through intel_perf_query_register_layout). 4597ec681f3Smrg */ 4607ec681f3Smrgvoid intel_perf_query_result_accumulate_fields(struct intel_perf_query_result *result, 4617ec681f3Smrg const struct intel_perf_query_info *query, 4627ec681f3Smrg const struct intel_device_info *devinfo, 4637ec681f3Smrg const void *start, 4647ec681f3Smrg const void *end, 4657ec681f3Smrg bool no_oa_accumulate); 4667ec681f3Smrg 4677ec681f3Smrgvoid intel_perf_query_result_clear(struct intel_perf_query_result *result); 4687ec681f3Smrg 4697ec681f3Smrg/** Debug helper printing out query data. 4707ec681f3Smrg */ 4717ec681f3Smrgvoid intel_perf_query_result_print_fields(const struct intel_perf_query_info *query, 4727ec681f3Smrg const struct intel_device_info *devinfo, 4737ec681f3Smrg const void *data); 4747ec681f3Smrg 4757ec681f3Smrgstatic inline size_t 4767ec681f3Smrgintel_perf_query_counter_get_size(const struct intel_perf_query_counter *counter) 4777ec681f3Smrg{ 4787ec681f3Smrg switch (counter->data_type) { 4797ec681f3Smrg case INTEL_PERF_COUNTER_DATA_TYPE_BOOL32: 4807ec681f3Smrg return sizeof(uint32_t); 4817ec681f3Smrg case INTEL_PERF_COUNTER_DATA_TYPE_UINT32: 4827ec681f3Smrg return sizeof(uint32_t); 4837ec681f3Smrg case INTEL_PERF_COUNTER_DATA_TYPE_UINT64: 4847ec681f3Smrg return sizeof(uint64_t); 4857ec681f3Smrg case INTEL_PERF_COUNTER_DATA_TYPE_FLOAT: 4867ec681f3Smrg return sizeof(float); 4877ec681f3Smrg case INTEL_PERF_COUNTER_DATA_TYPE_DOUBLE: 4887ec681f3Smrg return sizeof(double); 4897ec681f3Smrg default: 4907ec681f3Smrg unreachable("invalid counter data type"); 4917ec681f3Smrg } 4927ec681f3Smrg} 4937ec681f3Smrg 4947ec681f3Smrgstatic inline struct intel_perf_config * 4957ec681f3Smrgintel_perf_new(void *ctx) 4967ec681f3Smrg{ 4977ec681f3Smrg struct intel_perf_config *perf = rzalloc(ctx, struct intel_perf_config); 4987ec681f3Smrg return perf; 4997ec681f3Smrg} 5007ec681f3Smrg 5017ec681f3Smrg/** Whether we have the ability to hold off preemption on a batch so we don't 5027ec681f3Smrg * have to look at the OA buffer to subtract unrelated workloads off the 5037ec681f3Smrg * values captured through MI_* commands. 5047ec681f3Smrg */ 5057ec681f3Smrgstatic inline bool 5067ec681f3Smrgintel_perf_has_hold_preemption(const struct intel_perf_config *perf) 5077ec681f3Smrg{ 5087ec681f3Smrg return perf->i915_perf_version >= 3; 5097ec681f3Smrg} 5107ec681f3Smrg 5117ec681f3Smrg/** Whether we have the ability to lock EU array power configuration for the 5127ec681f3Smrg * duration of the performance recording. This is useful on Gfx11 where the HW 5137ec681f3Smrg * architecture requires half the EU for particular workloads. 5147ec681f3Smrg */ 5157ec681f3Smrgstatic inline bool 5167ec681f3Smrgintel_perf_has_global_sseu(const struct intel_perf_config *perf) 5177ec681f3Smrg{ 5187ec681f3Smrg return perf->i915_perf_version >= 4; 5197ec681f3Smrg} 5207ec681f3Smrg 5217ec681f3Smrguint32_t intel_perf_get_n_passes(struct intel_perf_config *perf, 5227ec681f3Smrg const uint32_t *counter_indices, 5237ec681f3Smrg uint32_t counter_indices_count, 5247ec681f3Smrg struct intel_perf_query_info **pass_queries); 5257ec681f3Smrgvoid intel_perf_get_counters_passes(struct intel_perf_config *perf, 5267ec681f3Smrg const uint32_t *counter_indices, 5277ec681f3Smrg uint32_t counter_indices_count, 5287ec681f3Smrg struct intel_perf_counter_pass *counter_pass); 5297ec681f3Smrg 5307ec681f3Smrg#ifdef __cplusplus 5317ec681f3Smrg} // extern "C" 5327ec681f3Smrg#endif 5337ec681f3Smrg 5347ec681f3Smrg#endif /* INTEL_PERF_H */ 535