17ec681f3Smrg/*
27ec681f3Smrg * Copyright © 2018 Intel Corporation
37ec681f3Smrg *
47ec681f3Smrg * Permission is hereby granted, free of charge, to any person obtaining a
57ec681f3Smrg * copy of this software and associated documentation files (the "Software"),
67ec681f3Smrg * to deal in the Software without restriction, including without limitation
77ec681f3Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
87ec681f3Smrg * and/or sell copies of the Software, and to permit persons to whom the
97ec681f3Smrg * Software is furnished to do so, subject to the following conditions:
107ec681f3Smrg *
117ec681f3Smrg * The above copyright notice and this permission notice (including the next
127ec681f3Smrg * paragraph) shall be included in all copies or substantial portions of the
137ec681f3Smrg * Software.
147ec681f3Smrg *
157ec681f3Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
167ec681f3Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
177ec681f3Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
187ec681f3Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
197ec681f3Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
207ec681f3Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
217ec681f3Smrg * IN THE SOFTWARE.
227ec681f3Smrg */
237ec681f3Smrg
247ec681f3Smrg#ifndef INTEL_PERF_H
257ec681f3Smrg#define INTEL_PERF_H
267ec681f3Smrg
277ec681f3Smrg#include <stdio.h>
287ec681f3Smrg#include <stdbool.h>
297ec681f3Smrg#include <stdint.h>
307ec681f3Smrg#include <string.h>
317ec681f3Smrg
327ec681f3Smrg#if defined(MAJOR_IN_SYSMACROS)
337ec681f3Smrg#include <sys/sysmacros.h>
347ec681f3Smrg#elif defined(MAJOR_IN_MKDEV)
357ec681f3Smrg#include <sys/mkdev.h>
367ec681f3Smrg#endif
377ec681f3Smrg
387ec681f3Smrg#include "util/hash_table.h"
397ec681f3Smrg#include "compiler/glsl/list.h"
407ec681f3Smrg#include "util/ralloc.h"
417ec681f3Smrg
427ec681f3Smrg#include "drm-uapi/i915_drm.h"
437ec681f3Smrg
447ec681f3Smrg#ifdef __cplusplus
457ec681f3Smrgextern "C" {
467ec681f3Smrg#endif
477ec681f3Smrg
487ec681f3Smrgstruct intel_device_info;
497ec681f3Smrg
507ec681f3Smrgstruct intel_perf_config;
517ec681f3Smrgstruct intel_perf_query_info;
527ec681f3Smrg
537ec681f3Smrg#define INTEL_PERF_INVALID_CTX_ID (0xffffffff)
547ec681f3Smrg
557ec681f3Smrgenum intel_perf_counter_type {
567ec681f3Smrg   INTEL_PERF_COUNTER_TYPE_EVENT,
577ec681f3Smrg   INTEL_PERF_COUNTER_TYPE_DURATION_NORM,
587ec681f3Smrg   INTEL_PERF_COUNTER_TYPE_DURATION_RAW,
597ec681f3Smrg   INTEL_PERF_COUNTER_TYPE_THROUGHPUT,
607ec681f3Smrg   INTEL_PERF_COUNTER_TYPE_RAW,
617ec681f3Smrg   INTEL_PERF_COUNTER_TYPE_TIMESTAMP,
627ec681f3Smrg};
637ec681f3Smrg
647ec681f3Smrgenum intel_perf_counter_data_type {
657ec681f3Smrg   INTEL_PERF_COUNTER_DATA_TYPE_BOOL32,
667ec681f3Smrg   INTEL_PERF_COUNTER_DATA_TYPE_UINT32,
677ec681f3Smrg   INTEL_PERF_COUNTER_DATA_TYPE_UINT64,
687ec681f3Smrg   INTEL_PERF_COUNTER_DATA_TYPE_FLOAT,
697ec681f3Smrg   INTEL_PERF_COUNTER_DATA_TYPE_DOUBLE,
707ec681f3Smrg};
717ec681f3Smrg
727ec681f3Smrgenum intel_perf_counter_units {
737ec681f3Smrg   /* size */
747ec681f3Smrg   INTEL_PERF_COUNTER_UNITS_BYTES,
757ec681f3Smrg
767ec681f3Smrg   /* frequency */
777ec681f3Smrg   INTEL_PERF_COUNTER_UNITS_HZ,
787ec681f3Smrg
797ec681f3Smrg   /* time */
807ec681f3Smrg   INTEL_PERF_COUNTER_UNITS_NS,
817ec681f3Smrg   INTEL_PERF_COUNTER_UNITS_US,
827ec681f3Smrg
837ec681f3Smrg   /**/
847ec681f3Smrg   INTEL_PERF_COUNTER_UNITS_PIXELS,
857ec681f3Smrg   INTEL_PERF_COUNTER_UNITS_TEXELS,
867ec681f3Smrg   INTEL_PERF_COUNTER_UNITS_THREADS,
877ec681f3Smrg   INTEL_PERF_COUNTER_UNITS_PERCENT,
887ec681f3Smrg
897ec681f3Smrg   /* events */
907ec681f3Smrg   INTEL_PERF_COUNTER_UNITS_MESSAGES,
917ec681f3Smrg   INTEL_PERF_COUNTER_UNITS_NUMBER,
927ec681f3Smrg   INTEL_PERF_COUNTER_UNITS_CYCLES,
937ec681f3Smrg   INTEL_PERF_COUNTER_UNITS_EVENTS,
947ec681f3Smrg   INTEL_PERF_COUNTER_UNITS_UTILIZATION,
957ec681f3Smrg
967ec681f3Smrg   /**/
977ec681f3Smrg   INTEL_PERF_COUNTER_UNITS_EU_SENDS_TO_L3_CACHE_LINES,
987ec681f3Smrg   INTEL_PERF_COUNTER_UNITS_EU_ATOMIC_REQUESTS_TO_L3_CACHE_LINES,
997ec681f3Smrg   INTEL_PERF_COUNTER_UNITS_EU_REQUESTS_TO_L3_CACHE_LINES,
1007ec681f3Smrg   INTEL_PERF_COUNTER_UNITS_EU_BYTES_PER_L3_CACHE_LINE,
1017ec681f3Smrg
1027ec681f3Smrg   INTEL_PERF_COUNTER_UNITS_MAX
1037ec681f3Smrg};
1047ec681f3Smrg
1057ec681f3Smrgstruct intel_pipeline_stat {
1067ec681f3Smrg   uint32_t reg;
1077ec681f3Smrg   uint32_t numerator;
1087ec681f3Smrg   uint32_t denominator;
1097ec681f3Smrg};
1107ec681f3Smrg
1117ec681f3Smrg/*
1127ec681f3Smrg * The largest OA formats we can use include:
1137ec681f3Smrg * For Haswell:
1147ec681f3Smrg *   1 timestamp, 45 A counters, 8 B counters and 8 C counters.
1157ec681f3Smrg * For Gfx8+
1167ec681f3Smrg *   1 timestamp, 1 clock, 36 A counters, 8 B counters and 8 C counters
1177ec681f3Smrg *
1187ec681f3Smrg * Plus 2 PERF_CNT registers and 1 RPSTAT register.
1197ec681f3Smrg */
1207ec681f3Smrg#define MAX_OA_REPORT_COUNTERS (62 + 2 + 1)
1217ec681f3Smrg
1227ec681f3Smrg/*
1237ec681f3Smrg * When currently allocate only one page for pipeline statistics queries. Here
1247ec681f3Smrg * we derived the maximum number of counters for that amount.
1257ec681f3Smrg */
1267ec681f3Smrg#define STATS_BO_SIZE               4096
1277ec681f3Smrg#define STATS_BO_END_OFFSET_BYTES   (STATS_BO_SIZE / 2)
1287ec681f3Smrg#define MAX_STAT_COUNTERS           (STATS_BO_END_OFFSET_BYTES / 8)
1297ec681f3Smrg
1307ec681f3Smrg#define I915_PERF_OA_SAMPLE_SIZE (8 +   /* drm_i915_perf_record_header */ \
1317ec681f3Smrg                                  256)  /* OA counter report */
1327ec681f3Smrg
1337ec681f3Smrgstruct intel_perf_query_result {
1347ec681f3Smrg   /**
1357ec681f3Smrg    * Storage for the final accumulated OA counters.
1367ec681f3Smrg    */
1377ec681f3Smrg   uint64_t accumulator[MAX_OA_REPORT_COUNTERS];
1387ec681f3Smrg
1397ec681f3Smrg   /**
1407ec681f3Smrg    * Hw ID used by the context on which the query was running.
1417ec681f3Smrg    */
1427ec681f3Smrg   uint32_t hw_id;
1437ec681f3Smrg
1447ec681f3Smrg   /**
1457ec681f3Smrg    * Number of reports accumulated to produce the results.
1467ec681f3Smrg    */
1477ec681f3Smrg   uint32_t reports_accumulated;
1487ec681f3Smrg
1497ec681f3Smrg   /**
1507ec681f3Smrg    * Frequency in the slices of the GT at the begin and end of the
1517ec681f3Smrg    * query.
1527ec681f3Smrg    */
1537ec681f3Smrg   uint64_t slice_frequency[2];
1547ec681f3Smrg
1557ec681f3Smrg   /**
1567ec681f3Smrg    * Frequency in the unslice of the GT at the begin and end of the
1577ec681f3Smrg    * query.
1587ec681f3Smrg    */
1597ec681f3Smrg   uint64_t unslice_frequency[2];
1607ec681f3Smrg
1617ec681f3Smrg   /**
1627ec681f3Smrg    * Frequency of the whole GT at the begin and end of the query.
1637ec681f3Smrg    */
1647ec681f3Smrg   uint64_t gt_frequency[2];
1657ec681f3Smrg
1667ec681f3Smrg   /**
1677ec681f3Smrg    * Timestamp of the query.
1687ec681f3Smrg    */
1697ec681f3Smrg   uint64_t begin_timestamp;
1707ec681f3Smrg
1717ec681f3Smrg   /**
1727ec681f3Smrg    * Whether the query was interrupted by another workload (aka preemption).
1737ec681f3Smrg    */
1747ec681f3Smrg   bool query_disjoint;
1757ec681f3Smrg};
1767ec681f3Smrg
1777ec681f3Smrgstruct intel_perf_query_counter {
1787ec681f3Smrg   const char *name;
1797ec681f3Smrg   const char *desc;
1807ec681f3Smrg   const char *symbol_name;
1817ec681f3Smrg   const char *category;
1827ec681f3Smrg   enum intel_perf_counter_type type;
1837ec681f3Smrg   enum intel_perf_counter_data_type data_type;
1847ec681f3Smrg   enum intel_perf_counter_units units;
1857ec681f3Smrg   uint64_t raw_max;
1867ec681f3Smrg   size_t offset;
1877ec681f3Smrg
1887ec681f3Smrg   union {
1897ec681f3Smrg      uint64_t (*oa_counter_read_uint64)(struct intel_perf_config *perf,
1907ec681f3Smrg                                         const struct intel_perf_query_info *query,
1917ec681f3Smrg                                         const struct intel_perf_query_result *results);
1927ec681f3Smrg      float (*oa_counter_read_float)(struct intel_perf_config *perf,
1937ec681f3Smrg                                     const struct intel_perf_query_info *query,
1947ec681f3Smrg                                     const struct intel_perf_query_result *results);
1957ec681f3Smrg      struct intel_pipeline_stat pipeline_stat;
1967ec681f3Smrg   };
1977ec681f3Smrg};
1987ec681f3Smrg
1997ec681f3Smrgstruct intel_perf_query_register_prog {
2007ec681f3Smrg   uint32_t reg;
2017ec681f3Smrg   uint32_t val;
2027ec681f3Smrg};
2037ec681f3Smrg
2047ec681f3Smrg/* Register programming for a given query */
2057ec681f3Smrgstruct intel_perf_registers {
2067ec681f3Smrg   const struct intel_perf_query_register_prog *flex_regs;
2077ec681f3Smrg   uint32_t n_flex_regs;
2087ec681f3Smrg
2097ec681f3Smrg   const struct intel_perf_query_register_prog *mux_regs;
2107ec681f3Smrg   uint32_t n_mux_regs;
2117ec681f3Smrg
2127ec681f3Smrg   const struct intel_perf_query_register_prog *b_counter_regs;
2137ec681f3Smrg   uint32_t n_b_counter_regs;
2147ec681f3Smrg};
2157ec681f3Smrg
2167ec681f3Smrgstruct intel_perf_query_info {
2177ec681f3Smrg   struct intel_perf_config *perf;
2187ec681f3Smrg
2197ec681f3Smrg   enum intel_perf_query_type {
2207ec681f3Smrg      INTEL_PERF_QUERY_TYPE_OA,
2217ec681f3Smrg      INTEL_PERF_QUERY_TYPE_RAW,
2227ec681f3Smrg      INTEL_PERF_QUERY_TYPE_PIPELINE,
2237ec681f3Smrg   } kind;
2247ec681f3Smrg   const char *name;
2257ec681f3Smrg   const char *symbol_name;
2267ec681f3Smrg   const char *guid;
2277ec681f3Smrg   struct intel_perf_query_counter *counters;
2287ec681f3Smrg   int n_counters;
2297ec681f3Smrg   int max_counters;
2307ec681f3Smrg   size_t data_size;
2317ec681f3Smrg
2327ec681f3Smrg   /* OA specific */
2337ec681f3Smrg   uint64_t oa_metrics_set_id;
2347ec681f3Smrg   int oa_format;
2357ec681f3Smrg
2367ec681f3Smrg   /* For indexing into the accumulator[] ... */
2377ec681f3Smrg   int gpu_time_offset;
2387ec681f3Smrg   int gpu_clock_offset;
2397ec681f3Smrg   int a_offset;
2407ec681f3Smrg   int b_offset;
2417ec681f3Smrg   int c_offset;
2427ec681f3Smrg   int perfcnt_offset;
2437ec681f3Smrg   int rpstat_offset;
2447ec681f3Smrg
2457ec681f3Smrg   struct intel_perf_registers config;
2467ec681f3Smrg};
2477ec681f3Smrg
2487ec681f3Smrg/* When not using the MI_RPC command, this structure describes the list of
2497ec681f3Smrg * register offsets as well as their storage location so that they can be
2507ec681f3Smrg * stored through a series of MI_SRM commands and accumulated with
2517ec681f3Smrg * intel_perf_query_result_accumulate_snapshots().
2527ec681f3Smrg */
2537ec681f3Smrgstruct intel_perf_query_field_layout {
2547ec681f3Smrg   /* Alignment for the layout */
2557ec681f3Smrg   uint32_t alignment;
2567ec681f3Smrg
2577ec681f3Smrg   /* Size of the whole layout */
2587ec681f3Smrg   uint32_t size;
2597ec681f3Smrg
2607ec681f3Smrg   uint32_t n_fields;
2617ec681f3Smrg
2627ec681f3Smrg   struct intel_perf_query_field {
2637ec681f3Smrg      /* MMIO location of this register */
2647ec681f3Smrg      uint16_t mmio_offset;
2657ec681f3Smrg
2667ec681f3Smrg      /* Location of this register in the storage */
2677ec681f3Smrg      uint16_t location;
2687ec681f3Smrg
2697ec681f3Smrg      /* Type of register, for accumulation (see intel_perf_query_info:*_offset
2707ec681f3Smrg       * fields)
2717ec681f3Smrg       */
2727ec681f3Smrg      enum intel_perf_query_field_type {
2737ec681f3Smrg         INTEL_PERF_QUERY_FIELD_TYPE_MI_RPC,
2747ec681f3Smrg         INTEL_PERF_QUERY_FIELD_TYPE_SRM_PERFCNT,
2757ec681f3Smrg         INTEL_PERF_QUERY_FIELD_TYPE_SRM_RPSTAT,
2767ec681f3Smrg         INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_B,
2777ec681f3Smrg         INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_C,
2787ec681f3Smrg      } type;
2797ec681f3Smrg
2807ec681f3Smrg      /* Index of register in the given type (for instance A31 or B2,
2817ec681f3Smrg       * etc...)
2827ec681f3Smrg       */
2837ec681f3Smrg      uint8_t index;
2847ec681f3Smrg
2857ec681f3Smrg      /* 4, 8 or 256 */
2867ec681f3Smrg      uint16_t size;
2877ec681f3Smrg
2887ec681f3Smrg      /* If not 0, mask to apply to the register value. */
2897ec681f3Smrg      uint64_t mask;
2907ec681f3Smrg   } *fields;
2917ec681f3Smrg};
2927ec681f3Smrg
2937ec681f3Smrgstruct intel_perf_query_counter_info {
2947ec681f3Smrg   struct intel_perf_query_counter *counter;
2957ec681f3Smrg
2967ec681f3Smrg   uint64_t query_mask;
2977ec681f3Smrg
2987ec681f3Smrg   /**
2997ec681f3Smrg    * Each counter can be a part of many groups, each time at different index.
3007ec681f3Smrg    * This struct stores one of those locations.
3017ec681f3Smrg    */
3027ec681f3Smrg   struct {
3037ec681f3Smrg      int group_idx; /* query/group number */
3047ec681f3Smrg      int counter_idx; /* index inside of query/group */
3057ec681f3Smrg   } location;
3067ec681f3Smrg};
3077ec681f3Smrg
3087ec681f3Smrgstruct intel_perf_config {
3097ec681f3Smrg   /* Whether i915 has DRM_I915_QUERY_PERF_CONFIG support. */
3107ec681f3Smrg   bool i915_query_supported;
3117ec681f3Smrg
3127ec681f3Smrg   /* Version of the i915-perf subsystem, refer to i915_drm.h. */
3137ec681f3Smrg   int i915_perf_version;
3147ec681f3Smrg
3157ec681f3Smrg   /* Powergating configuration for the running the query. */
3167ec681f3Smrg   struct drm_i915_gem_context_param_sseu sseu;
3177ec681f3Smrg
3187ec681f3Smrg   struct intel_perf_query_info *queries;
3197ec681f3Smrg   int n_queries;
3207ec681f3Smrg
3217ec681f3Smrg   struct intel_perf_query_counter_info *counter_infos;
3227ec681f3Smrg   int n_counters;
3237ec681f3Smrg
3247ec681f3Smrg   struct intel_perf_query_field_layout query_layout;
3257ec681f3Smrg
3267ec681f3Smrg   /* Variables referenced in the XML meta data for OA performance
3277ec681f3Smrg    * counters, e.g in the normalization equations.
3287ec681f3Smrg    *
3297ec681f3Smrg    * All uint64_t for consistent operand types in generated code
3307ec681f3Smrg    */
3317ec681f3Smrg   struct {
3327ec681f3Smrg      uint64_t timestamp_frequency; /** $GpuTimestampFrequency */
3337ec681f3Smrg      uint64_t n_eus;               /** $EuCoresTotalCount */
3347ec681f3Smrg      uint64_t n_eu_slices;         /** $EuSlicesTotalCount */
3357ec681f3Smrg      uint64_t n_eu_sub_slices;     /** $EuSubslicesTotalCount */
3367ec681f3Smrg      uint64_t eu_threads_count;    /** $EuThreadsCount */
3377ec681f3Smrg      uint64_t slice_mask;          /** $SliceMask */
3387ec681f3Smrg      uint64_t subslice_mask;       /** $SubsliceMask */
3397ec681f3Smrg      uint64_t gt_min_freq;         /** $GpuMinFrequency */
3407ec681f3Smrg      uint64_t gt_max_freq;         /** $GpuMaxFrequency */
3417ec681f3Smrg      uint64_t revision;            /** $SkuRevisionId */
3427ec681f3Smrg      bool     query_mode;          /** $QueryMode */
3437ec681f3Smrg   } sys_vars;
3447ec681f3Smrg
3457ec681f3Smrg   /* OA metric sets, indexed by GUID, as know by Mesa at build time, to
3467ec681f3Smrg    * cross-reference with the GUIDs of configs advertised by the kernel at
3477ec681f3Smrg    * runtime
3487ec681f3Smrg    */
3497ec681f3Smrg   struct hash_table *oa_metrics_table;
3507ec681f3Smrg
3517ec681f3Smrg   /* When MDAPI hasn't configured the metric we need to use by the time the
3527ec681f3Smrg    * query begins, this OA metric is used as a fallback.
3537ec681f3Smrg    */
3547ec681f3Smrg   uint64_t fallback_raw_oa_metric;
3557ec681f3Smrg
3567ec681f3Smrg   /* Whether we have support for this platform. If true && n_queries == 0,
3577ec681f3Smrg    * this means we will not be able to use i915-perf because of it is in
3587ec681f3Smrg    * paranoid mode.
3597ec681f3Smrg    */
3607ec681f3Smrg   bool platform_supported;
3617ec681f3Smrg
3627ec681f3Smrg   /* Location of the device's sysfs entry. */
3637ec681f3Smrg   char sysfs_dev_dir[256];
3647ec681f3Smrg
3657ec681f3Smrg   struct {
3667ec681f3Smrg      void *(*bo_alloc)(void *bufmgr, const char *name, uint64_t size);
3677ec681f3Smrg      void (*bo_unreference)(void *bo);
3687ec681f3Smrg      void *(*bo_map)(void *ctx, void *bo, unsigned flags);
3697ec681f3Smrg      void (*bo_unmap)(void *bo);
3707ec681f3Smrg      bool (*batch_references)(void *batch, void *bo);
3717ec681f3Smrg      void (*bo_wait_rendering)(void *bo);
3727ec681f3Smrg      int (*bo_busy)(void *bo);
3737ec681f3Smrg      void (*emit_stall_at_pixel_scoreboard)(void *ctx);
3747ec681f3Smrg      void (*emit_mi_report_perf_count)(void *ctx,
3757ec681f3Smrg                                        void *bo,
3767ec681f3Smrg                                        uint32_t offset_in_bytes,
3777ec681f3Smrg                                        uint32_t report_id);
3787ec681f3Smrg      void (*batchbuffer_flush)(void *ctx,
3797ec681f3Smrg                                const char *file, int line);
3807ec681f3Smrg      void (*store_register_mem)(void *ctx, void *bo, uint32_t reg, uint32_t reg_size, uint32_t offset);
3817ec681f3Smrg
3827ec681f3Smrg   } vtbl;
3837ec681f3Smrg};
3847ec681f3Smrg
3857ec681f3Smrgstruct intel_perf_counter_pass {
3867ec681f3Smrg   struct intel_perf_query_info *query;
3877ec681f3Smrg   struct intel_perf_query_counter *counter;
3887ec681f3Smrg   uint32_t pass;
3897ec681f3Smrg};
3907ec681f3Smrg
3917ec681f3Smrg/** Initialize the intel_perf_config object for a given device.
3927ec681f3Smrg *
3937ec681f3Smrg *    include_pipeline_statistics : Whether to add a pipeline statistic query
3947ec681f3Smrg *                                  intel_perf_query_info object
3957ec681f3Smrg *
3967ec681f3Smrg *    use_register_snapshots : Whether the queries should include counters
3977ec681f3Smrg *                             that rely on register snapshots using command
3987ec681f3Smrg *                             streamer instructions (not possible when using
3997ec681f3Smrg *                             only the OA buffer data).
4007ec681f3Smrg */
4017ec681f3Smrgvoid intel_perf_init_metrics(struct intel_perf_config *perf_cfg,
4027ec681f3Smrg                             const struct intel_device_info *devinfo,
4037ec681f3Smrg                             int drm_fd,
4047ec681f3Smrg                             bool include_pipeline_statistics,
4057ec681f3Smrg                             bool use_register_snapshots);
4067ec681f3Smrg
4077ec681f3Smrg/** Query i915 for a metric id using guid.
4087ec681f3Smrg */
4097ec681f3Smrgbool intel_perf_load_metric_id(struct intel_perf_config *perf_cfg,
4107ec681f3Smrg                               const char *guid,
4117ec681f3Smrg                               uint64_t *metric_id);
4127ec681f3Smrg
4137ec681f3Smrg/** Load a configuation's content from i915 using a guid.
4147ec681f3Smrg */
4157ec681f3Smrgstruct intel_perf_registers *intel_perf_load_configuration(struct intel_perf_config *perf_cfg,
4167ec681f3Smrg                                                           int fd, const char *guid);
4177ec681f3Smrg
4187ec681f3Smrg/** Store a configuration into i915 using guid and return a new metric id.
4197ec681f3Smrg *
4207ec681f3Smrg * If guid is NULL, then a generated one will be provided by hashing the
4217ec681f3Smrg * content of the configuration.
4227ec681f3Smrg */
4237ec681f3Smrguint64_t intel_perf_store_configuration(struct intel_perf_config *perf_cfg, int fd,
4247ec681f3Smrg                                        const struct intel_perf_registers *config,
4257ec681f3Smrg                                        const char *guid);
4267ec681f3Smrg
4277ec681f3Smrg/** Read the slice/unslice frequency from 2 OA reports and store then into
4287ec681f3Smrg *  result.
4297ec681f3Smrg */
4307ec681f3Smrgvoid intel_perf_query_result_read_frequencies(struct intel_perf_query_result *result,
4317ec681f3Smrg                                              const struct intel_device_info *devinfo,
4327ec681f3Smrg                                              const uint32_t *start,
4337ec681f3Smrg                                              const uint32_t *end);
4347ec681f3Smrg
4357ec681f3Smrg/** Store the GT frequency as reported by the RPSTAT register.
4367ec681f3Smrg */
4377ec681f3Smrgvoid intel_perf_query_result_read_gt_frequency(struct intel_perf_query_result *result,
4387ec681f3Smrg                                               const struct intel_device_info *devinfo,
4397ec681f3Smrg                                               const uint32_t start,
4407ec681f3Smrg                                               const uint32_t end);
4417ec681f3Smrg
4427ec681f3Smrg/** Store PERFCNT registers values.
4437ec681f3Smrg */
4447ec681f3Smrgvoid intel_perf_query_result_read_perfcnts(struct intel_perf_query_result *result,
4457ec681f3Smrg                                           const struct intel_perf_query_info *query,
4467ec681f3Smrg                                           const uint64_t *start,
4477ec681f3Smrg                                           const uint64_t *end);
4487ec681f3Smrg
4497ec681f3Smrg/** Accumulate the delta between 2 OA reports into result for a given query.
4507ec681f3Smrg */
4517ec681f3Smrgvoid intel_perf_query_result_accumulate(struct intel_perf_query_result *result,
4527ec681f3Smrg                                        const struct intel_perf_query_info *query,
4537ec681f3Smrg                                        const struct intel_device_info *devinfo,
4547ec681f3Smrg                                        const uint32_t *start,
4557ec681f3Smrg                                        const uint32_t *end);
4567ec681f3Smrg
4577ec681f3Smrg/** Accumulate the delta between 2 snapshots of OA perf registers (layout
4587ec681f3Smrg * should match description specified through intel_perf_query_register_layout).
4597ec681f3Smrg */
4607ec681f3Smrgvoid intel_perf_query_result_accumulate_fields(struct intel_perf_query_result *result,
4617ec681f3Smrg                                               const struct intel_perf_query_info *query,
4627ec681f3Smrg                                               const struct intel_device_info *devinfo,
4637ec681f3Smrg                                               const void *start,
4647ec681f3Smrg                                               const void *end,
4657ec681f3Smrg                                               bool no_oa_accumulate);
4667ec681f3Smrg
4677ec681f3Smrgvoid intel_perf_query_result_clear(struct intel_perf_query_result *result);
4687ec681f3Smrg
4697ec681f3Smrg/** Debug helper printing out query data.
4707ec681f3Smrg */
4717ec681f3Smrgvoid intel_perf_query_result_print_fields(const struct intel_perf_query_info *query,
4727ec681f3Smrg                                          const struct intel_device_info *devinfo,
4737ec681f3Smrg                                          const void *data);
4747ec681f3Smrg
4757ec681f3Smrgstatic inline size_t
4767ec681f3Smrgintel_perf_query_counter_get_size(const struct intel_perf_query_counter *counter)
4777ec681f3Smrg{
4787ec681f3Smrg   switch (counter->data_type) {
4797ec681f3Smrg   case INTEL_PERF_COUNTER_DATA_TYPE_BOOL32:
4807ec681f3Smrg      return sizeof(uint32_t);
4817ec681f3Smrg   case INTEL_PERF_COUNTER_DATA_TYPE_UINT32:
4827ec681f3Smrg      return sizeof(uint32_t);
4837ec681f3Smrg   case INTEL_PERF_COUNTER_DATA_TYPE_UINT64:
4847ec681f3Smrg      return sizeof(uint64_t);
4857ec681f3Smrg   case INTEL_PERF_COUNTER_DATA_TYPE_FLOAT:
4867ec681f3Smrg      return sizeof(float);
4877ec681f3Smrg   case INTEL_PERF_COUNTER_DATA_TYPE_DOUBLE:
4887ec681f3Smrg      return sizeof(double);
4897ec681f3Smrg   default:
4907ec681f3Smrg      unreachable("invalid counter data type");
4917ec681f3Smrg   }
4927ec681f3Smrg}
4937ec681f3Smrg
4947ec681f3Smrgstatic inline struct intel_perf_config *
4957ec681f3Smrgintel_perf_new(void *ctx)
4967ec681f3Smrg{
4977ec681f3Smrg   struct intel_perf_config *perf = rzalloc(ctx, struct intel_perf_config);
4987ec681f3Smrg   return perf;
4997ec681f3Smrg}
5007ec681f3Smrg
5017ec681f3Smrg/** Whether we have the ability to hold off preemption on a batch so we don't
5027ec681f3Smrg * have to look at the OA buffer to subtract unrelated workloads off the
5037ec681f3Smrg * values captured through MI_* commands.
5047ec681f3Smrg */
5057ec681f3Smrgstatic inline bool
5067ec681f3Smrgintel_perf_has_hold_preemption(const struct intel_perf_config *perf)
5077ec681f3Smrg{
5087ec681f3Smrg   return perf->i915_perf_version >= 3;
5097ec681f3Smrg}
5107ec681f3Smrg
5117ec681f3Smrg/** Whether we have the ability to lock EU array power configuration for the
5127ec681f3Smrg * duration of the performance recording. This is useful on Gfx11 where the HW
5137ec681f3Smrg * architecture requires half the EU for particular workloads.
5147ec681f3Smrg */
5157ec681f3Smrgstatic inline bool
5167ec681f3Smrgintel_perf_has_global_sseu(const struct intel_perf_config *perf)
5177ec681f3Smrg{
5187ec681f3Smrg   return perf->i915_perf_version >= 4;
5197ec681f3Smrg}
5207ec681f3Smrg
5217ec681f3Smrguint32_t intel_perf_get_n_passes(struct intel_perf_config *perf,
5227ec681f3Smrg                                 const uint32_t *counter_indices,
5237ec681f3Smrg                                 uint32_t counter_indices_count,
5247ec681f3Smrg                                 struct intel_perf_query_info **pass_queries);
5257ec681f3Smrgvoid intel_perf_get_counters_passes(struct intel_perf_config *perf,
5267ec681f3Smrg                                    const uint32_t *counter_indices,
5277ec681f3Smrg                                    uint32_t counter_indices_count,
5287ec681f3Smrg                                    struct intel_perf_counter_pass *counter_pass);
5297ec681f3Smrg
5307ec681f3Smrg#ifdef __cplusplus
5317ec681f3Smrg} // extern "C"
5327ec681f3Smrg#endif
5337ec681f3Smrg
5347ec681f3Smrg#endif /* INTEL_PERF_H */
535