1/* 2 * Copyright © 2018 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#ifndef GEN_PERF_H 25#define GEN_PERF_H 26 27#include <stdio.h> 28#include <stdint.h> 29#include <string.h> 30 31#if defined(MAJOR_IN_SYSMACROS) 32#include <sys/sysmacros.h> 33#elif defined(MAJOR_IN_MKDEV) 34#include <sys/mkdev.h> 35#endif 36 37#include "util/hash_table.h" 38#include "util/ralloc.h" 39 40struct gen_device_info; 41 42struct gen_perf; 43struct gen_perf_query_info; 44 45enum gen_perf_counter_type { 46 GEN_PERF_COUNTER_TYPE_EVENT, 47 GEN_PERF_COUNTER_TYPE_DURATION_NORM, 48 GEN_PERF_COUNTER_TYPE_DURATION_RAW, 49 GEN_PERF_COUNTER_TYPE_THROUGHPUT, 50 GEN_PERF_COUNTER_TYPE_RAW, 51 GEN_PERF_COUNTER_TYPE_TIMESTAMP, 52}; 53 54enum gen_perf_counter_data_type { 55 GEN_PERF_COUNTER_DATA_TYPE_BOOL32, 56 GEN_PERF_COUNTER_DATA_TYPE_UINT32, 57 GEN_PERF_COUNTER_DATA_TYPE_UINT64, 58 GEN_PERF_COUNTER_DATA_TYPE_FLOAT, 59 GEN_PERF_COUNTER_DATA_TYPE_DOUBLE, 60}; 61 62struct gen_pipeline_stat { 63 uint32_t reg; 64 uint32_t numerator; 65 uint32_t denominator; 66}; 67 68/* 69 * The largest OA formats we can use include: 70 * For Haswell: 71 * 1 timestamp, 45 A counters, 8 B counters and 8 C counters. 72 * For Gen8+ 73 * 1 timestamp, 1 clock, 36 A counters, 8 B counters and 8 C counters 74 */ 75#define MAX_OA_REPORT_COUNTERS 62 76 77struct gen_perf_query_result { 78 /** 79 * Storage for the final accumulated OA counters. 80 */ 81 uint64_t accumulator[MAX_OA_REPORT_COUNTERS]; 82 83 /** 84 * Hw ID used by the context on which the query was running. 85 */ 86 uint32_t hw_id; 87 88 /** 89 * Number of reports accumulated to produce the results. 90 */ 91 uint32_t reports_accumulated; 92 93 /** 94 * Frequency in the slices of the GT at the begin and end of the 95 * query. 96 */ 97 uint64_t slice_frequency[2]; 98 99 /** 100 * Frequency in the unslice of the GT at the begin and end of the 101 * query. 102 */ 103 uint64_t unslice_frequency[2]; 104}; 105 106struct gen_perf_query_counter { 107 const char *name; 108 const char *desc; 109 enum gen_perf_counter_type type; 110 enum gen_perf_counter_data_type data_type; 111 uint64_t raw_max; 112 size_t offset; 113 114 union { 115 uint64_t (*oa_counter_read_uint64)(struct gen_perf *perf, 116 const struct gen_perf_query_info *query, 117 const uint64_t *accumulator); 118 float (*oa_counter_read_float)(struct gen_perf *perf, 119 const struct gen_perf_query_info *query, 120 const uint64_t *accumulator); 121 struct gen_pipeline_stat pipeline_stat; 122 }; 123}; 124 125struct gen_perf_query_register_prog { 126 uint32_t reg; 127 uint32_t val; 128}; 129 130struct gen_perf_query_info { 131 enum gen_perf_query_type { 132 GEN_PERF_QUERY_TYPE_OA, 133 GEN_PERF_QUERY_TYPE_RAW, 134 GEN_PERF_QUERY_TYPE_PIPELINE, 135 } kind; 136 const char *name; 137 const char *guid; 138 struct gen_perf_query_counter *counters; 139 int n_counters; 140 int max_counters; 141 size_t data_size; 142 143 /* OA specific */ 144 uint64_t oa_metrics_set_id; 145 int oa_format; 146 147 /* For indexing into the accumulator[] ... */ 148 int gpu_time_offset; 149 int gpu_clock_offset; 150 int a_offset; 151 int b_offset; 152 int c_offset; 153 154 /* Register programming for a given query */ 155 struct gen_perf_query_register_prog *flex_regs; 156 uint32_t n_flex_regs; 157 158 struct gen_perf_query_register_prog *mux_regs; 159 uint32_t n_mux_regs; 160 161 struct gen_perf_query_register_prog *b_counter_regs; 162 uint32_t n_b_counter_regs; 163}; 164 165struct gen_perf { 166 struct gen_perf_query_info *queries; 167 int n_queries; 168 169 /* Variables referenced in the XML meta data for OA performance 170 * counters, e.g in the normalization equations. 171 * 172 * All uint64_t for consistent operand types in generated code 173 */ 174 struct { 175 uint64_t timestamp_frequency; /** $GpuTimestampFrequency */ 176 uint64_t n_eus; /** $EuCoresTotalCount */ 177 uint64_t n_eu_slices; /** $EuSlicesTotalCount */ 178 uint64_t n_eu_sub_slices; /** $EuSubslicesTotalCount */ 179 uint64_t eu_threads_count; /** $EuThreadsCount */ 180 uint64_t slice_mask; /** $SliceMask */ 181 uint64_t subslice_mask; /** $SubsliceMask */ 182 uint64_t gt_min_freq; /** $GpuMinFrequency */ 183 uint64_t gt_max_freq; /** $GpuMaxFrequency */ 184 uint64_t revision; /** $SkuRevisionId */ 185 } sys_vars; 186 187 /* OA metric sets, indexed by GUID, as know by Mesa at build time, to 188 * cross-reference with the GUIDs of configs advertised by the kernel at 189 * runtime 190 */ 191 struct hash_table *oa_metrics_table; 192 193 /* Location of the device's sysfs entry. */ 194 char sysfs_dev_dir[256]; 195 196 int (*ioctl)(int, unsigned long, void *); 197}; 198 199static inline size_t 200gen_perf_query_counter_get_size(const struct gen_perf_query_counter *counter) 201{ 202 switch (counter->data_type) { 203 case GEN_PERF_COUNTER_DATA_TYPE_BOOL32: 204 return sizeof(uint32_t); 205 case GEN_PERF_COUNTER_DATA_TYPE_UINT32: 206 return sizeof(uint32_t); 207 case GEN_PERF_COUNTER_DATA_TYPE_UINT64: 208 return sizeof(uint64_t); 209 case GEN_PERF_COUNTER_DATA_TYPE_FLOAT: 210 return sizeof(float); 211 case GEN_PERF_COUNTER_DATA_TYPE_DOUBLE: 212 return sizeof(double); 213 default: 214 unreachable("invalid counter data type"); 215 } 216} 217 218static inline struct gen_perf_query_info * 219gen_perf_query_append_query_info(struct gen_perf *perf, int max_counters) 220{ 221 struct gen_perf_query_info *query; 222 223 perf->queries = reralloc(perf, perf->queries, 224 struct gen_perf_query_info, 225 ++perf->n_queries); 226 query = &perf->queries[perf->n_queries - 1]; 227 memset(query, 0, sizeof(*query)); 228 229 if (max_counters > 0) { 230 query->max_counters = max_counters; 231 query->counters = 232 rzalloc_array(perf, struct gen_perf_query_counter, max_counters); 233 } 234 235 return query; 236} 237 238static inline void 239gen_perf_query_info_add_stat_reg(struct gen_perf_query_info *query, 240 uint32_t reg, 241 uint32_t numerator, 242 uint32_t denominator, 243 const char *name, 244 const char *description) 245{ 246 struct gen_perf_query_counter *counter; 247 248 assert(query->n_counters < query->max_counters); 249 250 counter = &query->counters[query->n_counters]; 251 counter->name = name; 252 counter->desc = description; 253 counter->type = GEN_PERF_COUNTER_TYPE_RAW; 254 counter->data_type = GEN_PERF_COUNTER_DATA_TYPE_UINT64; 255 counter->offset = sizeof(uint64_t) * query->n_counters; 256 counter->pipeline_stat.reg = reg; 257 counter->pipeline_stat.numerator = numerator; 258 counter->pipeline_stat.denominator = denominator; 259 260 query->n_counters++; 261} 262 263static inline void 264gen_perf_query_info_add_basic_stat_reg(struct gen_perf_query_info *query, 265 uint32_t reg, const char *name) 266{ 267 gen_perf_query_info_add_stat_reg(query, reg, 1, 1, name, name); 268} 269 270static inline struct gen_perf * 271gen_perf_new(void *ctx, int (*ioctl_cb)(int, unsigned long, void *)) 272{ 273 struct gen_perf *perf = rzalloc(ctx, struct gen_perf); 274 275 perf->ioctl = ioctl_cb; 276 277 return perf; 278} 279 280bool gen_perf_load_oa_metrics(struct gen_perf *perf, int fd, 281 const struct gen_device_info *devinfo); 282bool gen_perf_load_metric_id(struct gen_perf *perf, const char *guid, 283 uint64_t *metric_id); 284 285void gen_perf_query_result_read_frequencies(struct gen_perf_query_result *result, 286 const struct gen_device_info *devinfo, 287 const uint32_t *start, 288 const uint32_t *end); 289void gen_perf_query_result_accumulate(struct gen_perf_query_result *result, 290 const struct gen_perf_query_info *query, 291 const uint32_t *start, 292 const uint32_t *end); 293void gen_perf_query_result_clear(struct gen_perf_query_result *result); 294 295 296#endif /* GEN_PERF_H */ 297