1/* 2 * Copyright © 2019 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included 12 * in all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 20 * DEALINGS IN THE SOFTWARE. 21 */ 22 23#include "crocus_monitor.h" 24 25#include <xf86drm.h> 26 27#include "crocus_screen.h" 28#include "crocus_context.h" 29 30#include "perf/intel_perf.h" 31#include "perf/intel_perf_query.h" 32#include "perf/intel_perf_regs.h" 33 34struct crocus_monitor_object { 35 int num_active_counters; 36 int *active_counters; 37 38 size_t result_size; 39 unsigned char *result_buffer; 40 41 struct intel_perf_query_object *query; 42}; 43 44int 45crocus_get_monitor_info(struct pipe_screen *pscreen, unsigned index, 46 struct pipe_driver_query_info *info) 47{ 48 const struct crocus_screen *screen = (struct crocus_screen *)pscreen; 49 assert(screen->monitor_cfg); 50 if (!screen->monitor_cfg) 51 return 0; 52 53 const struct crocus_monitor_config *monitor_cfg = screen->monitor_cfg; 54 55 if (!info) { 56 /* return the number of metrics */ 57 return monitor_cfg->num_counters; 58 } 59 60 const struct intel_perf_config *perf_cfg = monitor_cfg->perf_cfg; 61 const int group = monitor_cfg->counters[index].group; 62 const int counter_index = monitor_cfg->counters[index].counter; 63 struct intel_perf_query_counter *counter = 64 &perf_cfg->queries[group].counters[counter_index]; 65 66 info->group_id = group; 67 info->name = counter->name; 68 info->query_type = PIPE_QUERY_DRIVER_SPECIFIC + index; 69 70 if (counter->type == INTEL_PERF_COUNTER_TYPE_THROUGHPUT) 71 info->result_type = PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE; 72 else 73 info->result_type = PIPE_DRIVER_QUERY_RESULT_TYPE_CUMULATIVE; 74 switch (counter->data_type) { 75 case INTEL_PERF_COUNTER_DATA_TYPE_BOOL32: 76 case INTEL_PERF_COUNTER_DATA_TYPE_UINT32: 77 info->type = PIPE_DRIVER_QUERY_TYPE_UINT; 78 info->max_value.u32 = 0; 79 break; 80 case INTEL_PERF_COUNTER_DATA_TYPE_UINT64: 81 info->type = PIPE_DRIVER_QUERY_TYPE_UINT64; 82 info->max_value.u64 = 0; 83 break; 84 case INTEL_PERF_COUNTER_DATA_TYPE_FLOAT: 85 case INTEL_PERF_COUNTER_DATA_TYPE_DOUBLE: 86 info->type = PIPE_DRIVER_QUERY_TYPE_FLOAT; 87 info->max_value.u64 = -1; 88 break; 89 default: 90 assert(false); 91 break; 92 } 93 94 /* indicates that this is an OA query, not a pipeline statistics query */ 95 info->flags = PIPE_DRIVER_QUERY_FLAG_BATCH; 96 return 1; 97} 98 99typedef void (*bo_unreference_t)(void *); 100typedef void *(*bo_map_t)(void *, void *, unsigned flags); 101typedef void (*bo_unmap_t)(void *); 102typedef void (*emit_mi_report_t)(void *, void *, uint32_t, uint32_t); 103typedef void (*emit_mi_flush_t)(void *); 104typedef void (*capture_frequency_stat_register_t)(void *, void *, 105 uint32_t ); 106typedef void (*store_register_mem64_t)(void *ctx, void *bo, 107 uint32_t reg, uint32_t offset); 108typedef bool (*batch_references_t)(void *batch, void *bo); 109typedef void (*bo_wait_rendering_t)(void *bo); 110typedef int (*bo_busy_t)(void *bo); 111 112static void * 113crocus_oa_bo_alloc(void *bufmgr, const char *name, uint64_t size) 114{ 115 return crocus_bo_alloc(bufmgr, name, size); 116} 117 118#if 0 119static void 120crocus_monitor_emit_mi_flush(struct crocus_context *ice) 121{ 122 const int flags = PIPE_CONTROL_RENDER_TARGET_FLUSH | 123 PIPE_CONTROL_INSTRUCTION_INVALIDATE | 124 PIPE_CONTROL_CONST_CACHE_INVALIDATE | 125 PIPE_CONTROL_DATA_CACHE_FLUSH | 126 PIPE_CONTROL_DEPTH_CACHE_FLUSH | 127 PIPE_CONTROL_VF_CACHE_INVALIDATE | 128 PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE | 129 PIPE_CONTROL_CS_STALL; 130 crocus_emit_pipe_control_flush(&ice->batches[CROCUS_BATCH_RENDER], 131 "OA metrics", flags); 132} 133#endif 134 135static void 136crocus_monitor_emit_mi_report_perf_count(void *c, 137 void *bo, 138 uint32_t offset_in_bytes, 139 uint32_t report_id) 140{ 141 struct crocus_context *ice = c; 142 struct crocus_batch *batch = &ice->batches[CROCUS_BATCH_RENDER]; 143 struct crocus_screen *screen = batch->screen; 144 screen->vtbl.emit_mi_report_perf_count(batch, bo, offset_in_bytes, report_id); 145} 146 147static void 148crocus_monitor_batchbuffer_flush(void *c, const char *file, int line) 149{ 150 struct crocus_context *ice = c; 151 _crocus_batch_flush(&ice->batches[CROCUS_BATCH_RENDER], __FILE__, __LINE__); 152} 153 154#if 0 155static void 156crocus_monitor_capture_frequency_stat_register(void *ctx, 157 void *bo, 158 uint32_t bo_offset) 159{ 160 struct crocus_context *ice = ctx; 161 struct crocus_batch *batch = &ice->batches[CROCUS_BATCH_RENDER]; 162 ice->vtbl.store_register_mem32(batch, GEN9_RPSTAT0, bo, bo_offset, false); 163} 164 165static void 166crocus_monitor_store_register_mem64(void *ctx, void *bo, 167 uint32_t reg, uint32_t offset) 168{ 169 struct crocus_context *ice = ctx; 170 struct crocus_batch *batch = &ice->batches[CROCUS_BATCH_RENDER]; 171 ice->vtbl.store_register_mem64(batch, reg, bo, offset, false); 172} 173#endif 174 175static bool 176crocus_monitor_init_metrics(struct crocus_screen *screen) 177{ 178 struct crocus_monitor_config *monitor_cfg = 179 rzalloc(screen, struct crocus_monitor_config); 180 struct intel_perf_config *perf_cfg = NULL; 181 if (unlikely(!monitor_cfg)) 182 goto allocation_error; 183 perf_cfg = intel_perf_new(monitor_cfg); 184 if (unlikely(!perf_cfg)) 185 goto allocation_error; 186 187 monitor_cfg->perf_cfg = perf_cfg; 188 189 perf_cfg->vtbl.bo_alloc = crocus_oa_bo_alloc; 190 perf_cfg->vtbl.bo_unreference = (bo_unreference_t)crocus_bo_unreference; 191 perf_cfg->vtbl.bo_map = (bo_map_t)crocus_bo_map; 192 perf_cfg->vtbl.bo_unmap = (bo_unmap_t)crocus_bo_unmap; 193 194 perf_cfg->vtbl.emit_mi_report_perf_count = 195 (emit_mi_report_t)crocus_monitor_emit_mi_report_perf_count; 196 perf_cfg->vtbl.batchbuffer_flush = crocus_monitor_batchbuffer_flush; 197 perf_cfg->vtbl.batch_references = (batch_references_t)crocus_batch_references; 198 perf_cfg->vtbl.bo_wait_rendering = 199 (bo_wait_rendering_t)crocus_bo_wait_rendering; 200 perf_cfg->vtbl.bo_busy = (bo_busy_t)crocus_bo_busy; 201 202 intel_perf_init_metrics(perf_cfg, &screen->devinfo, screen->fd, false, false); 203 screen->monitor_cfg = monitor_cfg; 204 205 /* a gallium "group" is equivalent to a gen "query" 206 * a gallium "query" is equivalent to a gen "query_counter" 207 * 208 * Each gen_query supports a specific number of query_counters. To 209 * allocate the array of crocus_monitor_counter, we need an upper bound 210 * (ignoring duplicate query_counters). 211 */ 212 int gen_query_counters_count = 0; 213 for (int gen_query_id = 0; 214 gen_query_id < perf_cfg->n_queries; 215 ++gen_query_id) { 216 gen_query_counters_count += perf_cfg->queries[gen_query_id].n_counters; 217 } 218 219 monitor_cfg->counters = rzalloc_size(monitor_cfg, 220 sizeof(struct crocus_monitor_counter) * 221 gen_query_counters_count); 222 if (unlikely(!monitor_cfg->counters)) 223 goto allocation_error; 224 225 int crocus_monitor_id = 0; 226 for (int group = 0; group < perf_cfg->n_queries; ++group) { 227 for (int counter = 0; 228 counter < perf_cfg->queries[group].n_counters; 229 ++counter) { 230 /* Check previously identified metrics to filter out duplicates. The 231 * user is not helped by having the same metric available in several 232 * groups. (n^2 algorithm). 233 */ 234 bool duplicate = false; 235 for (int existing_group = 0; 236 existing_group < group && !duplicate; 237 ++existing_group) { 238 for (int existing_counter = 0; 239 existing_counter < perf_cfg->queries[existing_group].n_counters && !duplicate; 240 ++existing_counter) { 241 const char *current_name = 242 perf_cfg->queries[group].counters[counter].name; 243 const char *existing_name = 244 perf_cfg->queries[existing_group].counters[existing_counter].name; 245 if (strcmp(current_name, existing_name) == 0) { 246 duplicate = true; 247 } 248 } 249 } 250 if (duplicate) 251 continue; 252 monitor_cfg->counters[crocus_monitor_id].group = group; 253 monitor_cfg->counters[crocus_monitor_id].counter = counter; 254 ++crocus_monitor_id; 255 } 256 } 257 monitor_cfg->num_counters = crocus_monitor_id; 258 return monitor_cfg->num_counters; 259 260allocation_error: 261 if (monitor_cfg) 262 free(monitor_cfg->counters); 263 free(perf_cfg); 264 free(monitor_cfg); 265 return false; 266} 267 268int 269crocus_get_monitor_group_info(struct pipe_screen *pscreen, 270 unsigned group_index, 271 struct pipe_driver_query_group_info *info) 272{ 273 struct crocus_screen *screen = (struct crocus_screen *)pscreen; 274 if (!screen->monitor_cfg) { 275 if (!crocus_monitor_init_metrics(screen)) 276 return 0; 277 } 278 279 const struct crocus_monitor_config *monitor_cfg = screen->monitor_cfg; 280 const struct intel_perf_config *perf_cfg = monitor_cfg->perf_cfg; 281 282 if (!info) { 283 /* return the count that can be queried */ 284 return perf_cfg->n_queries; 285 } 286 287 if (group_index >= perf_cfg->n_queries) { 288 /* out of range */ 289 return 0; 290 } 291 292 struct intel_perf_query_info *query = &perf_cfg->queries[group_index]; 293 294 info->name = query->name; 295 info->max_active_queries = query->n_counters; 296 info->num_queries = query->n_counters; 297 298 return 1; 299} 300 301static void 302crocus_init_monitor_ctx(struct crocus_context *ice) 303{ 304 struct crocus_screen *screen = (struct crocus_screen *) ice->ctx.screen; 305 struct crocus_monitor_config *monitor_cfg = screen->monitor_cfg; 306 307 ice->perf_ctx = intel_perf_new_context(ice); 308 if (unlikely(!ice->perf_ctx)) 309 return; 310 311 struct intel_perf_context *perf_ctx = ice->perf_ctx; 312 struct intel_perf_config *perf_cfg = monitor_cfg->perf_cfg; 313 intel_perf_init_context(perf_ctx, 314 perf_cfg, 315 ice, 316 ice, 317 screen->bufmgr, 318 &screen->devinfo, 319 ice->batches[CROCUS_BATCH_RENDER].hw_ctx_id, 320 screen->fd); 321} 322 323/* entry point for GenPerfMonitorsAMD */ 324struct crocus_monitor_object * 325crocus_create_monitor_object(struct crocus_context *ice, 326 unsigned num_queries, 327 unsigned *query_types) 328{ 329 struct crocus_screen *screen = (struct crocus_screen *) ice->ctx.screen; 330 struct crocus_monitor_config *monitor_cfg = screen->monitor_cfg; 331 struct intel_perf_config *perf_cfg = monitor_cfg->perf_cfg; 332 struct intel_perf_query_object *query_obj = NULL; 333 334 /* initialize perf context if this has not already been done. This 335 * function is the first entry point that carries the gl context. 336 */ 337 if (ice->perf_ctx == NULL) { 338 crocus_init_monitor_ctx(ice); 339 } 340 struct intel_perf_context *perf_ctx = ice->perf_ctx; 341 342 assert(num_queries > 0); 343 int query_index = query_types[0] - PIPE_QUERY_DRIVER_SPECIFIC; 344 assert(query_index <= monitor_cfg->num_counters); 345 const int group = monitor_cfg->counters[query_index].group; 346 347 struct crocus_monitor_object *monitor = 348 calloc(1, sizeof(struct crocus_monitor_object)); 349 if (unlikely(!monitor)) 350 goto allocation_failure; 351 352 monitor->num_active_counters = num_queries; 353 monitor->active_counters = calloc(num_queries, sizeof(int)); 354 if (unlikely(!monitor->active_counters)) 355 goto allocation_failure; 356 357 for (int i = 0; i < num_queries; ++i) { 358 unsigned current_query = query_types[i]; 359 unsigned current_query_index = current_query - PIPE_QUERY_DRIVER_SPECIFIC; 360 361 /* all queries must be in the same group */ 362 assert(current_query_index <= monitor_cfg->num_counters); 363 assert(monitor_cfg->counters[current_query_index].group == group); 364 monitor->active_counters[i] = 365 monitor_cfg->counters[current_query_index].counter; 366 } 367 368 /* create the intel_perf_query */ 369 query_obj = intel_perf_new_query(perf_ctx, group); 370 if (unlikely(!query_obj)) 371 goto allocation_failure; 372 373 monitor->query = query_obj; 374 monitor->result_size = perf_cfg->queries[group].data_size; 375 monitor->result_buffer = calloc(1, monitor->result_size); 376 if (unlikely(!monitor->result_buffer)) 377 goto allocation_failure; 378 379 return monitor; 380 381allocation_failure: 382 if (monitor) { 383 free(monitor->active_counters); 384 free(monitor->result_buffer); 385 } 386 free(query_obj); 387 free(monitor); 388 return NULL; 389} 390 391void 392crocus_destroy_monitor_object(struct pipe_context *ctx, 393 struct crocus_monitor_object *monitor) 394{ 395 struct crocus_context *ice = (struct crocus_context *)ctx; 396 397 intel_perf_delete_query(ice->perf_ctx, monitor->query); 398 free(monitor->result_buffer); 399 monitor->result_buffer = NULL; 400 free(monitor->active_counters); 401 monitor->active_counters = NULL; 402 free(monitor); 403} 404 405bool 406crocus_begin_monitor(struct pipe_context *ctx, 407 struct crocus_monitor_object *monitor) 408{ 409 struct crocus_context *ice = (void *) ctx; 410 struct intel_perf_context *perf_ctx = ice->perf_ctx; 411 412 return intel_perf_begin_query(perf_ctx, monitor->query); 413} 414 415bool 416crocus_end_monitor(struct pipe_context *ctx, 417 struct crocus_monitor_object *monitor) 418{ 419 struct crocus_context *ice = (void *) ctx; 420 struct intel_perf_context *perf_ctx = ice->perf_ctx; 421 422 intel_perf_end_query(perf_ctx, monitor->query); 423 return true; 424} 425 426bool 427crocus_get_monitor_result(struct pipe_context *ctx, 428 struct crocus_monitor_object *monitor, 429 bool wait, 430 union pipe_numeric_type_union *result) 431{ 432 struct crocus_context *ice = (void *) ctx; 433 struct intel_perf_context *perf_ctx = ice->perf_ctx; 434 struct crocus_batch *batch = &ice->batches[CROCUS_BATCH_RENDER]; 435 436 bool monitor_ready = 437 intel_perf_is_query_ready(perf_ctx, monitor->query, batch); 438 439 if (!monitor_ready) { 440 if (!wait) 441 return false; 442 intel_perf_wait_query(perf_ctx, monitor->query, batch); 443 } 444 445 assert(intel_perf_is_query_ready(perf_ctx, monitor->query, batch)); 446 447 unsigned bytes_written; 448 intel_perf_get_query_data(perf_ctx, monitor->query, batch, 449 monitor->result_size, 450 (unsigned*) monitor->result_buffer, 451 &bytes_written); 452 if (bytes_written != monitor->result_size) 453 return false; 454 455 /* copy metrics into the batch result */ 456 for (int i = 0; i < monitor->num_active_counters; ++i) { 457 int current_counter = monitor->active_counters[i]; 458 const struct intel_perf_query_info *info = 459 intel_perf_query_info(monitor->query); 460 const struct intel_perf_query_counter *counter = 461 &info->counters[current_counter]; 462 assert(intel_perf_query_counter_get_size(counter)); 463 switch (counter->data_type) { 464 case INTEL_PERF_COUNTER_DATA_TYPE_UINT64: 465 result[i].u64 = *(uint64_t*)(monitor->result_buffer + counter->offset); 466 break; 467 case INTEL_PERF_COUNTER_DATA_TYPE_FLOAT: 468 result[i].f = *(float*)(monitor->result_buffer + counter->offset); 469 break; 470 case INTEL_PERF_COUNTER_DATA_TYPE_UINT32: 471 case INTEL_PERF_COUNTER_DATA_TYPE_BOOL32: 472 result[i].u64 = *(uint32_t*)(monitor->result_buffer + counter->offset); 473 break; 474 case INTEL_PERF_COUNTER_DATA_TYPE_DOUBLE: { 475 double v = *(double*)(monitor->result_buffer + counter->offset); 476 result[i].f = v; 477 break; 478 } 479 default: 480 unreachable("unexpected counter data type"); 481 } 482 } 483 return true; 484} 485