anv_perf.c revision 7ec681f3
1/* 2 * Copyright © 2018 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21 * DEALINGS IN THE SOFTWARE. 22 */ 23 24#include <assert.h> 25#include <stdbool.h> 26#include <stdint.h> 27 28#include "anv_private.h" 29#include "vk_util.h" 30 31#include "perf/intel_perf.h" 32#include "perf/intel_perf_mdapi.h" 33 34#include "util/mesa-sha1.h" 35 36void 37anv_physical_device_init_perf(struct anv_physical_device *device, int fd) 38{ 39 const struct intel_device_info *devinfo = &device->info; 40 41 device->perf = NULL; 42 43 /* We need self modifying batches. The i915 parser prevents it on 44 * Gfx7.5 :( maybe one day. 45 */ 46 if (devinfo->ver < 8) 47 return; 48 49 struct intel_perf_config *perf = intel_perf_new(NULL); 50 51 intel_perf_init_metrics(perf, &device->info, fd, 52 false /* pipeline statistics */, 53 true /* register snapshots */); 54 55 if (!perf->n_queries) { 56 if (perf->platform_supported) { 57 static bool warned_once = false; 58 59 if (!warned_once) { 60 mesa_logw("Performance support disabled, " 61 "consider sysctl dev.i915.perf_stream_paranoid=0\n"); 62 warned_once = true; 63 } 64 } 65 goto err; 66 } 67 68 /* We need DRM_I915_PERF_PROP_HOLD_PREEMPTION support, only available in 69 * perf revision 2. 70 */ 71 if (!INTEL_DEBUG(DEBUG_NO_OACONFIG)) { 72 if (!intel_perf_has_hold_preemption(perf)) 73 goto err; 74 } 75 76 device->perf = perf; 77 78 /* Compute the number of commands we need to implement a performance 79 * query. 80 */ 81 const struct intel_perf_query_field_layout *layout = &perf->query_layout; 82 device->n_perf_query_commands = 0; 83 for (uint32_t f = 0; f < layout->n_fields; f++) { 84 struct intel_perf_query_field *field = &layout->fields[f]; 85 86 switch (field->type) { 87 case INTEL_PERF_QUERY_FIELD_TYPE_MI_RPC: 88 device->n_perf_query_commands++; 89 break; 90 case INTEL_PERF_QUERY_FIELD_TYPE_SRM_PERFCNT: 91 case INTEL_PERF_QUERY_FIELD_TYPE_SRM_RPSTAT: 92 case INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_B: 93 case INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_C: 94 device->n_perf_query_commands += field->size / 4; 95 break; 96 } 97 } 98 device->n_perf_query_commands *= 2; /* Begin & End */ 99 device->n_perf_query_commands += 1; /* availability */ 100 101 return; 102 103 err: 104 ralloc_free(perf); 105} 106 107void 108anv_device_perf_init(struct anv_device *device) 109{ 110 device->perf_fd = -1; 111} 112 113static int 114anv_device_perf_open(struct anv_device *device, uint64_t metric_id) 115{ 116 uint64_t properties[DRM_I915_PERF_PROP_MAX * 2]; 117 struct drm_i915_perf_open_param param; 118 int p = 0, stream_fd; 119 120 properties[p++] = DRM_I915_PERF_PROP_SAMPLE_OA; 121 properties[p++] = true; 122 123 properties[p++] = DRM_I915_PERF_PROP_OA_METRICS_SET; 124 properties[p++] = metric_id; 125 126 properties[p++] = DRM_I915_PERF_PROP_OA_FORMAT; 127 properties[p++] = device->info.ver >= 8 ? 128 I915_OA_FORMAT_A32u40_A4u32_B8_C8 : 129 I915_OA_FORMAT_A45_B8_C8; 130 131 properties[p++] = DRM_I915_PERF_PROP_OA_EXPONENT; 132 properties[p++] = 31; /* slowest sampling period */ 133 134 properties[p++] = DRM_I915_PERF_PROP_CTX_HANDLE; 135 properties[p++] = device->context_id; 136 137 properties[p++] = DRM_I915_PERF_PROP_HOLD_PREEMPTION; 138 properties[p++] = true; 139 140 /* If global SSEU is available, pin it to the default. This will ensure on 141 * Gfx11 for instance we use the full EU array. Initially when perf was 142 * enabled we would use only half on Gfx11 because of functional 143 * requirements. 144 */ 145 if (intel_perf_has_global_sseu(device->physical->perf)) { 146 properties[p++] = DRM_I915_PERF_PROP_GLOBAL_SSEU; 147 properties[p++] = (uintptr_t) &device->physical->perf->sseu; 148 } 149 150 memset(¶m, 0, sizeof(param)); 151 param.flags = 0; 152 param.flags |= I915_PERF_FLAG_FD_CLOEXEC | I915_PERF_FLAG_FD_NONBLOCK; 153 param.properties_ptr = (uintptr_t)properties; 154 param.num_properties = p / 2; 155 156 stream_fd = intel_ioctl(device->fd, DRM_IOCTL_I915_PERF_OPEN, ¶m); 157 return stream_fd; 158} 159 160/* VK_INTEL_performance_query */ 161VkResult anv_InitializePerformanceApiINTEL( 162 VkDevice _device, 163 const VkInitializePerformanceApiInfoINTEL* pInitializeInfo) 164{ 165 ANV_FROM_HANDLE(anv_device, device, _device); 166 167 if (!device->physical->perf) 168 return VK_ERROR_EXTENSION_NOT_PRESENT; 169 170 /* Not much to do here */ 171 return VK_SUCCESS; 172} 173 174VkResult anv_GetPerformanceParameterINTEL( 175 VkDevice _device, 176 VkPerformanceParameterTypeINTEL parameter, 177 VkPerformanceValueINTEL* pValue) 178{ 179 ANV_FROM_HANDLE(anv_device, device, _device); 180 181 if (!device->physical->perf) 182 return VK_ERROR_EXTENSION_NOT_PRESENT; 183 184 VkResult result = VK_SUCCESS; 185 switch (parameter) { 186 case VK_PERFORMANCE_PARAMETER_TYPE_HW_COUNTERS_SUPPORTED_INTEL: 187 pValue->type = VK_PERFORMANCE_VALUE_TYPE_BOOL_INTEL; 188 pValue->data.valueBool = VK_TRUE; 189 break; 190 191 case VK_PERFORMANCE_PARAMETER_TYPE_STREAM_MARKER_VALID_BITS_INTEL: 192 pValue->type = VK_PERFORMANCE_VALUE_TYPE_UINT32_INTEL; 193 pValue->data.value32 = 25; 194 break; 195 196 default: 197 result = VK_ERROR_FEATURE_NOT_PRESENT; 198 break; 199 } 200 201 return result; 202} 203 204VkResult anv_CmdSetPerformanceMarkerINTEL( 205 VkCommandBuffer commandBuffer, 206 const VkPerformanceMarkerInfoINTEL* pMarkerInfo) 207{ 208 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); 209 210 cmd_buffer->intel_perf_marker = pMarkerInfo->marker; 211 212 return VK_SUCCESS; 213} 214 215VkResult anv_AcquirePerformanceConfigurationINTEL( 216 VkDevice _device, 217 const VkPerformanceConfigurationAcquireInfoINTEL* pAcquireInfo, 218 VkPerformanceConfigurationINTEL* pConfiguration) 219{ 220 ANV_FROM_HANDLE(anv_device, device, _device); 221 struct anv_performance_configuration_intel *config; 222 223 config = vk_object_alloc(&device->vk, NULL, sizeof(*config), 224 VK_OBJECT_TYPE_PERFORMANCE_CONFIGURATION_INTEL); 225 if (!config) 226 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); 227 228 if (!INTEL_DEBUG(DEBUG_NO_OACONFIG)) { 229 config->register_config = 230 intel_perf_load_configuration(device->physical->perf, device->fd, 231 INTEL_PERF_QUERY_GUID_MDAPI); 232 if (!config->register_config) { 233 vk_object_free(&device->vk, NULL, config); 234 return VK_INCOMPLETE; 235 } 236 237 int ret = 238 intel_perf_store_configuration(device->physical->perf, device->fd, 239 config->register_config, NULL /* guid */); 240 if (ret < 0) { 241 ralloc_free(config->register_config); 242 vk_object_free(&device->vk, NULL, config); 243 return VK_INCOMPLETE; 244 } 245 246 config->config_id = ret; 247 } 248 249 *pConfiguration = anv_performance_configuration_intel_to_handle(config); 250 251 return VK_SUCCESS; 252} 253 254VkResult anv_ReleasePerformanceConfigurationINTEL( 255 VkDevice _device, 256 VkPerformanceConfigurationINTEL _configuration) 257{ 258 ANV_FROM_HANDLE(anv_device, device, _device); 259 ANV_FROM_HANDLE(anv_performance_configuration_intel, config, _configuration); 260 261 if (!INTEL_DEBUG(DEBUG_NO_OACONFIG)) 262 intel_ioctl(device->fd, DRM_IOCTL_I915_PERF_REMOVE_CONFIG, &config->config_id); 263 264 ralloc_free(config->register_config); 265 266 vk_object_free(&device->vk, NULL, config); 267 268 return VK_SUCCESS; 269} 270 271VkResult anv_QueueSetPerformanceConfigurationINTEL( 272 VkQueue _queue, 273 VkPerformanceConfigurationINTEL _configuration) 274{ 275 ANV_FROM_HANDLE(anv_queue, queue, _queue); 276 ANV_FROM_HANDLE(anv_performance_configuration_intel, config, _configuration); 277 struct anv_device *device = queue->device; 278 279 if (!INTEL_DEBUG(DEBUG_NO_OACONFIG)) { 280 if (device->perf_fd < 0) { 281 device->perf_fd = anv_device_perf_open(device, config->config_id); 282 if (device->perf_fd < 0) 283 return VK_ERROR_INITIALIZATION_FAILED; 284 } else { 285 int ret = intel_ioctl(device->perf_fd, I915_PERF_IOCTL_CONFIG, 286 (void *)(uintptr_t) config->config_id); 287 if (ret < 0) 288 return anv_device_set_lost(device, "i915-perf config failed: %m"); 289 } 290 } 291 292 return VK_SUCCESS; 293} 294 295void anv_UninitializePerformanceApiINTEL( 296 VkDevice _device) 297{ 298 ANV_FROM_HANDLE(anv_device, device, _device); 299 300 if (device->perf_fd >= 0) { 301 close(device->perf_fd); 302 device->perf_fd = -1; 303 } 304} 305 306/* VK_KHR_performance_query */ 307static const VkPerformanceCounterUnitKHR 308intel_perf_counter_unit_to_vk_unit[] = { 309 [INTEL_PERF_COUNTER_UNITS_BYTES] = VK_PERFORMANCE_COUNTER_UNIT_BYTES_KHR, 310 [INTEL_PERF_COUNTER_UNITS_HZ] = VK_PERFORMANCE_COUNTER_UNIT_HERTZ_KHR, 311 [INTEL_PERF_COUNTER_UNITS_NS] = VK_PERFORMANCE_COUNTER_UNIT_NANOSECONDS_KHR, 312 [INTEL_PERF_COUNTER_UNITS_US] = VK_PERFORMANCE_COUNTER_UNIT_NANOSECONDS_KHR, /* todo */ 313 [INTEL_PERF_COUNTER_UNITS_PIXELS] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR, 314 [INTEL_PERF_COUNTER_UNITS_TEXELS] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR, 315 [INTEL_PERF_COUNTER_UNITS_THREADS] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR, 316 [INTEL_PERF_COUNTER_UNITS_PERCENT] = VK_PERFORMANCE_COUNTER_UNIT_PERCENTAGE_KHR, 317 [INTEL_PERF_COUNTER_UNITS_MESSAGES] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR, 318 [INTEL_PERF_COUNTER_UNITS_NUMBER] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR, 319 [INTEL_PERF_COUNTER_UNITS_CYCLES] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR, 320 [INTEL_PERF_COUNTER_UNITS_EVENTS] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR, 321 [INTEL_PERF_COUNTER_UNITS_UTILIZATION] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR, 322 [INTEL_PERF_COUNTER_UNITS_EU_SENDS_TO_L3_CACHE_LINES] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR, 323 [INTEL_PERF_COUNTER_UNITS_EU_ATOMIC_REQUESTS_TO_L3_CACHE_LINES] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR, 324 [INTEL_PERF_COUNTER_UNITS_EU_REQUESTS_TO_L3_CACHE_LINES] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR, 325 [INTEL_PERF_COUNTER_UNITS_EU_BYTES_PER_L3_CACHE_LINE] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR, 326}; 327 328static const VkPerformanceCounterStorageKHR 329intel_perf_counter_data_type_to_vk_storage[] = { 330 [INTEL_PERF_COUNTER_DATA_TYPE_BOOL32] = VK_PERFORMANCE_COUNTER_STORAGE_UINT32_KHR, 331 [INTEL_PERF_COUNTER_DATA_TYPE_UINT32] = VK_PERFORMANCE_COUNTER_STORAGE_UINT32_KHR, 332 [INTEL_PERF_COUNTER_DATA_TYPE_UINT64] = VK_PERFORMANCE_COUNTER_STORAGE_UINT64_KHR, 333 [INTEL_PERF_COUNTER_DATA_TYPE_FLOAT] = VK_PERFORMANCE_COUNTER_STORAGE_FLOAT32_KHR, 334 [INTEL_PERF_COUNTER_DATA_TYPE_DOUBLE] = VK_PERFORMANCE_COUNTER_STORAGE_FLOAT64_KHR, 335}; 336 337VkResult anv_EnumeratePhysicalDeviceQueueFamilyPerformanceQueryCountersKHR( 338 VkPhysicalDevice physicalDevice, 339 uint32_t queueFamilyIndex, 340 uint32_t* pCounterCount, 341 VkPerformanceCounterKHR* pCounters, 342 VkPerformanceCounterDescriptionKHR* pCounterDescriptions) 343{ 344 ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice); 345 struct intel_perf_config *perf = pdevice->perf; 346 347 uint32_t desc_count = *pCounterCount; 348 349 VK_OUTARRAY_MAKE(out, pCounters, pCounterCount); 350 VK_OUTARRAY_MAKE(out_desc, pCounterDescriptions, &desc_count); 351 352 for (int c = 0; c < (perf ? perf->n_counters : 0); c++) { 353 const struct intel_perf_query_counter *intel_counter = perf->counter_infos[c].counter; 354 355 vk_outarray_append(&out, counter) { 356 counter->unit = intel_perf_counter_unit_to_vk_unit[intel_counter->units]; 357 counter->scope = VK_QUERY_SCOPE_COMMAND_KHR; 358 counter->storage = intel_perf_counter_data_type_to_vk_storage[intel_counter->data_type]; 359 360 unsigned char sha1_result[20]; 361 _mesa_sha1_compute(intel_counter->symbol_name, 362 strlen(intel_counter->symbol_name), 363 sha1_result); 364 memcpy(counter->uuid, sha1_result, sizeof(counter->uuid)); 365 } 366 367 vk_outarray_append(&out_desc, desc) { 368 desc->flags = 0; /* None so far. */ 369 snprintf(desc->name, sizeof(desc->name), "%s", intel_counter->name); 370 snprintf(desc->category, sizeof(desc->category), "%s", intel_counter->category); 371 snprintf(desc->description, sizeof(desc->description), "%s", intel_counter->desc); 372 } 373 } 374 375 return vk_outarray_status(&out); 376} 377 378void anv_GetPhysicalDeviceQueueFamilyPerformanceQueryPassesKHR( 379 VkPhysicalDevice physicalDevice, 380 const VkQueryPoolPerformanceCreateInfoKHR* pPerformanceQueryCreateInfo, 381 uint32_t* pNumPasses) 382{ 383 ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice); 384 struct intel_perf_config *perf = pdevice->perf; 385 386 if (!perf) { 387 *pNumPasses = 0; 388 return; 389 } 390 391 *pNumPasses = intel_perf_get_n_passes(perf, 392 pPerformanceQueryCreateInfo->pCounterIndices, 393 pPerformanceQueryCreateInfo->counterIndexCount, 394 NULL); 395} 396 397VkResult anv_AcquireProfilingLockKHR( 398 VkDevice _device, 399 const VkAcquireProfilingLockInfoKHR* pInfo) 400{ 401 ANV_FROM_HANDLE(anv_device, device, _device); 402 struct intel_perf_config *perf = device->physical->perf; 403 struct intel_perf_query_info *first_metric_set = &perf->queries[0]; 404 int fd = -1; 405 406 assert(device->perf_fd == -1); 407 408 if (!INTEL_DEBUG(DEBUG_NO_OACONFIG)) { 409 fd = anv_device_perf_open(device, first_metric_set->oa_metrics_set_id); 410 if (fd < 0) 411 return VK_TIMEOUT; 412 } 413 414 device->perf_fd = fd; 415 return VK_SUCCESS; 416} 417 418void anv_ReleaseProfilingLockKHR( 419 VkDevice _device) 420{ 421 ANV_FROM_HANDLE(anv_device, device, _device); 422 423 if (!INTEL_DEBUG(DEBUG_NO_OACONFIG)) { 424 assert(device->perf_fd >= 0); 425 close(device->perf_fd); 426 } 427 device->perf_fd = -1; 428} 429 430void 431anv_perf_write_pass_results(struct intel_perf_config *perf, 432 struct anv_query_pool *pool, uint32_t pass, 433 const struct intel_perf_query_result *accumulated_results, 434 union VkPerformanceCounterResultKHR *results) 435{ 436 for (uint32_t c = 0; c < pool->n_counters; c++) { 437 const struct intel_perf_counter_pass *counter_pass = &pool->counter_pass[c]; 438 439 if (counter_pass->pass != pass) 440 continue; 441 442 switch (pool->pass_query[pass]->kind) { 443 case INTEL_PERF_QUERY_TYPE_PIPELINE: { 444 assert(counter_pass->counter->data_type == INTEL_PERF_COUNTER_DATA_TYPE_UINT64); 445 uint32_t accu_offset = counter_pass->counter->offset / sizeof(uint64_t); 446 results[c].uint64 = accumulated_results->accumulator[accu_offset]; 447 break; 448 } 449 450 case INTEL_PERF_QUERY_TYPE_OA: 451 case INTEL_PERF_QUERY_TYPE_RAW: 452 switch (counter_pass->counter->data_type) { 453 case INTEL_PERF_COUNTER_DATA_TYPE_UINT64: 454 results[c].uint64 = 455 counter_pass->counter->oa_counter_read_uint64(perf, 456 counter_pass->query, 457 accumulated_results); 458 break; 459 case INTEL_PERF_COUNTER_DATA_TYPE_FLOAT: 460 results[c].float32 = 461 counter_pass->counter->oa_counter_read_float(perf, 462 counter_pass->query, 463 accumulated_results); 464 break; 465 default: 466 /* So far we aren't using uint32, double or bool32... */ 467 unreachable("unexpected counter data type"); 468 } 469 break; 470 471 default: 472 unreachable("invalid query type"); 473 } 474 475 /* The Vulkan extension only has nanoseconds as a unit */ 476 if (counter_pass->counter->units == INTEL_PERF_COUNTER_UNITS_US) { 477 assert(counter_pass->counter->data_type == INTEL_PERF_COUNTER_DATA_TYPE_UINT64); 478 results[c].uint64 *= 1000; 479 } 480 } 481} 482