1/* 2 * Copyright © 2018 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#include <dirent.h> 25 26#include <sys/types.h> 27#include <sys/stat.h> 28#include <fcntl.h> 29#include <unistd.h> 30#include <errno.h> 31 32#include <drm-uapi/i915_drm.h> 33 34#include "gen_perf.h" 35#include "perf/gen_perf_metrics.h" 36 37#include "dev/gen_debug.h" 38#include "dev/gen_device_info.h" 39#include "util/bitscan.h" 40 41#define FILE_DEBUG_FLAG DEBUG_PERFMON 42 43static bool 44get_sysfs_dev_dir(struct gen_perf *perf, int fd) 45{ 46 struct stat sb; 47 int min, maj; 48 DIR *drmdir; 49 struct dirent *drm_entry; 50 int len; 51 52 perf->sysfs_dev_dir[0] = '\0'; 53 54 if (fstat(fd, &sb)) { 55 DBG("Failed to stat DRM fd\n"); 56 return false; 57 } 58 59 maj = major(sb.st_rdev); 60 min = minor(sb.st_rdev); 61 62 if (!S_ISCHR(sb.st_mode)) { 63 DBG("DRM fd is not a character device as expected\n"); 64 return false; 65 } 66 67 len = snprintf(perf->sysfs_dev_dir, 68 sizeof(perf->sysfs_dev_dir), 69 "/sys/dev/char/%d:%d/device/drm", maj, min); 70 if (len < 0 || len >= sizeof(perf->sysfs_dev_dir)) { 71 DBG("Failed to concatenate sysfs path to drm device\n"); 72 return false; 73 } 74 75 drmdir = opendir(perf->sysfs_dev_dir); 76 if (!drmdir) { 77 DBG("Failed to open %s: %m\n", perf->sysfs_dev_dir); 78 return false; 79 } 80 81 while ((drm_entry = readdir(drmdir))) { 82 if ((drm_entry->d_type == DT_DIR || 83 drm_entry->d_type == DT_LNK) && 84 strncmp(drm_entry->d_name, "card", 4) == 0) 85 { 86 len = snprintf(perf->sysfs_dev_dir, 87 sizeof(perf->sysfs_dev_dir), 88 "/sys/dev/char/%d:%d/device/drm/%s", 89 maj, min, drm_entry->d_name); 90 closedir(drmdir); 91 if (len < 0 || len >= sizeof(perf->sysfs_dev_dir)) 92 return false; 93 else 94 return true; 95 } 96 } 97 98 closedir(drmdir); 99 100 DBG("Failed to find cardX directory under /sys/dev/char/%d:%d/device/drm\n", 101 maj, min); 102 103 return false; 104} 105 106static bool 107read_file_uint64(const char *file, uint64_t *val) 108{ 109 char buf[32]; 110 int fd, n; 111 112 fd = open(file, 0); 113 if (fd < 0) 114 return false; 115 while ((n = read(fd, buf, sizeof (buf) - 1)) < 0 && 116 errno == EINTR); 117 close(fd); 118 if (n < 0) 119 return false; 120 121 buf[n] = '\0'; 122 *val = strtoull(buf, NULL, 0); 123 124 return true; 125} 126 127static bool 128read_sysfs_drm_device_file_uint64(struct gen_perf *perf, 129 const char *file, 130 uint64_t *value) 131{ 132 char buf[512]; 133 int len; 134 135 len = snprintf(buf, sizeof(buf), "%s/%s", perf->sysfs_dev_dir, file); 136 if (len < 0 || len >= sizeof(buf)) { 137 DBG("Failed to concatenate sys filename to read u64 from\n"); 138 return false; 139 } 140 141 return read_file_uint64(buf, value); 142} 143 144static void 145register_oa_config(struct gen_perf *perf, 146 const struct gen_perf_query_info *query, 147 uint64_t config_id) 148{ 149 struct gen_perf_query_info *registred_query = 150 gen_perf_query_append_query_info(perf, 0); 151 152 *registred_query = *query; 153 registred_query->oa_metrics_set_id = config_id; 154 DBG("metric set registred: id = %" PRIu64", guid = %s\n", 155 registred_query->oa_metrics_set_id, query->guid); 156} 157 158static void 159enumerate_sysfs_metrics(struct gen_perf *perf) 160{ 161 DIR *metricsdir = NULL; 162 struct dirent *metric_entry; 163 char buf[256]; 164 int len; 165 166 len = snprintf(buf, sizeof(buf), "%s/metrics", perf->sysfs_dev_dir); 167 if (len < 0 || len >= sizeof(buf)) { 168 DBG("Failed to concatenate path to sysfs metrics/ directory\n"); 169 return; 170 } 171 172 metricsdir = opendir(buf); 173 if (!metricsdir) { 174 DBG("Failed to open %s: %m\n", buf); 175 return; 176 } 177 178 while ((metric_entry = readdir(metricsdir))) { 179 struct hash_entry *entry; 180 181 if ((metric_entry->d_type != DT_DIR && 182 metric_entry->d_type != DT_LNK) || 183 metric_entry->d_name[0] == '.') 184 continue; 185 186 DBG("metric set: %s\n", metric_entry->d_name); 187 entry = _mesa_hash_table_search(perf->oa_metrics_table, 188 metric_entry->d_name); 189 if (entry) { 190 uint64_t id; 191 192 len = snprintf(buf, sizeof(buf), "%s/metrics/%s/id", 193 perf->sysfs_dev_dir, metric_entry->d_name); 194 if (len < 0 || len >= sizeof(buf)) { 195 DBG("Failed to concatenate path to sysfs metric id file\n"); 196 continue; 197 } 198 199 if (!read_file_uint64(buf, &id)) { 200 DBG("Failed to read metric set id from %s: %m", buf); 201 continue; 202 } 203 204 register_oa_config(perf, (const struct gen_perf_query_info *)entry->data, id); 205 } else 206 DBG("metric set not known by mesa (skipping)\n"); 207 } 208 209 closedir(metricsdir); 210} 211 212static bool 213kernel_has_dynamic_config_support(struct gen_perf *perf, int fd) 214{ 215 uint64_t invalid_config_id = UINT64_MAX; 216 217 return perf->ioctl(fd, DRM_IOCTL_I915_PERF_REMOVE_CONFIG, 218 &invalid_config_id) < 0 && errno == ENOENT; 219} 220 221bool 222gen_perf_load_metric_id(struct gen_perf *perf, const char *guid, 223 uint64_t *metric_id) 224{ 225 char config_path[280]; 226 227 snprintf(config_path, sizeof(config_path), "%s/metrics/%s/id", 228 perf->sysfs_dev_dir, guid); 229 230 /* Don't recreate already loaded configs. */ 231 return read_file_uint64(config_path, metric_id); 232} 233 234static void 235init_oa_configs(struct gen_perf *perf, int fd) 236{ 237 hash_table_foreach(perf->oa_metrics_table, entry) { 238 const struct gen_perf_query_info *query = entry->data; 239 struct drm_i915_perf_oa_config config; 240 uint64_t config_id; 241 int ret; 242 243 if (gen_perf_load_metric_id(perf, query->guid, &config_id)) { 244 DBG("metric set: %s (already loaded)\n", query->guid); 245 register_oa_config(perf, query, config_id); 246 continue; 247 } 248 249 memset(&config, 0, sizeof(config)); 250 251 memcpy(config.uuid, query->guid, sizeof(config.uuid)); 252 253 config.n_mux_regs = query->n_mux_regs; 254 config.mux_regs_ptr = (uintptr_t) query->mux_regs; 255 256 config.n_boolean_regs = query->n_b_counter_regs; 257 config.boolean_regs_ptr = (uintptr_t) query->b_counter_regs; 258 259 config.n_flex_regs = query->n_flex_regs; 260 config.flex_regs_ptr = (uintptr_t) query->flex_regs; 261 262 ret = perf->ioctl(fd, DRM_IOCTL_I915_PERF_ADD_CONFIG, &config); 263 if (ret < 0) { 264 DBG("Failed to load \"%s\" (%s) metrics set in kernel: %s\n", 265 query->name, query->guid, strerror(errno)); 266 continue; 267 } 268 269 register_oa_config(perf, query, ret); 270 DBG("metric set: %s (added)\n", query->guid); 271 } 272} 273 274static void 275compute_topology_builtins(struct gen_perf *perf, 276 const struct gen_device_info *devinfo) 277{ 278 perf->sys_vars.slice_mask = devinfo->slice_masks; 279 perf->sys_vars.n_eu_slices = devinfo->num_slices; 280 281 for (int i = 0; i < sizeof(devinfo->subslice_masks[i]); i++) { 282 perf->sys_vars.n_eu_sub_slices += 283 __builtin_popcount(devinfo->subslice_masks[i]); 284 } 285 286 for (int i = 0; i < sizeof(devinfo->eu_masks); i++) 287 perf->sys_vars.n_eus += __builtin_popcount(devinfo->eu_masks[i]); 288 289 perf->sys_vars.eu_threads_count = devinfo->num_thread_per_eu; 290 291 /* The subslice mask builtin contains bits for all slices. Prior to Gen11 292 * it had groups of 3bits for each slice, on Gen11 it's 8bits for each 293 * slice. 294 * 295 * Ideally equations would be updated to have a slice/subslice query 296 * function/operator. 297 */ 298 perf->sys_vars.subslice_mask = 0; 299 300 int bits_per_subslice = devinfo->gen == 11 ? 8 : 3; 301 302 for (int s = 0; s < util_last_bit(devinfo->slice_masks); s++) { 303 for (int ss = 0; ss < (devinfo->subslice_slice_stride * 8); ss++) { 304 if (gen_device_info_subslice_available(devinfo, s, ss)) 305 perf->sys_vars.subslice_mask |= 1ULL << (s * bits_per_subslice + ss); 306 } 307 } 308} 309 310static bool 311init_oa_sys_vars(struct gen_perf *perf, const struct gen_device_info *devinfo) 312{ 313 uint64_t min_freq_mhz = 0, max_freq_mhz = 0; 314 315 if (!read_sysfs_drm_device_file_uint64(perf, "gt_min_freq_mhz", &min_freq_mhz)) 316 return false; 317 318 if (!read_sysfs_drm_device_file_uint64(perf, "gt_max_freq_mhz", &max_freq_mhz)) 319 return false; 320 321 memset(&perf->sys_vars, 0, sizeof(perf->sys_vars)); 322 perf->sys_vars.gt_min_freq = min_freq_mhz * 1000000; 323 perf->sys_vars.gt_max_freq = max_freq_mhz * 1000000; 324 perf->sys_vars.timestamp_frequency = devinfo->timestamp_frequency; 325 perf->sys_vars.revision = devinfo->revision; 326 compute_topology_builtins(perf, devinfo); 327 328 return true; 329} 330 331typedef void (*perf_register_oa_queries_t)(struct gen_perf *); 332 333static perf_register_oa_queries_t 334get_register_queries_function(const struct gen_device_info *devinfo) 335{ 336 if (devinfo->is_haswell) 337 return gen_oa_register_queries_hsw; 338 if (devinfo->is_cherryview) 339 return gen_oa_register_queries_chv; 340 if (devinfo->is_broadwell) 341 return gen_oa_register_queries_bdw; 342 if (devinfo->is_broxton) 343 return gen_oa_register_queries_bxt; 344 if (devinfo->is_skylake) { 345 if (devinfo->gt == 2) 346 return gen_oa_register_queries_sklgt2; 347 if (devinfo->gt == 3) 348 return gen_oa_register_queries_sklgt3; 349 if (devinfo->gt == 4) 350 return gen_oa_register_queries_sklgt4; 351 } 352 if (devinfo->is_kabylake) { 353 if (devinfo->gt == 2) 354 return gen_oa_register_queries_kblgt2; 355 if (devinfo->gt == 3) 356 return gen_oa_register_queries_kblgt3; 357 } 358 if (devinfo->is_geminilake) 359 return gen_oa_register_queries_glk; 360 if (devinfo->is_coffeelake) { 361 if (devinfo->gt == 2) 362 return gen_oa_register_queries_cflgt2; 363 if (devinfo->gt == 3) 364 return gen_oa_register_queries_cflgt3; 365 } 366 if (devinfo->is_cannonlake) 367 return gen_oa_register_queries_cnl; 368 if (devinfo->gen == 11) 369 return gen_oa_register_queries_icl; 370 371 return NULL; 372} 373 374bool 375gen_perf_load_oa_metrics(struct gen_perf *perf, int fd, 376 const struct gen_device_info *devinfo) 377{ 378 perf_register_oa_queries_t oa_register = get_register_queries_function(devinfo); 379 bool i915_perf_oa_available = false; 380 struct stat sb; 381 382 /* The existence of this sysctl parameter implies the kernel supports 383 * the i915 perf interface. 384 */ 385 if (stat("/proc/sys/dev/i915/perf_stream_paranoid", &sb) == 0) { 386 387 /* If _paranoid == 1 then on Gen8+ we won't be able to access OA 388 * metrics unless running as root. 389 */ 390 if (devinfo->is_haswell) 391 i915_perf_oa_available = true; 392 else { 393 uint64_t paranoid = 1; 394 395 read_file_uint64("/proc/sys/dev/i915/perf_stream_paranoid", ¶noid); 396 397 if (paranoid == 0 || geteuid() == 0) 398 i915_perf_oa_available = true; 399 } 400 } 401 402 if (!i915_perf_oa_available || 403 !oa_register || 404 !get_sysfs_dev_dir(perf, fd) || 405 !init_oa_sys_vars(perf, devinfo)) 406 return false; 407 408 perf->oa_metrics_table = 409 _mesa_hash_table_create(perf, _mesa_key_hash_string, 410 _mesa_key_string_equal); 411 412 /* Index all the metric sets mesa knows about before looking to see what 413 * the kernel is advertising. 414 */ 415 oa_register(perf); 416 417 if (likely((INTEL_DEBUG & DEBUG_NO_OACONFIG) == 0) && 418 kernel_has_dynamic_config_support(perf, fd)) 419 init_oa_configs(perf, fd); 420 else 421 enumerate_sysfs_metrics(perf); 422 423 return true; 424} 425 426/* Accumulate 32bits OA counters */ 427static inline void 428accumulate_uint32(const uint32_t *report0, 429 const uint32_t *report1, 430 uint64_t *accumulator) 431{ 432 *accumulator += (uint32_t)(*report1 - *report0); 433} 434 435/* Accumulate 40bits OA counters */ 436static inline void 437accumulate_uint40(int a_index, 438 const uint32_t *report0, 439 const uint32_t *report1, 440 uint64_t *accumulator) 441{ 442 const uint8_t *high_bytes0 = (uint8_t *)(report0 + 40); 443 const uint8_t *high_bytes1 = (uint8_t *)(report1 + 40); 444 uint64_t high0 = (uint64_t)(high_bytes0[a_index]) << 32; 445 uint64_t high1 = (uint64_t)(high_bytes1[a_index]) << 32; 446 uint64_t value0 = report0[a_index + 4] | high0; 447 uint64_t value1 = report1[a_index + 4] | high1; 448 uint64_t delta; 449 450 if (value0 > value1) 451 delta = (1ULL << 40) + value1 - value0; 452 else 453 delta = value1 - value0; 454 455 *accumulator += delta; 456} 457 458static void 459gen8_read_report_clock_ratios(const uint32_t *report, 460 uint64_t *slice_freq_hz, 461 uint64_t *unslice_freq_hz) 462{ 463 /* The lower 16bits of the RPT_ID field of the OA reports contains a 464 * snapshot of the bits coming from the RP_FREQ_NORMAL register and is 465 * divided this way : 466 * 467 * RPT_ID[31:25]: RP_FREQ_NORMAL[20:14] (low squashed_slice_clock_frequency) 468 * RPT_ID[10:9]: RP_FREQ_NORMAL[22:21] (high squashed_slice_clock_frequency) 469 * RPT_ID[8:0]: RP_FREQ_NORMAL[31:23] (squashed_unslice_clock_frequency) 470 * 471 * RP_FREQ_NORMAL[31:23]: Software Unslice Ratio Request 472 * Multiple of 33.33MHz 2xclk (16 MHz 1xclk) 473 * 474 * RP_FREQ_NORMAL[22:14]: Software Slice Ratio Request 475 * Multiple of 33.33MHz 2xclk (16 MHz 1xclk) 476 */ 477 478 uint32_t unslice_freq = report[0] & 0x1ff; 479 uint32_t slice_freq_low = (report[0] >> 25) & 0x7f; 480 uint32_t slice_freq_high = (report[0] >> 9) & 0x3; 481 uint32_t slice_freq = slice_freq_low | (slice_freq_high << 7); 482 483 *slice_freq_hz = slice_freq * 16666667ULL; 484 *unslice_freq_hz = unslice_freq * 16666667ULL; 485} 486 487void 488gen_perf_query_result_read_frequencies(struct gen_perf_query_result *result, 489 const struct gen_device_info *devinfo, 490 const uint32_t *start, 491 const uint32_t *end) 492{ 493 /* Slice/Unslice frequency is only available in the OA reports when the 494 * "Disable OA reports due to clock ratio change" field in 495 * OA_DEBUG_REGISTER is set to 1. This is how the kernel programs this 496 * global register (see drivers/gpu/drm/i915/i915_perf.c) 497 * 498 * Documentation says this should be available on Gen9+ but experimentation 499 * shows that Gen8 reports similar values, so we enable it there too. 500 */ 501 if (devinfo->gen < 8) 502 return; 503 504 gen8_read_report_clock_ratios(start, 505 &result->slice_frequency[0], 506 &result->unslice_frequency[0]); 507 gen8_read_report_clock_ratios(end, 508 &result->slice_frequency[1], 509 &result->unslice_frequency[1]); 510} 511 512void 513gen_perf_query_result_accumulate(struct gen_perf_query_result *result, 514 const struct gen_perf_query_info *query, 515 const uint32_t *start, 516 const uint32_t *end) 517{ 518 int i, idx = 0; 519 520 result->hw_id = start[2]; 521 result->reports_accumulated++; 522 523 switch (query->oa_format) { 524 case I915_OA_FORMAT_A32u40_A4u32_B8_C8: 525 accumulate_uint32(start + 1, end + 1, result->accumulator + idx++); /* timestamp */ 526 accumulate_uint32(start + 3, end + 3, result->accumulator + idx++); /* clock */ 527 528 /* 32x 40bit A counters... */ 529 for (i = 0; i < 32; i++) 530 accumulate_uint40(i, start, end, result->accumulator + idx++); 531 532 /* 4x 32bit A counters... */ 533 for (i = 0; i < 4; i++) 534 accumulate_uint32(start + 36 + i, end + 36 + i, result->accumulator + idx++); 535 536 /* 8x 32bit B counters + 8x 32bit C counters... */ 537 for (i = 0; i < 16; i++) 538 accumulate_uint32(start + 48 + i, end + 48 + i, result->accumulator + idx++); 539 break; 540 541 case I915_OA_FORMAT_A45_B8_C8: 542 accumulate_uint32(start + 1, end + 1, result->accumulator); /* timestamp */ 543 544 for (i = 0; i < 61; i++) 545 accumulate_uint32(start + 3 + i, end + 3 + i, result->accumulator + 1 + i); 546 break; 547 548 default: 549 unreachable("Can't accumulate OA counters in unknown format"); 550 } 551 552} 553 554void 555gen_perf_query_result_clear(struct gen_perf_query_result *result) 556{ 557 memset(result, 0, sizeof(*result)); 558 result->hw_id = 0xffffffff; /* invalid */ 559} 560