1/* 2 * Copyright © 2009 Corbin Simpson 3 * Copyright © 2011 Marek Olšák <maraeo@gmail.com> 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining 7 * a copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 15 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 16 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 17 * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS 18 * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21 * USE OR OTHER DEALINGS IN THE SOFTWARE. 22 * 23 * The above copyright notice and this permission notice (including the 24 * next paragraph) shall be included in all copies or substantial portions 25 * of the Software. 26 */ 27 28#include "radeon_drm_bo.h" 29#include "radeon_drm_cs.h" 30#include "radeon_drm_public.h" 31 32#include "util/u_cpu_detect.h" 33#include "util/u_memory.h" 34#include "util/u_hash_table.h" 35 36#include <xf86drm.h> 37#include <stdio.h> 38#include <sys/types.h> 39#include <sys/stat.h> 40#include <unistd.h> 41#include <fcntl.h> 42#include <radeon_surface.h> 43 44static struct util_hash_table *fd_tab = NULL; 45static mtx_t fd_tab_mutex = _MTX_INITIALIZER_NP; 46 47/* Enable/disable feature access for one command stream. 48 * If enable == true, return true on success. 49 * Otherwise, return false. 50 * 51 * We basically do the same thing kernel does, because we have to deal 52 * with multiple contexts (here command streams) backed by one winsys. */ 53static bool radeon_set_fd_access(struct radeon_drm_cs *applier, 54 struct radeon_drm_cs **owner, 55 mtx_t *mutex, 56 unsigned request, const char *request_name, 57 bool enable) 58{ 59 struct drm_radeon_info info; 60 unsigned value = enable ? 1 : 0; 61 62 memset(&info, 0, sizeof(info)); 63 64 mtx_lock(&*mutex); 65 66 /* Early exit if we are sure the request will fail. */ 67 if (enable) { 68 if (*owner) { 69 mtx_unlock(&*mutex); 70 return false; 71 } 72 } else { 73 if (*owner != applier) { 74 mtx_unlock(&*mutex); 75 return false; 76 } 77 } 78 79 /* Pass through the request to the kernel. */ 80 info.value = (unsigned long)&value; 81 info.request = request; 82 if (drmCommandWriteRead(applier->ws->fd, DRM_RADEON_INFO, 83 &info, sizeof(info)) != 0) { 84 mtx_unlock(&*mutex); 85 return false; 86 } 87 88 /* Update the rights in the winsys. */ 89 if (enable) { 90 if (value) { 91 *owner = applier; 92 mtx_unlock(&*mutex); 93 return true; 94 } 95 } else { 96 *owner = NULL; 97 } 98 99 mtx_unlock(&*mutex); 100 return false; 101} 102 103static bool radeon_get_drm_value(int fd, unsigned request, 104 const char *errname, uint32_t *out) 105{ 106 struct drm_radeon_info info; 107 int retval; 108 109 memset(&info, 0, sizeof(info)); 110 111 info.value = (unsigned long)out; 112 info.request = request; 113 114 retval = drmCommandWriteRead(fd, DRM_RADEON_INFO, &info, sizeof(info)); 115 if (retval) { 116 if (errname) { 117 fprintf(stderr, "radeon: Failed to get %s, error number %d\n", 118 errname, retval); 119 } 120 return false; 121 } 122 return true; 123} 124 125/* Helper function to do the ioctls needed for setup and init. */ 126static bool do_winsys_init(struct radeon_drm_winsys *ws) 127{ 128 struct drm_radeon_gem_info gem_info; 129 int retval; 130 drmVersionPtr version; 131 132 memset(&gem_info, 0, sizeof(gem_info)); 133 134 /* We do things in a specific order here. 135 * 136 * DRM version first. We need to be sure we're running on a KMS chipset. 137 * This is also for some features. 138 * 139 * Then, the PCI ID. This is essential and should return usable numbers 140 * for all Radeons. If this fails, we probably got handed an FD for some 141 * non-Radeon card. 142 * 143 * The GEM info is actually bogus on the kernel side, as well as our side 144 * (see radeon_gem_info_ioctl in radeon_gem.c) but that's alright because 145 * we don't actually use the info for anything yet. 146 * 147 * The GB and Z pipe requests should always succeed, but they might not 148 * return sensical values for all chipsets, but that's alright because 149 * the pipe drivers already know that. 150 */ 151 152 /* Get DRM version. */ 153 version = drmGetVersion(ws->fd); 154 if (version->version_major != 2 || 155 version->version_minor < 12) { 156 fprintf(stderr, "%s: DRM version is %d.%d.%d but this driver is " 157 "only compatible with 2.12.0 (kernel 3.2) or later.\n", 158 __FUNCTION__, 159 version->version_major, 160 version->version_minor, 161 version->version_patchlevel); 162 drmFreeVersion(version); 163 return false; 164 } 165 166 ws->info.drm_major = version->version_major; 167 ws->info.drm_minor = version->version_minor; 168 ws->info.drm_patchlevel = version->version_patchlevel; 169 drmFreeVersion(version); 170 171 /* Get PCI ID. */ 172 if (!radeon_get_drm_value(ws->fd, RADEON_INFO_DEVICE_ID, "PCI ID", 173 &ws->info.pci_id)) 174 return false; 175 176 /* Check PCI ID. */ 177 switch (ws->info.pci_id) { 178#define CHIPSET(pci_id, name, cfamily) case pci_id: ws->info.family = CHIP_##cfamily; ws->gen = DRV_R300; break; 179#include "pci_ids/r300_pci_ids.h" 180#undef CHIPSET 181 182#define CHIPSET(pci_id, name, cfamily) case pci_id: ws->info.family = CHIP_##cfamily; ws->gen = DRV_R600; break; 183#include "pci_ids/r600_pci_ids.h" 184#undef CHIPSET 185 186#define CHIPSET(pci_id, cfamily) \ 187 case pci_id: \ 188 ws->info.family = CHIP_##cfamily; \ 189 ws->info.name = #cfamily; \ 190 ws->gen = DRV_SI; \ 191 break; 192#include "pci_ids/radeonsi_pci_ids.h" 193#undef CHIPSET 194 195 default: 196 fprintf(stderr, "radeon: Invalid PCI ID.\n"); 197 return false; 198 } 199 200 switch (ws->info.family) { 201 default: 202 case CHIP_UNKNOWN: 203 fprintf(stderr, "radeon: Unknown family.\n"); 204 return false; 205 case CHIP_R300: 206 case CHIP_R350: 207 case CHIP_RV350: 208 case CHIP_RV370: 209 case CHIP_RV380: 210 case CHIP_RS400: 211 case CHIP_RC410: 212 case CHIP_RS480: 213 ws->info.chip_class = R300; 214 break; 215 case CHIP_R420: /* R4xx-based cores. */ 216 case CHIP_R423: 217 case CHIP_R430: 218 case CHIP_R480: 219 case CHIP_R481: 220 case CHIP_RV410: 221 case CHIP_RS600: 222 case CHIP_RS690: 223 case CHIP_RS740: 224 ws->info.chip_class = R400; 225 break; 226 case CHIP_RV515: /* R5xx-based cores. */ 227 case CHIP_R520: 228 case CHIP_RV530: 229 case CHIP_R580: 230 case CHIP_RV560: 231 case CHIP_RV570: 232 ws->info.chip_class = R500; 233 break; 234 case CHIP_R600: 235 case CHIP_RV610: 236 case CHIP_RV630: 237 case CHIP_RV670: 238 case CHIP_RV620: 239 case CHIP_RV635: 240 case CHIP_RS780: 241 case CHIP_RS880: 242 ws->info.chip_class = R600; 243 break; 244 case CHIP_RV770: 245 case CHIP_RV730: 246 case CHIP_RV710: 247 case CHIP_RV740: 248 ws->info.chip_class = R700; 249 break; 250 case CHIP_CEDAR: 251 case CHIP_REDWOOD: 252 case CHIP_JUNIPER: 253 case CHIP_CYPRESS: 254 case CHIP_HEMLOCK: 255 case CHIP_PALM: 256 case CHIP_SUMO: 257 case CHIP_SUMO2: 258 case CHIP_BARTS: 259 case CHIP_TURKS: 260 case CHIP_CAICOS: 261 ws->info.chip_class = EVERGREEN; 262 break; 263 case CHIP_CAYMAN: 264 case CHIP_ARUBA: 265 ws->info.chip_class = CAYMAN; 266 break; 267 case CHIP_TAHITI: 268 case CHIP_PITCAIRN: 269 case CHIP_VERDE: 270 case CHIP_OLAND: 271 case CHIP_HAINAN: 272 ws->info.chip_class = SI; 273 break; 274 case CHIP_BONAIRE: 275 case CHIP_KAVERI: 276 case CHIP_KABINI: 277 case CHIP_HAWAII: 278 case CHIP_MULLINS: 279 ws->info.chip_class = CIK; 280 break; 281 } 282 283 /* Set which chips don't have dedicated VRAM. */ 284 switch (ws->info.family) { 285 case CHIP_RS400: 286 case CHIP_RC410: 287 case CHIP_RS480: 288 case CHIP_RS600: 289 case CHIP_RS690: 290 case CHIP_RS740: 291 case CHIP_RS780: 292 case CHIP_RS880: 293 case CHIP_PALM: 294 case CHIP_SUMO: 295 case CHIP_SUMO2: 296 case CHIP_ARUBA: 297 case CHIP_KAVERI: 298 case CHIP_KABINI: 299 case CHIP_MULLINS: 300 ws->info.has_dedicated_vram = false; 301 break; 302 303 default: 304 ws->info.has_dedicated_vram = true; 305 } 306 307 /* Check for dma */ 308 ws->info.num_sdma_rings = 0; 309 /* DMA is disabled on R700. There is IB corruption and hangs. */ 310 if (ws->info.chip_class >= EVERGREEN && ws->info.drm_minor >= 27) { 311 ws->info.num_sdma_rings = 1; 312 } 313 314 /* Check for UVD and VCE */ 315 ws->info.has_hw_decode = false; 316 ws->info.vce_fw_version = 0x00000000; 317 if (ws->info.drm_minor >= 32) { 318 uint32_t value = RADEON_CS_RING_UVD; 319 if (radeon_get_drm_value(ws->fd, RADEON_INFO_RING_WORKING, 320 "UVD Ring working", &value)) 321 ws->info.has_hw_decode = value; 322 323 value = RADEON_CS_RING_VCE; 324 if (radeon_get_drm_value(ws->fd, RADEON_INFO_RING_WORKING, 325 NULL, &value) && value) { 326 327 if (radeon_get_drm_value(ws->fd, RADEON_INFO_VCE_FW_VERSION, 328 "VCE FW version", &value)) 329 ws->info.vce_fw_version = value; 330 } 331 } 332 333 /* Check for userptr support. */ 334 { 335 struct drm_radeon_gem_userptr args = {0}; 336 337 /* If the ioctl doesn't exist, -EINVAL is returned. 338 * 339 * If the ioctl exists, it should return -EACCES 340 * if RADEON_GEM_USERPTR_READONLY or RADEON_GEM_USERPTR_REGISTER 341 * aren't set. 342 */ 343 ws->info.has_userptr = 344 drmCommandWriteRead(ws->fd, DRM_RADEON_GEM_USERPTR, 345 &args, sizeof(args)) == -EACCES; 346 } 347 348 /* Get GEM info. */ 349 retval = drmCommandWriteRead(ws->fd, DRM_RADEON_GEM_INFO, 350 &gem_info, sizeof(gem_info)); 351 if (retval) { 352 fprintf(stderr, "radeon: Failed to get MM info, error number %d\n", 353 retval); 354 return false; 355 } 356 ws->info.gart_size = gem_info.gart_size; 357 ws->info.vram_size = gem_info.vram_size; 358 ws->info.vram_vis_size = gem_info.vram_visible; 359 /* Older versions of the kernel driver reported incorrect values, and 360 * didn't support more than 256MB of visible VRAM anyway 361 */ 362 if (ws->info.drm_minor < 49) 363 ws->info.vram_vis_size = MIN2(ws->info.vram_vis_size, 256*1024*1024); 364 365 /* Radeon allocates all buffers contiguously, which makes large allocations 366 * unlikely to succeed. */ 367 if (ws->info.has_dedicated_vram) 368 ws->info.max_alloc_size = ws->info.vram_size * 0.7; 369 else 370 ws->info.max_alloc_size = ws->info.gart_size * 0.7; 371 372 if (ws->info.drm_minor < 40) 373 ws->info.max_alloc_size = MIN2(ws->info.max_alloc_size, 256*1024*1024); 374 /* Both 32-bit and 64-bit address spaces only have 4GB. */ 375 ws->info.max_alloc_size = MIN2(ws->info.max_alloc_size, 3ull*1024*1024*1024); 376 377 /* Get max clock frequency info and convert it to MHz */ 378 radeon_get_drm_value(ws->fd, RADEON_INFO_MAX_SCLK, NULL, 379 &ws->info.max_shader_clock); 380 ws->info.max_shader_clock /= 1000; 381 382 ws->num_cpus = sysconf(_SC_NPROCESSORS_ONLN); 383 384 /* Generation-specific queries. */ 385 if (ws->gen == DRV_R300) { 386 if (!radeon_get_drm_value(ws->fd, RADEON_INFO_NUM_GB_PIPES, 387 "GB pipe count", 388 &ws->info.r300_num_gb_pipes)) 389 return false; 390 391 if (!radeon_get_drm_value(ws->fd, RADEON_INFO_NUM_Z_PIPES, 392 "Z pipe count", 393 &ws->info.r300_num_z_pipes)) 394 return false; 395 } 396 else if (ws->gen >= DRV_R600) { 397 uint32_t tiling_config = 0; 398 399 if (!radeon_get_drm_value(ws->fd, RADEON_INFO_NUM_BACKENDS, 400 "num backends", 401 &ws->info.num_render_backends)) 402 return false; 403 404 /* get the GPU counter frequency, failure is not fatal */ 405 radeon_get_drm_value(ws->fd, RADEON_INFO_CLOCK_CRYSTAL_FREQ, NULL, 406 &ws->info.clock_crystal_freq); 407 408 radeon_get_drm_value(ws->fd, RADEON_INFO_TILING_CONFIG, NULL, 409 &tiling_config); 410 411 ws->info.r600_num_banks = 412 ws->info.chip_class >= EVERGREEN ? 413 4 << ((tiling_config & 0xf0) >> 4) : 414 4 << ((tiling_config & 0x30) >> 4); 415 416 ws->info.pipe_interleave_bytes = 417 ws->info.chip_class >= EVERGREEN ? 418 256 << ((tiling_config & 0xf00) >> 8) : 419 256 << ((tiling_config & 0xc0) >> 6); 420 421 if (!ws->info.pipe_interleave_bytes) 422 ws->info.pipe_interleave_bytes = 423 ws->info.chip_class >= EVERGREEN ? 512 : 256; 424 425 radeon_get_drm_value(ws->fd, RADEON_INFO_NUM_TILE_PIPES, NULL, 426 &ws->info.num_tile_pipes); 427 428 /* "num_tiles_pipes" must be equal to the number of pipes (Px) in the 429 * pipe config field of the GB_TILE_MODE array. Only one card (Tahiti) 430 * reports a different value (12). Fix it by setting what's in the 431 * GB_TILE_MODE array (8). 432 */ 433 if (ws->gen == DRV_SI && ws->info.num_tile_pipes == 12) 434 ws->info.num_tile_pipes = 8; 435 436 if (radeon_get_drm_value(ws->fd, RADEON_INFO_BACKEND_MAP, NULL, 437 &ws->info.r600_gb_backend_map)) 438 ws->info.r600_gb_backend_map_valid = true; 439 440 /* Default value. */ 441 ws->info.enabled_rb_mask = u_bit_consecutive(0, ws->info.num_render_backends); 442 /* 443 * This fails (silently) on non-GCN or older kernels, overwriting the 444 * default enabled_rb_mask with the result of the last query. 445 */ 446 if (ws->gen >= DRV_SI) 447 radeon_get_drm_value(ws->fd, RADEON_INFO_SI_BACKEND_ENABLED_MASK, NULL, 448 &ws->info.enabled_rb_mask); 449 450 ws->info.r600_has_virtual_memory = false; 451 if (ws->info.drm_minor >= 13) { 452 uint32_t ib_vm_max_size; 453 454 ws->info.r600_has_virtual_memory = true; 455 if (!radeon_get_drm_value(ws->fd, RADEON_INFO_VA_START, NULL, 456 &ws->va_start)) 457 ws->info.r600_has_virtual_memory = false; 458 if (!radeon_get_drm_value(ws->fd, RADEON_INFO_IB_VM_MAX_SIZE, NULL, 459 &ib_vm_max_size)) 460 ws->info.r600_has_virtual_memory = false; 461 radeon_get_drm_value(ws->fd, RADEON_INFO_VA_UNMAP_WORKING, NULL, 462 &ws->va_unmap_working); 463 } 464 if (ws->gen == DRV_R600 && !debug_get_bool_option("RADEON_VA", false)) 465 ws->info.r600_has_virtual_memory = false; 466 } 467 468 /* Get max pipes, this is only needed for compute shaders. All evergreen+ 469 * chips have at least 2 pipes, so we use 2 as a default. */ 470 ws->info.r600_max_quad_pipes = 2; 471 radeon_get_drm_value(ws->fd, RADEON_INFO_MAX_PIPES, NULL, 472 &ws->info.r600_max_quad_pipes); 473 474 /* All GPUs have at least one compute unit */ 475 ws->info.num_good_compute_units = 1; 476 radeon_get_drm_value(ws->fd, RADEON_INFO_ACTIVE_CU_COUNT, NULL, 477 &ws->info.num_good_compute_units); 478 479 radeon_get_drm_value(ws->fd, RADEON_INFO_MAX_SE, NULL, 480 &ws->info.max_se); 481 482 switch (ws->info.family) { 483 case CHIP_HAINAN: 484 case CHIP_KABINI: 485 case CHIP_MULLINS: 486 ws->info.num_tcc_blocks = 2; 487 break; 488 case CHIP_VERDE: 489 case CHIP_OLAND: 490 case CHIP_BONAIRE: 491 case CHIP_KAVERI: 492 ws->info.num_tcc_blocks = 4; 493 break; 494 case CHIP_PITCAIRN: 495 ws->info.num_tcc_blocks = 8; 496 break; 497 case CHIP_TAHITI: 498 ws->info.num_tcc_blocks = 12; 499 break; 500 case CHIP_HAWAII: 501 ws->info.num_tcc_blocks = 16; 502 break; 503 default: 504 ws->info.num_tcc_blocks = 0; 505 break; 506 } 507 508 if (!ws->info.max_se) { 509 switch (ws->info.family) { 510 default: 511 ws->info.max_se = 1; 512 break; 513 case CHIP_CYPRESS: 514 case CHIP_HEMLOCK: 515 case CHIP_BARTS: 516 case CHIP_CAYMAN: 517 case CHIP_TAHITI: 518 case CHIP_PITCAIRN: 519 case CHIP_BONAIRE: 520 ws->info.max_se = 2; 521 break; 522 case CHIP_HAWAII: 523 ws->info.max_se = 4; 524 break; 525 } 526 } 527 528 radeon_get_drm_value(ws->fd, RADEON_INFO_MAX_SH_PER_SE, NULL, 529 &ws->info.max_sh_per_se); 530 if (ws->gen == DRV_SI) { 531 ws->info.num_good_cu_per_sh = ws->info.num_good_compute_units / 532 (ws->info.max_se * ws->info.max_sh_per_se); 533 } 534 535 radeon_get_drm_value(ws->fd, RADEON_INFO_ACCEL_WORKING2, NULL, 536 &ws->accel_working2); 537 if (ws->info.family == CHIP_HAWAII && ws->accel_working2 < 2) { 538 fprintf(stderr, "radeon: GPU acceleration for Hawaii disabled, " 539 "returned accel_working2 value %u is smaller than 2. " 540 "Please install a newer kernel.\n", 541 ws->accel_working2); 542 return false; 543 } 544 545 if (ws->info.chip_class == CIK) { 546 if (!radeon_get_drm_value(ws->fd, RADEON_INFO_CIK_MACROTILE_MODE_ARRAY, NULL, 547 ws->info.cik_macrotile_mode_array)) { 548 fprintf(stderr, "radeon: Kernel 3.13 is required for CIK support.\n"); 549 return false; 550 } 551 } 552 553 if (ws->info.chip_class >= SI) { 554 if (!radeon_get_drm_value(ws->fd, RADEON_INFO_SI_TILE_MODE_ARRAY, NULL, 555 ws->info.si_tile_mode_array)) { 556 fprintf(stderr, "radeon: Kernel 3.10 is required for SI support.\n"); 557 return false; 558 } 559 } 560 561 /* Hawaii with old firmware needs type2 nop packet. 562 * accel_working2 with value 3 indicates the new firmware. 563 */ 564 ws->info.gfx_ib_pad_with_type2 = ws->info.chip_class <= SI || 565 (ws->info.family == CHIP_HAWAII && 566 ws->accel_working2 < 3); 567 ws->info.tcc_cache_line_size = 64; /* TC L2 line size on GCN */ 568 ws->info.ib_start_alignment = 4096; 569 ws->info.kernel_flushes_hdp_before_ib = ws->info.drm_minor >= 40; 570 /* HTILE is broken with 1D tiling on old kernels and CIK. */ 571 ws->info.htile_cmask_support_1d_tiling = ws->info.chip_class != CIK || 572 ws->info.drm_minor >= 38; 573 ws->info.si_TA_CS_BC_BASE_ADDR_allowed = ws->info.drm_minor >= 48; 574 ws->info.has_bo_metadata = false; 575 ws->info.has_gpu_reset_status_query = false; 576 ws->info.has_gpu_reset_counter_query = ws->info.drm_minor >= 43; 577 ws->info.has_eqaa_surface_allocator = false; 578 ws->info.has_format_bc1_through_bc7 = ws->info.drm_minor >= 31; 579 ws->info.kernel_flushes_tc_l2_after_ib = true; 580 /* Old kernels disallowed register writes via COPY_DATA 581 * that are used for indirect compute dispatches. */ 582 ws->info.has_indirect_compute_dispatch = ws->info.chip_class == CIK || 583 (ws->info.chip_class == SI && 584 ws->info.drm_minor >= 45); 585 /* SI doesn't support unaligned loads. */ 586 ws->info.has_unaligned_shader_loads = ws->info.chip_class == CIK && 587 ws->info.drm_minor >= 50; 588 ws->info.has_sparse_vm_mappings = false; 589 /* 2D tiling on CIK is supported since DRM 2.35.0 */ 590 ws->info.has_2d_tiling = ws->info.chip_class <= SI || ws->info.drm_minor >= 35; 591 ws->info.has_read_registers_query = ws->info.drm_minor >= 42; 592 ws->info.max_alignment = 1024*1024; 593 594 ws->check_vm = strstr(debug_get_option("R600_DEBUG", ""), "check_vm") != NULL; 595 596 return true; 597} 598 599static void radeon_winsys_destroy(struct radeon_winsys *rws) 600{ 601 struct radeon_drm_winsys *ws = (struct radeon_drm_winsys*)rws; 602 603 if (util_queue_is_initialized(&ws->cs_queue)) 604 util_queue_destroy(&ws->cs_queue); 605 606 mtx_destroy(&ws->hyperz_owner_mutex); 607 mtx_destroy(&ws->cmask_owner_mutex); 608 609 if (ws->info.r600_has_virtual_memory) 610 pb_slabs_deinit(&ws->bo_slabs); 611 pb_cache_deinit(&ws->bo_cache); 612 613 if (ws->gen >= DRV_R600) { 614 radeon_surface_manager_free(ws->surf_man); 615 } 616 617 util_hash_table_destroy(ws->bo_names); 618 util_hash_table_destroy(ws->bo_handles); 619 util_hash_table_destroy(ws->bo_vas); 620 mtx_destroy(&ws->bo_handles_mutex); 621 mtx_destroy(&ws->vm32.mutex); 622 mtx_destroy(&ws->vm64.mutex); 623 mtx_destroy(&ws->bo_fence_lock); 624 625 if (ws->fd >= 0) 626 close(ws->fd); 627 628 FREE(rws); 629} 630 631static void radeon_query_info(struct radeon_winsys *rws, 632 struct radeon_info *info) 633{ 634 *info = ((struct radeon_drm_winsys *)rws)->info; 635} 636 637static bool radeon_cs_request_feature(struct radeon_cmdbuf *rcs, 638 enum radeon_feature_id fid, 639 bool enable) 640{ 641 struct radeon_drm_cs *cs = radeon_drm_cs(rcs); 642 643 switch (fid) { 644 case RADEON_FID_R300_HYPERZ_ACCESS: 645 return radeon_set_fd_access(cs, &cs->ws->hyperz_owner, 646 &cs->ws->hyperz_owner_mutex, 647 RADEON_INFO_WANT_HYPERZ, "Hyper-Z", 648 enable); 649 650 case RADEON_FID_R300_CMASK_ACCESS: 651 return radeon_set_fd_access(cs, &cs->ws->cmask_owner, 652 &cs->ws->cmask_owner_mutex, 653 RADEON_INFO_WANT_CMASK, "AA optimizations", 654 enable); 655 } 656 return false; 657} 658 659static uint64_t radeon_query_value(struct radeon_winsys *rws, 660 enum radeon_value_id value) 661{ 662 struct radeon_drm_winsys *ws = (struct radeon_drm_winsys*)rws; 663 uint64_t retval = 0; 664 665 switch (value) { 666 case RADEON_REQUESTED_VRAM_MEMORY: 667 return ws->allocated_vram; 668 case RADEON_REQUESTED_GTT_MEMORY: 669 return ws->allocated_gtt; 670 case RADEON_MAPPED_VRAM: 671 return ws->mapped_vram; 672 case RADEON_MAPPED_GTT: 673 return ws->mapped_gtt; 674 case RADEON_BUFFER_WAIT_TIME_NS: 675 return ws->buffer_wait_time; 676 case RADEON_NUM_MAPPED_BUFFERS: 677 return ws->num_mapped_buffers; 678 case RADEON_TIMESTAMP: 679 if (ws->info.drm_minor < 20 || ws->gen < DRV_R600) { 680 assert(0); 681 return 0; 682 } 683 684 radeon_get_drm_value(ws->fd, RADEON_INFO_TIMESTAMP, "timestamp", 685 (uint32_t*)&retval); 686 return retval; 687 case RADEON_NUM_GFX_IBS: 688 return ws->num_gfx_IBs; 689 case RADEON_NUM_SDMA_IBS: 690 return ws->num_sdma_IBs; 691 case RADEON_NUM_BYTES_MOVED: 692 radeon_get_drm_value(ws->fd, RADEON_INFO_NUM_BYTES_MOVED, 693 "num-bytes-moved", (uint32_t*)&retval); 694 return retval; 695 case RADEON_NUM_EVICTIONS: 696 case RADEON_NUM_VRAM_CPU_PAGE_FAULTS: 697 case RADEON_VRAM_VIS_USAGE: 698 case RADEON_GFX_BO_LIST_COUNTER: 699 case RADEON_GFX_IB_SIZE_COUNTER: 700 return 0; /* unimplemented */ 701 case RADEON_VRAM_USAGE: 702 radeon_get_drm_value(ws->fd, RADEON_INFO_VRAM_USAGE, 703 "vram-usage", (uint32_t*)&retval); 704 return retval; 705 case RADEON_GTT_USAGE: 706 radeon_get_drm_value(ws->fd, RADEON_INFO_GTT_USAGE, 707 "gtt-usage", (uint32_t*)&retval); 708 return retval; 709 case RADEON_GPU_TEMPERATURE: 710 radeon_get_drm_value(ws->fd, RADEON_INFO_CURRENT_GPU_TEMP, 711 "gpu-temp", (uint32_t*)&retval); 712 return retval; 713 case RADEON_CURRENT_SCLK: 714 radeon_get_drm_value(ws->fd, RADEON_INFO_CURRENT_GPU_SCLK, 715 "current-gpu-sclk", (uint32_t*)&retval); 716 return retval; 717 case RADEON_CURRENT_MCLK: 718 radeon_get_drm_value(ws->fd, RADEON_INFO_CURRENT_GPU_MCLK, 719 "current-gpu-mclk", (uint32_t*)&retval); 720 return retval; 721 case RADEON_GPU_RESET_COUNTER: 722 radeon_get_drm_value(ws->fd, RADEON_INFO_GPU_RESET_COUNTER, 723 "gpu-reset-counter", (uint32_t*)&retval); 724 return retval; 725 case RADEON_CS_THREAD_TIME: 726 return util_queue_get_thread_time_nano(&ws->cs_queue, 0); 727 } 728 return 0; 729} 730 731static bool radeon_read_registers(struct radeon_winsys *rws, 732 unsigned reg_offset, 733 unsigned num_registers, uint32_t *out) 734{ 735 struct radeon_drm_winsys *ws = (struct radeon_drm_winsys*)rws; 736 unsigned i; 737 738 for (i = 0; i < num_registers; i++) { 739 uint32_t reg = reg_offset + i*4; 740 741 if (!radeon_get_drm_value(ws->fd, RADEON_INFO_READ_REG, NULL, ®)) 742 return false; 743 out[i] = reg; 744 } 745 return true; 746} 747 748static unsigned hash_fd(void *key) 749{ 750 int fd = pointer_to_intptr(key); 751 struct stat stat; 752 fstat(fd, &stat); 753 754 return stat.st_dev ^ stat.st_ino ^ stat.st_rdev; 755} 756 757static int compare_fd(void *key1, void *key2) 758{ 759 int fd1 = pointer_to_intptr(key1); 760 int fd2 = pointer_to_intptr(key2); 761 struct stat stat1, stat2; 762 fstat(fd1, &stat1); 763 fstat(fd2, &stat2); 764 765 return stat1.st_dev != stat2.st_dev || 766 stat1.st_ino != stat2.st_ino || 767 stat1.st_rdev != stat2.st_rdev; 768} 769 770DEBUG_GET_ONCE_BOOL_OPTION(thread, "RADEON_THREAD", true) 771 772static bool radeon_winsys_unref(struct radeon_winsys *ws) 773{ 774 struct radeon_drm_winsys *rws = (struct radeon_drm_winsys*)ws; 775 bool destroy; 776 777 /* When the reference counter drops to zero, remove the fd from the table. 778 * This must happen while the mutex is locked, so that 779 * radeon_drm_winsys_create in another thread doesn't get the winsys 780 * from the table when the counter drops to 0. */ 781 mtx_lock(&fd_tab_mutex); 782 783 destroy = pipe_reference(&rws->reference, NULL); 784 if (destroy && fd_tab) { 785 util_hash_table_remove(fd_tab, intptr_to_pointer(rws->fd)); 786 if (util_hash_table_count(fd_tab) == 0) { 787 util_hash_table_destroy(fd_tab); 788 fd_tab = NULL; 789 } 790 } 791 792 mtx_unlock(&fd_tab_mutex); 793 return destroy; 794} 795 796#define PTR_TO_UINT(x) ((unsigned)((intptr_t)(x))) 797 798static unsigned handle_hash(void *key) 799{ 800 return PTR_TO_UINT(key); 801} 802 803static int handle_compare(void *key1, void *key2) 804{ 805 return PTR_TO_UINT(key1) != PTR_TO_UINT(key2); 806} 807 808static void radeon_pin_threads_to_L3_cache(struct radeon_winsys *ws, 809 unsigned cache) 810{ 811 struct radeon_drm_winsys *rws = (struct radeon_drm_winsys*)ws; 812 813 if (util_queue_is_initialized(&rws->cs_queue)) { 814 util_pin_thread_to_L3(rws->cs_queue.threads[0], cache, 815 util_cpu_caps.cores_per_L3); 816 } 817} 818 819PUBLIC struct radeon_winsys * 820radeon_drm_winsys_create(int fd, const struct pipe_screen_config *config, 821 radeon_screen_create_t screen_create) 822{ 823 struct radeon_drm_winsys *ws; 824 825 mtx_lock(&fd_tab_mutex); 826 if (!fd_tab) { 827 fd_tab = util_hash_table_create(hash_fd, compare_fd); 828 } 829 830 ws = util_hash_table_get(fd_tab, intptr_to_pointer(fd)); 831 if (ws) { 832 pipe_reference(NULL, &ws->reference); 833 mtx_unlock(&fd_tab_mutex); 834 return &ws->base; 835 } 836 837 ws = CALLOC_STRUCT(radeon_drm_winsys); 838 if (!ws) { 839 mtx_unlock(&fd_tab_mutex); 840 return NULL; 841 } 842 843 ws->fd = fcntl(fd, F_DUPFD_CLOEXEC, 3); 844 845 if (!do_winsys_init(ws)) 846 goto fail1; 847 848 pb_cache_init(&ws->bo_cache, RADEON_MAX_CACHED_HEAPS, 849 500000, ws->check_vm ? 1.0f : 2.0f, 0, 850 MIN2(ws->info.vram_size, ws->info.gart_size), 851 radeon_bo_destroy, 852 radeon_bo_can_reclaim); 853 854 if (ws->info.r600_has_virtual_memory) { 855 /* There is no fundamental obstacle to using slab buffer allocation 856 * without GPUVM, but enabling it requires making sure that the drivers 857 * honor the address offset. 858 */ 859 if (!pb_slabs_init(&ws->bo_slabs, 860 RADEON_SLAB_MIN_SIZE_LOG2, RADEON_SLAB_MAX_SIZE_LOG2, 861 RADEON_MAX_SLAB_HEAPS, 862 ws, 863 radeon_bo_can_reclaim_slab, 864 radeon_bo_slab_alloc, 865 radeon_bo_slab_free)) 866 goto fail_cache; 867 868 ws->info.min_alloc_size = 1 << RADEON_SLAB_MIN_SIZE_LOG2; 869 } else { 870 ws->info.min_alloc_size = ws->info.gart_page_size; 871 } 872 873 if (ws->gen >= DRV_R600) { 874 ws->surf_man = radeon_surface_manager_new(ws->fd); 875 if (!ws->surf_man) 876 goto fail_slab; 877 } 878 879 /* init reference */ 880 pipe_reference_init(&ws->reference, 1); 881 882 /* Set functions. */ 883 ws->base.unref = radeon_winsys_unref; 884 ws->base.destroy = radeon_winsys_destroy; 885 ws->base.query_info = radeon_query_info; 886 ws->base.pin_threads_to_L3_cache = radeon_pin_threads_to_L3_cache; 887 ws->base.cs_request_feature = radeon_cs_request_feature; 888 ws->base.query_value = radeon_query_value; 889 ws->base.read_registers = radeon_read_registers; 890 891 radeon_drm_bo_init_functions(ws); 892 radeon_drm_cs_init_functions(ws); 893 radeon_surface_init_functions(ws); 894 895 (void) mtx_init(&ws->hyperz_owner_mutex, mtx_plain); 896 (void) mtx_init(&ws->cmask_owner_mutex, mtx_plain); 897 898 ws->bo_names = util_hash_table_create(handle_hash, handle_compare); 899 ws->bo_handles = util_hash_table_create(handle_hash, handle_compare); 900 ws->bo_vas = util_hash_table_create(handle_hash, handle_compare); 901 (void) mtx_init(&ws->bo_handles_mutex, mtx_plain); 902 (void) mtx_init(&ws->vm32.mutex, mtx_plain); 903 (void) mtx_init(&ws->vm64.mutex, mtx_plain); 904 (void) mtx_init(&ws->bo_fence_lock, mtx_plain); 905 list_inithead(&ws->vm32.holes); 906 list_inithead(&ws->vm64.holes); 907 908 /* The kernel currently returns 8MB. Make sure this doesn't change. */ 909 if (ws->va_start > 8 * 1024 * 1024) { 910 /* Not enough 32-bit address space. */ 911 radeon_winsys_destroy(&ws->base); 912 mtx_unlock(&fd_tab_mutex); 913 return NULL; 914 } 915 916 ws->vm32.start = ws->va_start; 917 ws->vm32.end = 1ull << 32; 918 919 /* The maximum is 8GB of virtual address space limited by the kernel. 920 * It's obviously not enough for bigger cards, like Hawaiis with 4GB 921 * and 8GB of physical memory and 4GB of GART. 922 * 923 * Older kernels set the limit to 4GB, which is even worse, so they only 924 * have 32-bit address space. 925 */ 926 if (ws->info.drm_minor >= 41) { 927 ws->vm64.start = 1ull << 32; 928 ws->vm64.end = 1ull << 33; 929 } 930 931 /* TTM aligns the BO size to the CPU page size */ 932 ws->info.gart_page_size = sysconf(_SC_PAGESIZE); 933 934 if (ws->num_cpus > 1 && debug_get_option_thread()) 935 util_queue_init(&ws->cs_queue, "rcs", 8, 1, 0); 936 937 /* Create the screen at the end. The winsys must be initialized 938 * completely. 939 * 940 * Alternatively, we could create the screen based on "ws->gen" 941 * and link all drivers into one binary blob. */ 942 ws->base.screen = screen_create(&ws->base, config); 943 if (!ws->base.screen) { 944 radeon_winsys_destroy(&ws->base); 945 mtx_unlock(&fd_tab_mutex); 946 return NULL; 947 } 948 949 util_hash_table_set(fd_tab, intptr_to_pointer(ws->fd), ws); 950 951 /* We must unlock the mutex once the winsys is fully initialized, so that 952 * other threads attempting to create the winsys from the same fd will 953 * get a fully initialized winsys and not just half-way initialized. */ 954 mtx_unlock(&fd_tab_mutex); 955 956 return &ws->base; 957 958fail_slab: 959 if (ws->info.r600_has_virtual_memory) 960 pb_slabs_deinit(&ws->bo_slabs); 961fail_cache: 962 pb_cache_deinit(&ws->bo_cache); 963fail1: 964 mtx_unlock(&fd_tab_mutex); 965 if (ws->surf_man) 966 radeon_surface_manager_free(ws->surf_man); 967 if (ws->fd >= 0) 968 close(ws->fd); 969 970 FREE(ws); 971 return NULL; 972} 973