1/* 2 * Copyright © 2009 Corbin Simpson 3 * Copyright © 2011 Marek Olšák <maraeo@gmail.com> 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining 7 * a copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 15 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 16 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 17 * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS 18 * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21 * USE OR OTHER DEALINGS IN THE SOFTWARE. 22 * 23 * The above copyright notice and this permission notice (including the 24 * next paragraph) shall be included in all copies or substantial portions 25 * of the Software. 26 */ 27 28#include "radeon_drm_bo.h" 29#include "radeon_drm_cs.h" 30#include "radeon_drm_public.h" 31 32#include "util/os_file.h" 33#include "util/u_cpu_detect.h" 34#include "util/u_memory.h" 35#include "util/u_hash_table.h" 36#include "util/u_pointer.h" 37 38#include <xf86drm.h> 39#include <stdio.h> 40#include <sys/types.h> 41#include <sys/stat.h> 42#include <unistd.h> 43#include <fcntl.h> 44#include <radeon_surface.h> 45 46static struct hash_table *fd_tab = NULL; 47static mtx_t fd_tab_mutex = _MTX_INITIALIZER_NP; 48 49/* Enable/disable feature access for one command stream. 50 * If enable == true, return true on success. 51 * Otherwise, return false. 52 * 53 * We basically do the same thing kernel does, because we have to deal 54 * with multiple contexts (here command streams) backed by one winsys. */ 55static bool radeon_set_fd_access(struct radeon_drm_cs *applier, 56 struct radeon_drm_cs **owner, 57 mtx_t *mutex, 58 unsigned request, const char *request_name, 59 bool enable) 60{ 61 struct drm_radeon_info info; 62 unsigned value = enable ? 1 : 0; 63 64 memset(&info, 0, sizeof(info)); 65 66 mtx_lock(&*mutex); 67 68 /* Early exit if we are sure the request will fail. */ 69 if (enable) { 70 if (*owner) { 71 mtx_unlock(&*mutex); 72 return false; 73 } 74 } else { 75 if (*owner != applier) { 76 mtx_unlock(&*mutex); 77 return false; 78 } 79 } 80 81 /* Pass through the request to the kernel. */ 82 info.value = (unsigned long)&value; 83 info.request = request; 84 if (drmCommandWriteRead(applier->ws->fd, DRM_RADEON_INFO, 85 &info, sizeof(info)) != 0) { 86 mtx_unlock(&*mutex); 87 return false; 88 } 89 90 /* Update the rights in the winsys. */ 91 if (enable) { 92 if (value) { 93 *owner = applier; 94 mtx_unlock(&*mutex); 95 return true; 96 } 97 } else { 98 *owner = NULL; 99 } 100 101 mtx_unlock(&*mutex); 102 return false; 103} 104 105static bool radeon_get_drm_value(int fd, unsigned request, 106 const char *errname, uint32_t *out) 107{ 108 struct drm_radeon_info info; 109 int retval; 110 111 memset(&info, 0, sizeof(info)); 112 113 info.value = (unsigned long)out; 114 info.request = request; 115 116 retval = drmCommandWriteRead(fd, DRM_RADEON_INFO, &info, sizeof(info)); 117 if (retval) { 118 if (errname) { 119 fprintf(stderr, "radeon: Failed to get %s, error number %d\n", 120 errname, retval); 121 } 122 return false; 123 } 124 return true; 125} 126 127/* Helper function to do the ioctls needed for setup and init. */ 128static bool do_winsys_init(struct radeon_drm_winsys *ws) 129{ 130 struct drm_radeon_gem_info gem_info; 131 int retval; 132 drmVersionPtr version; 133 134 memset(&gem_info, 0, sizeof(gem_info)); 135 136 /* We do things in a specific order here. 137 * 138 * DRM version first. We need to be sure we're running on a KMS chipset. 139 * This is also for some features. 140 * 141 * Then, the PCI ID. This is essential and should return usable numbers 142 * for all Radeons. If this fails, we probably got handed an FD for some 143 * non-Radeon card. 144 * 145 * The GEM info is actually bogus on the kernel side, as well as our side 146 * (see radeon_gem_info_ioctl in radeon_gem.c) but that's alright because 147 * we don't actually use the info for anything yet. 148 * 149 * The GB and Z pipe requests should always succeed, but they might not 150 * return sensical values for all chipsets, but that's alright because 151 * the pipe drivers already know that. 152 */ 153 154 /* Get DRM version. */ 155 version = drmGetVersion(ws->fd); 156 if (version->version_major != 2 || 157 version->version_minor < 12) { 158 fprintf(stderr, "%s: DRM version is %d.%d.%d but this driver is " 159 "only compatible with 2.12.0 (kernel 3.2) or later.\n", 160 __FUNCTION__, 161 version->version_major, 162 version->version_minor, 163 version->version_patchlevel); 164 drmFreeVersion(version); 165 return false; 166 } 167 168 ws->info.drm_major = version->version_major; 169 ws->info.drm_minor = version->version_minor; 170 ws->info.drm_patchlevel = version->version_patchlevel; 171 ws->info.is_amdgpu = false; 172 drmFreeVersion(version); 173 174 /* Get PCI ID. */ 175 if (!radeon_get_drm_value(ws->fd, RADEON_INFO_DEVICE_ID, "PCI ID", 176 &ws->info.pci_id)) 177 return false; 178 179 /* Check PCI ID. */ 180 switch (ws->info.pci_id) { 181#define CHIPSET(pci_id, name, cfamily) case pci_id: ws->info.family = CHIP_##cfamily; ws->gen = DRV_R300; break; 182#include "pci_ids/r300_pci_ids.h" 183#undef CHIPSET 184 185#define CHIPSET(pci_id, name, cfamily) case pci_id: ws->info.family = CHIP_##cfamily; ws->gen = DRV_R600; break; 186#include "pci_ids/r600_pci_ids.h" 187#undef CHIPSET 188 189#define CHIPSET(pci_id, cfamily) \ 190 case pci_id: \ 191 ws->info.family = CHIP_##cfamily; \ 192 ws->info.name = #cfamily; \ 193 ws->gen = DRV_SI; \ 194 break; 195#include "pci_ids/radeonsi_pci_ids.h" 196#undef CHIPSET 197 198 default: 199 fprintf(stderr, "radeon: Invalid PCI ID.\n"); 200 return false; 201 } 202 203 switch (ws->info.family) { 204 default: 205 case CHIP_UNKNOWN: 206 fprintf(stderr, "radeon: Unknown family.\n"); 207 return false; 208 case CHIP_R300: 209 case CHIP_R350: 210 case CHIP_RV350: 211 case CHIP_RV370: 212 case CHIP_RV380: 213 case CHIP_RS400: 214 case CHIP_RC410: 215 case CHIP_RS480: 216 ws->info.chip_class = R300; 217 break; 218 case CHIP_R420: /* R4xx-based cores. */ 219 case CHIP_R423: 220 case CHIP_R430: 221 case CHIP_R480: 222 case CHIP_R481: 223 case CHIP_RV410: 224 case CHIP_RS600: 225 case CHIP_RS690: 226 case CHIP_RS740: 227 ws->info.chip_class = R400; 228 break; 229 case CHIP_RV515: /* R5xx-based cores. */ 230 case CHIP_R520: 231 case CHIP_RV530: 232 case CHIP_R580: 233 case CHIP_RV560: 234 case CHIP_RV570: 235 ws->info.chip_class = R500; 236 break; 237 case CHIP_R600: 238 case CHIP_RV610: 239 case CHIP_RV630: 240 case CHIP_RV670: 241 case CHIP_RV620: 242 case CHIP_RV635: 243 case CHIP_RS780: 244 case CHIP_RS880: 245 ws->info.chip_class = R600; 246 break; 247 case CHIP_RV770: 248 case CHIP_RV730: 249 case CHIP_RV710: 250 case CHIP_RV740: 251 ws->info.chip_class = R700; 252 break; 253 case CHIP_CEDAR: 254 case CHIP_REDWOOD: 255 case CHIP_JUNIPER: 256 case CHIP_CYPRESS: 257 case CHIP_HEMLOCK: 258 case CHIP_PALM: 259 case CHIP_SUMO: 260 case CHIP_SUMO2: 261 case CHIP_BARTS: 262 case CHIP_TURKS: 263 case CHIP_CAICOS: 264 ws->info.chip_class = EVERGREEN; 265 break; 266 case CHIP_CAYMAN: 267 case CHIP_ARUBA: 268 ws->info.chip_class = CAYMAN; 269 break; 270 case CHIP_TAHITI: 271 case CHIP_PITCAIRN: 272 case CHIP_VERDE: 273 case CHIP_OLAND: 274 case CHIP_HAINAN: 275 ws->info.chip_class = GFX6; 276 break; 277 case CHIP_BONAIRE: 278 case CHIP_KAVERI: 279 case CHIP_KABINI: 280 case CHIP_HAWAII: 281 ws->info.chip_class = GFX7; 282 break; 283 } 284 285 /* Set which chips don't have dedicated VRAM. */ 286 switch (ws->info.family) { 287 case CHIP_RS400: 288 case CHIP_RC410: 289 case CHIP_RS480: 290 case CHIP_RS600: 291 case CHIP_RS690: 292 case CHIP_RS740: 293 case CHIP_RS780: 294 case CHIP_RS880: 295 case CHIP_PALM: 296 case CHIP_SUMO: 297 case CHIP_SUMO2: 298 case CHIP_ARUBA: 299 case CHIP_KAVERI: 300 case CHIP_KABINI: 301 ws->info.has_dedicated_vram = false; 302 break; 303 304 default: 305 ws->info.has_dedicated_vram = true; 306 } 307 308 ws->info.num_rings[RING_GFX] = 1; 309 /* Check for dma */ 310 ws->info.num_rings[RING_DMA] = 0; 311 /* DMA is disabled on R700. There is IB corruption and hangs. */ 312 if (ws->info.chip_class >= EVERGREEN && ws->info.drm_minor >= 27) { 313 ws->info.num_rings[RING_DMA] = 1; 314 } 315 316 /* Check for UVD and VCE */ 317 ws->info.has_video_hw.uvd_decode = false; 318 ws->info.has_video_hw.vce_encode = false; 319 ws->info.vce_fw_version = 0x00000000; 320 if (ws->info.drm_minor >= 32) { 321 uint32_t value = RADEON_CS_RING_UVD; 322 if (radeon_get_drm_value(ws->fd, RADEON_INFO_RING_WORKING, 323 "UVD Ring working", &value)) { 324 ws->info.has_video_hw.uvd_decode = value; 325 ws->info.num_rings[RING_UVD] = 1; 326 } 327 328 value = RADEON_CS_RING_VCE; 329 if (radeon_get_drm_value(ws->fd, RADEON_INFO_RING_WORKING, 330 NULL, &value) && value) { 331 332 if (radeon_get_drm_value(ws->fd, RADEON_INFO_VCE_FW_VERSION, 333 "VCE FW version", &value)) { 334 ws->info.vce_fw_version = value; 335 ws->info.num_rings[RING_VCE] = 1; 336 ws->info.has_video_hw.vce_encode = true; 337 } 338 } 339 } 340 341 /* Check for userptr support. */ 342 { 343 struct drm_radeon_gem_userptr args = {0}; 344 345 /* If the ioctl doesn't exist, -EINVAL is returned. 346 * 347 * If the ioctl exists, it should return -EACCES 348 * if RADEON_GEM_USERPTR_READONLY or RADEON_GEM_USERPTR_REGISTER 349 * aren't set. 350 */ 351 ws->info.has_userptr = 352 drmCommandWriteRead(ws->fd, DRM_RADEON_GEM_USERPTR, 353 &args, sizeof(args)) == -EACCES; 354 } 355 356 /* Get GEM info. */ 357 retval = drmCommandWriteRead(ws->fd, DRM_RADEON_GEM_INFO, 358 &gem_info, sizeof(gem_info)); 359 if (retval) { 360 fprintf(stderr, "radeon: Failed to get MM info, error number %d\n", 361 retval); 362 return false; 363 } 364 ws->info.gart_size = gem_info.gart_size; 365 ws->info.vram_size = gem_info.vram_size; 366 ws->info.vram_vis_size = gem_info.vram_visible; 367 /* Older versions of the kernel driver reported incorrect values, and 368 * didn't support more than 256MB of visible VRAM anyway 369 */ 370 if (ws->info.drm_minor < 49) 371 ws->info.vram_vis_size = MIN2(ws->info.vram_vis_size, 256*1024*1024); 372 373 ws->info.gart_size_kb = DIV_ROUND_UP(ws->info.gart_size, 1024); 374 ws->info.vram_size_kb = DIV_ROUND_UP(ws->info.vram_size, 1024); 375 376 /* Radeon allocates all buffers contiguously, which makes large allocations 377 * unlikely to succeed. */ 378 if (ws->info.has_dedicated_vram) 379 ws->info.max_alloc_size = ws->info.vram_size * 0.7; 380 else 381 ws->info.max_alloc_size = ws->info.gart_size * 0.7; 382 383 if (ws->info.drm_minor < 40) 384 ws->info.max_alloc_size = MIN2(ws->info.max_alloc_size, 256*1024*1024); 385 /* Both 32-bit and 64-bit address spaces only have 4GB. */ 386 ws->info.max_alloc_size = MIN2(ws->info.max_alloc_size, 3ull*1024*1024*1024); 387 388 /* Get max clock frequency info and convert it to MHz */ 389 radeon_get_drm_value(ws->fd, RADEON_INFO_MAX_SCLK, NULL, 390 &ws->info.max_shader_clock); 391 ws->info.max_shader_clock /= 1000; 392 393 ws->num_cpus = sysconf(_SC_NPROCESSORS_ONLN); 394 395 /* Generation-specific queries. */ 396 if (ws->gen == DRV_R300) { 397 if (!radeon_get_drm_value(ws->fd, RADEON_INFO_NUM_GB_PIPES, 398 "GB pipe count", 399 &ws->info.r300_num_gb_pipes)) 400 return false; 401 402 if (!radeon_get_drm_value(ws->fd, RADEON_INFO_NUM_Z_PIPES, 403 "Z pipe count", 404 &ws->info.r300_num_z_pipes)) 405 return false; 406 } 407 else if (ws->gen >= DRV_R600) { 408 uint32_t tiling_config = 0; 409 410 if (!radeon_get_drm_value(ws->fd, RADEON_INFO_NUM_BACKENDS, 411 "num backends", 412 &ws->info.max_render_backends)) 413 return false; 414 415 /* get the GPU counter frequency, failure is not fatal */ 416 radeon_get_drm_value(ws->fd, RADEON_INFO_CLOCK_CRYSTAL_FREQ, NULL, 417 &ws->info.clock_crystal_freq); 418 419 radeon_get_drm_value(ws->fd, RADEON_INFO_TILING_CONFIG, NULL, 420 &tiling_config); 421 422 ws->info.r600_num_banks = 423 ws->info.chip_class >= EVERGREEN ? 424 4 << ((tiling_config & 0xf0) >> 4) : 425 4 << ((tiling_config & 0x30) >> 4); 426 427 ws->info.pipe_interleave_bytes = 428 ws->info.chip_class >= EVERGREEN ? 429 256 << ((tiling_config & 0xf00) >> 8) : 430 256 << ((tiling_config & 0xc0) >> 6); 431 432 if (!ws->info.pipe_interleave_bytes) 433 ws->info.pipe_interleave_bytes = 434 ws->info.chip_class >= EVERGREEN ? 512 : 256; 435 436 radeon_get_drm_value(ws->fd, RADEON_INFO_NUM_TILE_PIPES, NULL, 437 &ws->info.num_tile_pipes); 438 439 /* "num_tiles_pipes" must be equal to the number of pipes (Px) in the 440 * pipe config field of the GB_TILE_MODE array. Only one card (Tahiti) 441 * reports a different value (12). Fix it by setting what's in the 442 * GB_TILE_MODE array (8). 443 */ 444 if (ws->gen == DRV_SI && ws->info.num_tile_pipes == 12) 445 ws->info.num_tile_pipes = 8; 446 447 if (radeon_get_drm_value(ws->fd, RADEON_INFO_BACKEND_MAP, NULL, 448 &ws->info.r600_gb_backend_map)) 449 ws->info.r600_gb_backend_map_valid = true; 450 451 /* Default value. */ 452 ws->info.enabled_rb_mask = u_bit_consecutive(0, ws->info.max_render_backends); 453 /* 454 * This fails (silently) on non-GCN or older kernels, overwriting the 455 * default enabled_rb_mask with the result of the last query. 456 */ 457 if (ws->gen >= DRV_SI) 458 radeon_get_drm_value(ws->fd, RADEON_INFO_SI_BACKEND_ENABLED_MASK, NULL, 459 &ws->info.enabled_rb_mask); 460 461 ws->info.r600_has_virtual_memory = false; 462 if (ws->info.drm_minor >= 13) { 463 uint32_t ib_vm_max_size; 464 465 ws->info.r600_has_virtual_memory = true; 466 if (!radeon_get_drm_value(ws->fd, RADEON_INFO_VA_START, NULL, 467 &ws->va_start)) 468 ws->info.r600_has_virtual_memory = false; 469 if (!radeon_get_drm_value(ws->fd, RADEON_INFO_IB_VM_MAX_SIZE, NULL, 470 &ib_vm_max_size)) 471 ws->info.r600_has_virtual_memory = false; 472 radeon_get_drm_value(ws->fd, RADEON_INFO_VA_UNMAP_WORKING, NULL, 473 &ws->va_unmap_working); 474 } 475 if (ws->gen == DRV_R600 && !debug_get_bool_option("RADEON_VA", false)) 476 ws->info.r600_has_virtual_memory = false; 477 } 478 479 /* Get max pipes, this is only needed for compute shaders. All evergreen+ 480 * chips have at least 2 pipes, so we use 2 as a default. */ 481 ws->info.r600_max_quad_pipes = 2; 482 radeon_get_drm_value(ws->fd, RADEON_INFO_MAX_PIPES, NULL, 483 &ws->info.r600_max_quad_pipes); 484 485 /* All GPUs have at least one compute unit */ 486 ws->info.num_good_compute_units = 1; 487 radeon_get_drm_value(ws->fd, RADEON_INFO_ACTIVE_CU_COUNT, NULL, 488 &ws->info.num_good_compute_units); 489 490 radeon_get_drm_value(ws->fd, RADEON_INFO_MAX_SE, NULL, 491 &ws->info.max_se); 492 493 switch (ws->info.family) { 494 case CHIP_HAINAN: 495 case CHIP_KABINI: 496 ws->info.max_tcc_blocks = 2; 497 break; 498 case CHIP_VERDE: 499 case CHIP_OLAND: 500 case CHIP_BONAIRE: 501 case CHIP_KAVERI: 502 ws->info.max_tcc_blocks = 4; 503 break; 504 case CHIP_PITCAIRN: 505 ws->info.max_tcc_blocks = 8; 506 break; 507 case CHIP_TAHITI: 508 ws->info.max_tcc_blocks = 12; 509 break; 510 case CHIP_HAWAII: 511 ws->info.max_tcc_blocks = 16; 512 break; 513 default: 514 ws->info.max_tcc_blocks = 0; 515 break; 516 } 517 518 if (!ws->info.max_se) { 519 switch (ws->info.family) { 520 default: 521 ws->info.max_se = 1; 522 break; 523 case CHIP_CYPRESS: 524 case CHIP_HEMLOCK: 525 case CHIP_BARTS: 526 case CHIP_CAYMAN: 527 case CHIP_TAHITI: 528 case CHIP_PITCAIRN: 529 case CHIP_BONAIRE: 530 ws->info.max_se = 2; 531 break; 532 case CHIP_HAWAII: 533 ws->info.max_se = 4; 534 break; 535 } 536 } 537 538 ws->info.num_se = ws->info.max_se; 539 540 radeon_get_drm_value(ws->fd, RADEON_INFO_MAX_SH_PER_SE, NULL, 541 &ws->info.max_sa_per_se); 542 if (ws->gen == DRV_SI) { 543 ws->info.max_good_cu_per_sa = 544 ws->info.min_good_cu_per_sa = ws->info.num_good_compute_units / 545 (ws->info.max_se * ws->info.max_sa_per_se); 546 } 547 548 radeon_get_drm_value(ws->fd, RADEON_INFO_ACCEL_WORKING2, NULL, 549 &ws->accel_working2); 550 if (ws->info.family == CHIP_HAWAII && ws->accel_working2 < 2) { 551 fprintf(stderr, "radeon: GPU acceleration for Hawaii disabled, " 552 "returned accel_working2 value %u is smaller than 2. " 553 "Please install a newer kernel.\n", 554 ws->accel_working2); 555 return false; 556 } 557 558 if (ws->info.chip_class == GFX7) { 559 if (!radeon_get_drm_value(ws->fd, RADEON_INFO_CIK_MACROTILE_MODE_ARRAY, NULL, 560 ws->info.cik_macrotile_mode_array)) { 561 fprintf(stderr, "radeon: Kernel 3.13 is required for Sea Islands support.\n"); 562 return false; 563 } 564 } 565 566 if (ws->info.chip_class >= GFX6) { 567 if (!radeon_get_drm_value(ws->fd, RADEON_INFO_SI_TILE_MODE_ARRAY, NULL, 568 ws->info.si_tile_mode_array)) { 569 fprintf(stderr, "radeon: Kernel 3.10 is required for Southern Islands support.\n"); 570 return false; 571 } 572 } 573 574 /* Hawaii with old firmware needs type2 nop packet. 575 * accel_working2 with value 3 indicates the new firmware. 576 */ 577 ws->info.gfx_ib_pad_with_type2 = ws->info.chip_class <= GFX6 || 578 (ws->info.family == CHIP_HAWAII && 579 ws->accel_working2 < 3); 580 ws->info.tcc_cache_line_size = 64; /* TC L2 line size on GCN */ 581 ws->info.ib_alignment = 4096; 582 ws->info.kernel_flushes_hdp_before_ib = ws->info.drm_minor >= 40; 583 /* HTILE is broken with 1D tiling on old kernels and GFX7. */ 584 ws->info.htile_cmask_support_1d_tiling = ws->info.chip_class != GFX7 || 585 ws->info.drm_minor >= 38; 586 ws->info.si_TA_CS_BC_BASE_ADDR_allowed = ws->info.drm_minor >= 48; 587 ws->info.has_bo_metadata = false; 588 ws->info.has_gpu_reset_status_query = ws->info.drm_minor >= 43; 589 ws->info.has_eqaa_surface_allocator = false; 590 ws->info.has_format_bc1_through_bc7 = ws->info.drm_minor >= 31; 591 ws->info.kernel_flushes_tc_l2_after_ib = true; 592 /* Old kernels disallowed register writes via COPY_DATA 593 * that are used for indirect compute dispatches. */ 594 ws->info.has_indirect_compute_dispatch = ws->info.chip_class == GFX7 || 595 (ws->info.chip_class == GFX6 && 596 ws->info.drm_minor >= 45); 597 /* GFX6 doesn't support unaligned loads. */ 598 ws->info.has_unaligned_shader_loads = ws->info.chip_class == GFX7 && 599 ws->info.drm_minor >= 50; 600 ws->info.has_sparse_vm_mappings = false; 601 /* 2D tiling on GFX7 is supported since DRM 2.35.0 */ 602 ws->info.has_2d_tiling = ws->info.chip_class <= GFX6 || ws->info.drm_minor >= 35; 603 ws->info.has_read_registers_query = ws->info.drm_minor >= 42; 604 ws->info.max_alignment = 1024*1024; 605 ws->info.has_graphics = true; 606 ws->info.cpdma_prefetch_writes_memory = true; 607 ws->info.max_wave64_per_simd = 10; 608 ws->info.num_physical_sgprs_per_simd = 512; 609 ws->info.num_physical_wave64_vgprs_per_simd = 256; 610 ws->info.has_3d_cube_border_color_mipmap = true; 611 612 ws->check_vm = strstr(debug_get_option("R600_DEBUG", ""), "check_vm") != NULL || 613 strstr(debug_get_option("AMD_DEBUG", ""), "check_vm") != NULL; 614 ws->noop_cs = debug_get_bool_option("RADEON_NOOP", false); 615 616 return true; 617} 618 619static void radeon_winsys_destroy(struct radeon_winsys *rws) 620{ 621 struct radeon_drm_winsys *ws = (struct radeon_drm_winsys*)rws; 622 623 if (util_queue_is_initialized(&ws->cs_queue)) 624 util_queue_destroy(&ws->cs_queue); 625 626 mtx_destroy(&ws->hyperz_owner_mutex); 627 mtx_destroy(&ws->cmask_owner_mutex); 628 629 if (ws->info.r600_has_virtual_memory) 630 pb_slabs_deinit(&ws->bo_slabs); 631 pb_cache_deinit(&ws->bo_cache); 632 633 if (ws->gen >= DRV_R600) { 634 radeon_surface_manager_free(ws->surf_man); 635 } 636 637 _mesa_hash_table_destroy(ws->bo_names, NULL); 638 _mesa_hash_table_destroy(ws->bo_handles, NULL); 639 _mesa_hash_table_u64_destroy(ws->bo_vas); 640 mtx_destroy(&ws->bo_handles_mutex); 641 mtx_destroy(&ws->vm32.mutex); 642 mtx_destroy(&ws->vm64.mutex); 643 mtx_destroy(&ws->bo_fence_lock); 644 645 if (ws->fd >= 0) 646 close(ws->fd); 647 648 FREE(rws); 649} 650 651static void radeon_query_info(struct radeon_winsys *rws, 652 struct radeon_info *info, 653 bool enable_smart_access_memory, 654 bool disable_smart_access_memory) 655{ 656 *info = ((struct radeon_drm_winsys *)rws)->info; 657} 658 659static bool radeon_cs_request_feature(struct radeon_cmdbuf *rcs, 660 enum radeon_feature_id fid, 661 bool enable) 662{ 663 struct radeon_drm_cs *cs = radeon_drm_cs(rcs); 664 665 switch (fid) { 666 case RADEON_FID_R300_HYPERZ_ACCESS: 667 return radeon_set_fd_access(cs, &cs->ws->hyperz_owner, 668 &cs->ws->hyperz_owner_mutex, 669 RADEON_INFO_WANT_HYPERZ, "Hyper-Z", 670 enable); 671 672 case RADEON_FID_R300_CMASK_ACCESS: 673 return radeon_set_fd_access(cs, &cs->ws->cmask_owner, 674 &cs->ws->cmask_owner_mutex, 675 RADEON_INFO_WANT_CMASK, "AA optimizations", 676 enable); 677 } 678 return false; 679} 680 681uint32_t radeon_drm_get_gpu_reset_counter(struct radeon_drm_winsys *ws) 682{ 683 uint64_t retval = 0; 684 685 if (!ws->info.has_gpu_reset_status_query) 686 return 0; 687 688 radeon_get_drm_value(ws->fd, RADEON_INFO_GPU_RESET_COUNTER, 689 "gpu-reset-counter", (uint32_t*)&retval); 690 return retval; 691} 692 693static uint64_t radeon_query_value(struct radeon_winsys *rws, 694 enum radeon_value_id value) 695{ 696 struct radeon_drm_winsys *ws = (struct radeon_drm_winsys*)rws; 697 uint64_t retval = 0; 698 699 switch (value) { 700 case RADEON_REQUESTED_VRAM_MEMORY: 701 return ws->allocated_vram; 702 case RADEON_REQUESTED_GTT_MEMORY: 703 return ws->allocated_gtt; 704 case RADEON_MAPPED_VRAM: 705 return ws->mapped_vram; 706 case RADEON_MAPPED_GTT: 707 return ws->mapped_gtt; 708 case RADEON_BUFFER_WAIT_TIME_NS: 709 return ws->buffer_wait_time; 710 case RADEON_NUM_MAPPED_BUFFERS: 711 return ws->num_mapped_buffers; 712 case RADEON_TIMESTAMP: 713 if (ws->info.drm_minor < 20 || ws->gen < DRV_R600) { 714 assert(0); 715 return 0; 716 } 717 718 radeon_get_drm_value(ws->fd, RADEON_INFO_TIMESTAMP, "timestamp", 719 (uint32_t*)&retval); 720 return retval; 721 case RADEON_NUM_GFX_IBS: 722 return ws->num_gfx_IBs; 723 case RADEON_NUM_SDMA_IBS: 724 return ws->num_sdma_IBs; 725 case RADEON_NUM_BYTES_MOVED: 726 radeon_get_drm_value(ws->fd, RADEON_INFO_NUM_BYTES_MOVED, 727 "num-bytes-moved", (uint32_t*)&retval); 728 return retval; 729 case RADEON_NUM_EVICTIONS: 730 case RADEON_NUM_VRAM_CPU_PAGE_FAULTS: 731 case RADEON_VRAM_VIS_USAGE: 732 case RADEON_GFX_BO_LIST_COUNTER: 733 case RADEON_GFX_IB_SIZE_COUNTER: 734 case RADEON_SLAB_WASTED_VRAM: 735 case RADEON_SLAB_WASTED_GTT: 736 return 0; /* unimplemented */ 737 case RADEON_VRAM_USAGE: 738 radeon_get_drm_value(ws->fd, RADEON_INFO_VRAM_USAGE, 739 "vram-usage", (uint32_t*)&retval); 740 return retval; 741 case RADEON_GTT_USAGE: 742 radeon_get_drm_value(ws->fd, RADEON_INFO_GTT_USAGE, 743 "gtt-usage", (uint32_t*)&retval); 744 return retval; 745 case RADEON_GPU_TEMPERATURE: 746 radeon_get_drm_value(ws->fd, RADEON_INFO_CURRENT_GPU_TEMP, 747 "gpu-temp", (uint32_t*)&retval); 748 return retval; 749 case RADEON_CURRENT_SCLK: 750 radeon_get_drm_value(ws->fd, RADEON_INFO_CURRENT_GPU_SCLK, 751 "current-gpu-sclk", (uint32_t*)&retval); 752 return retval; 753 case RADEON_CURRENT_MCLK: 754 radeon_get_drm_value(ws->fd, RADEON_INFO_CURRENT_GPU_MCLK, 755 "current-gpu-mclk", (uint32_t*)&retval); 756 return retval; 757 case RADEON_CS_THREAD_TIME: 758 return util_queue_get_thread_time_nano(&ws->cs_queue, 0); 759 } 760 return 0; 761} 762 763static bool radeon_read_registers(struct radeon_winsys *rws, 764 unsigned reg_offset, 765 unsigned num_registers, uint32_t *out) 766{ 767 struct radeon_drm_winsys *ws = (struct radeon_drm_winsys*)rws; 768 unsigned i; 769 770 for (i = 0; i < num_registers; i++) { 771 uint32_t reg = reg_offset + i*4; 772 773 if (!radeon_get_drm_value(ws->fd, RADEON_INFO_READ_REG, NULL, ®)) 774 return false; 775 out[i] = reg; 776 } 777 return true; 778} 779 780DEBUG_GET_ONCE_BOOL_OPTION(thread, "RADEON_THREAD", true) 781 782static bool radeon_winsys_unref(struct radeon_winsys *ws) 783{ 784 struct radeon_drm_winsys *rws = (struct radeon_drm_winsys*)ws; 785 bool destroy; 786 787 /* When the reference counter drops to zero, remove the fd from the table. 788 * This must happen while the mutex is locked, so that 789 * radeon_drm_winsys_create in another thread doesn't get the winsys 790 * from the table when the counter drops to 0. */ 791 mtx_lock(&fd_tab_mutex); 792 793 destroy = pipe_reference(&rws->reference, NULL); 794 if (destroy && fd_tab) { 795 _mesa_hash_table_remove_key(fd_tab, intptr_to_pointer(rws->fd)); 796 if (_mesa_hash_table_num_entries(fd_tab) == 0) { 797 _mesa_hash_table_destroy(fd_tab, NULL); 798 fd_tab = NULL; 799 } 800 } 801 802 mtx_unlock(&fd_tab_mutex); 803 return destroy; 804} 805 806static void radeon_pin_threads_to_L3_cache(struct radeon_winsys *ws, 807 unsigned cache) 808{ 809 struct radeon_drm_winsys *rws = (struct radeon_drm_winsys*)ws; 810 811 if (util_queue_is_initialized(&rws->cs_queue)) { 812 util_set_thread_affinity(rws->cs_queue.threads[0], 813 util_get_cpu_caps()->L3_affinity_mask[cache], 814 NULL, util_get_cpu_caps()->num_cpu_mask_bits); 815 } 816} 817 818static bool radeon_cs_is_secure(struct radeon_cmdbuf* cs) 819{ 820 return false; 821} 822 823PUBLIC struct radeon_winsys * 824radeon_drm_winsys_create(int fd, const struct pipe_screen_config *config, 825 radeon_screen_create_t screen_create) 826{ 827 struct radeon_drm_winsys *ws; 828 829 mtx_lock(&fd_tab_mutex); 830 if (!fd_tab) { 831 fd_tab = util_hash_table_create_fd_keys(); 832 } 833 834 ws = util_hash_table_get(fd_tab, intptr_to_pointer(fd)); 835 if (ws) { 836 pipe_reference(NULL, &ws->reference); 837 mtx_unlock(&fd_tab_mutex); 838 return &ws->base; 839 } 840 841 ws = CALLOC_STRUCT(radeon_drm_winsys); 842 if (!ws) { 843 mtx_unlock(&fd_tab_mutex); 844 return NULL; 845 } 846 847 ws->fd = os_dupfd_cloexec(fd); 848 849 if (!do_winsys_init(ws)) 850 goto fail1; 851 852 pb_cache_init(&ws->bo_cache, RADEON_MAX_CACHED_HEAPS, 853 500000, ws->check_vm ? 1.0f : 2.0f, 0, 854 MIN2(ws->info.vram_size, ws->info.gart_size), NULL, 855 radeon_bo_destroy, 856 radeon_bo_can_reclaim); 857 858 if (ws->info.r600_has_virtual_memory) { 859 /* There is no fundamental obstacle to using slab buffer allocation 860 * without GPUVM, but enabling it requires making sure that the drivers 861 * honor the address offset. 862 */ 863 if (!pb_slabs_init(&ws->bo_slabs, 864 RADEON_SLAB_MIN_SIZE_LOG2, RADEON_SLAB_MAX_SIZE_LOG2, 865 RADEON_MAX_SLAB_HEAPS, false, 866 ws, 867 radeon_bo_can_reclaim_slab, 868 radeon_bo_slab_alloc, 869 radeon_bo_slab_free)) 870 goto fail_cache; 871 872 ws->info.min_alloc_size = 1 << RADEON_SLAB_MIN_SIZE_LOG2; 873 } else { 874 ws->info.min_alloc_size = ws->info.gart_page_size; 875 } 876 877 if (ws->gen >= DRV_R600) { 878 ws->surf_man = radeon_surface_manager_new(ws->fd); 879 if (!ws->surf_man) 880 goto fail_slab; 881 } 882 883 /* init reference */ 884 pipe_reference_init(&ws->reference, 1); 885 886 /* Set functions. */ 887 ws->base.unref = radeon_winsys_unref; 888 ws->base.destroy = radeon_winsys_destroy; 889 ws->base.query_info = radeon_query_info; 890 ws->base.pin_threads_to_L3_cache = radeon_pin_threads_to_L3_cache; 891 ws->base.cs_request_feature = radeon_cs_request_feature; 892 ws->base.query_value = radeon_query_value; 893 ws->base.read_registers = radeon_read_registers; 894 ws->base.cs_is_secure = radeon_cs_is_secure; 895 896 radeon_drm_bo_init_functions(ws); 897 radeon_drm_cs_init_functions(ws); 898 radeon_surface_init_functions(ws); 899 900 (void) mtx_init(&ws->hyperz_owner_mutex, mtx_plain); 901 (void) mtx_init(&ws->cmask_owner_mutex, mtx_plain); 902 903 ws->bo_names = util_hash_table_create_ptr_keys(); 904 ws->bo_handles = util_hash_table_create_ptr_keys(); 905 ws->bo_vas = _mesa_hash_table_u64_create(NULL); 906 (void) mtx_init(&ws->bo_handles_mutex, mtx_plain); 907 (void) mtx_init(&ws->vm32.mutex, mtx_plain); 908 (void) mtx_init(&ws->vm64.mutex, mtx_plain); 909 (void) mtx_init(&ws->bo_fence_lock, mtx_plain); 910 list_inithead(&ws->vm32.holes); 911 list_inithead(&ws->vm64.holes); 912 913 /* The kernel currently returns 8MB. Make sure this doesn't change. */ 914 if (ws->va_start > 8 * 1024 * 1024) { 915 /* Not enough 32-bit address space. */ 916 radeon_winsys_destroy(&ws->base); 917 mtx_unlock(&fd_tab_mutex); 918 return NULL; 919 } 920 921 ws->vm32.start = ws->va_start; 922 ws->vm32.end = 1ull << 32; 923 924 /* The maximum is 8GB of virtual address space limited by the kernel. 925 * It's obviously not enough for bigger cards, like Hawaiis with 4GB 926 * and 8GB of physical memory and 4GB of GART. 927 * 928 * Older kernels set the limit to 4GB, which is even worse, so they only 929 * have 32-bit address space. 930 */ 931 if (ws->info.drm_minor >= 41) { 932 ws->vm64.start = 1ull << 32; 933 ws->vm64.end = 1ull << 33; 934 } 935 936 /* TTM aligns the BO size to the CPU page size */ 937 ws->info.gart_page_size = sysconf(_SC_PAGESIZE); 938 ws->info.pte_fragment_size = 64 * 1024; /* GPUVM page size */ 939 940 if (ws->num_cpus > 1 && debug_get_option_thread()) 941 util_queue_init(&ws->cs_queue, "rcs", 8, 1, 0, NULL); 942 943 /* Create the screen at the end. The winsys must be initialized 944 * completely. 945 * 946 * Alternatively, we could create the screen based on "ws->gen" 947 * and link all drivers into one binary blob. */ 948 ws->base.screen = screen_create(&ws->base, config); 949 if (!ws->base.screen) { 950 radeon_winsys_destroy(&ws->base); 951 mtx_unlock(&fd_tab_mutex); 952 return NULL; 953 } 954 955 _mesa_hash_table_insert(fd_tab, intptr_to_pointer(ws->fd), ws); 956 957 /* We must unlock the mutex once the winsys is fully initialized, so that 958 * other threads attempting to create the winsys from the same fd will 959 * get a fully initialized winsys and not just half-way initialized. */ 960 mtx_unlock(&fd_tab_mutex); 961 962 return &ws->base; 963 964fail_slab: 965 if (ws->info.r600_has_virtual_memory) 966 pb_slabs_deinit(&ws->bo_slabs); 967fail_cache: 968 pb_cache_deinit(&ws->bo_cache); 969fail1: 970 mtx_unlock(&fd_tab_mutex); 971 if (ws->surf_man) 972 radeon_surface_manager_free(ws->surf_man); 973 if (ws->fd >= 0) 974 close(ws->fd); 975 976 FREE(ws); 977 return NULL; 978} 979