1/* 2 * Copyright © 2009 Corbin Simpson <MostAwesomeDude@gmail.com> 3 * Copyright © 2009 Joakim Sindholt <opensource@zhasha.com> 4 * Copyright © 2011 Marek Olšák <maraeo@gmail.com> 5 * Copyright © 2015 Advanced Micro Devices, Inc. 6 * All Rights Reserved. 7 * 8 * Permission is hereby granted, free of charge, to any person obtaining 9 * a copy of this software and associated documentation files (the 10 * "Software"), to deal in the Software without restriction, including 11 * without limitation the rights to use, copy, modify, merge, publish, 12 * distribute, sub license, and/or sell copies of the Software, and to 13 * permit persons to whom the Software is furnished to do so, subject to 14 * the following conditions: 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 18 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 19 * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS 20 * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 22 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 23 * USE OR OTHER DEALINGS IN THE SOFTWARE. 24 * 25 * The above copyright notice and this permission notice (including the 26 * next paragraph) shall be included in all copies or substantial portions 27 * of the Software. 28 */ 29 30#include "amdgpu_cs.h" 31#include "amdgpu_public.h" 32 33#include "util/os_file.h" 34#include "util/os_misc.h" 35#include "util/u_cpu_detect.h" 36#include "util/u_hash_table.h" 37#include "util/hash_table.h" 38#include "util/xmlconfig.h" 39#include "drm-uapi/amdgpu_drm.h" 40#include <xf86drm.h> 41#include <stdio.h> 42#include <sys/stat.h> 43#include <fcntl.h> 44#include "ac_llvm_util.h" 45#include "sid.h" 46 47static struct hash_table *dev_tab = NULL; 48static simple_mtx_t dev_tab_mutex = _SIMPLE_MTX_INITIALIZER_NP; 49 50#if DEBUG 51DEBUG_GET_ONCE_BOOL_OPTION(all_bos, "RADEON_ALL_BOS", false) 52#endif 53 54static void handle_env_var_force_family(struct amdgpu_winsys *ws) 55{ 56 const char *family = debug_get_option("SI_FORCE_FAMILY", NULL); 57 unsigned i; 58 59 if (!family) 60 return; 61 62 for (i = CHIP_TAHITI; i < CHIP_LAST; i++) { 63 if (!strcmp(family, ac_get_llvm_processor_name(i))) { 64 /* Override family and chip_class. */ 65 ws->info.family = i; 66 ws->info.name = "GCN-NOOP"; 67 68 if (i >= CHIP_SIENNA_CICHLID) 69 ws->info.chip_class = GFX10_3; 70 else if (i >= CHIP_NAVI10) 71 ws->info.chip_class = GFX10; 72 else if (i >= CHIP_VEGA10) 73 ws->info.chip_class = GFX9; 74 else if (i >= CHIP_TONGA) 75 ws->info.chip_class = GFX8; 76 else if (i >= CHIP_BONAIRE) 77 ws->info.chip_class = GFX7; 78 else 79 ws->info.chip_class = GFX6; 80 81 /* Don't submit any IBs. */ 82 setenv("RADEON_NOOP", "1", 1); 83 return; 84 } 85 } 86 87 fprintf(stderr, "radeonsi: Unknown family: %s\n", family); 88 exit(1); 89} 90 91/* Helper function to do the ioctls needed for setup and init. */ 92static bool do_winsys_init(struct amdgpu_winsys *ws, 93 const struct pipe_screen_config *config, 94 int fd) 95{ 96 if (!ac_query_gpu_info(fd, ws->dev, &ws->info, &ws->amdinfo)) 97 goto fail; 98 99 /* TODO: Enable this once the kernel handles it efficiently. */ 100 if (ws->info.has_dedicated_vram) 101 ws->info.has_local_buffers = false; 102 103 handle_env_var_force_family(ws); 104 105 ws->addrlib = ac_addrlib_create(&ws->info, &ws->info.max_alignment); 106 if (!ws->addrlib) { 107 fprintf(stderr, "amdgpu: Cannot create addrlib.\n"); 108 goto fail; 109 } 110 111 ws->check_vm = strstr(debug_get_option("R600_DEBUG", ""), "check_vm") != NULL || 112 strstr(debug_get_option("AMD_DEBUG", ""), "check_vm") != NULL; 113 ws->noop_cs = debug_get_bool_option("RADEON_NOOP", false); 114#if DEBUG 115 ws->debug_all_bos = debug_get_option_all_bos(); 116#endif 117 ws->reserve_vmid = strstr(debug_get_option("R600_DEBUG", ""), "reserve_vmid") != NULL || 118 strstr(debug_get_option("AMD_DEBUG", ""), "reserve_vmid") != NULL; 119 ws->zero_all_vram_allocs = strstr(debug_get_option("R600_DEBUG", ""), "zerovram") != NULL || 120 driQueryOptionb(config->options, "radeonsi_zerovram"); 121 122 return true; 123 124fail: 125 amdgpu_device_deinitialize(ws->dev); 126 ws->dev = NULL; 127 return false; 128} 129 130static void do_winsys_deinit(struct amdgpu_winsys *ws) 131{ 132 if (ws->reserve_vmid) 133 amdgpu_vm_unreserve_vmid(ws->dev, 0); 134 135 if (util_queue_is_initialized(&ws->cs_queue)) 136 util_queue_destroy(&ws->cs_queue); 137 138 simple_mtx_destroy(&ws->bo_fence_lock); 139 for (unsigned i = 0; i < NUM_SLAB_ALLOCATORS; i++) { 140 if (ws->bo_slabs[i].groups) 141 pb_slabs_deinit(&ws->bo_slabs[i]); 142 } 143 pb_cache_deinit(&ws->bo_cache); 144 _mesa_hash_table_destroy(ws->bo_export_table, NULL); 145 simple_mtx_destroy(&ws->sws_list_lock); 146#if DEBUG 147 simple_mtx_destroy(&ws->global_bo_list_lock); 148#endif 149 simple_mtx_destroy(&ws->bo_export_table_lock); 150 151 ac_addrlib_destroy(ws->addrlib); 152 amdgpu_device_deinitialize(ws->dev); 153 FREE(ws); 154} 155 156static void amdgpu_winsys_destroy(struct radeon_winsys *rws) 157{ 158 struct amdgpu_screen_winsys *sws = amdgpu_screen_winsys(rws); 159 struct amdgpu_winsys *ws = sws->aws; 160 bool destroy; 161 162 /* When the reference counter drops to zero, remove the device pointer 163 * from the table. 164 * This must happen while the mutex is locked, so that 165 * amdgpu_winsys_create in another thread doesn't get the winsys 166 * from the table when the counter drops to 0. 167 */ 168 simple_mtx_lock(&dev_tab_mutex); 169 170 destroy = pipe_reference(&ws->reference, NULL); 171 if (destroy && dev_tab) { 172 _mesa_hash_table_remove_key(dev_tab, ws->dev); 173 if (_mesa_hash_table_num_entries(dev_tab) == 0) { 174 _mesa_hash_table_destroy(dev_tab, NULL); 175 dev_tab = NULL; 176 } 177 } 178 179 simple_mtx_unlock(&dev_tab_mutex); 180 181 if (destroy) 182 do_winsys_deinit(ws); 183 184 close(sws->fd); 185 FREE(rws); 186} 187 188static void amdgpu_winsys_query_info(struct radeon_winsys *rws, 189 struct radeon_info *info, 190 bool enable_smart_access_memory, 191 bool disable_smart_access_memory) 192{ 193 struct amdgpu_winsys *ws = amdgpu_winsys(rws); 194 195 if (disable_smart_access_memory) 196 ws->info.smart_access_memory = false; 197 else if (enable_smart_access_memory && ws->info.all_vram_visible) 198 ws->info.smart_access_memory = true; 199 200 *info = ws->info; 201} 202 203static bool amdgpu_cs_request_feature(struct radeon_cmdbuf *rcs, 204 enum radeon_feature_id fid, 205 bool enable) 206{ 207 return false; 208} 209 210static uint64_t amdgpu_query_value(struct radeon_winsys *rws, 211 enum radeon_value_id value) 212{ 213 struct amdgpu_winsys *ws = amdgpu_winsys(rws); 214 struct amdgpu_heap_info heap; 215 uint64_t retval = 0; 216 217 switch (value) { 218 case RADEON_REQUESTED_VRAM_MEMORY: 219 return ws->allocated_vram; 220 case RADEON_REQUESTED_GTT_MEMORY: 221 return ws->allocated_gtt; 222 case RADEON_MAPPED_VRAM: 223 return ws->mapped_vram; 224 case RADEON_MAPPED_GTT: 225 return ws->mapped_gtt; 226 case RADEON_SLAB_WASTED_VRAM: 227 return ws->slab_wasted_vram; 228 case RADEON_SLAB_WASTED_GTT: 229 return ws->slab_wasted_gtt; 230 case RADEON_BUFFER_WAIT_TIME_NS: 231 return ws->buffer_wait_time; 232 case RADEON_NUM_MAPPED_BUFFERS: 233 return ws->num_mapped_buffers; 234 case RADEON_TIMESTAMP: 235 amdgpu_query_info(ws->dev, AMDGPU_INFO_TIMESTAMP, 8, &retval); 236 return retval; 237 case RADEON_NUM_GFX_IBS: 238 return ws->num_gfx_IBs; 239 case RADEON_NUM_SDMA_IBS: 240 return ws->num_sdma_IBs; 241 case RADEON_GFX_BO_LIST_COUNTER: 242 return ws->gfx_bo_list_counter; 243 case RADEON_GFX_IB_SIZE_COUNTER: 244 return ws->gfx_ib_size_counter; 245 case RADEON_NUM_BYTES_MOVED: 246 amdgpu_query_info(ws->dev, AMDGPU_INFO_NUM_BYTES_MOVED, 8, &retval); 247 return retval; 248 case RADEON_NUM_EVICTIONS: 249 amdgpu_query_info(ws->dev, AMDGPU_INFO_NUM_EVICTIONS, 8, &retval); 250 return retval; 251 case RADEON_NUM_VRAM_CPU_PAGE_FAULTS: 252 amdgpu_query_info(ws->dev, AMDGPU_INFO_NUM_VRAM_CPU_PAGE_FAULTS, 8, &retval); 253 return retval; 254 case RADEON_VRAM_USAGE: 255 amdgpu_query_heap_info(ws->dev, AMDGPU_GEM_DOMAIN_VRAM, 0, &heap); 256 return heap.heap_usage; 257 case RADEON_VRAM_VIS_USAGE: 258 amdgpu_query_heap_info(ws->dev, AMDGPU_GEM_DOMAIN_VRAM, 259 AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED, &heap); 260 return heap.heap_usage; 261 case RADEON_GTT_USAGE: 262 amdgpu_query_heap_info(ws->dev, AMDGPU_GEM_DOMAIN_GTT, 0, &heap); 263 return heap.heap_usage; 264 case RADEON_GPU_TEMPERATURE: 265 amdgpu_query_sensor_info(ws->dev, AMDGPU_INFO_SENSOR_GPU_TEMP, 4, &retval); 266 return retval; 267 case RADEON_CURRENT_SCLK: 268 amdgpu_query_sensor_info(ws->dev, AMDGPU_INFO_SENSOR_GFX_SCLK, 4, &retval); 269 return retval; 270 case RADEON_CURRENT_MCLK: 271 amdgpu_query_sensor_info(ws->dev, AMDGPU_INFO_SENSOR_GFX_MCLK, 4, &retval); 272 return retval; 273 case RADEON_CS_THREAD_TIME: 274 return util_queue_get_thread_time_nano(&ws->cs_queue, 0); 275 } 276 return 0; 277} 278 279static bool amdgpu_read_registers(struct radeon_winsys *rws, 280 unsigned reg_offset, 281 unsigned num_registers, uint32_t *out) 282{ 283 struct amdgpu_winsys *ws = amdgpu_winsys(rws); 284 285 return amdgpu_read_mm_registers(ws->dev, reg_offset / 4, num_registers, 286 0xffffffff, 0, out) == 0; 287} 288 289static bool amdgpu_winsys_unref(struct radeon_winsys *rws) 290{ 291 struct amdgpu_screen_winsys *sws = amdgpu_screen_winsys(rws); 292 struct amdgpu_winsys *aws = sws->aws; 293 bool ret; 294 295 simple_mtx_lock(&aws->sws_list_lock); 296 297 ret = pipe_reference(&sws->reference, NULL); 298 if (ret) { 299 struct amdgpu_screen_winsys **sws_iter; 300 struct amdgpu_winsys *aws = sws->aws; 301 302 /* Remove this amdgpu_screen_winsys from amdgpu_winsys' list, so that 303 * amdgpu_winsys_create can't re-use it anymore 304 */ 305 for (sws_iter = &aws->sws_list; *sws_iter; sws_iter = &(*sws_iter)->next) { 306 if (*sws_iter == sws) { 307 *sws_iter = sws->next; 308 break; 309 } 310 } 311 } 312 313 simple_mtx_unlock(&aws->sws_list_lock); 314 315 if (ret && sws->kms_handles) { 316 struct drm_gem_close args; 317 318 hash_table_foreach(sws->kms_handles, entry) { 319 args.handle = (uintptr_t)entry->data; 320 drmIoctl(sws->fd, DRM_IOCTL_GEM_CLOSE, &args); 321 } 322 _mesa_hash_table_destroy(sws->kms_handles, NULL); 323 } 324 325 return ret; 326} 327 328static void amdgpu_pin_threads_to_L3_cache(struct radeon_winsys *rws, 329 unsigned cache) 330{ 331 struct amdgpu_winsys *ws = amdgpu_winsys(rws); 332 333 util_set_thread_affinity(ws->cs_queue.threads[0], 334 util_get_cpu_caps()->L3_affinity_mask[cache], 335 NULL, util_get_cpu_caps()->num_cpu_mask_bits); 336} 337 338static uint32_t kms_handle_hash(const void *key) 339{ 340 const struct amdgpu_winsys_bo *bo = key; 341 342 return bo->u.real.kms_handle; 343} 344 345static bool kms_handle_equals(const void *a, const void *b) 346{ 347 return a == b; 348} 349 350static bool amdgpu_cs_is_secure(struct radeon_cmdbuf *rcs) 351{ 352 struct amdgpu_cs *cs = amdgpu_cs(rcs); 353 return cs->csc->secure; 354} 355 356PUBLIC struct radeon_winsys * 357amdgpu_winsys_create(int fd, const struct pipe_screen_config *config, 358 radeon_screen_create_t screen_create) 359{ 360 struct amdgpu_screen_winsys *ws; 361 struct amdgpu_winsys *aws; 362 amdgpu_device_handle dev; 363 uint32_t drm_major, drm_minor; 364 int r; 365 366 ws = CALLOC_STRUCT(amdgpu_screen_winsys); 367 if (!ws) 368 return NULL; 369 370 pipe_reference_init(&ws->reference, 1); 371 ws->fd = os_dupfd_cloexec(fd); 372 373 /* Look up the winsys from the dev table. */ 374 simple_mtx_lock(&dev_tab_mutex); 375 if (!dev_tab) 376 dev_tab = util_hash_table_create_ptr_keys(); 377 378 /* Initialize the amdgpu device. This should always return the same pointer 379 * for the same fd. */ 380 r = amdgpu_device_initialize(ws->fd, &drm_major, &drm_minor, &dev); 381 if (r) { 382 fprintf(stderr, "amdgpu: amdgpu_device_initialize failed.\n"); 383 goto fail; 384 } 385 386 /* Lookup a winsys if we have already created one for this device. */ 387 aws = util_hash_table_get(dev_tab, dev); 388 if (aws) { 389 struct amdgpu_screen_winsys *sws_iter; 390 391 /* Release the device handle, because we don't need it anymore. 392 * This function is returning an existing winsys instance, which 393 * has its own device handle. 394 */ 395 amdgpu_device_deinitialize(dev); 396 397 simple_mtx_lock(&aws->sws_list_lock); 398 for (sws_iter = aws->sws_list; sws_iter; sws_iter = sws_iter->next) { 399 r = os_same_file_description(sws_iter->fd, ws->fd); 400 401 if (r == 0) { 402 close(ws->fd); 403 FREE(ws); 404 ws = sws_iter; 405 pipe_reference(NULL, &ws->reference); 406 simple_mtx_unlock(&aws->sws_list_lock); 407 goto unlock; 408 } else if (r < 0) { 409 static bool logged; 410 411 if (!logged) { 412 os_log_message("amdgpu: os_same_file_description couldn't " 413 "determine if two DRM fds reference the same " 414 "file description.\n" 415 "If they do, bad things may happen!\n"); 416 logged = true; 417 } 418 } 419 } 420 simple_mtx_unlock(&aws->sws_list_lock); 421 422 ws->kms_handles = _mesa_hash_table_create(NULL, kms_handle_hash, 423 kms_handle_equals); 424 if (!ws->kms_handles) 425 goto fail; 426 427 pipe_reference(NULL, &aws->reference); 428 } else { 429 /* Create a new winsys. */ 430 aws = CALLOC_STRUCT(amdgpu_winsys); 431 if (!aws) 432 goto fail; 433 434 aws->dev = dev; 435 aws->fd = ws->fd; 436 aws->info.drm_major = drm_major; 437 aws->info.drm_minor = drm_minor; 438 aws->dummy_ws.aws = aws; /* only the pointer is used */ 439 440 if (!do_winsys_init(aws, config, fd)) 441 goto fail_alloc; 442 443 /* Create managers. */ 444 pb_cache_init(&aws->bo_cache, RADEON_MAX_CACHED_HEAPS, 445 500000, aws->check_vm ? 1.0f : 2.0f, 0, 446 (aws->info.vram_size + aws->info.gart_size) / 8, aws, 447 /* Cast to void* because one of the function parameters 448 * is a struct pointer instead of void*. */ 449 (void*)amdgpu_bo_destroy, (void*)amdgpu_bo_can_reclaim); 450 451 unsigned min_slab_order = 8; /* 256 bytes */ 452 unsigned max_slab_order = 20; /* 1 MB (slab size = 2 MB) */ 453 unsigned num_slab_orders_per_allocator = (max_slab_order - min_slab_order) / 454 NUM_SLAB_ALLOCATORS; 455 456 /* Divide the size order range among slab managers. */ 457 for (unsigned i = 0; i < NUM_SLAB_ALLOCATORS; i++) { 458 unsigned min_order = min_slab_order; 459 unsigned max_order = MIN2(min_order + num_slab_orders_per_allocator, 460 max_slab_order); 461 462 if (!pb_slabs_init(&aws->bo_slabs[i], 463 min_order, max_order, 464 RADEON_MAX_SLAB_HEAPS, true, 465 aws, 466 amdgpu_bo_can_reclaim_slab, 467 amdgpu_bo_slab_alloc_normal, 468 /* Cast to void* because one of the function parameters 469 * is a struct pointer instead of void*. */ 470 (void*)amdgpu_bo_slab_free)) { 471 amdgpu_winsys_destroy(&ws->base); 472 simple_mtx_unlock(&dev_tab_mutex); 473 return NULL; 474 } 475 476 if (aws->info.has_tmz_support && 477 !pb_slabs_init(&aws->bo_slabs_encrypted[i], 478 min_order, max_order, 479 RADEON_MAX_SLAB_HEAPS, true, 480 aws, 481 amdgpu_bo_can_reclaim_slab, 482 amdgpu_bo_slab_alloc_encrypted, 483 /* Cast to void* because one of the function parameters 484 * is a struct pointer instead of void*. */ 485 (void*)amdgpu_bo_slab_free)) { 486 amdgpu_winsys_destroy(&ws->base); 487 simple_mtx_unlock(&dev_tab_mutex); 488 return NULL; 489 } 490 491 min_slab_order = max_order + 1; 492 } 493 494 aws->info.min_alloc_size = 1 << aws->bo_slabs[0].min_order; 495 496 /* init reference */ 497 pipe_reference_init(&aws->reference, 1); 498#if DEBUG 499 list_inithead(&aws->global_bo_list); 500#endif 501 aws->bo_export_table = util_hash_table_create_ptr_keys(); 502 503 (void) simple_mtx_init(&aws->sws_list_lock, mtx_plain); 504#if DEBUG 505 (void) simple_mtx_init(&aws->global_bo_list_lock, mtx_plain); 506#endif 507 (void) simple_mtx_init(&aws->bo_fence_lock, mtx_plain); 508 (void) simple_mtx_init(&aws->bo_export_table_lock, mtx_plain); 509 510 if (!util_queue_init(&aws->cs_queue, "cs", 8, 1, 511 UTIL_QUEUE_INIT_RESIZE_IF_FULL, NULL)) { 512 amdgpu_winsys_destroy(&ws->base); 513 simple_mtx_unlock(&dev_tab_mutex); 514 return NULL; 515 } 516 517 _mesa_hash_table_insert(dev_tab, dev, aws); 518 519 if (aws->reserve_vmid) { 520 r = amdgpu_vm_reserve_vmid(dev, 0); 521 if (r) { 522 amdgpu_winsys_destroy(&ws->base); 523 simple_mtx_unlock(&dev_tab_mutex); 524 return NULL; 525 } 526 } 527 } 528 529 ws->aws = aws; 530 531 /* Set functions. */ 532 ws->base.unref = amdgpu_winsys_unref; 533 ws->base.destroy = amdgpu_winsys_destroy; 534 ws->base.query_info = amdgpu_winsys_query_info; 535 ws->base.cs_request_feature = amdgpu_cs_request_feature; 536 ws->base.query_value = amdgpu_query_value; 537 ws->base.read_registers = amdgpu_read_registers; 538 ws->base.pin_threads_to_L3_cache = amdgpu_pin_threads_to_L3_cache; 539 ws->base.cs_is_secure = amdgpu_cs_is_secure; 540 541 amdgpu_bo_init_functions(ws); 542 amdgpu_cs_init_functions(ws); 543 amdgpu_surface_init_functions(ws); 544 545 simple_mtx_lock(&aws->sws_list_lock); 546 ws->next = aws->sws_list; 547 aws->sws_list = ws; 548 simple_mtx_unlock(&aws->sws_list_lock); 549 550 /* Create the screen at the end. The winsys must be initialized 551 * completely. 552 * 553 * Alternatively, we could create the screen based on "ws->gen" 554 * and link all drivers into one binary blob. */ 555 ws->base.screen = screen_create(&ws->base, config); 556 if (!ws->base.screen) { 557 amdgpu_winsys_destroy(&ws->base); 558 simple_mtx_unlock(&dev_tab_mutex); 559 return NULL; 560 } 561 562unlock: 563 /* We must unlock the mutex once the winsys is fully initialized, so that 564 * other threads attempting to create the winsys from the same fd will 565 * get a fully initialized winsys and not just half-way initialized. */ 566 simple_mtx_unlock(&dev_tab_mutex); 567 568 return &ws->base; 569 570fail_alloc: 571 FREE(aws); 572fail: 573 if (ws->kms_handles) 574 _mesa_hash_table_destroy(ws->kms_handles, NULL); 575 close(ws->fd); 576 FREE(ws); 577 simple_mtx_unlock(&dev_tab_mutex); 578 return NULL; 579} 580