1/* 2 * Copyright © 2016 Red Hat. 3 * Copyright © 2016 Bas Nieuwenhuizen 4 * 5 * based on amdgpu winsys. 6 * Copyright © 2011 Marek Olšák <maraeo@gmail.com> 7 * Copyright © 2015 Advanced Micro Devices, Inc. 8 * 9 * Permission is hereby granted, free of charge, to any person obtaining a 10 * copy of this software and associated documentation files (the "Software"), 11 * to deal in the Software without restriction, including without limitation 12 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 13 * and/or sell copies of the Software, and to permit persons to whom the 14 * Software is furnished to do so, subject to the following conditions: 15 * 16 * The above copyright notice and this permission notice (including the next 17 * paragraph) shall be included in all copies or substantial portions of the 18 * Software. 19 * 20 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 21 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 23 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 24 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 25 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 26 * IN THE SOFTWARE. 27 */ 28 29#include <stdio.h> 30 31#include "radv_amdgpu_bo.h" 32 33#include <amdgpu.h> 34#include <amdgpu_drm.h> 35#include <inttypes.h> 36#include <pthread.h> 37#include <unistd.h> 38 39#include "util/u_atomic.h" 40 41static void radv_amdgpu_winsys_bo_destroy(struct radeon_winsys_bo *_bo); 42 43static int 44radv_amdgpu_bo_va_op(struct radv_amdgpu_winsys *ws, 45 amdgpu_bo_handle bo, 46 uint64_t offset, 47 uint64_t size, 48 uint64_t addr, 49 uint32_t bo_flags, 50 uint32_t ops) 51{ 52 uint64_t flags = AMDGPU_VM_PAGE_READABLE | 53 AMDGPU_VM_PAGE_EXECUTABLE; 54 55 if ((bo_flags & RADEON_FLAG_VA_UNCACHED) && ws->info.chip_class >= GFX9) 56 flags |= AMDGPU_VM_MTYPE_UC; 57 58 if (!(bo_flags & RADEON_FLAG_READ_ONLY)) 59 flags |= AMDGPU_VM_PAGE_WRITEABLE; 60 61 size = ALIGN(size, getpagesize()); 62 63 return amdgpu_bo_va_op_raw(ws->dev, bo, offset, size, addr, 64 flags, ops); 65} 66 67static void 68radv_amdgpu_winsys_virtual_map(struct radv_amdgpu_winsys_bo *bo, 69 const struct radv_amdgpu_map_range *range) 70{ 71 assert(range->size); 72 73 if (!range->bo) 74 return; /* TODO: PRT mapping */ 75 76 p_atomic_inc(&range->bo->ref_count); 77 int r = radv_amdgpu_bo_va_op(bo->ws, range->bo->bo, range->bo_offset, 78 range->size, range->offset + bo->base.va, 79 0, AMDGPU_VA_OP_MAP); 80 if (r) 81 abort(); 82} 83 84static void 85radv_amdgpu_winsys_virtual_unmap(struct radv_amdgpu_winsys_bo *bo, 86 const struct radv_amdgpu_map_range *range) 87{ 88 assert(range->size); 89 90 if (!range->bo) 91 return; /* TODO: PRT mapping */ 92 93 int r = radv_amdgpu_bo_va_op(bo->ws, range->bo->bo, range->bo_offset, 94 range->size, range->offset + bo->base.va, 95 0, AMDGPU_VA_OP_UNMAP); 96 if (r) 97 abort(); 98 radv_amdgpu_winsys_bo_destroy((struct radeon_winsys_bo *)range->bo); 99} 100 101static int bo_comparator(const void *ap, const void *bp) { 102 struct radv_amdgpu_bo *a = *(struct radv_amdgpu_bo *const *)ap; 103 struct radv_amdgpu_bo *b = *(struct radv_amdgpu_bo *const *)bp; 104 return (a > b) ? 1 : (a < b) ? -1 : 0; 105} 106 107static void 108radv_amdgpu_winsys_rebuild_bo_list(struct radv_amdgpu_winsys_bo *bo) 109{ 110 if (bo->bo_capacity < bo->range_count) { 111 uint32_t new_count = MAX2(bo->bo_capacity * 2, bo->range_count); 112 bo->bos = realloc(bo->bos, new_count * sizeof(struct radv_amdgpu_winsys_bo *)); 113 bo->bo_capacity = new_count; 114 } 115 116 uint32_t temp_bo_count = 0; 117 for (uint32_t i = 0; i < bo->range_count; ++i) 118 if (bo->ranges[i].bo) 119 bo->bos[temp_bo_count++] = bo->ranges[i].bo; 120 121 qsort(bo->bos, temp_bo_count, sizeof(struct radv_amdgpu_winsys_bo *), &bo_comparator); 122 123 uint32_t final_bo_count = 1; 124 for (uint32_t i = 1; i < temp_bo_count; ++i) 125 if (bo->bos[i] != bo->bos[i - 1]) 126 bo->bos[final_bo_count++] = bo->bos[i]; 127 128 bo->bo_count = final_bo_count; 129} 130 131static void 132radv_amdgpu_winsys_bo_virtual_bind(struct radeon_winsys_bo *_parent, 133 uint64_t offset, uint64_t size, 134 struct radeon_winsys_bo *_bo, uint64_t bo_offset) 135{ 136 struct radv_amdgpu_winsys_bo *parent = (struct radv_amdgpu_winsys_bo *)_parent; 137 struct radv_amdgpu_winsys_bo *bo = (struct radv_amdgpu_winsys_bo*)_bo; 138 int range_count_delta, new_idx; 139 int first = 0, last; 140 struct radv_amdgpu_map_range new_first, new_last; 141 142 assert(parent->is_virtual); 143 assert(!bo || !bo->is_virtual); 144 145 if (!size) 146 return; 147 148 /* We have at most 2 new ranges (1 by the bind, and another one by splitting a range that contains the newly bound range). */ 149 if (parent->range_capacity - parent->range_count < 2) { 150 parent->range_capacity += 2; 151 parent->ranges = realloc(parent->ranges, 152 parent->range_capacity * sizeof(struct radv_amdgpu_map_range)); 153 } 154 155 /* 156 * [first, last] is exactly the range of ranges that either overlap the 157 * new parent, or are adjacent to it. This corresponds to the bind ranges 158 * that may change. 159 */ 160 while(first + 1 < parent->range_count && parent->ranges[first].offset + parent->ranges[first].size < offset) 161 ++first; 162 163 last = first; 164 while(last + 1 < parent->range_count && parent->ranges[last].offset <= offset + size) 165 ++last; 166 167 /* Whether the first or last range are going to be totally removed or just 168 * resized/left alone. Note that in the case of first == last, we will split 169 * this into a part before and after the new range. The remove flag is then 170 * whether to not create the corresponding split part. */ 171 bool remove_first = parent->ranges[first].offset == offset; 172 bool remove_last = parent->ranges[last].offset + parent->ranges[last].size == offset + size; 173 bool unmapped_first = false; 174 175 assert(parent->ranges[first].offset <= offset); 176 assert(parent->ranges[last].offset + parent->ranges[last].size >= offset + size); 177 178 /* Try to merge the new range with the first range. */ 179 if (parent->ranges[first].bo == bo && (!bo || offset - bo_offset == parent->ranges[first].offset - parent->ranges[first].bo_offset)) { 180 size += offset - parent->ranges[first].offset; 181 offset = parent->ranges[first].offset; 182 bo_offset = parent->ranges[first].bo_offset; 183 remove_first = true; 184 } 185 186 /* Try to merge the new range with the last range. */ 187 if (parent->ranges[last].bo == bo && (!bo || offset - bo_offset == parent->ranges[last].offset - parent->ranges[last].bo_offset)) { 188 size = parent->ranges[last].offset + parent->ranges[last].size - offset; 189 remove_last = true; 190 } 191 192 range_count_delta = 1 - (last - first + 1) + !remove_first + !remove_last; 193 new_idx = first + !remove_first; 194 195 /* Any range between first and last is going to be entirely covered by the new range so just unmap them. */ 196 for (int i = first + 1; i < last; ++i) 197 radv_amdgpu_winsys_virtual_unmap(parent, parent->ranges + i); 198 199 /* If the first/last range are not left alone we unmap then and optionally map 200 * them again after modifications. Not that this implicitly can do the splitting 201 * if first == last. */ 202 new_first = parent->ranges[first]; 203 new_last = parent->ranges[last]; 204 205 if (parent->ranges[first].offset + parent->ranges[first].size > offset || remove_first) { 206 radv_amdgpu_winsys_virtual_unmap(parent, parent->ranges + first); 207 unmapped_first = true; 208 209 if (!remove_first) { 210 new_first.size = offset - new_first.offset; 211 radv_amdgpu_winsys_virtual_map(parent, &new_first); 212 } 213 } 214 215 if (parent->ranges[last].offset < offset + size || remove_last) { 216 if (first != last || !unmapped_first) 217 radv_amdgpu_winsys_virtual_unmap(parent, parent->ranges + last); 218 219 if (!remove_last) { 220 new_last.size -= offset + size - new_last.offset; 221 new_last.offset = offset + size; 222 radv_amdgpu_winsys_virtual_map(parent, &new_last); 223 } 224 } 225 226 /* Moves the range list after last to account for the changed number of ranges. */ 227 memmove(parent->ranges + last + 1 + range_count_delta, parent->ranges + last + 1, 228 sizeof(struct radv_amdgpu_map_range) * (parent->range_count - last - 1)); 229 230 if (!remove_first) 231 parent->ranges[first] = new_first; 232 233 if (!remove_last) 234 parent->ranges[new_idx + 1] = new_last; 235 236 /* Actually set up the new range. */ 237 parent->ranges[new_idx].offset = offset; 238 parent->ranges[new_idx].size = size; 239 parent->ranges[new_idx].bo = bo; 240 parent->ranges[new_idx].bo_offset = bo_offset; 241 242 radv_amdgpu_winsys_virtual_map(parent, parent->ranges + new_idx); 243 244 parent->range_count += range_count_delta; 245 246 radv_amdgpu_winsys_rebuild_bo_list(parent); 247} 248 249static void radv_amdgpu_winsys_bo_destroy(struct radeon_winsys_bo *_bo) 250{ 251 struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo); 252 struct radv_amdgpu_winsys *ws = bo->ws; 253 254 if (p_atomic_dec_return(&bo->ref_count)) 255 return; 256 if (bo->is_virtual) { 257 for (uint32_t i = 0; i < bo->range_count; ++i) { 258 radv_amdgpu_winsys_virtual_unmap(bo, bo->ranges + i); 259 } 260 free(bo->bos); 261 free(bo->ranges); 262 } else { 263 if (bo->ws->debug_all_bos) { 264 pthread_mutex_lock(&bo->ws->global_bo_list_lock); 265 LIST_DEL(&bo->global_list_item); 266 bo->ws->num_buffers--; 267 pthread_mutex_unlock(&bo->ws->global_bo_list_lock); 268 } 269 radv_amdgpu_bo_va_op(bo->ws, bo->bo, 0, bo->size, bo->base.va, 270 0, AMDGPU_VA_OP_UNMAP); 271 amdgpu_bo_free(bo->bo); 272 } 273 274 if (bo->initial_domain & RADEON_DOMAIN_VRAM) 275 p_atomic_add(&ws->allocated_vram, 276 -align64(bo->size, ws->info.gart_page_size)); 277 if (bo->base.vram_cpu_access) 278 p_atomic_add(&ws->allocated_vram_vis, 279 -align64(bo->size, ws->info.gart_page_size)); 280 if (bo->initial_domain & RADEON_DOMAIN_GTT) 281 p_atomic_add(&ws->allocated_gtt, 282 -align64(bo->size, ws->info.gart_page_size)); 283 284 amdgpu_va_range_free(bo->va_handle); 285 FREE(bo); 286} 287 288static void radv_amdgpu_add_buffer_to_global_list(struct radv_amdgpu_winsys_bo *bo) 289{ 290 struct radv_amdgpu_winsys *ws = bo->ws; 291 292 if (bo->ws->debug_all_bos) { 293 pthread_mutex_lock(&ws->global_bo_list_lock); 294 LIST_ADDTAIL(&bo->global_list_item, &ws->global_bo_list); 295 ws->num_buffers++; 296 pthread_mutex_unlock(&ws->global_bo_list_lock); 297 } 298} 299 300static struct radeon_winsys_bo * 301radv_amdgpu_winsys_bo_create(struct radeon_winsys *_ws, 302 uint64_t size, 303 unsigned alignment, 304 enum radeon_bo_domain initial_domain, 305 unsigned flags, 306 unsigned priority) 307{ 308 struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws); 309 struct radv_amdgpu_winsys_bo *bo; 310 struct amdgpu_bo_alloc_request request = {0}; 311 amdgpu_bo_handle buf_handle; 312 uint64_t va = 0; 313 amdgpu_va_handle va_handle; 314 int r; 315 bo = CALLOC_STRUCT(radv_amdgpu_winsys_bo); 316 if (!bo) { 317 return NULL; 318 } 319 320 unsigned virt_alignment = alignment; 321 if (size >= ws->info.pte_fragment_size) 322 virt_alignment = MAX2(virt_alignment, ws->info.pte_fragment_size); 323 324 r = amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general, 325 size, virt_alignment, 0, &va, &va_handle, 326 (flags & RADEON_FLAG_32BIT ? AMDGPU_VA_RANGE_32_BIT : 0) | 327 AMDGPU_VA_RANGE_HIGH); 328 if (r) 329 goto error_va_alloc; 330 331 bo->base.va = va; 332 bo->va_handle = va_handle; 333 bo->size = size; 334 bo->ws = ws; 335 bo->is_virtual = !!(flags & RADEON_FLAG_VIRTUAL); 336 bo->ref_count = 1; 337 338 if (flags & RADEON_FLAG_VIRTUAL) { 339 bo->ranges = realloc(NULL, sizeof(struct radv_amdgpu_map_range)); 340 bo->range_count = 1; 341 bo->range_capacity = 1; 342 343 bo->ranges[0].offset = 0; 344 bo->ranges[0].size = size; 345 bo->ranges[0].bo = NULL; 346 bo->ranges[0].bo_offset = 0; 347 348 radv_amdgpu_winsys_virtual_map(bo, bo->ranges); 349 return (struct radeon_winsys_bo *)bo; 350 } 351 352 request.alloc_size = size; 353 request.phys_alignment = alignment; 354 355 if (initial_domain & RADEON_DOMAIN_VRAM) 356 request.preferred_heap |= AMDGPU_GEM_DOMAIN_VRAM; 357 if (initial_domain & RADEON_DOMAIN_GTT) 358 request.preferred_heap |= AMDGPU_GEM_DOMAIN_GTT; 359 360 if (flags & RADEON_FLAG_CPU_ACCESS) { 361 bo->base.vram_cpu_access = initial_domain & RADEON_DOMAIN_VRAM; 362 request.flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; 363 } 364 if (flags & RADEON_FLAG_NO_CPU_ACCESS) 365 request.flags |= AMDGPU_GEM_CREATE_NO_CPU_ACCESS; 366 if (flags & RADEON_FLAG_GTT_WC) 367 request.flags |= AMDGPU_GEM_CREATE_CPU_GTT_USWC; 368 if (!(flags & RADEON_FLAG_IMPLICIT_SYNC) && ws->info.drm_minor >= 22) 369 request.flags |= AMDGPU_GEM_CREATE_EXPLICIT_SYNC; 370 if (flags & RADEON_FLAG_NO_INTERPROCESS_SHARING && 371 ws->info.has_local_buffers && 372 (ws->use_local_bos || (flags & RADEON_FLAG_PREFER_LOCAL_BO))) { 373 bo->base.is_local = true; 374 request.flags |= AMDGPU_GEM_CREATE_VM_ALWAYS_VALID; 375 } 376 377 /* this won't do anything on pre 4.9 kernels */ 378 if (ws->zero_all_vram_allocs && (initial_domain & RADEON_DOMAIN_VRAM)) 379 request.flags |= AMDGPU_GEM_CREATE_VRAM_CLEARED; 380 r = amdgpu_bo_alloc(ws->dev, &request, &buf_handle); 381 if (r) { 382 fprintf(stderr, "amdgpu: Failed to allocate a buffer:\n"); 383 fprintf(stderr, "amdgpu: size : %"PRIu64" bytes\n", size); 384 fprintf(stderr, "amdgpu: alignment : %u bytes\n", alignment); 385 fprintf(stderr, "amdgpu: domains : %u\n", initial_domain); 386 goto error_bo_alloc; 387 } 388 389 r = radv_amdgpu_bo_va_op(ws, buf_handle, 0, size, va, flags, 390 AMDGPU_VA_OP_MAP); 391 if (r) 392 goto error_va_map; 393 394 bo->bo = buf_handle; 395 bo->initial_domain = initial_domain; 396 bo->is_shared = false; 397 bo->priority = priority; 398 399 r = amdgpu_bo_export(buf_handle, amdgpu_bo_handle_type_kms, &bo->bo_handle); 400 assert(!r); 401 402 if (initial_domain & RADEON_DOMAIN_VRAM) 403 p_atomic_add(&ws->allocated_vram, 404 align64(bo->size, ws->info.gart_page_size)); 405 if (bo->base.vram_cpu_access) 406 p_atomic_add(&ws->allocated_vram_vis, 407 align64(bo->size, ws->info.gart_page_size)); 408 if (initial_domain & RADEON_DOMAIN_GTT) 409 p_atomic_add(&ws->allocated_gtt, 410 align64(bo->size, ws->info.gart_page_size)); 411 412 radv_amdgpu_add_buffer_to_global_list(bo); 413 return (struct radeon_winsys_bo *)bo; 414error_va_map: 415 amdgpu_bo_free(buf_handle); 416 417error_bo_alloc: 418 amdgpu_va_range_free(va_handle); 419 420error_va_alloc: 421 FREE(bo); 422 return NULL; 423} 424 425static void * 426radv_amdgpu_winsys_bo_map(struct radeon_winsys_bo *_bo) 427{ 428 struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo); 429 int ret; 430 void *data; 431 ret = amdgpu_bo_cpu_map(bo->bo, &data); 432 if (ret) 433 return NULL; 434 return data; 435} 436 437static void 438radv_amdgpu_winsys_bo_unmap(struct radeon_winsys_bo *_bo) 439{ 440 struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo); 441 amdgpu_bo_cpu_unmap(bo->bo); 442} 443 444static uint64_t 445radv_amdgpu_get_optimal_vm_alignment(struct radv_amdgpu_winsys *ws, 446 uint64_t size, unsigned alignment) 447{ 448 uint64_t vm_alignment = alignment; 449 450 /* Increase the VM alignment for faster address translation. */ 451 if (size >= ws->info.pte_fragment_size) 452 vm_alignment = MAX2(vm_alignment, ws->info.pte_fragment_size); 453 454 /* Gfx9: Increase the VM alignment to the most significant bit set 455 * in the size for faster address translation. 456 */ 457 if (ws->info.chip_class >= GFX9) { 458 unsigned msb = util_last_bit64(size); /* 0 = no bit is set */ 459 uint64_t msb_alignment = msb ? 1ull << (msb - 1) : 0; 460 461 vm_alignment = MAX2(vm_alignment, msb_alignment); 462 } 463 return vm_alignment; 464} 465 466static struct radeon_winsys_bo * 467radv_amdgpu_winsys_bo_from_ptr(struct radeon_winsys *_ws, 468 void *pointer, 469 uint64_t size, 470 unsigned priority) 471{ 472 struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws); 473 amdgpu_bo_handle buf_handle; 474 struct radv_amdgpu_winsys_bo *bo; 475 uint64_t va; 476 amdgpu_va_handle va_handle; 477 uint64_t vm_alignment; 478 479 bo = CALLOC_STRUCT(radv_amdgpu_winsys_bo); 480 if (!bo) 481 return NULL; 482 483 if (amdgpu_create_bo_from_user_mem(ws->dev, pointer, size, &buf_handle)) 484 goto error; 485 486 /* Using the optimal VM alignment also fixes GPU hangs for buffers that 487 * are imported. 488 */ 489 vm_alignment = radv_amdgpu_get_optimal_vm_alignment(ws, size, 490 ws->info.gart_page_size); 491 492 if (amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general, 493 size, vm_alignment, 0, &va, &va_handle, 494 AMDGPU_VA_RANGE_HIGH)) 495 goto error_va_alloc; 496 497 if (amdgpu_bo_va_op(buf_handle, 0, size, va, 0, AMDGPU_VA_OP_MAP)) 498 goto error_va_map; 499 500 /* Initialize it */ 501 bo->base.va = va; 502 bo->va_handle = va_handle; 503 bo->size = size; 504 bo->ref_count = 1; 505 bo->ws = ws; 506 bo->bo = buf_handle; 507 bo->initial_domain = RADEON_DOMAIN_GTT; 508 bo->priority = priority; 509 510 MAYBE_UNUSED int r = amdgpu_bo_export(buf_handle, amdgpu_bo_handle_type_kms, &bo->bo_handle); 511 assert(!r); 512 513 p_atomic_add(&ws->allocated_gtt, 514 align64(bo->size, ws->info.gart_page_size)); 515 516 radv_amdgpu_add_buffer_to_global_list(bo); 517 return (struct radeon_winsys_bo *)bo; 518 519error_va_map: 520 amdgpu_va_range_free(va_handle); 521 522error_va_alloc: 523 amdgpu_bo_free(buf_handle); 524 525error: 526 FREE(bo); 527 return NULL; 528} 529 530static struct radeon_winsys_bo * 531radv_amdgpu_winsys_bo_from_fd(struct radeon_winsys *_ws, 532 int fd, unsigned priority, 533 unsigned *stride, 534 unsigned *offset) 535{ 536 struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws); 537 struct radv_amdgpu_winsys_bo *bo; 538 uint64_t va; 539 amdgpu_va_handle va_handle; 540 enum amdgpu_bo_handle_type type = amdgpu_bo_handle_type_dma_buf_fd; 541 struct amdgpu_bo_import_result result = {0}; 542 struct amdgpu_bo_info info = {0}; 543 enum radeon_bo_domain initial = 0; 544 int r; 545 bo = CALLOC_STRUCT(radv_amdgpu_winsys_bo); 546 if (!bo) 547 return NULL; 548 549 r = amdgpu_bo_import(ws->dev, type, fd, &result); 550 if (r) 551 goto error; 552 553 r = amdgpu_bo_query_info(result.buf_handle, &info); 554 if (r) 555 goto error_query; 556 557 r = amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general, 558 result.alloc_size, 1 << 20, 0, &va, &va_handle, 559 AMDGPU_VA_RANGE_HIGH); 560 if (r) 561 goto error_query; 562 563 r = radv_amdgpu_bo_va_op(ws, result.buf_handle, 0, result.alloc_size, 564 va, 0, AMDGPU_VA_OP_MAP); 565 if (r) 566 goto error_va_map; 567 568 if (info.preferred_heap & AMDGPU_GEM_DOMAIN_VRAM) 569 initial |= RADEON_DOMAIN_VRAM; 570 if (info.preferred_heap & AMDGPU_GEM_DOMAIN_GTT) 571 initial |= RADEON_DOMAIN_GTT; 572 573 bo->bo = result.buf_handle; 574 bo->base.va = va; 575 bo->va_handle = va_handle; 576 bo->initial_domain = initial; 577 bo->size = result.alloc_size; 578 bo->is_shared = true; 579 bo->ws = ws; 580 bo->priority = priority; 581 bo->ref_count = 1; 582 583 r = amdgpu_bo_export(result.buf_handle, amdgpu_bo_handle_type_kms, &bo->bo_handle); 584 assert(!r); 585 586 if (bo->initial_domain & RADEON_DOMAIN_VRAM) 587 p_atomic_add(&ws->allocated_vram, 588 align64(bo->size, ws->info.gart_page_size)); 589 if (bo->initial_domain & RADEON_DOMAIN_GTT) 590 p_atomic_add(&ws->allocated_gtt, 591 align64(bo->size, ws->info.gart_page_size)); 592 593 radv_amdgpu_add_buffer_to_global_list(bo); 594 return (struct radeon_winsys_bo *)bo; 595error_va_map: 596 amdgpu_va_range_free(va_handle); 597 598error_query: 599 amdgpu_bo_free(result.buf_handle); 600 601error: 602 FREE(bo); 603 return NULL; 604} 605 606static bool 607radv_amdgpu_winsys_get_fd(struct radeon_winsys *_ws, 608 struct radeon_winsys_bo *_bo, 609 int *fd) 610{ 611 struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo); 612 enum amdgpu_bo_handle_type type = amdgpu_bo_handle_type_dma_buf_fd; 613 int r; 614 unsigned handle; 615 r = amdgpu_bo_export(bo->bo, type, &handle); 616 if (r) 617 return false; 618 619 *fd = (int)handle; 620 bo->is_shared = true; 621 return true; 622} 623 624static unsigned eg_tile_split(unsigned tile_split) 625{ 626 switch (tile_split) { 627 case 0: tile_split = 64; break; 628 case 1: tile_split = 128; break; 629 case 2: tile_split = 256; break; 630 case 3: tile_split = 512; break; 631 default: 632 case 4: tile_split = 1024; break; 633 case 5: tile_split = 2048; break; 634 case 6: tile_split = 4096; break; 635 } 636 return tile_split; 637} 638 639static unsigned radv_eg_tile_split_rev(unsigned eg_tile_split) 640{ 641 switch (eg_tile_split) { 642 case 64: return 0; 643 case 128: return 1; 644 case 256: return 2; 645 case 512: return 3; 646 default: 647 case 1024: return 4; 648 case 2048: return 5; 649 case 4096: return 6; 650 } 651} 652 653static void 654radv_amdgpu_winsys_bo_set_metadata(struct radeon_winsys_bo *_bo, 655 struct radeon_bo_metadata *md) 656{ 657 struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo); 658 struct amdgpu_bo_metadata metadata = {0}; 659 uint32_t tiling_flags = 0; 660 661 if (bo->ws->info.chip_class >= GFX9) { 662 tiling_flags |= AMDGPU_TILING_SET(SWIZZLE_MODE, md->u.gfx9.swizzle_mode); 663 } else { 664 if (md->u.legacy.macrotile == RADEON_LAYOUT_TILED) 665 tiling_flags |= AMDGPU_TILING_SET(ARRAY_MODE, 4); /* 2D_TILED_THIN1 */ 666 else if (md->u.legacy.microtile == RADEON_LAYOUT_TILED) 667 tiling_flags |= AMDGPU_TILING_SET(ARRAY_MODE, 2); /* 1D_TILED_THIN1 */ 668 else 669 tiling_flags |= AMDGPU_TILING_SET(ARRAY_MODE, 1); /* LINEAR_ALIGNED */ 670 671 tiling_flags |= AMDGPU_TILING_SET(PIPE_CONFIG, md->u.legacy.pipe_config); 672 tiling_flags |= AMDGPU_TILING_SET(BANK_WIDTH, util_logbase2(md->u.legacy.bankw)); 673 tiling_flags |= AMDGPU_TILING_SET(BANK_HEIGHT, util_logbase2(md->u.legacy.bankh)); 674 if (md->u.legacy.tile_split) 675 tiling_flags |= AMDGPU_TILING_SET(TILE_SPLIT, radv_eg_tile_split_rev(md->u.legacy.tile_split)); 676 tiling_flags |= AMDGPU_TILING_SET(MACRO_TILE_ASPECT, util_logbase2(md->u.legacy.mtilea)); 677 tiling_flags |= AMDGPU_TILING_SET(NUM_BANKS, util_logbase2(md->u.legacy.num_banks)-1); 678 679 if (md->u.legacy.scanout) 680 tiling_flags |= AMDGPU_TILING_SET(MICRO_TILE_MODE, 0); /* DISPLAY_MICRO_TILING */ 681 else 682 tiling_flags |= AMDGPU_TILING_SET(MICRO_TILE_MODE, 1); /* THIN_MICRO_TILING */ 683 } 684 685 metadata.tiling_info = tiling_flags; 686 metadata.size_metadata = md->size_metadata; 687 memcpy(metadata.umd_metadata, md->metadata, sizeof(md->metadata)); 688 689 amdgpu_bo_set_metadata(bo->bo, &metadata); 690} 691 692static void 693radv_amdgpu_winsys_bo_get_metadata(struct radeon_winsys_bo *_bo, 694 struct radeon_bo_metadata *md) 695{ 696 struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo); 697 struct amdgpu_bo_info info = {0}; 698 699 int r = amdgpu_bo_query_info(bo->bo, &info); 700 if (r) 701 return; 702 703 uint64_t tiling_flags = info.metadata.tiling_info; 704 705 if (bo->ws->info.chip_class >= GFX9) { 706 md->u.gfx9.swizzle_mode = AMDGPU_TILING_GET(tiling_flags, SWIZZLE_MODE); 707 } else { 708 md->u.legacy.microtile = RADEON_LAYOUT_LINEAR; 709 md->u.legacy.macrotile = RADEON_LAYOUT_LINEAR; 710 711 if (AMDGPU_TILING_GET(tiling_flags, ARRAY_MODE) == 4) /* 2D_TILED_THIN1 */ 712 md->u.legacy.macrotile = RADEON_LAYOUT_TILED; 713 else if (AMDGPU_TILING_GET(tiling_flags, ARRAY_MODE) == 2) /* 1D_TILED_THIN1 */ 714 md->u.legacy.microtile = RADEON_LAYOUT_TILED; 715 716 md->u.legacy.pipe_config = AMDGPU_TILING_GET(tiling_flags, PIPE_CONFIG); 717 md->u.legacy.bankw = 1 << AMDGPU_TILING_GET(tiling_flags, BANK_WIDTH); 718 md->u.legacy.bankh = 1 << AMDGPU_TILING_GET(tiling_flags, BANK_HEIGHT); 719 md->u.legacy.tile_split = eg_tile_split(AMDGPU_TILING_GET(tiling_flags, TILE_SPLIT)); 720 md->u.legacy.mtilea = 1 << AMDGPU_TILING_GET(tiling_flags, MACRO_TILE_ASPECT); 721 md->u.legacy.num_banks = 2 << AMDGPU_TILING_GET(tiling_flags, NUM_BANKS); 722 md->u.legacy.scanout = AMDGPU_TILING_GET(tiling_flags, MICRO_TILE_MODE) == 0; /* DISPLAY */ 723 } 724 725 md->size_metadata = info.metadata.size_metadata; 726 memcpy(md->metadata, info.metadata.umd_metadata, sizeof(md->metadata)); 727} 728 729void radv_amdgpu_bo_init_functions(struct radv_amdgpu_winsys *ws) 730{ 731 ws->base.buffer_create = radv_amdgpu_winsys_bo_create; 732 ws->base.buffer_destroy = radv_amdgpu_winsys_bo_destroy; 733 ws->base.buffer_map = radv_amdgpu_winsys_bo_map; 734 ws->base.buffer_unmap = radv_amdgpu_winsys_bo_unmap; 735 ws->base.buffer_from_ptr = radv_amdgpu_winsys_bo_from_ptr; 736 ws->base.buffer_from_fd = radv_amdgpu_winsys_bo_from_fd; 737 ws->base.buffer_get_fd = radv_amdgpu_winsys_get_fd; 738 ws->base.buffer_set_metadata = radv_amdgpu_winsys_bo_set_metadata; 739 ws->base.buffer_get_metadata = radv_amdgpu_winsys_bo_get_metadata; 740 ws->base.buffer_virtual_bind = radv_amdgpu_winsys_bo_virtual_bind; 741} 742