1 /* $NetBSD: radeon_object.c,v 1.8 2021/12/18 23:45:43 riastradh Exp $ */ 2 3 /* 4 * Copyright 2009 Jerome Glisse. 5 * All Rights Reserved. 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a 8 * copy of this software and associated documentation files (the 9 * "Software"), to deal in the Software without restriction, including 10 * without limitation the rights to use, copy, modify, merge, publish, 11 * distribute, sub license, and/or sell copies of the Software, and to 12 * permit persons to whom the Software is furnished to do so, subject to 13 * the following conditions: 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, 19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21 * USE OR OTHER DEALINGS IN THE SOFTWARE. 22 * 23 * The above copyright notice and this permission notice (including the 24 * next paragraph) shall be included in all copies or substantial portions 25 * of the Software. 26 * 27 */ 28 /* 29 * Authors: 30 * Jerome Glisse <glisse (at) freedesktop.org> 31 * Thomas Hellstrom <thomas-at-tungstengraphics-dot-com> 32 * Dave Airlie 33 */ 34 35 #include <sys/cdefs.h> 36 __KERNEL_RCSID(0, "$NetBSD: radeon_object.c,v 1.8 2021/12/18 23:45:43 riastradh Exp $"); 37 38 #include <linux/io.h> 39 #include <linux/list.h> 40 #include <linux/slab.h> 41 42 #include <drm/drm_cache.h> 43 #include <drm/drm_prime.h> 44 #include <drm/radeon_drm.h> 45 46 #include "radeon.h" 47 #include "radeon_trace.h" 48 49 #include <linux/nbsd-namespace.h> 50 51 int radeon_ttm_init(struct radeon_device *rdev); 52 void radeon_ttm_fini(struct radeon_device *rdev); 53 static void radeon_bo_clear_surface_reg(struct radeon_bo *bo); 54 55 /* 56 * To exclude mutual BO access we rely on bo_reserve exclusion, as all 57 * function are calling it. 58 */ 59 60 static void radeon_update_memory_usage(struct radeon_bo *bo, 61 unsigned mem_type, int sign) 62 { 63 struct radeon_device *rdev = bo->rdev; 64 u64 size = (u64)bo->tbo.num_pages << PAGE_SHIFT; 65 66 switch (mem_type) { 67 case TTM_PL_TT: 68 if (sign > 0) 69 atomic64_add(size, &rdev->gtt_usage); 70 else 71 atomic64_sub(size, &rdev->gtt_usage); 72 break; 73 case TTM_PL_VRAM: 74 if (sign > 0) 75 atomic64_add(size, &rdev->vram_usage); 76 else 77 atomic64_sub(size, &rdev->vram_usage); 78 break; 79 } 80 } 81 82 static void radeon_ttm_bo_destroy(struct ttm_buffer_object *tbo) 83 { 84 struct radeon_bo *bo; 85 86 bo = container_of(tbo, struct radeon_bo, tbo); 87 88 radeon_update_memory_usage(bo, bo->tbo.mem.mem_type, -1); 89 90 mutex_lock(&bo->rdev->gem.mutex); 91 list_del_init(&bo->list); 92 mutex_unlock(&bo->rdev->gem.mutex); 93 radeon_bo_clear_surface_reg(bo); 94 WARN_ON_ONCE(!list_empty(&bo->va)); 95 if (bo->tbo.base.import_attach) 96 drm_prime_gem_destroy(&bo->tbo.base, bo->tbo.sg); 97 drm_gem_object_release(&bo->tbo.base); 98 kfree(bo); 99 } 100 101 bool radeon_ttm_bo_is_radeon_bo(struct ttm_buffer_object *bo) 102 { 103 if (bo->destroy == &radeon_ttm_bo_destroy) 104 return true; 105 return false; 106 } 107 108 void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, u32 domain) 109 { 110 u32 c = 0, i; 111 112 rbo->placement.placement = rbo->placements; 113 rbo->placement.busy_placement = rbo->placements; 114 if (domain & RADEON_GEM_DOMAIN_VRAM) { 115 /* Try placing BOs which don't need CPU access outside of the 116 * CPU accessible part of VRAM 117 */ 118 if ((rbo->flags & RADEON_GEM_NO_CPU_ACCESS) && 119 rbo->rdev->mc.visible_vram_size < rbo->rdev->mc.real_vram_size) { 120 rbo->placements[c].fpfn = 121 rbo->rdev->mc.visible_vram_size >> PAGE_SHIFT; 122 rbo->placements[c++].flags = TTM_PL_FLAG_WC | 123 TTM_PL_FLAG_UNCACHED | 124 TTM_PL_FLAG_VRAM; 125 } 126 127 rbo->placements[c].fpfn = 0; 128 rbo->placements[c++].flags = TTM_PL_FLAG_WC | 129 TTM_PL_FLAG_UNCACHED | 130 TTM_PL_FLAG_VRAM; 131 } 132 133 if (domain & RADEON_GEM_DOMAIN_GTT) { 134 if (rbo->flags & RADEON_GEM_GTT_UC) { 135 rbo->placements[c].fpfn = 0; 136 rbo->placements[c++].flags = TTM_PL_FLAG_UNCACHED | 137 TTM_PL_FLAG_TT; 138 139 } else if ((rbo->flags & RADEON_GEM_GTT_WC) || 140 (rbo->rdev->flags & RADEON_IS_AGP)) { 141 rbo->placements[c].fpfn = 0; 142 rbo->placements[c++].flags = TTM_PL_FLAG_WC | 143 TTM_PL_FLAG_UNCACHED | 144 TTM_PL_FLAG_TT; 145 } else { 146 rbo->placements[c].fpfn = 0; 147 rbo->placements[c++].flags = TTM_PL_FLAG_CACHED | 148 TTM_PL_FLAG_TT; 149 } 150 } 151 152 if (domain & RADEON_GEM_DOMAIN_CPU) { 153 if (rbo->flags & RADEON_GEM_GTT_UC) { 154 rbo->placements[c].fpfn = 0; 155 rbo->placements[c++].flags = TTM_PL_FLAG_UNCACHED | 156 TTM_PL_FLAG_SYSTEM; 157 158 } else if ((rbo->flags & RADEON_GEM_GTT_WC) || 159 rbo->rdev->flags & RADEON_IS_AGP) { 160 rbo->placements[c].fpfn = 0; 161 rbo->placements[c++].flags = TTM_PL_FLAG_WC | 162 TTM_PL_FLAG_UNCACHED | 163 TTM_PL_FLAG_SYSTEM; 164 } else { 165 rbo->placements[c].fpfn = 0; 166 rbo->placements[c++].flags = TTM_PL_FLAG_CACHED | 167 TTM_PL_FLAG_SYSTEM; 168 } 169 } 170 if (!c) { 171 rbo->placements[c].fpfn = 0; 172 rbo->placements[c++].flags = TTM_PL_MASK_CACHING | 173 TTM_PL_FLAG_SYSTEM; 174 } 175 176 rbo->placement.num_placement = c; 177 rbo->placement.num_busy_placement = c; 178 179 for (i = 0; i < c; ++i) { 180 if ((rbo->flags & RADEON_GEM_CPU_ACCESS) && 181 (rbo->placements[i].flags & TTM_PL_FLAG_VRAM) && 182 !rbo->placements[i].fpfn) 183 rbo->placements[i].lpfn = 184 rbo->rdev->mc.visible_vram_size >> PAGE_SHIFT; 185 else 186 rbo->placements[i].lpfn = 0; 187 } 188 } 189 190 int radeon_bo_create(struct radeon_device *rdev, 191 unsigned long size, int byte_align, bool kernel, 192 u32 domain, u32 flags, struct sg_table *sg, 193 struct dma_resv *resv, 194 struct radeon_bo **bo_ptr) 195 { 196 struct radeon_bo *bo; 197 enum ttm_bo_type type; 198 unsigned long page_align = roundup(byte_align, PAGE_SIZE) >> PAGE_SHIFT; 199 size_t acc_size; 200 int r; 201 202 size = ALIGN(size, PAGE_SIZE); 203 204 if (kernel) { 205 type = ttm_bo_type_kernel; 206 } else if (sg) { 207 type = ttm_bo_type_sg; 208 } else { 209 type = ttm_bo_type_device; 210 } 211 *bo_ptr = NULL; 212 213 acc_size = ttm_bo_dma_acc_size(&rdev->mman.bdev, size, 214 sizeof(struct radeon_bo)); 215 216 bo = kzalloc(sizeof(struct radeon_bo), GFP_KERNEL); 217 if (bo == NULL) 218 return -ENOMEM; 219 drm_gem_private_object_init(rdev->ddev, &bo->tbo.base, size); 220 bo->rdev = rdev; 221 bo->surface_reg = -1; 222 INIT_LIST_HEAD(&bo->list); 223 INIT_LIST_HEAD(&bo->va); 224 bo->initial_domain = domain & (RADEON_GEM_DOMAIN_VRAM | 225 RADEON_GEM_DOMAIN_GTT | 226 RADEON_GEM_DOMAIN_CPU); 227 228 bo->flags = flags; 229 /* PCI GART is always snooped */ 230 if (!(rdev->flags & RADEON_IS_PCIE)) 231 bo->flags &= ~(RADEON_GEM_GTT_WC | RADEON_GEM_GTT_UC); 232 233 /* Write-combined CPU mappings of GTT cause GPU hangs with RV6xx 234 * See https://bugs.freedesktop.org/show_bug.cgi?id=91268 235 */ 236 if (rdev->family >= CHIP_RV610 && rdev->family <= CHIP_RV635) 237 bo->flags &= ~(RADEON_GEM_GTT_WC | RADEON_GEM_GTT_UC); 238 239 #ifdef CONFIG_X86_32 240 /* XXX: Write-combined CPU mappings of GTT seem broken on 32-bit 241 * See https://bugs.freedesktop.org/show_bug.cgi?id=84627 242 */ 243 bo->flags &= ~(RADEON_GEM_GTT_WC | RADEON_GEM_GTT_UC); 244 #elif defined(CONFIG_X86) && !defined(CONFIG_X86_PAT) 245 /* Don't try to enable write-combining when it can't work, or things 246 * may be slow 247 * See https://bugs.freedesktop.org/show_bug.cgi?id=88758 248 */ 249 #ifndef CONFIG_COMPILE_TEST 250 #warning Please enable CONFIG_MTRR and CONFIG_X86_PAT for better performance \ 251 thanks to write-combining 252 #endif 253 254 if (bo->flags & RADEON_GEM_GTT_WC) 255 DRM_INFO_ONCE("Please enable CONFIG_MTRR and CONFIG_X86_PAT for " 256 "better performance thanks to write-combining\n"); 257 bo->flags &= ~(RADEON_GEM_GTT_WC | RADEON_GEM_GTT_UC); 258 #else 259 /* For architectures that don't support WC memory, 260 * mask out the WC flag from the BO 261 */ 262 if (!drm_arch_can_wc_memory()) 263 bo->flags &= ~RADEON_GEM_GTT_WC; 264 #endif 265 266 radeon_ttm_placement_from_domain(bo, domain); 267 /* Kernel allocation are uninterruptible */ 268 down_read(&rdev->pm.mclk_lock); 269 r = ttm_bo_init(&rdev->mman.bdev, &bo->tbo, size, type, 270 &bo->placement, page_align, !kernel, acc_size, 271 sg, resv, &radeon_ttm_bo_destroy); 272 up_read(&rdev->pm.mclk_lock); 273 if (unlikely(r != 0)) { 274 return r; 275 } 276 *bo_ptr = bo; 277 278 trace_radeon_bo_create(bo); 279 280 return 0; 281 } 282 283 int radeon_bo_kmap(struct radeon_bo *bo, void **ptr) 284 { 285 bool is_iomem; 286 int r; 287 288 if (bo->kptr) { 289 if (ptr) { 290 *ptr = bo->kptr; 291 } 292 return 0; 293 } 294 r = ttm_bo_kmap(&bo->tbo, 0, bo->tbo.num_pages, &bo->kmap); 295 if (r) { 296 return r; 297 } 298 bo->kptr = ttm_kmap_obj_virtual(&bo->kmap, &is_iomem); 299 if (ptr) { 300 *ptr = bo->kptr; 301 } 302 radeon_bo_check_tiling(bo, 0, 0); 303 return 0; 304 } 305 306 void radeon_bo_kunmap(struct radeon_bo *bo) 307 { 308 if (bo->kptr == NULL) 309 return; 310 bo->kptr = NULL; 311 radeon_bo_check_tiling(bo, 0, 0); 312 ttm_bo_kunmap(&bo->kmap); 313 } 314 315 struct radeon_bo *radeon_bo_ref(struct radeon_bo *bo) 316 { 317 if (bo == NULL) 318 return NULL; 319 320 ttm_bo_get(&bo->tbo); 321 return bo; 322 } 323 324 void radeon_bo_unref(struct radeon_bo **bo) 325 { 326 struct ttm_buffer_object *tbo; 327 328 if ((*bo) == NULL) 329 return; 330 tbo = &((*bo)->tbo); 331 ttm_bo_put(tbo); 332 *bo = NULL; 333 } 334 335 int radeon_bo_pin_restricted(struct radeon_bo *bo, u32 domain, u64 max_offset, 336 u64 *gpu_addr) 337 { 338 struct ttm_operation_ctx ctx = { false, false }; 339 int r, i; 340 341 if (radeon_ttm_tt_has_userptr(bo->tbo.ttm)) 342 return -EPERM; 343 344 if (bo->pin_count) { 345 bo->pin_count++; 346 if (gpu_addr) 347 *gpu_addr = radeon_bo_gpu_offset(bo); 348 349 if (max_offset != 0) { 350 u64 domain_start; 351 352 if (domain == RADEON_GEM_DOMAIN_VRAM) 353 domain_start = bo->rdev->mc.vram_start; 354 else 355 domain_start = bo->rdev->mc.gtt_start; 356 WARN_ON_ONCE(max_offset < 357 (radeon_bo_gpu_offset(bo) - domain_start)); 358 } 359 360 return 0; 361 } 362 if (bo->prime_shared_count && domain == RADEON_GEM_DOMAIN_VRAM) { 363 /* A BO shared as a dma-buf cannot be sensibly migrated to VRAM */ 364 return -EINVAL; 365 } 366 367 radeon_ttm_placement_from_domain(bo, domain); 368 for (i = 0; i < bo->placement.num_placement; i++) { 369 /* force to pin into visible video ram */ 370 if ((bo->placements[i].flags & TTM_PL_FLAG_VRAM) && 371 !(bo->flags & RADEON_GEM_NO_CPU_ACCESS) && 372 (!max_offset || max_offset > bo->rdev->mc.visible_vram_size)) 373 bo->placements[i].lpfn = 374 bo->rdev->mc.visible_vram_size >> PAGE_SHIFT; 375 else 376 bo->placements[i].lpfn = max_offset >> PAGE_SHIFT; 377 378 bo->placements[i].flags |= TTM_PL_FLAG_NO_EVICT; 379 } 380 381 r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); 382 if (likely(r == 0)) { 383 bo->pin_count = 1; 384 if (gpu_addr != NULL) 385 *gpu_addr = radeon_bo_gpu_offset(bo); 386 if (domain == RADEON_GEM_DOMAIN_VRAM) 387 bo->rdev->vram_pin_size += radeon_bo_size(bo); 388 else 389 bo->rdev->gart_pin_size += radeon_bo_size(bo); 390 } else { 391 dev_err(bo->rdev->dev, "%p pin failed\n", bo); 392 } 393 return r; 394 } 395 396 int radeon_bo_pin(struct radeon_bo *bo, u32 domain, u64 *gpu_addr) 397 { 398 return radeon_bo_pin_restricted(bo, domain, 0, gpu_addr); 399 } 400 401 int radeon_bo_unpin(struct radeon_bo *bo) 402 { 403 struct ttm_operation_ctx ctx = { false, false }; 404 int r, i; 405 406 if (!bo->pin_count) { 407 dev_warn(bo->rdev->dev, "%p unpin not necessary\n", bo); 408 return 0; 409 } 410 bo->pin_count--; 411 if (bo->pin_count) 412 return 0; 413 for (i = 0; i < bo->placement.num_placement; i++) { 414 bo->placements[i].lpfn = 0; 415 bo->placements[i].flags &= ~TTM_PL_FLAG_NO_EVICT; 416 } 417 r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); 418 if (likely(r == 0)) { 419 if (bo->tbo.mem.mem_type == TTM_PL_VRAM) 420 bo->rdev->vram_pin_size -= radeon_bo_size(bo); 421 else 422 bo->rdev->gart_pin_size -= radeon_bo_size(bo); 423 } else { 424 dev_err(bo->rdev->dev, "%p validate failed for unpin\n", bo); 425 } 426 return r; 427 } 428 429 int radeon_bo_evict_vram(struct radeon_device *rdev) 430 { 431 /* late 2.6.33 fix IGP hibernate - we need pm ops to do this correct */ 432 #ifndef CONFIG_HIBERNATION 433 if (rdev->flags & RADEON_IS_IGP) { 434 if (rdev->mc.igp_sideport_enabled == false) 435 /* Useless to evict on IGP chips */ 436 return 0; 437 } 438 #endif 439 return ttm_bo_evict_mm(&rdev->mman.bdev, TTM_PL_VRAM); 440 } 441 442 void radeon_bo_force_delete(struct radeon_device *rdev) 443 { 444 struct radeon_bo *bo, *n; 445 446 if (list_empty(&rdev->gem.objects)) { 447 return; 448 } 449 dev_err(rdev->dev, "Userspace still has active objects !\n"); 450 list_for_each_entry_safe(bo, n, &rdev->gem.objects, list) { 451 dev_err(rdev->dev, "%p %p %lu %lu force free\n", 452 &bo->tbo.base, bo, (unsigned long)bo->tbo.base.size, 453 *((unsigned long *)&bo->tbo.base.refcount)); 454 mutex_lock(&bo->rdev->gem.mutex); 455 list_del_init(&bo->list); 456 mutex_unlock(&bo->rdev->gem.mutex); 457 /* this should unref the ttm bo */ 458 drm_gem_object_put_unlocked(&bo->tbo.base); 459 } 460 } 461 462 int radeon_bo_init(struct radeon_device *rdev) 463 { 464 /* reserve PAT memory space to WC for VRAM */ 465 arch_io_reserve_memtype_wc(rdev->mc.aper_base, 466 rdev->mc.aper_size); 467 468 /* Add an MTRR for the VRAM */ 469 if (!rdev->fastfb_working) { 470 rdev->mc.vram_mtrr = arch_phys_wc_add(rdev->mc.aper_base, 471 rdev->mc.aper_size); 472 } 473 #ifdef __NetBSD__ 474 if (rdev->mc.aper_base) 475 pmap_pv_track(rdev->mc.aper_base, rdev->mc.aper_size); 476 #endif 477 DRM_INFO("Detected VRAM RAM=%"PRIx64"M, BAR=%lluM\n", 478 rdev->mc.mc_vram_size >> 20, 479 (unsigned long long)rdev->mc.aper_size >> 20); 480 DRM_INFO("RAM width %dbits %cDR\n", 481 rdev->mc.vram_width, rdev->mc.vram_is_ddr ? 'D' : 'S'); 482 return radeon_ttm_init(rdev); 483 } 484 485 void radeon_bo_fini(struct radeon_device *rdev) 486 { 487 radeon_ttm_fini(rdev); 488 #ifdef __NetBSD__ 489 if (rdev->mc.aper_base) 490 pmap_pv_untrack(rdev->mc.aper_base, rdev->mc.aper_size); 491 #endif 492 arch_phys_wc_del(rdev->mc.vram_mtrr); 493 arch_io_free_memtype_wc(rdev->mc.aper_base, rdev->mc.aper_size); 494 } 495 496 /* Returns how many bytes TTM can move per IB. 497 */ 498 static u64 radeon_bo_get_threshold_for_moves(struct radeon_device *rdev) 499 { 500 u64 real_vram_size = rdev->mc.real_vram_size; 501 u64 vram_usage = atomic64_read(&rdev->vram_usage); 502 503 /* This function is based on the current VRAM usage. 504 * 505 * - If all of VRAM is free, allow relocating the number of bytes that 506 * is equal to 1/4 of the size of VRAM for this IB. 507 508 * - If more than one half of VRAM is occupied, only allow relocating 509 * 1 MB of data for this IB. 510 * 511 * - From 0 to one half of used VRAM, the threshold decreases 512 * linearly. 513 * __________________ 514 * 1/4 of -|\ | 515 * VRAM | \ | 516 * | \ | 517 * | \ | 518 * | \ | 519 * | \ | 520 * | \ | 521 * | \________|1 MB 522 * |----------------| 523 * VRAM 0 % 100 % 524 * used used 525 * 526 * Note: It's a threshold, not a limit. The threshold must be crossed 527 * for buffer relocations to stop, so any buffer of an arbitrary size 528 * can be moved as long as the threshold isn't crossed before 529 * the relocation takes place. We don't want to disable buffer 530 * relocations completely. 531 * 532 * The idea is that buffers should be placed in VRAM at creation time 533 * and TTM should only do a minimum number of relocations during 534 * command submission. In practice, you need to submit at least 535 * a dozen IBs to move all buffers to VRAM if they are in GTT. 536 * 537 * Also, things can get pretty crazy under memory pressure and actual 538 * VRAM usage can change a lot, so playing safe even at 50% does 539 * consistently increase performance. 540 */ 541 542 u64 half_vram = real_vram_size >> 1; 543 u64 half_free_vram = vram_usage >= half_vram ? 0 : half_vram - vram_usage; 544 u64 bytes_moved_threshold = half_free_vram >> 1; 545 return max(bytes_moved_threshold, 1024*1024ull); 546 } 547 548 int radeon_bo_list_validate(struct radeon_device *rdev, 549 struct ww_acquire_ctx *ticket, 550 struct list_head *head, int ring) 551 { 552 struct ttm_operation_ctx ctx = { true, false }; 553 struct radeon_bo_list *lobj; 554 struct list_head duplicates; 555 int r; 556 u64 bytes_moved = 0, initial_bytes_moved; 557 u64 bytes_moved_threshold = radeon_bo_get_threshold_for_moves(rdev); 558 559 INIT_LIST_HEAD(&duplicates); 560 r = ttm_eu_reserve_buffers(ticket, head, true, &duplicates); 561 if (unlikely(r != 0)) { 562 return r; 563 } 564 565 list_for_each_entry(lobj, head, tv.head) { 566 struct radeon_bo *bo = lobj->robj; 567 if (!bo->pin_count) { 568 u32 domain = lobj->preferred_domains; 569 u32 allowed = lobj->allowed_domains; 570 u32 current_domain = 571 radeon_mem_type_to_domain(bo->tbo.mem.mem_type); 572 573 /* Check if this buffer will be moved and don't move it 574 * if we have moved too many buffers for this IB already. 575 * 576 * Note that this allows moving at least one buffer of 577 * any size, because it doesn't take the current "bo" 578 * into account. We don't want to disallow buffer moves 579 * completely. 580 */ 581 if ((allowed & current_domain) != 0 && 582 (domain & current_domain) == 0 && /* will be moved */ 583 bytes_moved > bytes_moved_threshold) { 584 /* don't move it */ 585 domain = current_domain; 586 } 587 588 retry: 589 radeon_ttm_placement_from_domain(bo, domain); 590 if (ring == R600_RING_TYPE_UVD_INDEX) 591 radeon_uvd_force_into_uvd_segment(bo, allowed); 592 593 initial_bytes_moved = atomic64_read(&rdev->num_bytes_moved); 594 r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); 595 bytes_moved += atomic64_read(&rdev->num_bytes_moved) - 596 initial_bytes_moved; 597 598 if (unlikely(r)) { 599 if (r != -ERESTARTSYS && 600 domain != lobj->allowed_domains) { 601 domain = lobj->allowed_domains; 602 goto retry; 603 } 604 ttm_eu_backoff_reservation(ticket, head); 605 return r; 606 } 607 } 608 lobj->gpu_offset = radeon_bo_gpu_offset(bo); 609 lobj->tiling_flags = bo->tiling_flags; 610 } 611 612 list_for_each_entry(lobj, &duplicates, tv.head) { 613 lobj->gpu_offset = radeon_bo_gpu_offset(lobj->robj); 614 lobj->tiling_flags = lobj->robj->tiling_flags; 615 } 616 617 return 0; 618 } 619 620 int radeon_bo_get_surface_reg(struct radeon_bo *bo) 621 { 622 struct radeon_device *rdev = bo->rdev; 623 struct radeon_surface_reg *reg; 624 struct radeon_bo *old_object; 625 int steal; 626 int i; 627 628 dma_resv_assert_held(bo->tbo.base.resv); 629 630 if (!bo->tiling_flags) 631 return 0; 632 633 if (bo->surface_reg >= 0) { 634 reg = &rdev->surface_regs[bo->surface_reg]; 635 i = bo->surface_reg; 636 goto out; 637 } 638 639 steal = -1; 640 for (i = 0; i < RADEON_GEM_MAX_SURFACES; i++) { 641 642 reg = &rdev->surface_regs[i]; 643 if (!reg->bo) 644 break; 645 646 old_object = reg->bo; 647 if (old_object->pin_count == 0) 648 steal = i; 649 } 650 651 /* if we are all out */ 652 if (i == RADEON_GEM_MAX_SURFACES) { 653 if (steal == -1) 654 return -ENOMEM; 655 /* find someone with a surface reg and nuke their BO */ 656 reg = &rdev->surface_regs[steal]; 657 old_object = reg->bo; 658 /* blow away the mapping */ 659 DRM_DEBUG("stealing surface reg %d from %p\n", steal, old_object); 660 ttm_bo_unmap_virtual(&old_object->tbo); 661 old_object->surface_reg = -1; 662 i = steal; 663 } 664 665 bo->surface_reg = i; 666 reg->bo = bo; 667 668 out: 669 radeon_set_surface_reg(rdev, i, bo->tiling_flags, bo->pitch, 670 bo->tbo.mem.start << PAGE_SHIFT, 671 bo->tbo.num_pages << PAGE_SHIFT); 672 return 0; 673 } 674 675 static void radeon_bo_clear_surface_reg(struct radeon_bo *bo) 676 { 677 struct radeon_device *rdev = bo->rdev; 678 struct radeon_surface_reg *reg; 679 680 if (bo->surface_reg == -1) 681 return; 682 683 reg = &rdev->surface_regs[bo->surface_reg]; 684 radeon_clear_surface_reg(rdev, bo->surface_reg); 685 686 reg->bo = NULL; 687 bo->surface_reg = -1; 688 } 689 690 int radeon_bo_set_tiling_flags(struct radeon_bo *bo, 691 uint32_t tiling_flags, uint32_t pitch) 692 { 693 struct radeon_device *rdev = bo->rdev; 694 int r; 695 696 if (rdev->family >= CHIP_CEDAR) { 697 unsigned bankw, bankh, mtaspect, tilesplit, stilesplit; 698 699 bankw = (tiling_flags >> RADEON_TILING_EG_BANKW_SHIFT) & RADEON_TILING_EG_BANKW_MASK; 700 bankh = (tiling_flags >> RADEON_TILING_EG_BANKH_SHIFT) & RADEON_TILING_EG_BANKH_MASK; 701 mtaspect = (tiling_flags >> RADEON_TILING_EG_MACRO_TILE_ASPECT_SHIFT) & RADEON_TILING_EG_MACRO_TILE_ASPECT_MASK; 702 tilesplit = (tiling_flags >> RADEON_TILING_EG_TILE_SPLIT_SHIFT) & RADEON_TILING_EG_TILE_SPLIT_MASK; 703 stilesplit = (tiling_flags >> RADEON_TILING_EG_STENCIL_TILE_SPLIT_SHIFT) & RADEON_TILING_EG_STENCIL_TILE_SPLIT_MASK; 704 switch (bankw) { 705 case 0: 706 case 1: 707 case 2: 708 case 4: 709 case 8: 710 break; 711 default: 712 return -EINVAL; 713 } 714 switch (bankh) { 715 case 0: 716 case 1: 717 case 2: 718 case 4: 719 case 8: 720 break; 721 default: 722 return -EINVAL; 723 } 724 switch (mtaspect) { 725 case 0: 726 case 1: 727 case 2: 728 case 4: 729 case 8: 730 break; 731 default: 732 return -EINVAL; 733 } 734 if (tilesplit > 6) { 735 return -EINVAL; 736 } 737 if (stilesplit > 6) { 738 return -EINVAL; 739 } 740 } 741 r = radeon_bo_reserve(bo, false); 742 if (unlikely(r != 0)) 743 return r; 744 bo->tiling_flags = tiling_flags; 745 bo->pitch = pitch; 746 radeon_bo_unreserve(bo); 747 return 0; 748 } 749 750 void radeon_bo_get_tiling_flags(struct radeon_bo *bo, 751 uint32_t *tiling_flags, 752 uint32_t *pitch) 753 { 754 dma_resv_assert_held(bo->tbo.base.resv); 755 756 if (tiling_flags) 757 *tiling_flags = bo->tiling_flags; 758 if (pitch) 759 *pitch = bo->pitch; 760 } 761 762 int radeon_bo_check_tiling(struct radeon_bo *bo, bool has_moved, 763 bool force_drop) 764 { 765 if (!force_drop) 766 dma_resv_assert_held(bo->tbo.base.resv); 767 768 if (!(bo->tiling_flags & RADEON_TILING_SURFACE)) 769 return 0; 770 771 if (force_drop) { 772 radeon_bo_clear_surface_reg(bo); 773 return 0; 774 } 775 776 if (bo->tbo.mem.mem_type != TTM_PL_VRAM) { 777 if (!has_moved) 778 return 0; 779 780 if (bo->surface_reg >= 0) 781 radeon_bo_clear_surface_reg(bo); 782 return 0; 783 } 784 785 if ((bo->surface_reg >= 0) && !has_moved) 786 return 0; 787 788 return radeon_bo_get_surface_reg(bo); 789 } 790 791 void radeon_bo_move_notify(struct ttm_buffer_object *bo, 792 bool evict, 793 struct ttm_mem_reg *new_mem) 794 { 795 struct radeon_bo *rbo; 796 797 if (!radeon_ttm_bo_is_radeon_bo(bo)) 798 return; 799 800 rbo = container_of(bo, struct radeon_bo, tbo); 801 radeon_bo_check_tiling(rbo, 0, 1); 802 radeon_vm_bo_invalidate(rbo->rdev, rbo); 803 804 /* update statistics */ 805 if (!new_mem) 806 return; 807 808 radeon_update_memory_usage(rbo, bo->mem.mem_type, -1); 809 radeon_update_memory_usage(rbo, new_mem->mem_type, 1); 810 } 811 812 int radeon_bo_fault_reserve_notify(struct ttm_buffer_object *bo) 813 { 814 struct ttm_operation_ctx ctx = { false, false }; 815 struct radeon_device *rdev; 816 struct radeon_bo *rbo; 817 unsigned long offset, size, lpfn; 818 int i, r; 819 820 if (!radeon_ttm_bo_is_radeon_bo(bo)) 821 return 0; 822 rbo = container_of(bo, struct radeon_bo, tbo); 823 radeon_bo_check_tiling(rbo, 0, 0); 824 rdev = rbo->rdev; 825 if (bo->mem.mem_type != TTM_PL_VRAM) 826 return 0; 827 828 size = bo->mem.num_pages << PAGE_SHIFT; 829 offset = bo->mem.start << PAGE_SHIFT; 830 if ((offset + size) <= rdev->mc.visible_vram_size) 831 return 0; 832 833 /* Can't move a pinned BO to visible VRAM */ 834 if (rbo->pin_count > 0) 835 return -EINVAL; 836 837 /* hurrah the memory is not visible ! */ 838 radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_VRAM); 839 lpfn = rdev->mc.visible_vram_size >> PAGE_SHIFT; 840 for (i = 0; i < rbo->placement.num_placement; i++) { 841 /* Force into visible VRAM */ 842 if ((rbo->placements[i].flags & TTM_PL_FLAG_VRAM) && 843 (!rbo->placements[i].lpfn || rbo->placements[i].lpfn > lpfn)) 844 rbo->placements[i].lpfn = lpfn; 845 } 846 r = ttm_bo_validate(bo, &rbo->placement, &ctx); 847 if (unlikely(r == -ENOMEM)) { 848 radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_GTT); 849 return ttm_bo_validate(bo, &rbo->placement, &ctx); 850 } else if (unlikely(r != 0)) { 851 return r; 852 } 853 854 offset = bo->mem.start << PAGE_SHIFT; 855 /* this should never happen */ 856 if ((offset + size) > rdev->mc.visible_vram_size) 857 return -EINVAL; 858 859 return 0; 860 } 861 862 int radeon_bo_wait(struct radeon_bo *bo, u32 *mem_type, bool no_wait) 863 { 864 int r; 865 866 r = ttm_bo_reserve(&bo->tbo, true, no_wait, NULL); 867 if (unlikely(r != 0)) 868 return r; 869 if (mem_type) 870 *mem_type = bo->tbo.mem.mem_type; 871 872 r = ttm_bo_wait(&bo->tbo, true, no_wait); 873 ttm_bo_unreserve(&bo->tbo); 874 return r; 875 } 876 877 /** 878 * radeon_bo_fence - add fence to buffer object 879 * 880 * @bo: buffer object in question 881 * @fence: fence to add 882 * @shared: true if fence should be added shared 883 * 884 */ 885 void radeon_bo_fence(struct radeon_bo *bo, struct radeon_fence *fence, 886 bool shared) 887 { 888 struct dma_resv *resv = bo->tbo.base.resv; 889 890 if (shared) 891 dma_resv_add_shared_fence(resv, &fence->base); 892 else 893 dma_resv_add_excl_fence(resv, &fence->base); 894 } 895