Home | History | Annotate | Line # | Download | only in radeon
      1 /*	$NetBSD: radeon_object.c,v 1.8 2021/12/18 23:45:43 riastradh Exp $	*/
      2 
      3 /*
      4  * Copyright 2009 Jerome Glisse.
      5  * All Rights Reserved.
      6  *
      7  * Permission is hereby granted, free of charge, to any person obtaining a
      8  * copy of this software and associated documentation files (the
      9  * "Software"), to deal in the Software without restriction, including
     10  * without limitation the rights to use, copy, modify, merge, publish,
     11  * distribute, sub license, and/or sell copies of the Software, and to
     12  * permit persons to whom the Software is furnished to do so, subject to
     13  * the following conditions:
     14  *
     15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
     18  * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
     19  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
     20  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
     21  * USE OR OTHER DEALINGS IN THE SOFTWARE.
     22  *
     23  * The above copyright notice and this permission notice (including the
     24  * next paragraph) shall be included in all copies or substantial portions
     25  * of the Software.
     26  *
     27  */
     28 /*
     29  * Authors:
     30  *    Jerome Glisse <glisse (at) freedesktop.org>
     31  *    Thomas Hellstrom <thomas-at-tungstengraphics-dot-com>
     32  *    Dave Airlie
     33  */
     34 
     35 #include <sys/cdefs.h>
     36 __KERNEL_RCSID(0, "$NetBSD: radeon_object.c,v 1.8 2021/12/18 23:45:43 riastradh Exp $");
     37 
     38 #include <linux/io.h>
     39 #include <linux/list.h>
     40 #include <linux/slab.h>
     41 
     42 #include <drm/drm_cache.h>
     43 #include <drm/drm_prime.h>
     44 #include <drm/radeon_drm.h>
     45 
     46 #include "radeon.h"
     47 #include "radeon_trace.h"
     48 
     49 #include <linux/nbsd-namespace.h>
     50 
     51 int radeon_ttm_init(struct radeon_device *rdev);
     52 void radeon_ttm_fini(struct radeon_device *rdev);
     53 static void radeon_bo_clear_surface_reg(struct radeon_bo *bo);
     54 
     55 /*
     56  * To exclude mutual BO access we rely on bo_reserve exclusion, as all
     57  * function are calling it.
     58  */
     59 
     60 static void radeon_update_memory_usage(struct radeon_bo *bo,
     61 				       unsigned mem_type, int sign)
     62 {
     63 	struct radeon_device *rdev = bo->rdev;
     64 	u64 size = (u64)bo->tbo.num_pages << PAGE_SHIFT;
     65 
     66 	switch (mem_type) {
     67 	case TTM_PL_TT:
     68 		if (sign > 0)
     69 			atomic64_add(size, &rdev->gtt_usage);
     70 		else
     71 			atomic64_sub(size, &rdev->gtt_usage);
     72 		break;
     73 	case TTM_PL_VRAM:
     74 		if (sign > 0)
     75 			atomic64_add(size, &rdev->vram_usage);
     76 		else
     77 			atomic64_sub(size, &rdev->vram_usage);
     78 		break;
     79 	}
     80 }
     81 
     82 static void radeon_ttm_bo_destroy(struct ttm_buffer_object *tbo)
     83 {
     84 	struct radeon_bo *bo;
     85 
     86 	bo = container_of(tbo, struct radeon_bo, tbo);
     87 
     88 	radeon_update_memory_usage(bo, bo->tbo.mem.mem_type, -1);
     89 
     90 	mutex_lock(&bo->rdev->gem.mutex);
     91 	list_del_init(&bo->list);
     92 	mutex_unlock(&bo->rdev->gem.mutex);
     93 	radeon_bo_clear_surface_reg(bo);
     94 	WARN_ON_ONCE(!list_empty(&bo->va));
     95 	if (bo->tbo.base.import_attach)
     96 		drm_prime_gem_destroy(&bo->tbo.base, bo->tbo.sg);
     97 	drm_gem_object_release(&bo->tbo.base);
     98 	kfree(bo);
     99 }
    100 
    101 bool radeon_ttm_bo_is_radeon_bo(struct ttm_buffer_object *bo)
    102 {
    103 	if (bo->destroy == &radeon_ttm_bo_destroy)
    104 		return true;
    105 	return false;
    106 }
    107 
    108 void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, u32 domain)
    109 {
    110 	u32 c = 0, i;
    111 
    112 	rbo->placement.placement = rbo->placements;
    113 	rbo->placement.busy_placement = rbo->placements;
    114 	if (domain & RADEON_GEM_DOMAIN_VRAM) {
    115 		/* Try placing BOs which don't need CPU access outside of the
    116 		 * CPU accessible part of VRAM
    117 		 */
    118 		if ((rbo->flags & RADEON_GEM_NO_CPU_ACCESS) &&
    119 		    rbo->rdev->mc.visible_vram_size < rbo->rdev->mc.real_vram_size) {
    120 			rbo->placements[c].fpfn =
    121 				rbo->rdev->mc.visible_vram_size >> PAGE_SHIFT;
    122 			rbo->placements[c++].flags = TTM_PL_FLAG_WC |
    123 						     TTM_PL_FLAG_UNCACHED |
    124 						     TTM_PL_FLAG_VRAM;
    125 		}
    126 
    127 		rbo->placements[c].fpfn = 0;
    128 		rbo->placements[c++].flags = TTM_PL_FLAG_WC |
    129 					     TTM_PL_FLAG_UNCACHED |
    130 					     TTM_PL_FLAG_VRAM;
    131 	}
    132 
    133 	if (domain & RADEON_GEM_DOMAIN_GTT) {
    134 		if (rbo->flags & RADEON_GEM_GTT_UC) {
    135 			rbo->placements[c].fpfn = 0;
    136 			rbo->placements[c++].flags = TTM_PL_FLAG_UNCACHED |
    137 				TTM_PL_FLAG_TT;
    138 
    139 		} else if ((rbo->flags & RADEON_GEM_GTT_WC) ||
    140 			   (rbo->rdev->flags & RADEON_IS_AGP)) {
    141 			rbo->placements[c].fpfn = 0;
    142 			rbo->placements[c++].flags = TTM_PL_FLAG_WC |
    143 				TTM_PL_FLAG_UNCACHED |
    144 				TTM_PL_FLAG_TT;
    145 		} else {
    146 			rbo->placements[c].fpfn = 0;
    147 			rbo->placements[c++].flags = TTM_PL_FLAG_CACHED |
    148 						     TTM_PL_FLAG_TT;
    149 		}
    150 	}
    151 
    152 	if (domain & RADEON_GEM_DOMAIN_CPU) {
    153 		if (rbo->flags & RADEON_GEM_GTT_UC) {
    154 			rbo->placements[c].fpfn = 0;
    155 			rbo->placements[c++].flags = TTM_PL_FLAG_UNCACHED |
    156 				TTM_PL_FLAG_SYSTEM;
    157 
    158 		} else if ((rbo->flags & RADEON_GEM_GTT_WC) ||
    159 		    rbo->rdev->flags & RADEON_IS_AGP) {
    160 			rbo->placements[c].fpfn = 0;
    161 			rbo->placements[c++].flags = TTM_PL_FLAG_WC |
    162 				TTM_PL_FLAG_UNCACHED |
    163 				TTM_PL_FLAG_SYSTEM;
    164 		} else {
    165 			rbo->placements[c].fpfn = 0;
    166 			rbo->placements[c++].flags = TTM_PL_FLAG_CACHED |
    167 						     TTM_PL_FLAG_SYSTEM;
    168 		}
    169 	}
    170 	if (!c) {
    171 		rbo->placements[c].fpfn = 0;
    172 		rbo->placements[c++].flags = TTM_PL_MASK_CACHING |
    173 					     TTM_PL_FLAG_SYSTEM;
    174 	}
    175 
    176 	rbo->placement.num_placement = c;
    177 	rbo->placement.num_busy_placement = c;
    178 
    179 	for (i = 0; i < c; ++i) {
    180 		if ((rbo->flags & RADEON_GEM_CPU_ACCESS) &&
    181 		    (rbo->placements[i].flags & TTM_PL_FLAG_VRAM) &&
    182 		    !rbo->placements[i].fpfn)
    183 			rbo->placements[i].lpfn =
    184 				rbo->rdev->mc.visible_vram_size >> PAGE_SHIFT;
    185 		else
    186 			rbo->placements[i].lpfn = 0;
    187 	}
    188 }
    189 
    190 int radeon_bo_create(struct radeon_device *rdev,
    191 		     unsigned long size, int byte_align, bool kernel,
    192 		     u32 domain, u32 flags, struct sg_table *sg,
    193 		     struct dma_resv *resv,
    194 		     struct radeon_bo **bo_ptr)
    195 {
    196 	struct radeon_bo *bo;
    197 	enum ttm_bo_type type;
    198 	unsigned long page_align = roundup(byte_align, PAGE_SIZE) >> PAGE_SHIFT;
    199 	size_t acc_size;
    200 	int r;
    201 
    202 	size = ALIGN(size, PAGE_SIZE);
    203 
    204 	if (kernel) {
    205 		type = ttm_bo_type_kernel;
    206 	} else if (sg) {
    207 		type = ttm_bo_type_sg;
    208 	} else {
    209 		type = ttm_bo_type_device;
    210 	}
    211 	*bo_ptr = NULL;
    212 
    213 	acc_size = ttm_bo_dma_acc_size(&rdev->mman.bdev, size,
    214 				       sizeof(struct radeon_bo));
    215 
    216 	bo = kzalloc(sizeof(struct radeon_bo), GFP_KERNEL);
    217 	if (bo == NULL)
    218 		return -ENOMEM;
    219 	drm_gem_private_object_init(rdev->ddev, &bo->tbo.base, size);
    220 	bo->rdev = rdev;
    221 	bo->surface_reg = -1;
    222 	INIT_LIST_HEAD(&bo->list);
    223 	INIT_LIST_HEAD(&bo->va);
    224 	bo->initial_domain = domain & (RADEON_GEM_DOMAIN_VRAM |
    225 				       RADEON_GEM_DOMAIN_GTT |
    226 				       RADEON_GEM_DOMAIN_CPU);
    227 
    228 	bo->flags = flags;
    229 	/* PCI GART is always snooped */
    230 	if (!(rdev->flags & RADEON_IS_PCIE))
    231 		bo->flags &= ~(RADEON_GEM_GTT_WC | RADEON_GEM_GTT_UC);
    232 
    233 	/* Write-combined CPU mappings of GTT cause GPU hangs with RV6xx
    234 	 * See https://bugs.freedesktop.org/show_bug.cgi?id=91268
    235 	 */
    236 	if (rdev->family >= CHIP_RV610 && rdev->family <= CHIP_RV635)
    237 		bo->flags &= ~(RADEON_GEM_GTT_WC | RADEON_GEM_GTT_UC);
    238 
    239 #ifdef CONFIG_X86_32
    240 	/* XXX: Write-combined CPU mappings of GTT seem broken on 32-bit
    241 	 * See https://bugs.freedesktop.org/show_bug.cgi?id=84627
    242 	 */
    243 	bo->flags &= ~(RADEON_GEM_GTT_WC | RADEON_GEM_GTT_UC);
    244 #elif defined(CONFIG_X86) && !defined(CONFIG_X86_PAT)
    245 	/* Don't try to enable write-combining when it can't work, or things
    246 	 * may be slow
    247 	 * See https://bugs.freedesktop.org/show_bug.cgi?id=88758
    248 	 */
    249 #ifndef CONFIG_COMPILE_TEST
    250 #warning Please enable CONFIG_MTRR and CONFIG_X86_PAT for better performance \
    251 	 thanks to write-combining
    252 #endif
    253 
    254 	if (bo->flags & RADEON_GEM_GTT_WC)
    255 		DRM_INFO_ONCE("Please enable CONFIG_MTRR and CONFIG_X86_PAT for "
    256 			      "better performance thanks to write-combining\n");
    257 	bo->flags &= ~(RADEON_GEM_GTT_WC | RADEON_GEM_GTT_UC);
    258 #else
    259 	/* For architectures that don't support WC memory,
    260 	 * mask out the WC flag from the BO
    261 	 */
    262 	if (!drm_arch_can_wc_memory())
    263 		bo->flags &= ~RADEON_GEM_GTT_WC;
    264 #endif
    265 
    266 	radeon_ttm_placement_from_domain(bo, domain);
    267 	/* Kernel allocation are uninterruptible */
    268 	down_read(&rdev->pm.mclk_lock);
    269 	r = ttm_bo_init(&rdev->mman.bdev, &bo->tbo, size, type,
    270 			&bo->placement, page_align, !kernel, acc_size,
    271 			sg, resv, &radeon_ttm_bo_destroy);
    272 	up_read(&rdev->pm.mclk_lock);
    273 	if (unlikely(r != 0)) {
    274 		return r;
    275 	}
    276 	*bo_ptr = bo;
    277 
    278 	trace_radeon_bo_create(bo);
    279 
    280 	return 0;
    281 }
    282 
    283 int radeon_bo_kmap(struct radeon_bo *bo, void **ptr)
    284 {
    285 	bool is_iomem;
    286 	int r;
    287 
    288 	if (bo->kptr) {
    289 		if (ptr) {
    290 			*ptr = bo->kptr;
    291 		}
    292 		return 0;
    293 	}
    294 	r = ttm_bo_kmap(&bo->tbo, 0, bo->tbo.num_pages, &bo->kmap);
    295 	if (r) {
    296 		return r;
    297 	}
    298 	bo->kptr = ttm_kmap_obj_virtual(&bo->kmap, &is_iomem);
    299 	if (ptr) {
    300 		*ptr = bo->kptr;
    301 	}
    302 	radeon_bo_check_tiling(bo, 0, 0);
    303 	return 0;
    304 }
    305 
    306 void radeon_bo_kunmap(struct radeon_bo *bo)
    307 {
    308 	if (bo->kptr == NULL)
    309 		return;
    310 	bo->kptr = NULL;
    311 	radeon_bo_check_tiling(bo, 0, 0);
    312 	ttm_bo_kunmap(&bo->kmap);
    313 }
    314 
    315 struct radeon_bo *radeon_bo_ref(struct radeon_bo *bo)
    316 {
    317 	if (bo == NULL)
    318 		return NULL;
    319 
    320 	ttm_bo_get(&bo->tbo);
    321 	return bo;
    322 }
    323 
    324 void radeon_bo_unref(struct radeon_bo **bo)
    325 {
    326 	struct ttm_buffer_object *tbo;
    327 
    328 	if ((*bo) == NULL)
    329 		return;
    330 	tbo = &((*bo)->tbo);
    331 	ttm_bo_put(tbo);
    332 	*bo = NULL;
    333 }
    334 
    335 int radeon_bo_pin_restricted(struct radeon_bo *bo, u32 domain, u64 max_offset,
    336 			     u64 *gpu_addr)
    337 {
    338 	struct ttm_operation_ctx ctx = { false, false };
    339 	int r, i;
    340 
    341 	if (radeon_ttm_tt_has_userptr(bo->tbo.ttm))
    342 		return -EPERM;
    343 
    344 	if (bo->pin_count) {
    345 		bo->pin_count++;
    346 		if (gpu_addr)
    347 			*gpu_addr = radeon_bo_gpu_offset(bo);
    348 
    349 		if (max_offset != 0) {
    350 			u64 domain_start;
    351 
    352 			if (domain == RADEON_GEM_DOMAIN_VRAM)
    353 				domain_start = bo->rdev->mc.vram_start;
    354 			else
    355 				domain_start = bo->rdev->mc.gtt_start;
    356 			WARN_ON_ONCE(max_offset <
    357 				     (radeon_bo_gpu_offset(bo) - domain_start));
    358 		}
    359 
    360 		return 0;
    361 	}
    362 	if (bo->prime_shared_count && domain == RADEON_GEM_DOMAIN_VRAM) {
    363 		/* A BO shared as a dma-buf cannot be sensibly migrated to VRAM */
    364 		return -EINVAL;
    365 	}
    366 
    367 	radeon_ttm_placement_from_domain(bo, domain);
    368 	for (i = 0; i < bo->placement.num_placement; i++) {
    369 		/* force to pin into visible video ram */
    370 		if ((bo->placements[i].flags & TTM_PL_FLAG_VRAM) &&
    371 		    !(bo->flags & RADEON_GEM_NO_CPU_ACCESS) &&
    372 		    (!max_offset || max_offset > bo->rdev->mc.visible_vram_size))
    373 			bo->placements[i].lpfn =
    374 				bo->rdev->mc.visible_vram_size >> PAGE_SHIFT;
    375 		else
    376 			bo->placements[i].lpfn = max_offset >> PAGE_SHIFT;
    377 
    378 		bo->placements[i].flags |= TTM_PL_FLAG_NO_EVICT;
    379 	}
    380 
    381 	r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
    382 	if (likely(r == 0)) {
    383 		bo->pin_count = 1;
    384 		if (gpu_addr != NULL)
    385 			*gpu_addr = radeon_bo_gpu_offset(bo);
    386 		if (domain == RADEON_GEM_DOMAIN_VRAM)
    387 			bo->rdev->vram_pin_size += radeon_bo_size(bo);
    388 		else
    389 			bo->rdev->gart_pin_size += radeon_bo_size(bo);
    390 	} else {
    391 		dev_err(bo->rdev->dev, "%p pin failed\n", bo);
    392 	}
    393 	return r;
    394 }
    395 
    396 int radeon_bo_pin(struct radeon_bo *bo, u32 domain, u64 *gpu_addr)
    397 {
    398 	return radeon_bo_pin_restricted(bo, domain, 0, gpu_addr);
    399 }
    400 
    401 int radeon_bo_unpin(struct radeon_bo *bo)
    402 {
    403 	struct ttm_operation_ctx ctx = { false, false };
    404 	int r, i;
    405 
    406 	if (!bo->pin_count) {
    407 		dev_warn(bo->rdev->dev, "%p unpin not necessary\n", bo);
    408 		return 0;
    409 	}
    410 	bo->pin_count--;
    411 	if (bo->pin_count)
    412 		return 0;
    413 	for (i = 0; i < bo->placement.num_placement; i++) {
    414 		bo->placements[i].lpfn = 0;
    415 		bo->placements[i].flags &= ~TTM_PL_FLAG_NO_EVICT;
    416 	}
    417 	r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
    418 	if (likely(r == 0)) {
    419 		if (bo->tbo.mem.mem_type == TTM_PL_VRAM)
    420 			bo->rdev->vram_pin_size -= radeon_bo_size(bo);
    421 		else
    422 			bo->rdev->gart_pin_size -= radeon_bo_size(bo);
    423 	} else {
    424 		dev_err(bo->rdev->dev, "%p validate failed for unpin\n", bo);
    425 	}
    426 	return r;
    427 }
    428 
    429 int radeon_bo_evict_vram(struct radeon_device *rdev)
    430 {
    431 	/* late 2.6.33 fix IGP hibernate - we need pm ops to do this correct */
    432 #ifndef CONFIG_HIBERNATION
    433 	if (rdev->flags & RADEON_IS_IGP) {
    434 		if (rdev->mc.igp_sideport_enabled == false)
    435 			/* Useless to evict on IGP chips */
    436 			return 0;
    437 	}
    438 #endif
    439 	return ttm_bo_evict_mm(&rdev->mman.bdev, TTM_PL_VRAM);
    440 }
    441 
    442 void radeon_bo_force_delete(struct radeon_device *rdev)
    443 {
    444 	struct radeon_bo *bo, *n;
    445 
    446 	if (list_empty(&rdev->gem.objects)) {
    447 		return;
    448 	}
    449 	dev_err(rdev->dev, "Userspace still has active objects !\n");
    450 	list_for_each_entry_safe(bo, n, &rdev->gem.objects, list) {
    451 		dev_err(rdev->dev, "%p %p %lu %lu force free\n",
    452 			&bo->tbo.base, bo, (unsigned long)bo->tbo.base.size,
    453 			*((unsigned long *)&bo->tbo.base.refcount));
    454 		mutex_lock(&bo->rdev->gem.mutex);
    455 		list_del_init(&bo->list);
    456 		mutex_unlock(&bo->rdev->gem.mutex);
    457 		/* this should unref the ttm bo */
    458 		drm_gem_object_put_unlocked(&bo->tbo.base);
    459 	}
    460 }
    461 
    462 int radeon_bo_init(struct radeon_device *rdev)
    463 {
    464 	/* reserve PAT memory space to WC for VRAM */
    465 	arch_io_reserve_memtype_wc(rdev->mc.aper_base,
    466 				   rdev->mc.aper_size);
    467 
    468 	/* Add an MTRR for the VRAM */
    469 	if (!rdev->fastfb_working) {
    470 		rdev->mc.vram_mtrr = arch_phys_wc_add(rdev->mc.aper_base,
    471 						      rdev->mc.aper_size);
    472 	}
    473 #ifdef __NetBSD__
    474 	if (rdev->mc.aper_base)
    475 		pmap_pv_track(rdev->mc.aper_base, rdev->mc.aper_size);
    476 #endif
    477 	DRM_INFO("Detected VRAM RAM=%"PRIx64"M, BAR=%lluM\n",
    478 		rdev->mc.mc_vram_size >> 20,
    479 		(unsigned long long)rdev->mc.aper_size >> 20);
    480 	DRM_INFO("RAM width %dbits %cDR\n",
    481 			rdev->mc.vram_width, rdev->mc.vram_is_ddr ? 'D' : 'S');
    482 	return radeon_ttm_init(rdev);
    483 }
    484 
    485 void radeon_bo_fini(struct radeon_device *rdev)
    486 {
    487 	radeon_ttm_fini(rdev);
    488 #ifdef __NetBSD__
    489 	if (rdev->mc.aper_base)
    490 		pmap_pv_untrack(rdev->mc.aper_base, rdev->mc.aper_size);
    491 #endif
    492 	arch_phys_wc_del(rdev->mc.vram_mtrr);
    493 	arch_io_free_memtype_wc(rdev->mc.aper_base, rdev->mc.aper_size);
    494 }
    495 
    496 /* Returns how many bytes TTM can move per IB.
    497  */
    498 static u64 radeon_bo_get_threshold_for_moves(struct radeon_device *rdev)
    499 {
    500 	u64 real_vram_size = rdev->mc.real_vram_size;
    501 	u64 vram_usage = atomic64_read(&rdev->vram_usage);
    502 
    503 	/* This function is based on the current VRAM usage.
    504 	 *
    505 	 * - If all of VRAM is free, allow relocating the number of bytes that
    506 	 *   is equal to 1/4 of the size of VRAM for this IB.
    507 
    508 	 * - If more than one half of VRAM is occupied, only allow relocating
    509 	 *   1 MB of data for this IB.
    510 	 *
    511 	 * - From 0 to one half of used VRAM, the threshold decreases
    512 	 *   linearly.
    513 	 *         __________________
    514 	 * 1/4 of -|\               |
    515 	 * VRAM    | \              |
    516 	 *         |  \             |
    517 	 *         |   \            |
    518 	 *         |    \           |
    519 	 *         |     \          |
    520 	 *         |      \         |
    521 	 *         |       \________|1 MB
    522 	 *         |----------------|
    523 	 *    VRAM 0 %             100 %
    524 	 *         used            used
    525 	 *
    526 	 * Note: It's a threshold, not a limit. The threshold must be crossed
    527 	 * for buffer relocations to stop, so any buffer of an arbitrary size
    528 	 * can be moved as long as the threshold isn't crossed before
    529 	 * the relocation takes place. We don't want to disable buffer
    530 	 * relocations completely.
    531 	 *
    532 	 * The idea is that buffers should be placed in VRAM at creation time
    533 	 * and TTM should only do a minimum number of relocations during
    534 	 * command submission. In practice, you need to submit at least
    535 	 * a dozen IBs to move all buffers to VRAM if they are in GTT.
    536 	 *
    537 	 * Also, things can get pretty crazy under memory pressure and actual
    538 	 * VRAM usage can change a lot, so playing safe even at 50% does
    539 	 * consistently increase performance.
    540 	 */
    541 
    542 	u64 half_vram = real_vram_size >> 1;
    543 	u64 half_free_vram = vram_usage >= half_vram ? 0 : half_vram - vram_usage;
    544 	u64 bytes_moved_threshold = half_free_vram >> 1;
    545 	return max(bytes_moved_threshold, 1024*1024ull);
    546 }
    547 
    548 int radeon_bo_list_validate(struct radeon_device *rdev,
    549 			    struct ww_acquire_ctx *ticket,
    550 			    struct list_head *head, int ring)
    551 {
    552 	struct ttm_operation_ctx ctx = { true, false };
    553 	struct radeon_bo_list *lobj;
    554 	struct list_head duplicates;
    555 	int r;
    556 	u64 bytes_moved = 0, initial_bytes_moved;
    557 	u64 bytes_moved_threshold = radeon_bo_get_threshold_for_moves(rdev);
    558 
    559 	INIT_LIST_HEAD(&duplicates);
    560 	r = ttm_eu_reserve_buffers(ticket, head, true, &duplicates);
    561 	if (unlikely(r != 0)) {
    562 		return r;
    563 	}
    564 
    565 	list_for_each_entry(lobj, head, tv.head) {
    566 		struct radeon_bo *bo = lobj->robj;
    567 		if (!bo->pin_count) {
    568 			u32 domain = lobj->preferred_domains;
    569 			u32 allowed = lobj->allowed_domains;
    570 			u32 current_domain =
    571 				radeon_mem_type_to_domain(bo->tbo.mem.mem_type);
    572 
    573 			/* Check if this buffer will be moved and don't move it
    574 			 * if we have moved too many buffers for this IB already.
    575 			 *
    576 			 * Note that this allows moving at least one buffer of
    577 			 * any size, because it doesn't take the current "bo"
    578 			 * into account. We don't want to disallow buffer moves
    579 			 * completely.
    580 			 */
    581 			if ((allowed & current_domain) != 0 &&
    582 			    (domain & current_domain) == 0 && /* will be moved */
    583 			    bytes_moved > bytes_moved_threshold) {
    584 				/* don't move it */
    585 				domain = current_domain;
    586 			}
    587 
    588 		retry:
    589 			radeon_ttm_placement_from_domain(bo, domain);
    590 			if (ring == R600_RING_TYPE_UVD_INDEX)
    591 				radeon_uvd_force_into_uvd_segment(bo, allowed);
    592 
    593 			initial_bytes_moved = atomic64_read(&rdev->num_bytes_moved);
    594 			r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
    595 			bytes_moved += atomic64_read(&rdev->num_bytes_moved) -
    596 				       initial_bytes_moved;
    597 
    598 			if (unlikely(r)) {
    599 				if (r != -ERESTARTSYS &&
    600 				    domain != lobj->allowed_domains) {
    601 					domain = lobj->allowed_domains;
    602 					goto retry;
    603 				}
    604 				ttm_eu_backoff_reservation(ticket, head);
    605 				return r;
    606 			}
    607 		}
    608 		lobj->gpu_offset = radeon_bo_gpu_offset(bo);
    609 		lobj->tiling_flags = bo->tiling_flags;
    610 	}
    611 
    612 	list_for_each_entry(lobj, &duplicates, tv.head) {
    613 		lobj->gpu_offset = radeon_bo_gpu_offset(lobj->robj);
    614 		lobj->tiling_flags = lobj->robj->tiling_flags;
    615 	}
    616 
    617 	return 0;
    618 }
    619 
    620 int radeon_bo_get_surface_reg(struct radeon_bo *bo)
    621 {
    622 	struct radeon_device *rdev = bo->rdev;
    623 	struct radeon_surface_reg *reg;
    624 	struct radeon_bo *old_object;
    625 	int steal;
    626 	int i;
    627 
    628 	dma_resv_assert_held(bo->tbo.base.resv);
    629 
    630 	if (!bo->tiling_flags)
    631 		return 0;
    632 
    633 	if (bo->surface_reg >= 0) {
    634 		reg = &rdev->surface_regs[bo->surface_reg];
    635 		i = bo->surface_reg;
    636 		goto out;
    637 	}
    638 
    639 	steal = -1;
    640 	for (i = 0; i < RADEON_GEM_MAX_SURFACES; i++) {
    641 
    642 		reg = &rdev->surface_regs[i];
    643 		if (!reg->bo)
    644 			break;
    645 
    646 		old_object = reg->bo;
    647 		if (old_object->pin_count == 0)
    648 			steal = i;
    649 	}
    650 
    651 	/* if we are all out */
    652 	if (i == RADEON_GEM_MAX_SURFACES) {
    653 		if (steal == -1)
    654 			return -ENOMEM;
    655 		/* find someone with a surface reg and nuke their BO */
    656 		reg = &rdev->surface_regs[steal];
    657 		old_object = reg->bo;
    658 		/* blow away the mapping */
    659 		DRM_DEBUG("stealing surface reg %d from %p\n", steal, old_object);
    660 		ttm_bo_unmap_virtual(&old_object->tbo);
    661 		old_object->surface_reg = -1;
    662 		i = steal;
    663 	}
    664 
    665 	bo->surface_reg = i;
    666 	reg->bo = bo;
    667 
    668 out:
    669 	radeon_set_surface_reg(rdev, i, bo->tiling_flags, bo->pitch,
    670 			       bo->tbo.mem.start << PAGE_SHIFT,
    671 			       bo->tbo.num_pages << PAGE_SHIFT);
    672 	return 0;
    673 }
    674 
    675 static void radeon_bo_clear_surface_reg(struct radeon_bo *bo)
    676 {
    677 	struct radeon_device *rdev = bo->rdev;
    678 	struct radeon_surface_reg *reg;
    679 
    680 	if (bo->surface_reg == -1)
    681 		return;
    682 
    683 	reg = &rdev->surface_regs[bo->surface_reg];
    684 	radeon_clear_surface_reg(rdev, bo->surface_reg);
    685 
    686 	reg->bo = NULL;
    687 	bo->surface_reg = -1;
    688 }
    689 
    690 int radeon_bo_set_tiling_flags(struct radeon_bo *bo,
    691 				uint32_t tiling_flags, uint32_t pitch)
    692 {
    693 	struct radeon_device *rdev = bo->rdev;
    694 	int r;
    695 
    696 	if (rdev->family >= CHIP_CEDAR) {
    697 		unsigned bankw, bankh, mtaspect, tilesplit, stilesplit;
    698 
    699 		bankw = (tiling_flags >> RADEON_TILING_EG_BANKW_SHIFT) & RADEON_TILING_EG_BANKW_MASK;
    700 		bankh = (tiling_flags >> RADEON_TILING_EG_BANKH_SHIFT) & RADEON_TILING_EG_BANKH_MASK;
    701 		mtaspect = (tiling_flags >> RADEON_TILING_EG_MACRO_TILE_ASPECT_SHIFT) & RADEON_TILING_EG_MACRO_TILE_ASPECT_MASK;
    702 		tilesplit = (tiling_flags >> RADEON_TILING_EG_TILE_SPLIT_SHIFT) & RADEON_TILING_EG_TILE_SPLIT_MASK;
    703 		stilesplit = (tiling_flags >> RADEON_TILING_EG_STENCIL_TILE_SPLIT_SHIFT) & RADEON_TILING_EG_STENCIL_TILE_SPLIT_MASK;
    704 		switch (bankw) {
    705 		case 0:
    706 		case 1:
    707 		case 2:
    708 		case 4:
    709 		case 8:
    710 			break;
    711 		default:
    712 			return -EINVAL;
    713 		}
    714 		switch (bankh) {
    715 		case 0:
    716 		case 1:
    717 		case 2:
    718 		case 4:
    719 		case 8:
    720 			break;
    721 		default:
    722 			return -EINVAL;
    723 		}
    724 		switch (mtaspect) {
    725 		case 0:
    726 		case 1:
    727 		case 2:
    728 		case 4:
    729 		case 8:
    730 			break;
    731 		default:
    732 			return -EINVAL;
    733 		}
    734 		if (tilesplit > 6) {
    735 			return -EINVAL;
    736 		}
    737 		if (stilesplit > 6) {
    738 			return -EINVAL;
    739 		}
    740 	}
    741 	r = radeon_bo_reserve(bo, false);
    742 	if (unlikely(r != 0))
    743 		return r;
    744 	bo->tiling_flags = tiling_flags;
    745 	bo->pitch = pitch;
    746 	radeon_bo_unreserve(bo);
    747 	return 0;
    748 }
    749 
    750 void radeon_bo_get_tiling_flags(struct radeon_bo *bo,
    751 				uint32_t *tiling_flags,
    752 				uint32_t *pitch)
    753 {
    754 	dma_resv_assert_held(bo->tbo.base.resv);
    755 
    756 	if (tiling_flags)
    757 		*tiling_flags = bo->tiling_flags;
    758 	if (pitch)
    759 		*pitch = bo->pitch;
    760 }
    761 
    762 int radeon_bo_check_tiling(struct radeon_bo *bo, bool has_moved,
    763 				bool force_drop)
    764 {
    765 	if (!force_drop)
    766 		dma_resv_assert_held(bo->tbo.base.resv);
    767 
    768 	if (!(bo->tiling_flags & RADEON_TILING_SURFACE))
    769 		return 0;
    770 
    771 	if (force_drop) {
    772 		radeon_bo_clear_surface_reg(bo);
    773 		return 0;
    774 	}
    775 
    776 	if (bo->tbo.mem.mem_type != TTM_PL_VRAM) {
    777 		if (!has_moved)
    778 			return 0;
    779 
    780 		if (bo->surface_reg >= 0)
    781 			radeon_bo_clear_surface_reg(bo);
    782 		return 0;
    783 	}
    784 
    785 	if ((bo->surface_reg >= 0) && !has_moved)
    786 		return 0;
    787 
    788 	return radeon_bo_get_surface_reg(bo);
    789 }
    790 
    791 void radeon_bo_move_notify(struct ttm_buffer_object *bo,
    792 			   bool evict,
    793 			   struct ttm_mem_reg *new_mem)
    794 {
    795 	struct radeon_bo *rbo;
    796 
    797 	if (!radeon_ttm_bo_is_radeon_bo(bo))
    798 		return;
    799 
    800 	rbo = container_of(bo, struct radeon_bo, tbo);
    801 	radeon_bo_check_tiling(rbo, 0, 1);
    802 	radeon_vm_bo_invalidate(rbo->rdev, rbo);
    803 
    804 	/* update statistics */
    805 	if (!new_mem)
    806 		return;
    807 
    808 	radeon_update_memory_usage(rbo, bo->mem.mem_type, -1);
    809 	radeon_update_memory_usage(rbo, new_mem->mem_type, 1);
    810 }
    811 
    812 int radeon_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
    813 {
    814 	struct ttm_operation_ctx ctx = { false, false };
    815 	struct radeon_device *rdev;
    816 	struct radeon_bo *rbo;
    817 	unsigned long offset, size, lpfn;
    818 	int i, r;
    819 
    820 	if (!radeon_ttm_bo_is_radeon_bo(bo))
    821 		return 0;
    822 	rbo = container_of(bo, struct radeon_bo, tbo);
    823 	radeon_bo_check_tiling(rbo, 0, 0);
    824 	rdev = rbo->rdev;
    825 	if (bo->mem.mem_type != TTM_PL_VRAM)
    826 		return 0;
    827 
    828 	size = bo->mem.num_pages << PAGE_SHIFT;
    829 	offset = bo->mem.start << PAGE_SHIFT;
    830 	if ((offset + size) <= rdev->mc.visible_vram_size)
    831 		return 0;
    832 
    833 	/* Can't move a pinned BO to visible VRAM */
    834 	if (rbo->pin_count > 0)
    835 		return -EINVAL;
    836 
    837 	/* hurrah the memory is not visible ! */
    838 	radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_VRAM);
    839 	lpfn =	rdev->mc.visible_vram_size >> PAGE_SHIFT;
    840 	for (i = 0; i < rbo->placement.num_placement; i++) {
    841 		/* Force into visible VRAM */
    842 		if ((rbo->placements[i].flags & TTM_PL_FLAG_VRAM) &&
    843 		    (!rbo->placements[i].lpfn || rbo->placements[i].lpfn > lpfn))
    844 			rbo->placements[i].lpfn = lpfn;
    845 	}
    846 	r = ttm_bo_validate(bo, &rbo->placement, &ctx);
    847 	if (unlikely(r == -ENOMEM)) {
    848 		radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_GTT);
    849 		return ttm_bo_validate(bo, &rbo->placement, &ctx);
    850 	} else if (unlikely(r != 0)) {
    851 		return r;
    852 	}
    853 
    854 	offset = bo->mem.start << PAGE_SHIFT;
    855 	/* this should never happen */
    856 	if ((offset + size) > rdev->mc.visible_vram_size)
    857 		return -EINVAL;
    858 
    859 	return 0;
    860 }
    861 
    862 int radeon_bo_wait(struct radeon_bo *bo, u32 *mem_type, bool no_wait)
    863 {
    864 	int r;
    865 
    866 	r = ttm_bo_reserve(&bo->tbo, true, no_wait, NULL);
    867 	if (unlikely(r != 0))
    868 		return r;
    869 	if (mem_type)
    870 		*mem_type = bo->tbo.mem.mem_type;
    871 
    872 	r = ttm_bo_wait(&bo->tbo, true, no_wait);
    873 	ttm_bo_unreserve(&bo->tbo);
    874 	return r;
    875 }
    876 
    877 /**
    878  * radeon_bo_fence - add fence to buffer object
    879  *
    880  * @bo: buffer object in question
    881  * @fence: fence to add
    882  * @shared: true if fence should be added shared
    883  *
    884  */
    885 void radeon_bo_fence(struct radeon_bo *bo, struct radeon_fence *fence,
    886 		     bool shared)
    887 {
    888 	struct dma_resv *resv = bo->tbo.base.resv;
    889 
    890 	if (shared)
    891 		dma_resv_add_shared_fence(resv, &fence->base);
    892 	else
    893 		dma_resv_add_excl_fence(resv, &fence->base);
    894 }
    895