1 1.8 riastrad /* $NetBSD: radeon_vm.c,v 1.8 2021/12/18 23:45:43 riastradh Exp $ */ 2 1.3 riastrad 3 1.1 riastrad /* 4 1.1 riastrad * Copyright 2008 Advanced Micro Devices, Inc. 5 1.1 riastrad * Copyright 2008 Red Hat Inc. 6 1.1 riastrad * Copyright 2009 Jerome Glisse. 7 1.1 riastrad * 8 1.1 riastrad * Permission is hereby granted, free of charge, to any person obtaining a 9 1.1 riastrad * copy of this software and associated documentation files (the "Software"), 10 1.1 riastrad * to deal in the Software without restriction, including without limitation 11 1.1 riastrad * the rights to use, copy, modify, merge, publish, distribute, sublicense, 12 1.1 riastrad * and/or sell copies of the Software, and to permit persons to whom the 13 1.1 riastrad * Software is furnished to do so, subject to the following conditions: 14 1.1 riastrad * 15 1.1 riastrad * The above copyright notice and this permission notice shall be included in 16 1.1 riastrad * all copies or substantial portions of the Software. 17 1.1 riastrad * 18 1.1 riastrad * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 1.1 riastrad * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 1.1 riastrad * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 21 1.1 riastrad * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 22 1.1 riastrad * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 23 1.1 riastrad * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 24 1.1 riastrad * OTHER DEALINGS IN THE SOFTWARE. 25 1.1 riastrad * 26 1.1 riastrad * Authors: Dave Airlie 27 1.1 riastrad * Alex Deucher 28 1.1 riastrad * Jerome Glisse 29 1.1 riastrad */ 30 1.8 riastrad 31 1.3 riastrad #include <sys/cdefs.h> 32 1.8 riastrad __KERNEL_RCSID(0, "$NetBSD: radeon_vm.c,v 1.8 2021/12/18 23:45:43 riastradh Exp $"); 33 1.3 riastrad 34 1.1 riastrad #include <drm/radeon_drm.h> 35 1.1 riastrad #include "radeon.h" 36 1.1 riastrad #include "radeon_trace.h" 37 1.1 riastrad 38 1.7 riastrad #include <linux/nbsd-namespace.h> 39 1.7 riastrad 40 1.1 riastrad /* 41 1.1 riastrad * GPUVM 42 1.1 riastrad * GPUVM is similar to the legacy gart on older asics, however 43 1.1 riastrad * rather than there being a single global gart table 44 1.1 riastrad * for the entire GPU, there are multiple VM page tables active 45 1.1 riastrad * at any given time. The VM page tables can contain a mix 46 1.1 riastrad * vram pages and system memory pages and system memory pages 47 1.1 riastrad * can be mapped as snooped (cached system pages) or unsnooped 48 1.1 riastrad * (uncached system pages). 49 1.1 riastrad * Each VM has an ID associated with it and there is a page table 50 1.1 riastrad * associated with each VMID. When execting a command buffer, 51 1.1 riastrad * the kernel tells the the ring what VMID to use for that command 52 1.1 riastrad * buffer. VMIDs are allocated dynamically as commands are submitted. 53 1.1 riastrad * The userspace drivers maintain their own address space and the kernel 54 1.1 riastrad * sets up their pages tables accordingly when they submit their 55 1.1 riastrad * command buffers and a VMID is assigned. 56 1.1 riastrad * Cayman/Trinity support up to 8 active VMs at any given time; 57 1.1 riastrad * SI supports 16. 58 1.1 riastrad */ 59 1.1 riastrad 60 1.1 riastrad /** 61 1.1 riastrad * radeon_vm_num_pde - return the number of page directory entries 62 1.1 riastrad * 63 1.1 riastrad * @rdev: radeon_device pointer 64 1.1 riastrad * 65 1.1 riastrad * Calculate the number of page directory entries (cayman+). 66 1.1 riastrad */ 67 1.1 riastrad static unsigned radeon_vm_num_pdes(struct radeon_device *rdev) 68 1.1 riastrad { 69 1.3 riastrad return rdev->vm_manager.max_pfn >> radeon_vm_block_size; 70 1.1 riastrad } 71 1.1 riastrad 72 1.1 riastrad /** 73 1.1 riastrad * radeon_vm_directory_size - returns the size of the page directory in bytes 74 1.1 riastrad * 75 1.1 riastrad * @rdev: radeon_device pointer 76 1.1 riastrad * 77 1.1 riastrad * Calculate the size of the page directory in bytes (cayman+). 78 1.1 riastrad */ 79 1.1 riastrad static unsigned radeon_vm_directory_size(struct radeon_device *rdev) 80 1.1 riastrad { 81 1.1 riastrad return RADEON_GPU_PAGE_ALIGN(radeon_vm_num_pdes(rdev) * 8); 82 1.1 riastrad } 83 1.1 riastrad 84 1.1 riastrad /** 85 1.1 riastrad * radeon_vm_manager_init - init the vm manager 86 1.1 riastrad * 87 1.1 riastrad * @rdev: radeon_device pointer 88 1.1 riastrad * 89 1.1 riastrad * Init the vm manager (cayman+). 90 1.1 riastrad * Returns 0 for success, error for failure. 91 1.1 riastrad */ 92 1.1 riastrad int radeon_vm_manager_init(struct radeon_device *rdev) 93 1.1 riastrad { 94 1.1 riastrad int r; 95 1.1 riastrad 96 1.1 riastrad if (!rdev->vm_manager.enabled) { 97 1.1 riastrad r = radeon_asic_vm_init(rdev); 98 1.1 riastrad if (r) 99 1.1 riastrad return r; 100 1.1 riastrad 101 1.1 riastrad rdev->vm_manager.enabled = true; 102 1.1 riastrad } 103 1.1 riastrad return 0; 104 1.1 riastrad } 105 1.1 riastrad 106 1.1 riastrad /** 107 1.1 riastrad * radeon_vm_manager_fini - tear down the vm manager 108 1.1 riastrad * 109 1.1 riastrad * @rdev: radeon_device pointer 110 1.1 riastrad * 111 1.1 riastrad * Tear down the VM manager (cayman+). 112 1.1 riastrad */ 113 1.1 riastrad void radeon_vm_manager_fini(struct radeon_device *rdev) 114 1.1 riastrad { 115 1.1 riastrad int i; 116 1.1 riastrad 117 1.1 riastrad if (!rdev->vm_manager.enabled) 118 1.1 riastrad return; 119 1.1 riastrad 120 1.1 riastrad for (i = 0; i < RADEON_NUM_VM; ++i) 121 1.1 riastrad radeon_fence_unref(&rdev->vm_manager.active[i]); 122 1.1 riastrad radeon_asic_vm_fini(rdev); 123 1.1 riastrad rdev->vm_manager.enabled = false; 124 1.1 riastrad } 125 1.1 riastrad 126 1.1 riastrad /** 127 1.1 riastrad * radeon_vm_get_bos - add the vm BOs to a validation list 128 1.1 riastrad * 129 1.1 riastrad * @vm: vm providing the BOs 130 1.1 riastrad * @head: head of validation list 131 1.1 riastrad * 132 1.1 riastrad * Add the page directory to the list of BOs to 133 1.1 riastrad * validate for command submission (cayman+). 134 1.1 riastrad */ 135 1.3 riastrad struct radeon_bo_list *radeon_vm_get_bos(struct radeon_device *rdev, 136 1.1 riastrad struct radeon_vm *vm, 137 1.1 riastrad struct list_head *head) 138 1.1 riastrad { 139 1.3 riastrad struct radeon_bo_list *list; 140 1.1 riastrad unsigned i, idx; 141 1.1 riastrad 142 1.8 riastrad list = kvmalloc_array(vm->max_pde_used + 2, 143 1.8 riastrad sizeof(struct radeon_bo_list), GFP_KERNEL); 144 1.1 riastrad if (!list) 145 1.1 riastrad return NULL; 146 1.1 riastrad 147 1.1 riastrad /* add the vm page table to the list */ 148 1.1 riastrad list[0].robj = vm->page_directory; 149 1.8 riastrad list[0].preferred_domains = RADEON_GEM_DOMAIN_VRAM; 150 1.3 riastrad list[0].allowed_domains = RADEON_GEM_DOMAIN_VRAM; 151 1.1 riastrad list[0].tv.bo = &vm->page_directory->tbo; 152 1.8 riastrad list[0].tv.num_shared = 1; 153 1.1 riastrad list[0].tiling_flags = 0; 154 1.1 riastrad list_add(&list[0].tv.head, head); 155 1.1 riastrad 156 1.1 riastrad for (i = 0, idx = 1; i <= vm->max_pde_used; i++) { 157 1.1 riastrad if (!vm->page_tables[i].bo) 158 1.1 riastrad continue; 159 1.1 riastrad 160 1.1 riastrad list[idx].robj = vm->page_tables[i].bo; 161 1.8 riastrad list[idx].preferred_domains = RADEON_GEM_DOMAIN_VRAM; 162 1.3 riastrad list[idx].allowed_domains = RADEON_GEM_DOMAIN_VRAM; 163 1.1 riastrad list[idx].tv.bo = &list[idx].robj->tbo; 164 1.8 riastrad list[idx].tv.num_shared = 1; 165 1.1 riastrad list[idx].tiling_flags = 0; 166 1.1 riastrad list_add(&list[idx++].tv.head, head); 167 1.1 riastrad } 168 1.1 riastrad 169 1.1 riastrad return list; 170 1.1 riastrad } 171 1.1 riastrad 172 1.1 riastrad /** 173 1.1 riastrad * radeon_vm_grab_id - allocate the next free VMID 174 1.1 riastrad * 175 1.1 riastrad * @rdev: radeon_device pointer 176 1.1 riastrad * @vm: vm to allocate id for 177 1.1 riastrad * @ring: ring we want to submit job to 178 1.1 riastrad * 179 1.1 riastrad * Allocate an id for the vm (cayman+). 180 1.1 riastrad * Returns the fence we need to sync to (if any). 181 1.1 riastrad * 182 1.1 riastrad * Global and local mutex must be locked! 183 1.1 riastrad */ 184 1.1 riastrad struct radeon_fence *radeon_vm_grab_id(struct radeon_device *rdev, 185 1.1 riastrad struct radeon_vm *vm, int ring) 186 1.1 riastrad { 187 1.1 riastrad struct radeon_fence *best[RADEON_NUM_RINGS] = {}; 188 1.3 riastrad struct radeon_vm_id *vm_id = &vm->ids[ring]; 189 1.3 riastrad 190 1.1 riastrad unsigned choices[2] = {}; 191 1.1 riastrad unsigned i; 192 1.1 riastrad 193 1.1 riastrad /* check if the id is still valid */ 194 1.3 riastrad if (vm_id->id && vm_id->last_id_use && 195 1.3 riastrad vm_id->last_id_use == rdev->vm_manager.active[vm_id->id]) 196 1.1 riastrad return NULL; 197 1.1 riastrad 198 1.1 riastrad /* we definately need to flush */ 199 1.3 riastrad vm_id->pd_gpu_addr = ~0ll; 200 1.1 riastrad 201 1.1 riastrad /* skip over VMID 0, since it is the system VM */ 202 1.1 riastrad for (i = 1; i < rdev->vm_manager.nvm; ++i) { 203 1.1 riastrad struct radeon_fence *fence = rdev->vm_manager.active[i]; 204 1.1 riastrad 205 1.1 riastrad if (fence == NULL) { 206 1.1 riastrad /* found a free one */ 207 1.3 riastrad vm_id->id = i; 208 1.3 riastrad trace_radeon_vm_grab_id(i, ring); 209 1.1 riastrad return NULL; 210 1.1 riastrad } 211 1.1 riastrad 212 1.1 riastrad if (radeon_fence_is_earlier(fence, best[fence->ring])) { 213 1.1 riastrad best[fence->ring] = fence; 214 1.1 riastrad choices[fence->ring == ring ? 0 : 1] = i; 215 1.1 riastrad } 216 1.1 riastrad } 217 1.1 riastrad 218 1.1 riastrad for (i = 0; i < 2; ++i) { 219 1.1 riastrad if (choices[i]) { 220 1.3 riastrad vm_id->id = choices[i]; 221 1.3 riastrad trace_radeon_vm_grab_id(choices[i], ring); 222 1.1 riastrad return rdev->vm_manager.active[choices[i]]; 223 1.1 riastrad } 224 1.1 riastrad } 225 1.1 riastrad 226 1.1 riastrad /* should never happen */ 227 1.1 riastrad BUG(); 228 1.1 riastrad return NULL; 229 1.1 riastrad } 230 1.1 riastrad 231 1.1 riastrad /** 232 1.1 riastrad * radeon_vm_flush - hardware flush the vm 233 1.1 riastrad * 234 1.1 riastrad * @rdev: radeon_device pointer 235 1.1 riastrad * @vm: vm we want to flush 236 1.1 riastrad * @ring: ring to use for flush 237 1.3 riastrad * @updates: last vm update that is waited for 238 1.1 riastrad * 239 1.1 riastrad * Flush the vm (cayman+). 240 1.1 riastrad * 241 1.1 riastrad * Global and local mutex must be locked! 242 1.1 riastrad */ 243 1.1 riastrad void radeon_vm_flush(struct radeon_device *rdev, 244 1.1 riastrad struct radeon_vm *vm, 245 1.3 riastrad int ring, struct radeon_fence *updates) 246 1.1 riastrad { 247 1.1 riastrad uint64_t pd_addr = radeon_bo_gpu_offset(vm->page_directory); 248 1.3 riastrad struct radeon_vm_id *vm_id = &vm->ids[ring]; 249 1.3 riastrad 250 1.3 riastrad if (pd_addr != vm_id->pd_gpu_addr || !vm_id->flushed_updates || 251 1.3 riastrad radeon_fence_is_earlier(vm_id->flushed_updates, updates)) { 252 1.3 riastrad 253 1.3 riastrad trace_radeon_vm_flush(pd_addr, ring, vm->ids[ring].id); 254 1.3 riastrad radeon_fence_unref(&vm_id->flushed_updates); 255 1.3 riastrad vm_id->flushed_updates = radeon_fence_ref(updates); 256 1.3 riastrad vm_id->pd_gpu_addr = pd_addr; 257 1.3 riastrad radeon_ring_vm_flush(rdev, &rdev->ring[ring], 258 1.3 riastrad vm_id->id, vm_id->pd_gpu_addr); 259 1.1 riastrad 260 1.1 riastrad } 261 1.1 riastrad } 262 1.1 riastrad 263 1.1 riastrad /** 264 1.1 riastrad * radeon_vm_fence - remember fence for vm 265 1.1 riastrad * 266 1.1 riastrad * @rdev: radeon_device pointer 267 1.1 riastrad * @vm: vm we want to fence 268 1.1 riastrad * @fence: fence to remember 269 1.1 riastrad * 270 1.1 riastrad * Fence the vm (cayman+). 271 1.1 riastrad * Set the fence used to protect page table and id. 272 1.1 riastrad * 273 1.1 riastrad * Global and local mutex must be locked! 274 1.1 riastrad */ 275 1.1 riastrad void radeon_vm_fence(struct radeon_device *rdev, 276 1.1 riastrad struct radeon_vm *vm, 277 1.1 riastrad struct radeon_fence *fence) 278 1.1 riastrad { 279 1.3 riastrad unsigned vm_id = vm->ids[fence->ring].id; 280 1.1 riastrad 281 1.3 riastrad radeon_fence_unref(&rdev->vm_manager.active[vm_id]); 282 1.3 riastrad rdev->vm_manager.active[vm_id] = radeon_fence_ref(fence); 283 1.1 riastrad 284 1.3 riastrad radeon_fence_unref(&vm->ids[fence->ring].last_id_use); 285 1.3 riastrad vm->ids[fence->ring].last_id_use = radeon_fence_ref(fence); 286 1.1 riastrad } 287 1.1 riastrad 288 1.1 riastrad /** 289 1.1 riastrad * radeon_vm_bo_find - find the bo_va for a specific vm & bo 290 1.1 riastrad * 291 1.1 riastrad * @vm: requested vm 292 1.1 riastrad * @bo: requested buffer object 293 1.1 riastrad * 294 1.1 riastrad * Find @bo inside the requested vm (cayman+). 295 1.1 riastrad * Search inside the @bos vm list for the requested vm 296 1.1 riastrad * Returns the found bo_va or NULL if none is found 297 1.1 riastrad * 298 1.1 riastrad * Object has to be reserved! 299 1.1 riastrad */ 300 1.1 riastrad struct radeon_bo_va *radeon_vm_bo_find(struct radeon_vm *vm, 301 1.1 riastrad struct radeon_bo *bo) 302 1.1 riastrad { 303 1.1 riastrad struct radeon_bo_va *bo_va; 304 1.1 riastrad 305 1.1 riastrad list_for_each_entry(bo_va, &bo->va, bo_list) { 306 1.8 riastrad if (bo_va->vm == vm) 307 1.1 riastrad return bo_va; 308 1.8 riastrad 309 1.1 riastrad } 310 1.1 riastrad return NULL; 311 1.1 riastrad } 312 1.1 riastrad 313 1.1 riastrad /** 314 1.1 riastrad * radeon_vm_bo_add - add a bo to a specific vm 315 1.1 riastrad * 316 1.1 riastrad * @rdev: radeon_device pointer 317 1.1 riastrad * @vm: requested vm 318 1.1 riastrad * @bo: radeon buffer object 319 1.1 riastrad * 320 1.1 riastrad * Add @bo into the requested vm (cayman+). 321 1.1 riastrad * Add @bo to the list of bos associated with the vm 322 1.1 riastrad * Returns newly added bo_va or NULL for failure 323 1.1 riastrad * 324 1.1 riastrad * Object has to be reserved! 325 1.1 riastrad */ 326 1.1 riastrad struct radeon_bo_va *radeon_vm_bo_add(struct radeon_device *rdev, 327 1.1 riastrad struct radeon_vm *vm, 328 1.1 riastrad struct radeon_bo *bo) 329 1.1 riastrad { 330 1.1 riastrad struct radeon_bo_va *bo_va; 331 1.1 riastrad 332 1.1 riastrad bo_va = kzalloc(sizeof(struct radeon_bo_va), GFP_KERNEL); 333 1.8 riastrad if (bo_va == NULL) 334 1.1 riastrad return NULL; 335 1.8 riastrad 336 1.1 riastrad bo_va->vm = vm; 337 1.1 riastrad bo_va->bo = bo; 338 1.3 riastrad bo_va->it.start = 0; 339 1.3 riastrad bo_va->it.last = 0; 340 1.1 riastrad bo_va->flags = 0; 341 1.1 riastrad bo_va->ref_count = 1; 342 1.1 riastrad INIT_LIST_HEAD(&bo_va->bo_list); 343 1.3 riastrad INIT_LIST_HEAD(&bo_va->vm_status); 344 1.1 riastrad 345 1.1 riastrad mutex_lock(&vm->mutex); 346 1.1 riastrad list_add_tail(&bo_va->bo_list, &bo->va); 347 1.1 riastrad mutex_unlock(&vm->mutex); 348 1.1 riastrad 349 1.1 riastrad return bo_va; 350 1.1 riastrad } 351 1.1 riastrad 352 1.1 riastrad /** 353 1.3 riastrad * radeon_vm_set_pages - helper to call the right asic function 354 1.3 riastrad * 355 1.3 riastrad * @rdev: radeon_device pointer 356 1.3 riastrad * @ib: indirect buffer to fill with commands 357 1.3 riastrad * @pe: addr of the page entry 358 1.3 riastrad * @addr: dst addr to write into pe 359 1.3 riastrad * @count: number of page entries to update 360 1.3 riastrad * @incr: increase next addr by incr bytes 361 1.3 riastrad * @flags: hw access flags 362 1.3 riastrad * 363 1.3 riastrad * Traces the parameters and calls the right asic functions 364 1.3 riastrad * to setup the page table using the DMA. 365 1.3 riastrad */ 366 1.3 riastrad static void radeon_vm_set_pages(struct radeon_device *rdev, 367 1.3 riastrad struct radeon_ib *ib, 368 1.3 riastrad uint64_t pe, 369 1.3 riastrad uint64_t addr, unsigned count, 370 1.3 riastrad uint32_t incr, uint32_t flags) 371 1.3 riastrad { 372 1.3 riastrad trace_radeon_vm_set_page(pe, addr, count, incr, flags); 373 1.3 riastrad 374 1.3 riastrad if ((flags & R600_PTE_GART_MASK) == R600_PTE_GART_MASK) { 375 1.3 riastrad uint64_t src = rdev->gart.table_addr + (addr >> 12) * 8; 376 1.3 riastrad radeon_asic_vm_copy_pages(rdev, ib, pe, src, count); 377 1.3 riastrad 378 1.3 riastrad } else if ((flags & R600_PTE_SYSTEM) || (count < 3)) { 379 1.3 riastrad radeon_asic_vm_write_pages(rdev, ib, pe, addr, 380 1.3 riastrad count, incr, flags); 381 1.3 riastrad 382 1.3 riastrad } else { 383 1.3 riastrad radeon_asic_vm_set_pages(rdev, ib, pe, addr, 384 1.3 riastrad count, incr, flags); 385 1.3 riastrad } 386 1.3 riastrad } 387 1.3 riastrad 388 1.3 riastrad /** 389 1.1 riastrad * radeon_vm_clear_bo - initially clear the page dir/table 390 1.1 riastrad * 391 1.1 riastrad * @rdev: radeon_device pointer 392 1.1 riastrad * @bo: bo to clear 393 1.1 riastrad */ 394 1.1 riastrad static int radeon_vm_clear_bo(struct radeon_device *rdev, 395 1.1 riastrad struct radeon_bo *bo) 396 1.1 riastrad { 397 1.8 riastrad struct ttm_operation_ctx ctx = { true, false }; 398 1.1 riastrad struct radeon_ib ib; 399 1.1 riastrad unsigned entries; 400 1.1 riastrad uint64_t addr; 401 1.1 riastrad int r; 402 1.1 riastrad 403 1.3 riastrad r = radeon_bo_reserve(bo, false); 404 1.3 riastrad if (r) 405 1.1 riastrad return r; 406 1.1 riastrad 407 1.8 riastrad r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); 408 1.3 riastrad if (r) 409 1.3 riastrad goto error_unreserve; 410 1.1 riastrad 411 1.1 riastrad addr = radeon_bo_gpu_offset(bo); 412 1.1 riastrad entries = radeon_bo_size(bo) / 8; 413 1.1 riastrad 414 1.3 riastrad r = radeon_ib_get(rdev, R600_RING_TYPE_DMA_INDEX, &ib, NULL, 256); 415 1.1 riastrad if (r) 416 1.3 riastrad goto error_unreserve; 417 1.1 riastrad 418 1.1 riastrad ib.length_dw = 0; 419 1.1 riastrad 420 1.3 riastrad radeon_vm_set_pages(rdev, &ib, addr, 0, entries, 0, 0); 421 1.3 riastrad radeon_asic_vm_pad_ib(rdev, &ib); 422 1.3 riastrad WARN_ON(ib.length_dw > 64); 423 1.1 riastrad 424 1.3 riastrad r = radeon_ib_schedule(rdev, &ib, NULL, false); 425 1.1 riastrad if (r) 426 1.3 riastrad goto error_free; 427 1.1 riastrad 428 1.3 riastrad ib.fence->is_vm_update = true; 429 1.3 riastrad radeon_bo_fence(bo, ib.fence, false); 430 1.3 riastrad 431 1.3 riastrad error_free: 432 1.1 riastrad radeon_ib_free(rdev, &ib); 433 1.1 riastrad 434 1.3 riastrad error_unreserve: 435 1.3 riastrad radeon_bo_unreserve(bo); 436 1.1 riastrad return r; 437 1.1 riastrad } 438 1.1 riastrad 439 1.1 riastrad /** 440 1.1 riastrad * radeon_vm_bo_set_addr - set bos virtual address inside a vm 441 1.1 riastrad * 442 1.1 riastrad * @rdev: radeon_device pointer 443 1.1 riastrad * @bo_va: bo_va to store the address 444 1.1 riastrad * @soffset: requested offset of the buffer in the VM address space 445 1.1 riastrad * @flags: attributes of pages (read/write/valid/etc.) 446 1.1 riastrad * 447 1.1 riastrad * Set offset of @bo_va (cayman+). 448 1.1 riastrad * Validate and set the offset requested within the vm address space. 449 1.1 riastrad * Returns 0 for success, error for failure. 450 1.1 riastrad * 451 1.3 riastrad * Object has to be reserved and gets unreserved by this function! 452 1.1 riastrad */ 453 1.1 riastrad int radeon_vm_bo_set_addr(struct radeon_device *rdev, 454 1.1 riastrad struct radeon_bo_va *bo_va, 455 1.1 riastrad uint64_t soffset, 456 1.1 riastrad uint32_t flags) 457 1.1 riastrad { 458 1.1 riastrad uint64_t size = radeon_bo_size(bo_va->bo); 459 1.1 riastrad struct radeon_vm *vm = bo_va->vm; 460 1.1 riastrad unsigned last_pfn, pt_idx; 461 1.3 riastrad uint64_t eoffset; 462 1.1 riastrad int r; 463 1.1 riastrad 464 1.1 riastrad if (soffset) { 465 1.1 riastrad /* make sure object fit at this offset */ 466 1.3 riastrad eoffset = soffset + size - 1; 467 1.1 riastrad if (soffset >= eoffset) { 468 1.3 riastrad r = -EINVAL; 469 1.3 riastrad goto error_unreserve; 470 1.1 riastrad } 471 1.1 riastrad 472 1.1 riastrad last_pfn = eoffset / RADEON_GPU_PAGE_SIZE; 473 1.3 riastrad if (last_pfn >= rdev->vm_manager.max_pfn) { 474 1.3 riastrad dev_err(rdev->dev, "va above limit (0x%08X >= 0x%08X)\n", 475 1.1 riastrad last_pfn, rdev->vm_manager.max_pfn); 476 1.3 riastrad r = -EINVAL; 477 1.3 riastrad goto error_unreserve; 478 1.1 riastrad } 479 1.1 riastrad 480 1.1 riastrad } else { 481 1.1 riastrad eoffset = last_pfn = 0; 482 1.1 riastrad } 483 1.1 riastrad 484 1.1 riastrad mutex_lock(&vm->mutex); 485 1.3 riastrad soffset /= RADEON_GPU_PAGE_SIZE; 486 1.3 riastrad eoffset /= RADEON_GPU_PAGE_SIZE; 487 1.3 riastrad if (soffset || eoffset) { 488 1.3 riastrad struct interval_tree_node *it; 489 1.3 riastrad it = interval_tree_iter_first(&vm->va, soffset, eoffset); 490 1.3 riastrad if (it && it != &bo_va->it) { 491 1.3 riastrad struct radeon_bo_va *tmp; 492 1.3 riastrad tmp = container_of(it, struct radeon_bo_va, it); 493 1.3 riastrad /* bo and tmp overlap, invalid offset */ 494 1.4 riastrad dev_err(rdev->dev, "bo %p va 0x%010"PRIx64" conflict with " 495 1.3 riastrad "(bo %p 0x%010lx 0x%010lx)\n", bo_va->bo, 496 1.3 riastrad soffset, tmp->bo, tmp->it.start, tmp->it.last); 497 1.3 riastrad mutex_unlock(&vm->mutex); 498 1.3 riastrad r = -EINVAL; 499 1.3 riastrad goto error_unreserve; 500 1.1 riastrad } 501 1.3 riastrad } 502 1.1 riastrad 503 1.3 riastrad if (bo_va->it.start || bo_va->it.last) { 504 1.3 riastrad /* add a clone of the bo_va to clear the old address */ 505 1.3 riastrad struct radeon_bo_va *tmp; 506 1.3 riastrad tmp = kzalloc(sizeof(struct radeon_bo_va), GFP_KERNEL); 507 1.3 riastrad if (!tmp) { 508 1.1 riastrad mutex_unlock(&vm->mutex); 509 1.3 riastrad r = -ENOMEM; 510 1.3 riastrad goto error_unreserve; 511 1.1 riastrad } 512 1.3 riastrad tmp->it.start = bo_va->it.start; 513 1.3 riastrad tmp->it.last = bo_va->it.last; 514 1.3 riastrad tmp->vm = vm; 515 1.3 riastrad tmp->bo = radeon_bo_ref(bo_va->bo); 516 1.3 riastrad 517 1.3 riastrad interval_tree_remove(&bo_va->it, &vm->va); 518 1.3 riastrad spin_lock(&vm->status_lock); 519 1.3 riastrad bo_va->it.start = 0; 520 1.3 riastrad bo_va->it.last = 0; 521 1.3 riastrad list_del_init(&bo_va->vm_status); 522 1.3 riastrad list_add(&tmp->vm_status, &vm->freed); 523 1.3 riastrad spin_unlock(&vm->status_lock); 524 1.3 riastrad } 525 1.3 riastrad 526 1.3 riastrad if (soffset || eoffset) { 527 1.3 riastrad spin_lock(&vm->status_lock); 528 1.3 riastrad bo_va->it.start = soffset; 529 1.3 riastrad bo_va->it.last = eoffset; 530 1.3 riastrad list_add(&bo_va->vm_status, &vm->cleared); 531 1.3 riastrad spin_unlock(&vm->status_lock); 532 1.3 riastrad interval_tree_insert(&bo_va->it, &vm->va); 533 1.1 riastrad } 534 1.1 riastrad 535 1.1 riastrad bo_va->flags = flags; 536 1.1 riastrad 537 1.3 riastrad soffset >>= radeon_vm_block_size; 538 1.3 riastrad eoffset >>= radeon_vm_block_size; 539 1.3 riastrad 540 1.3 riastrad BUG_ON(eoffset >= radeon_vm_num_pdes(rdev)); 541 1.1 riastrad 542 1.1 riastrad if (eoffset > vm->max_pde_used) 543 1.1 riastrad vm->max_pde_used = eoffset; 544 1.1 riastrad 545 1.1 riastrad radeon_bo_unreserve(bo_va->bo); 546 1.1 riastrad 547 1.1 riastrad /* walk over the address space and allocate the page tables */ 548 1.1 riastrad for (pt_idx = soffset; pt_idx <= eoffset; ++pt_idx) { 549 1.1 riastrad struct radeon_bo *pt; 550 1.1 riastrad 551 1.1 riastrad if (vm->page_tables[pt_idx].bo) 552 1.1 riastrad continue; 553 1.1 riastrad 554 1.1 riastrad /* drop mutex to allocate and clear page table */ 555 1.1 riastrad mutex_unlock(&vm->mutex); 556 1.1 riastrad 557 1.1 riastrad r = radeon_bo_create(rdev, RADEON_VM_PTE_COUNT * 8, 558 1.3 riastrad RADEON_GPU_PAGE_SIZE, true, 559 1.3 riastrad RADEON_GEM_DOMAIN_VRAM, 0, 560 1.3 riastrad NULL, NULL, &pt); 561 1.1 riastrad if (r) 562 1.1 riastrad return r; 563 1.1 riastrad 564 1.1 riastrad r = radeon_vm_clear_bo(rdev, pt); 565 1.1 riastrad if (r) { 566 1.1 riastrad radeon_bo_unref(&pt); 567 1.1 riastrad return r; 568 1.1 riastrad } 569 1.1 riastrad 570 1.1 riastrad /* aquire mutex again */ 571 1.1 riastrad mutex_lock(&vm->mutex); 572 1.1 riastrad if (vm->page_tables[pt_idx].bo) { 573 1.1 riastrad /* someone else allocated the pt in the meantime */ 574 1.1 riastrad mutex_unlock(&vm->mutex); 575 1.1 riastrad radeon_bo_unref(&pt); 576 1.1 riastrad mutex_lock(&vm->mutex); 577 1.1 riastrad continue; 578 1.1 riastrad } 579 1.1 riastrad 580 1.1 riastrad vm->page_tables[pt_idx].addr = 0; 581 1.1 riastrad vm->page_tables[pt_idx].bo = pt; 582 1.1 riastrad } 583 1.1 riastrad 584 1.1 riastrad mutex_unlock(&vm->mutex); 585 1.3 riastrad return 0; 586 1.3 riastrad 587 1.3 riastrad error_unreserve: 588 1.3 riastrad radeon_bo_unreserve(bo_va->bo); 589 1.3 riastrad return r; 590 1.1 riastrad } 591 1.1 riastrad 592 1.1 riastrad /** 593 1.1 riastrad * radeon_vm_map_gart - get the physical address of a gart page 594 1.1 riastrad * 595 1.1 riastrad * @rdev: radeon_device pointer 596 1.1 riastrad * @addr: the unmapped addr 597 1.1 riastrad * 598 1.1 riastrad * Look up the physical address of the page that the pte resolves 599 1.1 riastrad * to (cayman+). 600 1.1 riastrad * Returns the physical address of the page. 601 1.1 riastrad */ 602 1.1 riastrad uint64_t radeon_vm_map_gart(struct radeon_device *rdev, uint64_t addr) 603 1.1 riastrad { 604 1.1 riastrad uint64_t result; 605 1.1 riastrad 606 1.1 riastrad /* page table offset */ 607 1.3 riastrad result = rdev->gart.pages_entry[addr >> RADEON_GPU_PAGE_SHIFT]; 608 1.3 riastrad result &= ~RADEON_GPU_PAGE_MASK; 609 1.1 riastrad 610 1.1 riastrad return result; 611 1.1 riastrad } 612 1.1 riastrad 613 1.1 riastrad /** 614 1.1 riastrad * radeon_vm_page_flags - translate page flags to what the hw uses 615 1.1 riastrad * 616 1.1 riastrad * @flags: flags comming from userspace 617 1.1 riastrad * 618 1.1 riastrad * Translate the flags the userspace ABI uses to hw flags. 619 1.1 riastrad */ 620 1.1 riastrad static uint32_t radeon_vm_page_flags(uint32_t flags) 621 1.1 riastrad { 622 1.8 riastrad uint32_t hw_flags = 0; 623 1.8 riastrad 624 1.8 riastrad hw_flags |= (flags & RADEON_VM_PAGE_VALID) ? R600_PTE_VALID : 0; 625 1.8 riastrad hw_flags |= (flags & RADEON_VM_PAGE_READABLE) ? R600_PTE_READABLE : 0; 626 1.8 riastrad hw_flags |= (flags & RADEON_VM_PAGE_WRITEABLE) ? R600_PTE_WRITEABLE : 0; 627 1.8 riastrad if (flags & RADEON_VM_PAGE_SYSTEM) { 628 1.8 riastrad hw_flags |= R600_PTE_SYSTEM; 629 1.8 riastrad hw_flags |= (flags & RADEON_VM_PAGE_SNOOPED) ? R600_PTE_SNOOPED : 0; 630 1.8 riastrad } 631 1.8 riastrad return hw_flags; 632 1.1 riastrad } 633 1.1 riastrad 634 1.1 riastrad /** 635 1.1 riastrad * radeon_vm_update_pdes - make sure that page directory is valid 636 1.1 riastrad * 637 1.1 riastrad * @rdev: radeon_device pointer 638 1.1 riastrad * @vm: requested vm 639 1.1 riastrad * @start: start of GPU address range 640 1.1 riastrad * @end: end of GPU address range 641 1.1 riastrad * 642 1.1 riastrad * Allocates new page tables if necessary 643 1.1 riastrad * and updates the page directory (cayman+). 644 1.1 riastrad * Returns 0 for success, error for failure. 645 1.1 riastrad * 646 1.1 riastrad * Global and local mutex must be locked! 647 1.1 riastrad */ 648 1.1 riastrad int radeon_vm_update_page_directory(struct radeon_device *rdev, 649 1.1 riastrad struct radeon_vm *vm) 650 1.1 riastrad { 651 1.1 riastrad struct radeon_bo *pd = vm->page_directory; 652 1.1 riastrad uint64_t pd_addr = radeon_bo_gpu_offset(pd); 653 1.3 riastrad uint32_t incr = RADEON_VM_PTE_COUNT * 8; 654 1.1 riastrad uint64_t last_pde = ~0, last_pt = ~0; 655 1.1 riastrad unsigned count = 0, pt_idx, ndw; 656 1.1 riastrad struct radeon_ib ib; 657 1.1 riastrad int r; 658 1.1 riastrad 659 1.1 riastrad /* padding, etc. */ 660 1.1 riastrad ndw = 64; 661 1.1 riastrad 662 1.1 riastrad /* assume the worst case */ 663 1.3 riastrad ndw += vm->max_pde_used * 6; 664 1.1 riastrad 665 1.1 riastrad /* update too big for an IB */ 666 1.1 riastrad if (ndw > 0xfffff) 667 1.1 riastrad return -ENOMEM; 668 1.1 riastrad 669 1.1 riastrad r = radeon_ib_get(rdev, R600_RING_TYPE_DMA_INDEX, &ib, NULL, ndw * 4); 670 1.1 riastrad if (r) 671 1.1 riastrad return r; 672 1.1 riastrad ib.length_dw = 0; 673 1.1 riastrad 674 1.1 riastrad /* walk over the address space and update the page directory */ 675 1.1 riastrad for (pt_idx = 0; pt_idx <= vm->max_pde_used; ++pt_idx) { 676 1.1 riastrad struct radeon_bo *bo = vm->page_tables[pt_idx].bo; 677 1.1 riastrad uint64_t pde, pt; 678 1.1 riastrad 679 1.1 riastrad if (bo == NULL) 680 1.1 riastrad continue; 681 1.1 riastrad 682 1.1 riastrad pt = radeon_bo_gpu_offset(bo); 683 1.1 riastrad if (vm->page_tables[pt_idx].addr == pt) 684 1.1 riastrad continue; 685 1.1 riastrad vm->page_tables[pt_idx].addr = pt; 686 1.1 riastrad 687 1.1 riastrad pde = pd_addr + pt_idx * 8; 688 1.1 riastrad if (((last_pde + 8 * count) != pde) || 689 1.1 riastrad ((last_pt + incr * count) != pt)) { 690 1.1 riastrad 691 1.1 riastrad if (count) { 692 1.3 riastrad radeon_vm_set_pages(rdev, &ib, last_pde, 693 1.3 riastrad last_pt, count, incr, 694 1.3 riastrad R600_PTE_VALID); 695 1.1 riastrad } 696 1.1 riastrad 697 1.1 riastrad count = 1; 698 1.1 riastrad last_pde = pde; 699 1.1 riastrad last_pt = pt; 700 1.1 riastrad } else { 701 1.1 riastrad ++count; 702 1.1 riastrad } 703 1.1 riastrad } 704 1.1 riastrad 705 1.1 riastrad if (count) 706 1.3 riastrad radeon_vm_set_pages(rdev, &ib, last_pde, last_pt, count, 707 1.3 riastrad incr, R600_PTE_VALID); 708 1.1 riastrad 709 1.1 riastrad if (ib.length_dw != 0) { 710 1.3 riastrad radeon_asic_vm_pad_ib(rdev, &ib); 711 1.3 riastrad 712 1.8 riastrad radeon_sync_resv(rdev, &ib.sync, pd->tbo.base.resv, true); 713 1.3 riastrad WARN_ON(ib.length_dw > ndw); 714 1.3 riastrad r = radeon_ib_schedule(rdev, &ib, NULL, false); 715 1.1 riastrad if (r) { 716 1.1 riastrad radeon_ib_free(rdev, &ib); 717 1.1 riastrad return r; 718 1.1 riastrad } 719 1.3 riastrad ib.fence->is_vm_update = true; 720 1.3 riastrad radeon_bo_fence(pd, ib.fence, false); 721 1.1 riastrad } 722 1.1 riastrad radeon_ib_free(rdev, &ib); 723 1.1 riastrad 724 1.1 riastrad return 0; 725 1.1 riastrad } 726 1.1 riastrad 727 1.1 riastrad /** 728 1.3 riastrad * radeon_vm_frag_ptes - add fragment information to PTEs 729 1.3 riastrad * 730 1.3 riastrad * @rdev: radeon_device pointer 731 1.3 riastrad * @ib: IB for the update 732 1.3 riastrad * @pe_start: first PTE to handle 733 1.3 riastrad * @pe_end: last PTE to handle 734 1.3 riastrad * @addr: addr those PTEs should point to 735 1.3 riastrad * @flags: hw mapping flags 736 1.3 riastrad * 737 1.3 riastrad * Global and local mutex must be locked! 738 1.3 riastrad */ 739 1.3 riastrad static void radeon_vm_frag_ptes(struct radeon_device *rdev, 740 1.3 riastrad struct radeon_ib *ib, 741 1.3 riastrad uint64_t pe_start, uint64_t pe_end, 742 1.3 riastrad uint64_t addr, uint32_t flags) 743 1.3 riastrad { 744 1.3 riastrad /** 745 1.3 riastrad * The MC L1 TLB supports variable sized pages, based on a fragment 746 1.3 riastrad * field in the PTE. When this field is set to a non-zero value, page 747 1.3 riastrad * granularity is increased from 4KB to (1 << (12 + frag)). The PTE 748 1.3 riastrad * flags are considered valid for all PTEs within the fragment range 749 1.3 riastrad * and corresponding mappings are assumed to be physically contiguous. 750 1.3 riastrad * 751 1.3 riastrad * The L1 TLB can store a single PTE for the whole fragment, 752 1.3 riastrad * significantly increasing the space available for translation 753 1.3 riastrad * caching. This leads to large improvements in throughput when the 754 1.3 riastrad * TLB is under pressure. 755 1.3 riastrad * 756 1.3 riastrad * The L2 TLB distributes small and large fragments into two 757 1.3 riastrad * asymmetric partitions. The large fragment cache is significantly 758 1.3 riastrad * larger. Thus, we try to use large fragments wherever possible. 759 1.3 riastrad * Userspace can support this by aligning virtual base address and 760 1.3 riastrad * allocation size to the fragment size. 761 1.3 riastrad */ 762 1.3 riastrad 763 1.3 riastrad /* NI is optimized for 256KB fragments, SI and newer for 64KB */ 764 1.3 riastrad uint64_t frag_flags = ((rdev->family == CHIP_CAYMAN) || 765 1.3 riastrad (rdev->family == CHIP_ARUBA)) ? 766 1.3 riastrad R600_PTE_FRAG_256KB : R600_PTE_FRAG_64KB; 767 1.3 riastrad uint64_t frag_align = ((rdev->family == CHIP_CAYMAN) || 768 1.3 riastrad (rdev->family == CHIP_ARUBA)) ? 0x200 : 0x80; 769 1.3 riastrad 770 1.3 riastrad uint64_t frag_start = ALIGN(pe_start, frag_align); 771 1.3 riastrad uint64_t frag_end = pe_end & ~(frag_align - 1); 772 1.3 riastrad 773 1.3 riastrad unsigned count; 774 1.3 riastrad 775 1.3 riastrad /* system pages are non continuously */ 776 1.3 riastrad if ((flags & R600_PTE_SYSTEM) || !(flags & R600_PTE_VALID) || 777 1.3 riastrad (frag_start >= frag_end)) { 778 1.3 riastrad 779 1.3 riastrad count = (pe_end - pe_start) / 8; 780 1.3 riastrad radeon_vm_set_pages(rdev, ib, pe_start, addr, count, 781 1.3 riastrad RADEON_GPU_PAGE_SIZE, flags); 782 1.3 riastrad return; 783 1.3 riastrad } 784 1.3 riastrad 785 1.3 riastrad /* handle the 4K area at the beginning */ 786 1.3 riastrad if (pe_start != frag_start) { 787 1.3 riastrad count = (frag_start - pe_start) / 8; 788 1.3 riastrad radeon_vm_set_pages(rdev, ib, pe_start, addr, count, 789 1.3 riastrad RADEON_GPU_PAGE_SIZE, flags); 790 1.3 riastrad addr += RADEON_GPU_PAGE_SIZE * count; 791 1.3 riastrad } 792 1.3 riastrad 793 1.3 riastrad /* handle the area in the middle */ 794 1.3 riastrad count = (frag_end - frag_start) / 8; 795 1.3 riastrad radeon_vm_set_pages(rdev, ib, frag_start, addr, count, 796 1.3 riastrad RADEON_GPU_PAGE_SIZE, flags | frag_flags); 797 1.3 riastrad 798 1.3 riastrad /* handle the 4K area at the end */ 799 1.3 riastrad if (frag_end != pe_end) { 800 1.3 riastrad addr += RADEON_GPU_PAGE_SIZE * count; 801 1.3 riastrad count = (pe_end - frag_end) / 8; 802 1.3 riastrad radeon_vm_set_pages(rdev, ib, frag_end, addr, count, 803 1.3 riastrad RADEON_GPU_PAGE_SIZE, flags); 804 1.3 riastrad } 805 1.3 riastrad } 806 1.3 riastrad 807 1.3 riastrad /** 808 1.1 riastrad * radeon_vm_update_ptes - make sure that page tables are valid 809 1.1 riastrad * 810 1.1 riastrad * @rdev: radeon_device pointer 811 1.1 riastrad * @vm: requested vm 812 1.1 riastrad * @start: start of GPU address range 813 1.1 riastrad * @end: end of GPU address range 814 1.1 riastrad * @dst: destination address to map to 815 1.1 riastrad * @flags: mapping flags 816 1.1 riastrad * 817 1.1 riastrad * Update the page tables in the range @start - @end (cayman+). 818 1.1 riastrad * 819 1.1 riastrad * Global and local mutex must be locked! 820 1.1 riastrad */ 821 1.3 riastrad static int radeon_vm_update_ptes(struct radeon_device *rdev, 822 1.3 riastrad struct radeon_vm *vm, 823 1.3 riastrad struct radeon_ib *ib, 824 1.3 riastrad uint64_t start, uint64_t end, 825 1.3 riastrad uint64_t dst, uint32_t flags) 826 1.1 riastrad { 827 1.3 riastrad uint64_t mask = RADEON_VM_PTE_COUNT - 1; 828 1.1 riastrad uint64_t last_pte = ~0, last_dst = ~0; 829 1.1 riastrad unsigned count = 0; 830 1.1 riastrad uint64_t addr; 831 1.1 riastrad 832 1.1 riastrad /* walk over the address space and update the page tables */ 833 1.1 riastrad for (addr = start; addr < end; ) { 834 1.3 riastrad uint64_t pt_idx = addr >> radeon_vm_block_size; 835 1.1 riastrad struct radeon_bo *pt = vm->page_tables[pt_idx].bo; 836 1.1 riastrad unsigned nptes; 837 1.1 riastrad uint64_t pte; 838 1.3 riastrad int r; 839 1.1 riastrad 840 1.8 riastrad radeon_sync_resv(rdev, &ib->sync, pt->tbo.base.resv, true); 841 1.8 riastrad r = dma_resv_reserve_shared(pt->tbo.base.resv, 1); 842 1.3 riastrad if (r) 843 1.3 riastrad return r; 844 1.1 riastrad 845 1.1 riastrad if ((addr & ~mask) == (end & ~mask)) 846 1.1 riastrad nptes = end - addr; 847 1.1 riastrad else 848 1.1 riastrad nptes = RADEON_VM_PTE_COUNT - (addr & mask); 849 1.1 riastrad 850 1.1 riastrad pte = radeon_bo_gpu_offset(pt); 851 1.1 riastrad pte += (addr & mask) * 8; 852 1.1 riastrad 853 1.1 riastrad if ((last_pte + 8 * count) != pte) { 854 1.1 riastrad 855 1.1 riastrad if (count) { 856 1.3 riastrad radeon_vm_frag_ptes(rdev, ib, last_pte, 857 1.3 riastrad last_pte + 8 * count, 858 1.3 riastrad last_dst, flags); 859 1.1 riastrad } 860 1.1 riastrad 861 1.1 riastrad count = nptes; 862 1.1 riastrad last_pte = pte; 863 1.1 riastrad last_dst = dst; 864 1.1 riastrad } else { 865 1.1 riastrad count += nptes; 866 1.1 riastrad } 867 1.1 riastrad 868 1.1 riastrad addr += nptes; 869 1.1 riastrad dst += nptes * RADEON_GPU_PAGE_SIZE; 870 1.1 riastrad } 871 1.1 riastrad 872 1.1 riastrad if (count) { 873 1.3 riastrad radeon_vm_frag_ptes(rdev, ib, last_pte, 874 1.3 riastrad last_pte + 8 * count, 875 1.3 riastrad last_dst, flags); 876 1.1 riastrad } 877 1.3 riastrad 878 1.3 riastrad return 0; 879 1.3 riastrad } 880 1.3 riastrad 881 1.3 riastrad /** 882 1.3 riastrad * radeon_vm_fence_pts - fence page tables after an update 883 1.3 riastrad * 884 1.3 riastrad * @vm: requested vm 885 1.3 riastrad * @start: start of GPU address range 886 1.3 riastrad * @end: end of GPU address range 887 1.3 riastrad * @fence: fence to use 888 1.3 riastrad * 889 1.3 riastrad * Fence the page tables in the range @start - @end (cayman+). 890 1.3 riastrad * 891 1.3 riastrad * Global and local mutex must be locked! 892 1.3 riastrad */ 893 1.3 riastrad static void radeon_vm_fence_pts(struct radeon_vm *vm, 894 1.3 riastrad uint64_t start, uint64_t end, 895 1.3 riastrad struct radeon_fence *fence) 896 1.3 riastrad { 897 1.3 riastrad unsigned i; 898 1.3 riastrad 899 1.3 riastrad start >>= radeon_vm_block_size; 900 1.3 riastrad end = (end - 1) >> radeon_vm_block_size; 901 1.3 riastrad 902 1.3 riastrad for (i = start; i <= end; ++i) 903 1.3 riastrad radeon_bo_fence(vm->page_tables[i].bo, fence, true); 904 1.1 riastrad } 905 1.1 riastrad 906 1.1 riastrad /** 907 1.1 riastrad * radeon_vm_bo_update - map a bo into the vm page table 908 1.1 riastrad * 909 1.1 riastrad * @rdev: radeon_device pointer 910 1.1 riastrad * @vm: requested vm 911 1.1 riastrad * @bo: radeon buffer object 912 1.1 riastrad * @mem: ttm mem 913 1.1 riastrad * 914 1.1 riastrad * Fill in the page table entries for @bo (cayman+). 915 1.1 riastrad * Returns 0 for success, -EINVAL for failure. 916 1.1 riastrad * 917 1.1 riastrad * Object have to be reserved and mutex must be locked! 918 1.1 riastrad */ 919 1.1 riastrad int radeon_vm_bo_update(struct radeon_device *rdev, 920 1.3 riastrad struct radeon_bo_va *bo_va, 921 1.1 riastrad struct ttm_mem_reg *mem) 922 1.1 riastrad { 923 1.3 riastrad struct radeon_vm *vm = bo_va->vm; 924 1.1 riastrad struct radeon_ib ib; 925 1.3 riastrad unsigned nptes, ncmds, ndw; 926 1.1 riastrad uint64_t addr; 927 1.3 riastrad uint32_t flags; 928 1.1 riastrad int r; 929 1.1 riastrad 930 1.3 riastrad if (!bo_va->it.start) { 931 1.3 riastrad dev_err(rdev->dev, "bo %p don't has a mapping in vm %p\n", 932 1.3 riastrad bo_va->bo, vm); 933 1.1 riastrad return -EINVAL; 934 1.1 riastrad } 935 1.1 riastrad 936 1.3 riastrad spin_lock(&vm->status_lock); 937 1.3 riastrad if (mem) { 938 1.3 riastrad if (list_empty(&bo_va->vm_status)) { 939 1.3 riastrad spin_unlock(&vm->status_lock); 940 1.3 riastrad return 0; 941 1.3 riastrad } 942 1.3 riastrad list_del_init(&bo_va->vm_status); 943 1.3 riastrad } else { 944 1.3 riastrad list_del(&bo_va->vm_status); 945 1.3 riastrad list_add(&bo_va->vm_status, &vm->cleared); 946 1.1 riastrad } 947 1.3 riastrad spin_unlock(&vm->status_lock); 948 1.1 riastrad 949 1.1 riastrad bo_va->flags &= ~RADEON_VM_PAGE_VALID; 950 1.1 riastrad bo_va->flags &= ~RADEON_VM_PAGE_SYSTEM; 951 1.3 riastrad bo_va->flags &= ~RADEON_VM_PAGE_SNOOPED; 952 1.3 riastrad if (bo_va->bo && radeon_ttm_tt_is_readonly(bo_va->bo->tbo.ttm)) 953 1.3 riastrad bo_va->flags &= ~RADEON_VM_PAGE_WRITEABLE; 954 1.3 riastrad 955 1.1 riastrad if (mem) { 956 1.8 riastrad addr = (u64)mem->start << PAGE_SHIFT; 957 1.8 riastrad if (mem->mem_type != TTM_PL_SYSTEM) 958 1.1 riastrad bo_va->flags |= RADEON_VM_PAGE_VALID; 959 1.8 riastrad 960 1.1 riastrad if (mem->mem_type == TTM_PL_TT) { 961 1.1 riastrad bo_va->flags |= RADEON_VM_PAGE_SYSTEM; 962 1.3 riastrad if (!(bo_va->bo->flags & (RADEON_GEM_GTT_WC | RADEON_GEM_GTT_UC))) 963 1.3 riastrad bo_va->flags |= RADEON_VM_PAGE_SNOOPED; 964 1.3 riastrad 965 1.1 riastrad } else { 966 1.1 riastrad addr += rdev->vm_manager.vram_base_offset; 967 1.1 riastrad } 968 1.1 riastrad } else { 969 1.1 riastrad addr = 0; 970 1.1 riastrad } 971 1.1 riastrad 972 1.1 riastrad trace_radeon_vm_bo_update(bo_va); 973 1.1 riastrad 974 1.3 riastrad nptes = bo_va->it.last - bo_va->it.start + 1; 975 1.3 riastrad 976 1.3 riastrad /* reserve space for one command every (1 << BLOCK_SIZE) entries 977 1.3 riastrad or 2k dwords (whatever is smaller) */ 978 1.3 riastrad ncmds = (nptes >> min(radeon_vm_block_size, 11)) + 1; 979 1.1 riastrad 980 1.1 riastrad /* padding, etc. */ 981 1.1 riastrad ndw = 64; 982 1.1 riastrad 983 1.3 riastrad flags = radeon_vm_page_flags(bo_va->flags); 984 1.3 riastrad if ((flags & R600_PTE_GART_MASK) == R600_PTE_GART_MASK) { 985 1.3 riastrad /* only copy commands needed */ 986 1.3 riastrad ndw += ncmds * 7; 987 1.3 riastrad 988 1.3 riastrad } else if (flags & R600_PTE_SYSTEM) { 989 1.3 riastrad /* header for write data commands */ 990 1.3 riastrad ndw += ncmds * 4; 991 1.3 riastrad 992 1.3 riastrad /* body of write data command */ 993 1.3 riastrad ndw += nptes * 2; 994 1.1 riastrad 995 1.3 riastrad } else { 996 1.3 riastrad /* set page commands needed */ 997 1.3 riastrad ndw += ncmds * 10; 998 1.3 riastrad 999 1.3 riastrad /* two extra commands for begin/end of fragment */ 1000 1.3 riastrad ndw += 2 * 10; 1001 1.3 riastrad } 1002 1.1 riastrad 1003 1.1 riastrad /* update too big for an IB */ 1004 1.1 riastrad if (ndw > 0xfffff) 1005 1.1 riastrad return -ENOMEM; 1006 1.1 riastrad 1007 1.1 riastrad r = radeon_ib_get(rdev, R600_RING_TYPE_DMA_INDEX, &ib, NULL, ndw * 4); 1008 1.1 riastrad if (r) 1009 1.1 riastrad return r; 1010 1.1 riastrad ib.length_dw = 0; 1011 1.1 riastrad 1012 1.3 riastrad if (!(bo_va->flags & RADEON_VM_PAGE_VALID)) { 1013 1.3 riastrad unsigned i; 1014 1.1 riastrad 1015 1.3 riastrad for (i = 0; i < RADEON_NUM_RINGS; ++i) 1016 1.3 riastrad radeon_sync_fence(&ib.sync, vm->ids[i].last_id_use); 1017 1.3 riastrad } 1018 1.3 riastrad 1019 1.3 riastrad r = radeon_vm_update_ptes(rdev, vm, &ib, bo_va->it.start, 1020 1.3 riastrad bo_va->it.last + 1, addr, 1021 1.3 riastrad radeon_vm_page_flags(bo_va->flags)); 1022 1.1 riastrad if (r) { 1023 1.1 riastrad radeon_ib_free(rdev, &ib); 1024 1.1 riastrad return r; 1025 1.1 riastrad } 1026 1.3 riastrad 1027 1.3 riastrad radeon_asic_vm_pad_ib(rdev, &ib); 1028 1.3 riastrad WARN_ON(ib.length_dw > ndw); 1029 1.3 riastrad 1030 1.3 riastrad r = radeon_ib_schedule(rdev, &ib, NULL, false); 1031 1.3 riastrad if (r) { 1032 1.3 riastrad radeon_ib_free(rdev, &ib); 1033 1.3 riastrad return r; 1034 1.3 riastrad } 1035 1.3 riastrad ib.fence->is_vm_update = true; 1036 1.3 riastrad radeon_vm_fence_pts(vm, bo_va->it.start, bo_va->it.last + 1, ib.fence); 1037 1.3 riastrad radeon_fence_unref(&bo_va->last_pt_update); 1038 1.3 riastrad bo_va->last_pt_update = radeon_fence_ref(ib.fence); 1039 1.1 riastrad radeon_ib_free(rdev, &ib); 1040 1.3 riastrad 1041 1.3 riastrad return 0; 1042 1.3 riastrad } 1043 1.3 riastrad 1044 1.3 riastrad /** 1045 1.3 riastrad * radeon_vm_clear_freed - clear freed BOs in the PT 1046 1.3 riastrad * 1047 1.3 riastrad * @rdev: radeon_device pointer 1048 1.3 riastrad * @vm: requested vm 1049 1.3 riastrad * 1050 1.3 riastrad * Make sure all freed BOs are cleared in the PT. 1051 1.3 riastrad * Returns 0 for success. 1052 1.3 riastrad * 1053 1.3 riastrad * PTs have to be reserved and mutex must be locked! 1054 1.3 riastrad */ 1055 1.3 riastrad int radeon_vm_clear_freed(struct radeon_device *rdev, 1056 1.3 riastrad struct radeon_vm *vm) 1057 1.3 riastrad { 1058 1.3 riastrad struct radeon_bo_va *bo_va; 1059 1.3 riastrad int r = 0; 1060 1.3 riastrad 1061 1.3 riastrad spin_lock(&vm->status_lock); 1062 1.3 riastrad while (!list_empty(&vm->freed)) { 1063 1.3 riastrad bo_va = list_first_entry(&vm->freed, 1064 1.3 riastrad struct radeon_bo_va, vm_status); 1065 1.3 riastrad spin_unlock(&vm->status_lock); 1066 1.3 riastrad 1067 1.3 riastrad r = radeon_vm_bo_update(rdev, bo_va, NULL); 1068 1.3 riastrad radeon_bo_unref(&bo_va->bo); 1069 1.3 riastrad radeon_fence_unref(&bo_va->last_pt_update); 1070 1.3 riastrad spin_lock(&vm->status_lock); 1071 1.3 riastrad list_del(&bo_va->vm_status); 1072 1.3 riastrad kfree(bo_va); 1073 1.3 riastrad if (r) 1074 1.3 riastrad break; 1075 1.3 riastrad 1076 1.3 riastrad } 1077 1.3 riastrad spin_unlock(&vm->status_lock); 1078 1.3 riastrad return r; 1079 1.3 riastrad 1080 1.3 riastrad } 1081 1.3 riastrad 1082 1.3 riastrad /** 1083 1.3 riastrad * radeon_vm_clear_invalids - clear invalidated BOs in the PT 1084 1.3 riastrad * 1085 1.3 riastrad * @rdev: radeon_device pointer 1086 1.3 riastrad * @vm: requested vm 1087 1.3 riastrad * 1088 1.3 riastrad * Make sure all invalidated BOs are cleared in the PT. 1089 1.3 riastrad * Returns 0 for success. 1090 1.3 riastrad * 1091 1.3 riastrad * PTs have to be reserved and mutex must be locked! 1092 1.3 riastrad */ 1093 1.3 riastrad int radeon_vm_clear_invalids(struct radeon_device *rdev, 1094 1.3 riastrad struct radeon_vm *vm) 1095 1.3 riastrad { 1096 1.3 riastrad struct radeon_bo_va *bo_va; 1097 1.3 riastrad int r; 1098 1.3 riastrad 1099 1.3 riastrad spin_lock(&vm->status_lock); 1100 1.3 riastrad while (!list_empty(&vm->invalidated)) { 1101 1.3 riastrad bo_va = list_first_entry(&vm->invalidated, 1102 1.3 riastrad struct radeon_bo_va, vm_status); 1103 1.3 riastrad spin_unlock(&vm->status_lock); 1104 1.3 riastrad 1105 1.3 riastrad r = radeon_vm_bo_update(rdev, bo_va, NULL); 1106 1.3 riastrad if (r) 1107 1.3 riastrad return r; 1108 1.3 riastrad 1109 1.3 riastrad spin_lock(&vm->status_lock); 1110 1.3 riastrad } 1111 1.3 riastrad spin_unlock(&vm->status_lock); 1112 1.1 riastrad 1113 1.1 riastrad return 0; 1114 1.1 riastrad } 1115 1.1 riastrad 1116 1.1 riastrad /** 1117 1.1 riastrad * radeon_vm_bo_rmv - remove a bo to a specific vm 1118 1.1 riastrad * 1119 1.1 riastrad * @rdev: radeon_device pointer 1120 1.1 riastrad * @bo_va: requested bo_va 1121 1.1 riastrad * 1122 1.1 riastrad * Remove @bo_va->bo from the requested vm (cayman+). 1123 1.1 riastrad * 1124 1.1 riastrad * Object have to be reserved! 1125 1.1 riastrad */ 1126 1.3 riastrad void radeon_vm_bo_rmv(struct radeon_device *rdev, 1127 1.3 riastrad struct radeon_bo_va *bo_va) 1128 1.1 riastrad { 1129 1.3 riastrad struct radeon_vm *vm = bo_va->vm; 1130 1.3 riastrad 1131 1.3 riastrad list_del(&bo_va->bo_list); 1132 1.1 riastrad 1133 1.3 riastrad mutex_lock(&vm->mutex); 1134 1.3 riastrad if (bo_va->it.start || bo_va->it.last) 1135 1.3 riastrad interval_tree_remove(&bo_va->it, &vm->va); 1136 1.1 riastrad 1137 1.3 riastrad spin_lock(&vm->status_lock); 1138 1.3 riastrad list_del(&bo_va->vm_status); 1139 1.3 riastrad if (bo_va->it.start || bo_va->it.last) { 1140 1.3 riastrad bo_va->bo = radeon_bo_ref(bo_va->bo); 1141 1.3 riastrad list_add(&bo_va->vm_status, &vm->freed); 1142 1.3 riastrad } else { 1143 1.3 riastrad radeon_fence_unref(&bo_va->last_pt_update); 1144 1.3 riastrad kfree(bo_va); 1145 1.3 riastrad } 1146 1.3 riastrad spin_unlock(&vm->status_lock); 1147 1.1 riastrad 1148 1.3 riastrad mutex_unlock(&vm->mutex); 1149 1.1 riastrad } 1150 1.1 riastrad 1151 1.1 riastrad /** 1152 1.1 riastrad * radeon_vm_bo_invalidate - mark the bo as invalid 1153 1.1 riastrad * 1154 1.1 riastrad * @rdev: radeon_device pointer 1155 1.1 riastrad * @vm: requested vm 1156 1.1 riastrad * @bo: radeon buffer object 1157 1.1 riastrad * 1158 1.1 riastrad * Mark @bo as invalid (cayman+). 1159 1.1 riastrad */ 1160 1.1 riastrad void radeon_vm_bo_invalidate(struct radeon_device *rdev, 1161 1.1 riastrad struct radeon_bo *bo) 1162 1.1 riastrad { 1163 1.1 riastrad struct radeon_bo_va *bo_va; 1164 1.1 riastrad 1165 1.1 riastrad list_for_each_entry(bo_va, &bo->va, bo_list) { 1166 1.3 riastrad spin_lock(&bo_va->vm->status_lock); 1167 1.3 riastrad if (list_empty(&bo_va->vm_status) && 1168 1.3 riastrad (bo_va->it.start || bo_va->it.last)) 1169 1.3 riastrad list_add(&bo_va->vm_status, &bo_va->vm->invalidated); 1170 1.3 riastrad spin_unlock(&bo_va->vm->status_lock); 1171 1.1 riastrad } 1172 1.1 riastrad } 1173 1.1 riastrad 1174 1.1 riastrad /** 1175 1.1 riastrad * radeon_vm_init - initialize a vm instance 1176 1.1 riastrad * 1177 1.1 riastrad * @rdev: radeon_device pointer 1178 1.1 riastrad * @vm: requested vm 1179 1.1 riastrad * 1180 1.1 riastrad * Init @vm fields (cayman+). 1181 1.1 riastrad */ 1182 1.1 riastrad int radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm) 1183 1.1 riastrad { 1184 1.3 riastrad const unsigned align = min(RADEON_VM_PTB_ALIGN_SIZE, 1185 1.3 riastrad RADEON_VM_PTE_COUNT * 8); 1186 1.1 riastrad unsigned pd_size, pd_entries, pts_size; 1187 1.3 riastrad int i, r; 1188 1.1 riastrad 1189 1.3 riastrad vm->ib_bo_va = NULL; 1190 1.3 riastrad for (i = 0; i < RADEON_NUM_RINGS; ++i) { 1191 1.3 riastrad vm->ids[i].id = 0; 1192 1.3 riastrad vm->ids[i].flushed_updates = NULL; 1193 1.3 riastrad vm->ids[i].last_id_use = NULL; 1194 1.3 riastrad } 1195 1.1 riastrad mutex_init(&vm->mutex); 1196 1.6 riastrad #ifdef __NetBSD__ 1197 1.6 riastrad interval_tree_init(&vm->va); 1198 1.6 riastrad #else 1199 1.8 riastrad vm->va = RB_ROOT_CACHED; 1200 1.6 riastrad #endif 1201 1.3 riastrad spin_lock_init(&vm->status_lock); 1202 1.3 riastrad INIT_LIST_HEAD(&vm->invalidated); 1203 1.3 riastrad INIT_LIST_HEAD(&vm->freed); 1204 1.3 riastrad INIT_LIST_HEAD(&vm->cleared); 1205 1.1 riastrad 1206 1.1 riastrad pd_size = radeon_vm_directory_size(rdev); 1207 1.1 riastrad pd_entries = radeon_vm_num_pdes(rdev); 1208 1.1 riastrad 1209 1.1 riastrad /* allocate page table array */ 1210 1.1 riastrad pts_size = pd_entries * sizeof(struct radeon_vm_pt); 1211 1.1 riastrad vm->page_tables = kzalloc(pts_size, GFP_KERNEL); 1212 1.1 riastrad if (vm->page_tables == NULL) { 1213 1.1 riastrad DRM_ERROR("Cannot allocate memory for page table array\n"); 1214 1.1 riastrad return -ENOMEM; 1215 1.1 riastrad } 1216 1.1 riastrad 1217 1.3 riastrad r = radeon_bo_create(rdev, pd_size, align, true, 1218 1.3 riastrad RADEON_GEM_DOMAIN_VRAM, 0, NULL, 1219 1.3 riastrad NULL, &vm->page_directory); 1220 1.1 riastrad if (r) 1221 1.1 riastrad return r; 1222 1.1 riastrad 1223 1.1 riastrad r = radeon_vm_clear_bo(rdev, vm->page_directory); 1224 1.1 riastrad if (r) { 1225 1.1 riastrad radeon_bo_unref(&vm->page_directory); 1226 1.1 riastrad vm->page_directory = NULL; 1227 1.1 riastrad return r; 1228 1.1 riastrad } 1229 1.1 riastrad 1230 1.1 riastrad return 0; 1231 1.1 riastrad } 1232 1.1 riastrad 1233 1.1 riastrad /** 1234 1.1 riastrad * radeon_vm_fini - tear down a vm instance 1235 1.1 riastrad * 1236 1.1 riastrad * @rdev: radeon_device pointer 1237 1.1 riastrad * @vm: requested vm 1238 1.1 riastrad * 1239 1.1 riastrad * Tear down @vm (cayman+). 1240 1.1 riastrad * Unbind the VM and remove all bos from the vm bo list 1241 1.1 riastrad */ 1242 1.1 riastrad void radeon_vm_fini(struct radeon_device *rdev, struct radeon_vm *vm) 1243 1.1 riastrad { 1244 1.1 riastrad struct radeon_bo_va *bo_va, *tmp; 1245 1.1 riastrad int i, r; 1246 1.1 riastrad 1247 1.8 riastrad if (!RB_EMPTY_ROOT(&vm->va.rb_root)) 1248 1.1 riastrad dev_err(rdev->dev, "still active bo inside vm\n"); 1249 1.8 riastrad 1250 1.8 riastrad rbtree_postorder_for_each_entry_safe(bo_va, tmp, 1251 1.8 riastrad &vm->va.rb_root, it.rb) { 1252 1.3 riastrad interval_tree_remove(&bo_va->it, &vm->va); 1253 1.1 riastrad r = radeon_bo_reserve(bo_va->bo, false); 1254 1.1 riastrad if (!r) { 1255 1.1 riastrad list_del_init(&bo_va->bo_list); 1256 1.1 riastrad radeon_bo_unreserve(bo_va->bo); 1257 1.3 riastrad radeon_fence_unref(&bo_va->last_pt_update); 1258 1.1 riastrad kfree(bo_va); 1259 1.1 riastrad } 1260 1.1 riastrad } 1261 1.3 riastrad list_for_each_entry_safe(bo_va, tmp, &vm->freed, vm_status) { 1262 1.3 riastrad radeon_bo_unref(&bo_va->bo); 1263 1.3 riastrad radeon_fence_unref(&bo_va->last_pt_update); 1264 1.3 riastrad kfree(bo_va); 1265 1.3 riastrad } 1266 1.1 riastrad 1267 1.1 riastrad for (i = 0; i < radeon_vm_num_pdes(rdev); i++) 1268 1.1 riastrad radeon_bo_unref(&vm->page_tables[i].bo); 1269 1.1 riastrad kfree(vm->page_tables); 1270 1.1 riastrad 1271 1.1 riastrad radeon_bo_unref(&vm->page_directory); 1272 1.1 riastrad 1273 1.3 riastrad for (i = 0; i < RADEON_NUM_RINGS; ++i) { 1274 1.3 riastrad radeon_fence_unref(&vm->ids[i].flushed_updates); 1275 1.3 riastrad radeon_fence_unref(&vm->ids[i].last_id_use); 1276 1.3 riastrad } 1277 1.1 riastrad 1278 1.3 riastrad spin_lock_destroy(&vm->status_lock); 1279 1.1 riastrad mutex_destroy(&vm->mutex); 1280 1.1 riastrad } 1281