1 /* $NetBSD: radeon_cs.c,v 1.6 2021/12/18 23:45:43 riastradh Exp $ */ 2 3 /* 4 * Copyright 2008 Jerome Glisse. 5 * All Rights Reserved. 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a 8 * copy of this software and associated documentation files (the "Software"), 9 * to deal in the Software without restriction, including without limitation 10 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 11 * and/or sell copies of the Software, and to permit persons to whom the 12 * Software is furnished to do so, subject to the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the next 15 * paragraph) shall be included in all copies or substantial portions of the 16 * Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 21 * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 22 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 23 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 24 * DEALINGS IN THE SOFTWARE. 25 * 26 * Authors: 27 * Jerome Glisse <glisse (at) freedesktop.org> 28 */ 29 30 #include <sys/cdefs.h> 31 __KERNEL_RCSID(0, "$NetBSD: radeon_cs.c,v 1.6 2021/12/18 23:45:43 riastradh Exp $"); 32 33 #include <linux/list_sort.h> 34 #include <linux/pci.h> 35 #include <linux/uaccess.h> 36 37 #include <drm/drm_device.h> 38 #include <drm/drm_file.h> 39 #include <drm/radeon_drm.h> 40 41 #include "radeon.h" 42 #include "radeon_reg.h" 43 #include "radeon_trace.h" 44 45 #define RADEON_CS_MAX_PRIORITY 32u 46 #define RADEON_CS_NUM_BUCKETS (RADEON_CS_MAX_PRIORITY + 1) 47 48 /* This is based on the bucket sort with O(n) time complexity. 49 * An item with priority "i" is added to bucket[i]. The lists are then 50 * concatenated in descending order. 51 */ 52 struct radeon_cs_buckets { 53 struct list_head bucket[RADEON_CS_NUM_BUCKETS]; 54 }; 55 56 static void radeon_cs_buckets_init(struct radeon_cs_buckets *b) 57 { 58 unsigned i; 59 60 for (i = 0; i < RADEON_CS_NUM_BUCKETS; i++) 61 INIT_LIST_HEAD(&b->bucket[i]); 62 } 63 64 static void radeon_cs_buckets_add(struct radeon_cs_buckets *b, 65 struct list_head *item, unsigned priority) 66 { 67 /* Since buffers which appear sooner in the relocation list are 68 * likely to be used more often than buffers which appear later 69 * in the list, the sort mustn't change the ordering of buffers 70 * with the same priority, i.e. it must be stable. 71 */ 72 list_add_tail(item, &b->bucket[min(priority, RADEON_CS_MAX_PRIORITY)]); 73 } 74 75 static void radeon_cs_buckets_get_list(struct radeon_cs_buckets *b, 76 struct list_head *out_list) 77 { 78 unsigned i; 79 80 /* Connect the sorted buckets in the output list. */ 81 for (i = 0; i < RADEON_CS_NUM_BUCKETS; i++) { 82 list_splice(&b->bucket[i], out_list); 83 } 84 } 85 86 static int radeon_cs_parser_relocs(struct radeon_cs_parser *p) 87 { 88 struct radeon_cs_chunk *chunk; 89 struct radeon_cs_buckets buckets; 90 unsigned i; 91 bool need_mmap_lock = false; 92 int r; 93 94 if (p->chunk_relocs == NULL) { 95 return 0; 96 } 97 chunk = p->chunk_relocs; 98 p->dma_reloc_idx = 0; 99 /* FIXME: we assume that each relocs use 4 dwords */ 100 p->nrelocs = chunk->length_dw / 4; 101 p->relocs = kvmalloc_array(p->nrelocs, sizeof(struct radeon_bo_list), 102 GFP_KERNEL | __GFP_ZERO); 103 if (p->relocs == NULL) { 104 return -ENOMEM; 105 } 106 107 radeon_cs_buckets_init(&buckets); 108 109 for (i = 0; i < p->nrelocs; i++) { 110 struct drm_radeon_cs_reloc *r; 111 struct drm_gem_object *gobj; 112 unsigned priority; 113 114 r = (struct drm_radeon_cs_reloc *)&chunk->kdata[i*4]; 115 gobj = drm_gem_object_lookup(p->filp, r->handle); 116 if (gobj == NULL) { 117 DRM_ERROR("gem object lookup failed 0x%x\n", 118 r->handle); 119 return -ENOENT; 120 } 121 p->relocs[i].robj = gem_to_radeon_bo(gobj); 122 123 /* The userspace buffer priorities are from 0 to 15. A higher 124 * number means the buffer is more important. 125 * Also, the buffers used for write have a higher priority than 126 * the buffers used for read only, which doubles the range 127 * to 0 to 31. 32 is reserved for the kernel driver. 128 */ 129 priority = (r->flags & RADEON_RELOC_PRIO_MASK) * 2 130 + !!r->write_domain; 131 132 /* The first reloc of an UVD job is the msg and that must be in 133 * VRAM, the second reloc is the DPB and for WMV that must be in 134 * VRAM as well. Also put everything into VRAM on AGP cards and older 135 * IGP chips to avoid image corruptions 136 */ 137 if (p->ring == R600_RING_TYPE_UVD_INDEX && 138 (i <= 0 || pci_find_capability(p->rdev->ddev->pdev, 139 PCI_CAP_ID_AGP) || 140 p->rdev->family == CHIP_RS780 || 141 p->rdev->family == CHIP_RS880)) { 142 143 /* TODO: is this still needed for NI+ ? */ 144 p->relocs[i].preferred_domains = 145 RADEON_GEM_DOMAIN_VRAM; 146 147 p->relocs[i].allowed_domains = 148 RADEON_GEM_DOMAIN_VRAM; 149 150 /* prioritize this over any other relocation */ 151 priority = RADEON_CS_MAX_PRIORITY; 152 } else { 153 uint32_t domain = r->write_domain ? 154 r->write_domain : r->read_domains; 155 156 if (domain & RADEON_GEM_DOMAIN_CPU) { 157 DRM_ERROR("RADEON_GEM_DOMAIN_CPU is not valid " 158 "for command submission\n"); 159 return -EINVAL; 160 } 161 162 p->relocs[i].preferred_domains = domain; 163 if (domain == RADEON_GEM_DOMAIN_VRAM) 164 domain |= RADEON_GEM_DOMAIN_GTT; 165 p->relocs[i].allowed_domains = domain; 166 } 167 168 if (radeon_ttm_tt_has_userptr(p->relocs[i].robj->tbo.ttm)) { 169 uint32_t domain = p->relocs[i].preferred_domains; 170 if (!(domain & RADEON_GEM_DOMAIN_GTT)) { 171 DRM_ERROR("Only RADEON_GEM_DOMAIN_GTT is " 172 "allowed for userptr BOs\n"); 173 return -EINVAL; 174 } 175 need_mmap_lock = true; 176 domain = RADEON_GEM_DOMAIN_GTT; 177 p->relocs[i].preferred_domains = domain; 178 p->relocs[i].allowed_domains = domain; 179 } 180 181 /* Objects shared as dma-bufs cannot be moved to VRAM */ 182 if (p->relocs[i].robj->prime_shared_count) { 183 p->relocs[i].allowed_domains &= ~RADEON_GEM_DOMAIN_VRAM; 184 if (!p->relocs[i].allowed_domains) { 185 DRM_ERROR("BO associated with dma-buf cannot " 186 "be moved to VRAM\n"); 187 return -EINVAL; 188 } 189 } 190 191 p->relocs[i].tv.bo = &p->relocs[i].robj->tbo; 192 p->relocs[i].tv.num_shared = !r->write_domain; 193 194 radeon_cs_buckets_add(&buckets, &p->relocs[i].tv.head, 195 priority); 196 } 197 198 radeon_cs_buckets_get_list(&buckets, &p->validated); 199 200 if (p->cs_flags & RADEON_CS_USE_VM) 201 p->vm_bos = radeon_vm_get_bos(p->rdev, p->ib.vm, 202 &p->validated); 203 #ifdef __NetBSD__ 204 if (need_mmap_lock) 205 vm_map_lock_read(&curproc->p_vmspace->vm_map); 206 #else 207 if (need_mmap_lock) 208 down_read(¤t->mm->mmap_sem); 209 #endif 210 211 r = radeon_bo_list_validate(p->rdev, &p->ticket, &p->validated, p->ring); 212 213 #ifdef __NetBSD__ 214 if (need_mmap_lock) 215 vm_map_unlock_read(&curproc->p_vmspace->vm_map); 216 #else 217 if (need_mmap_lock) 218 up_read(¤t->mm->mmap_sem); 219 #endif 220 221 return r; 222 } 223 224 static int radeon_cs_get_ring(struct radeon_cs_parser *p, u32 ring, s32 priority) 225 { 226 p->priority = priority; 227 228 switch (ring) { 229 default: 230 DRM_ERROR("unknown ring id: %d\n", ring); 231 return -EINVAL; 232 case RADEON_CS_RING_GFX: 233 p->ring = RADEON_RING_TYPE_GFX_INDEX; 234 break; 235 case RADEON_CS_RING_COMPUTE: 236 if (p->rdev->family >= CHIP_TAHITI) { 237 if (p->priority > 0) 238 p->ring = CAYMAN_RING_TYPE_CP1_INDEX; 239 else 240 p->ring = CAYMAN_RING_TYPE_CP2_INDEX; 241 } else 242 p->ring = RADEON_RING_TYPE_GFX_INDEX; 243 break; 244 case RADEON_CS_RING_DMA: 245 if (p->rdev->family >= CHIP_CAYMAN) { 246 if (p->priority > 0) 247 p->ring = R600_RING_TYPE_DMA_INDEX; 248 else 249 p->ring = CAYMAN_RING_TYPE_DMA1_INDEX; 250 } else if (p->rdev->family >= CHIP_RV770) { 251 p->ring = R600_RING_TYPE_DMA_INDEX; 252 } else { 253 return -EINVAL; 254 } 255 break; 256 case RADEON_CS_RING_UVD: 257 p->ring = R600_RING_TYPE_UVD_INDEX; 258 break; 259 case RADEON_CS_RING_VCE: 260 /* TODO: only use the low priority ring for now */ 261 p->ring = TN_RING_TYPE_VCE1_INDEX; 262 break; 263 } 264 return 0; 265 } 266 267 static int radeon_cs_sync_rings(struct radeon_cs_parser *p) 268 { 269 struct radeon_bo_list *reloc; 270 int r; 271 272 list_for_each_entry(reloc, &p->validated, tv.head) { 273 struct dma_resv *resv; 274 275 resv = reloc->robj->tbo.base.resv; 276 r = radeon_sync_resv(p->rdev, &p->ib.sync, resv, 277 reloc->tv.num_shared); 278 if (r) 279 return r; 280 } 281 return 0; 282 } 283 284 /* XXX: note that this is called from the legacy UMS CS ioctl as well */ 285 int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data) 286 { 287 struct drm_radeon_cs *cs = data; 288 uint64_t *chunk_array_ptr; 289 unsigned size, i; 290 u32 ring = RADEON_CS_RING_GFX; 291 s32 priority = 0; 292 293 INIT_LIST_HEAD(&p->validated); 294 295 if (!cs->num_chunks) { 296 return 0; 297 } 298 299 /* get chunks */ 300 p->idx = 0; 301 p->ib.sa_bo = NULL; 302 p->const_ib.sa_bo = NULL; 303 p->chunk_ib = NULL; 304 p->chunk_relocs = NULL; 305 p->chunk_flags = NULL; 306 p->chunk_const_ib = NULL; 307 p->chunks_array = kcalloc(cs->num_chunks, sizeof(uint64_t), GFP_KERNEL); 308 if (p->chunks_array == NULL) { 309 return -ENOMEM; 310 } 311 chunk_array_ptr = (uint64_t *)(unsigned long)(cs->chunks); 312 if (copy_from_user(p->chunks_array, chunk_array_ptr, 313 sizeof(uint64_t)*cs->num_chunks)) { 314 return -EFAULT; 315 } 316 p->cs_flags = 0; 317 p->nchunks = cs->num_chunks; 318 p->chunks = kcalloc(p->nchunks, sizeof(struct radeon_cs_chunk), GFP_KERNEL); 319 if (p->chunks == NULL) { 320 return -ENOMEM; 321 } 322 for (i = 0; i < p->nchunks; i++) { 323 struct drm_radeon_cs_chunk __user **chunk_ptr = NULL; 324 struct drm_radeon_cs_chunk user_chunk; 325 uint32_t __user *cdata; 326 327 chunk_ptr = (void __user*)(unsigned long)p->chunks_array[i]; 328 if (copy_from_user(&user_chunk, chunk_ptr, 329 sizeof(struct drm_radeon_cs_chunk))) { 330 return -EFAULT; 331 } 332 p->chunks[i].length_dw = user_chunk.length_dw; 333 if (user_chunk.chunk_id == RADEON_CHUNK_ID_RELOCS) { 334 p->chunk_relocs = &p->chunks[i]; 335 } 336 if (user_chunk.chunk_id == RADEON_CHUNK_ID_IB) { 337 p->chunk_ib = &p->chunks[i]; 338 /* zero length IB isn't useful */ 339 if (p->chunks[i].length_dw == 0) 340 return -EINVAL; 341 } 342 if (user_chunk.chunk_id == RADEON_CHUNK_ID_CONST_IB) { 343 p->chunk_const_ib = &p->chunks[i]; 344 /* zero length CONST IB isn't useful */ 345 if (p->chunks[i].length_dw == 0) 346 return -EINVAL; 347 } 348 if (user_chunk.chunk_id == RADEON_CHUNK_ID_FLAGS) { 349 p->chunk_flags = &p->chunks[i]; 350 /* zero length flags aren't useful */ 351 if (p->chunks[i].length_dw == 0) 352 return -EINVAL; 353 } 354 355 size = p->chunks[i].length_dw; 356 cdata = (void __user *)(unsigned long)user_chunk.chunk_data; 357 p->chunks[i].user_ptr = cdata; 358 if (user_chunk.chunk_id == RADEON_CHUNK_ID_CONST_IB) 359 continue; 360 361 if (user_chunk.chunk_id == RADEON_CHUNK_ID_IB) { 362 if (!p->rdev || !(p->rdev->flags & RADEON_IS_AGP)) 363 continue; 364 } 365 366 p->chunks[i].kdata = kvmalloc_array(size, sizeof(uint32_t), GFP_KERNEL); 367 size *= sizeof(uint32_t); 368 if (p->chunks[i].kdata == NULL) { 369 return -ENOMEM; 370 } 371 if (copy_from_user(p->chunks[i].kdata, cdata, size)) { 372 return -EFAULT; 373 } 374 if (user_chunk.chunk_id == RADEON_CHUNK_ID_FLAGS) { 375 p->cs_flags = p->chunks[i].kdata[0]; 376 if (p->chunks[i].length_dw > 1) 377 ring = p->chunks[i].kdata[1]; 378 if (p->chunks[i].length_dw > 2) 379 priority = (s32)p->chunks[i].kdata[2]; 380 } 381 } 382 383 /* these are KMS only */ 384 if (p->rdev) { 385 if ((p->cs_flags & RADEON_CS_USE_VM) && 386 !p->rdev->vm_manager.enabled) { 387 DRM_ERROR("VM not active on asic!\n"); 388 return -EINVAL; 389 } 390 391 if (radeon_cs_get_ring(p, ring, priority)) 392 return -EINVAL; 393 394 /* we only support VM on some SI+ rings */ 395 if ((p->cs_flags & RADEON_CS_USE_VM) == 0) { 396 if (p->rdev->asic->ring[p->ring]->cs_parse == NULL) { 397 DRM_ERROR("Ring %d requires VM!\n", p->ring); 398 return -EINVAL; 399 } 400 } else { 401 if (p->rdev->asic->ring[p->ring]->ib_parse == NULL) { 402 DRM_ERROR("VM not supported on ring %d!\n", 403 p->ring); 404 return -EINVAL; 405 } 406 } 407 } 408 409 return 0; 410 } 411 412 static int cmp_size_smaller_first(void *priv, struct list_head *a, 413 struct list_head *b) 414 { 415 struct radeon_bo_list *la = list_entry(a, struct radeon_bo_list, tv.head); 416 struct radeon_bo_list *lb = list_entry(b, struct radeon_bo_list, tv.head); 417 418 /* Sort A before B if A is smaller. */ 419 return (int)la->robj->tbo.num_pages - (int)lb->robj->tbo.num_pages; 420 } 421 422 /** 423 * cs_parser_fini() - clean parser states 424 * @parser: parser structure holding parsing context. 425 * @error: error number 426 * 427 * If error is set than unvalidate buffer, otherwise just free memory 428 * used by parsing context. 429 **/ 430 static void radeon_cs_parser_fini(struct radeon_cs_parser *parser, int error, bool backoff) 431 { 432 unsigned i; 433 434 if (!error) { 435 /* Sort the buffer list from the smallest to largest buffer, 436 * which affects the order of buffers in the LRU list. 437 * This assures that the smallest buffers are added first 438 * to the LRU list, so they are likely to be later evicted 439 * first, instead of large buffers whose eviction is more 440 * expensive. 441 * 442 * This slightly lowers the number of bytes moved by TTM 443 * per frame under memory pressure. 444 */ 445 list_sort(NULL, &parser->validated, cmp_size_smaller_first); 446 447 ttm_eu_fence_buffer_objects(&parser->ticket, 448 &parser->validated, 449 &parser->ib.fence->base); 450 } else if (backoff) { 451 ttm_eu_backoff_reservation(&parser->ticket, 452 &parser->validated); 453 } 454 455 if (parser->relocs != NULL) { 456 for (i = 0; i < parser->nrelocs; i++) { 457 struct radeon_bo *bo = parser->relocs[i].robj; 458 if (bo == NULL) 459 continue; 460 461 drm_gem_object_put_unlocked(&bo->tbo.base); 462 } 463 } 464 kfree(parser->track); 465 kvfree(parser->relocs); 466 kvfree(parser->vm_bos); 467 for (i = 0; i < parser->nchunks; i++) 468 kvfree(parser->chunks[i].kdata); 469 kfree(parser->chunks); 470 kfree(parser->chunks_array); 471 radeon_ib_free(parser->rdev, &parser->ib); 472 radeon_ib_free(parser->rdev, &parser->const_ib); 473 } 474 475 static int radeon_cs_ib_chunk(struct radeon_device *rdev, 476 struct radeon_cs_parser *parser) 477 { 478 int r; 479 480 if (parser->chunk_ib == NULL) 481 return 0; 482 483 if (parser->cs_flags & RADEON_CS_USE_VM) 484 return 0; 485 486 r = radeon_cs_parse(rdev, parser->ring, parser); 487 if (r || parser->parser_error) { 488 DRM_ERROR("Invalid command stream !\n"); 489 return r; 490 } 491 492 r = radeon_cs_sync_rings(parser); 493 if (r) { 494 if (r != -ERESTARTSYS) 495 DRM_ERROR("Failed to sync rings: %i\n", r); 496 return r; 497 } 498 499 if (parser->ring == R600_RING_TYPE_UVD_INDEX) 500 radeon_uvd_note_usage(rdev); 501 else if ((parser->ring == TN_RING_TYPE_VCE1_INDEX) || 502 (parser->ring == TN_RING_TYPE_VCE2_INDEX)) 503 radeon_vce_note_usage(rdev); 504 505 r = radeon_ib_schedule(rdev, &parser->ib, NULL, true); 506 if (r) { 507 DRM_ERROR("Failed to schedule IB !\n"); 508 } 509 return r; 510 } 511 512 static int radeon_bo_vm_update_pte(struct radeon_cs_parser *p, 513 struct radeon_vm *vm) 514 { 515 struct radeon_device *rdev = p->rdev; 516 struct radeon_bo_va *bo_va; 517 int i, r; 518 519 r = radeon_vm_update_page_directory(rdev, vm); 520 if (r) 521 return r; 522 523 r = radeon_vm_clear_freed(rdev, vm); 524 if (r) 525 return r; 526 527 if (vm->ib_bo_va == NULL) { 528 DRM_ERROR("Tmp BO not in VM!\n"); 529 return -EINVAL; 530 } 531 532 r = radeon_vm_bo_update(rdev, vm->ib_bo_va, 533 &rdev->ring_tmp_bo.bo->tbo.mem); 534 if (r) 535 return r; 536 537 for (i = 0; i < p->nrelocs; i++) { 538 struct radeon_bo *bo; 539 540 bo = p->relocs[i].robj; 541 bo_va = radeon_vm_bo_find(vm, bo); 542 if (bo_va == NULL) { 543 dev_err(rdev->dev, "bo %p not in vm %p\n", bo, vm); 544 return -EINVAL; 545 } 546 547 r = radeon_vm_bo_update(rdev, bo_va, &bo->tbo.mem); 548 if (r) 549 return r; 550 551 radeon_sync_fence(&p->ib.sync, bo_va->last_pt_update); 552 } 553 554 return radeon_vm_clear_invalids(rdev, vm); 555 } 556 557 static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev, 558 struct radeon_cs_parser *parser) 559 { 560 struct radeon_fpriv *fpriv = parser->filp->driver_priv; 561 struct radeon_vm *vm = &fpriv->vm; 562 int r; 563 564 if (parser->chunk_ib == NULL) 565 return 0; 566 if ((parser->cs_flags & RADEON_CS_USE_VM) == 0) 567 return 0; 568 569 if (parser->const_ib.length_dw) { 570 r = radeon_ring_ib_parse(rdev, parser->ring, &parser->const_ib); 571 if (r) { 572 return r; 573 } 574 } 575 576 r = radeon_ring_ib_parse(rdev, parser->ring, &parser->ib); 577 if (r) { 578 return r; 579 } 580 581 if (parser->ring == R600_RING_TYPE_UVD_INDEX) 582 radeon_uvd_note_usage(rdev); 583 584 mutex_lock(&vm->mutex); 585 r = radeon_bo_vm_update_pte(parser, vm); 586 if (r) { 587 goto out; 588 } 589 590 r = radeon_cs_sync_rings(parser); 591 if (r) { 592 if (r != -ERESTARTSYS) 593 DRM_ERROR("Failed to sync rings: %i\n", r); 594 goto out; 595 } 596 597 if ((rdev->family >= CHIP_TAHITI) && 598 (parser->chunk_const_ib != NULL)) { 599 r = radeon_ib_schedule(rdev, &parser->ib, &parser->const_ib, true); 600 } else { 601 r = radeon_ib_schedule(rdev, &parser->ib, NULL, true); 602 } 603 604 out: 605 mutex_unlock(&vm->mutex); 606 return r; 607 } 608 609 static int radeon_cs_handle_lockup(struct radeon_device *rdev, int r) 610 { 611 if (r == -EDEADLK) { 612 r = radeon_gpu_reset(rdev); 613 if (!r) 614 r = -EAGAIN; 615 } 616 return r; 617 } 618 619 static int radeon_cs_ib_fill(struct radeon_device *rdev, struct radeon_cs_parser *parser) 620 { 621 struct radeon_cs_chunk *ib_chunk; 622 struct radeon_vm *vm = NULL; 623 int r; 624 625 if (parser->chunk_ib == NULL) 626 return 0; 627 628 if (parser->cs_flags & RADEON_CS_USE_VM) { 629 struct radeon_fpriv *fpriv = parser->filp->driver_priv; 630 vm = &fpriv->vm; 631 632 if ((rdev->family >= CHIP_TAHITI) && 633 (parser->chunk_const_ib != NULL)) { 634 ib_chunk = parser->chunk_const_ib; 635 if (ib_chunk->length_dw > RADEON_IB_VM_MAX_SIZE) { 636 DRM_ERROR("cs IB CONST too big: %d\n", ib_chunk->length_dw); 637 return -EINVAL; 638 } 639 r = radeon_ib_get(rdev, parser->ring, &parser->const_ib, 640 vm, ib_chunk->length_dw * 4); 641 if (r) { 642 DRM_ERROR("Failed to get const ib !\n"); 643 return r; 644 } 645 parser->const_ib.is_const_ib = true; 646 parser->const_ib.length_dw = ib_chunk->length_dw; 647 if (copy_from_user(parser->const_ib.ptr, 648 ib_chunk->user_ptr, 649 ib_chunk->length_dw * 4)) 650 return -EFAULT; 651 } 652 653 ib_chunk = parser->chunk_ib; 654 if (ib_chunk->length_dw > RADEON_IB_VM_MAX_SIZE) { 655 DRM_ERROR("cs IB too big: %d\n", ib_chunk->length_dw); 656 return -EINVAL; 657 } 658 } 659 ib_chunk = parser->chunk_ib; 660 661 r = radeon_ib_get(rdev, parser->ring, &parser->ib, 662 vm, ib_chunk->length_dw * 4); 663 if (r) { 664 DRM_ERROR("Failed to get ib !\n"); 665 return r; 666 } 667 parser->ib.length_dw = ib_chunk->length_dw; 668 if (ib_chunk->kdata) 669 memcpy(parser->ib.ptr, ib_chunk->kdata, ib_chunk->length_dw * 4); 670 else if (copy_from_user(parser->ib.ptr, ib_chunk->user_ptr, ib_chunk->length_dw * 4)) 671 return -EFAULT; 672 return 0; 673 } 674 675 int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) 676 { 677 struct radeon_device *rdev = dev->dev_private; 678 struct radeon_cs_parser parser; 679 int r; 680 681 down_read(&rdev->exclusive_lock); 682 if (!rdev->accel_working) { 683 up_read(&rdev->exclusive_lock); 684 return -EBUSY; 685 } 686 if (rdev->in_reset) { 687 up_read(&rdev->exclusive_lock); 688 r = radeon_gpu_reset(rdev); 689 if (!r) 690 r = -EAGAIN; 691 return r; 692 } 693 /* initialize parser */ 694 memset(&parser, 0, sizeof(struct radeon_cs_parser)); 695 parser.filp = filp; 696 parser.rdev = rdev; 697 parser.dev = rdev->dev; 698 parser.family = rdev->family; 699 r = radeon_cs_parser_init(&parser, data); 700 if (r) { 701 DRM_ERROR("Failed to initialize parser !\n"); 702 radeon_cs_parser_fini(&parser, r, false); 703 up_read(&rdev->exclusive_lock); 704 r = radeon_cs_handle_lockup(rdev, r); 705 return r; 706 } 707 708 r = radeon_cs_ib_fill(rdev, &parser); 709 if (!r) { 710 r = radeon_cs_parser_relocs(&parser); 711 if (r && r != -ERESTARTSYS) 712 DRM_ERROR("Failed to parse relocation %d!\n", r); 713 } 714 715 if (r) { 716 radeon_cs_parser_fini(&parser, r, false); 717 up_read(&rdev->exclusive_lock); 718 r = radeon_cs_handle_lockup(rdev, r); 719 return r; 720 } 721 722 trace_radeon_cs(&parser); 723 724 r = radeon_cs_ib_chunk(rdev, &parser); 725 if (r) { 726 goto out; 727 } 728 r = radeon_cs_ib_vm_chunk(rdev, &parser); 729 if (r) { 730 goto out; 731 } 732 out: 733 radeon_cs_parser_fini(&parser, r, true); 734 up_read(&rdev->exclusive_lock); 735 r = radeon_cs_handle_lockup(rdev, r); 736 return r; 737 } 738 739 /** 740 * radeon_cs_packet_parse() - parse cp packet and point ib index to next packet 741 * @parser: parser structure holding parsing context. 742 * @pkt: where to store packet information 743 * 744 * Assume that chunk_ib_index is properly set. Will return -EINVAL 745 * if packet is bigger than remaining ib size. or if packets is unknown. 746 **/ 747 int radeon_cs_packet_parse(struct radeon_cs_parser *p, 748 struct radeon_cs_packet *pkt, 749 unsigned idx) 750 { 751 struct radeon_cs_chunk *ib_chunk = p->chunk_ib; 752 struct radeon_device *rdev = p->rdev; 753 uint32_t header; 754 int ret = 0, i; 755 756 if (idx >= ib_chunk->length_dw) { 757 DRM_ERROR("Can not parse packet at %d after CS end %d !\n", 758 idx, ib_chunk->length_dw); 759 return -EINVAL; 760 } 761 header = radeon_get_ib_value(p, idx); 762 pkt->idx = idx; 763 pkt->type = RADEON_CP_PACKET_GET_TYPE(header); 764 pkt->count = RADEON_CP_PACKET_GET_COUNT(header); 765 pkt->one_reg_wr = 0; 766 switch (pkt->type) { 767 case RADEON_PACKET_TYPE0: 768 if (rdev->family < CHIP_R600) { 769 pkt->reg = R100_CP_PACKET0_GET_REG(header); 770 pkt->one_reg_wr = 771 RADEON_CP_PACKET0_GET_ONE_REG_WR(header); 772 } else 773 pkt->reg = R600_CP_PACKET0_GET_REG(header); 774 break; 775 case RADEON_PACKET_TYPE3: 776 pkt->opcode = RADEON_CP_PACKET3_GET_OPCODE(header); 777 break; 778 case RADEON_PACKET_TYPE2: 779 pkt->count = -1; 780 break; 781 default: 782 DRM_ERROR("Unknown packet type %d at %d !\n", pkt->type, idx); 783 ret = -EINVAL; 784 goto dump_ib; 785 } 786 if ((pkt->count + 1 + pkt->idx) >= ib_chunk->length_dw) { 787 DRM_ERROR("Packet (%d:%d:%d) end after CS buffer (%d) !\n", 788 pkt->idx, pkt->type, pkt->count, ib_chunk->length_dw); 789 ret = -EINVAL; 790 goto dump_ib; 791 } 792 return 0; 793 794 dump_ib: 795 for (i = 0; i < ib_chunk->length_dw; i++) { 796 if (i == idx) 797 printk("\t0x%08x <---\n", radeon_get_ib_value(p, i)); 798 else 799 printk("\t0x%08x\n", radeon_get_ib_value(p, i)); 800 } 801 return ret; 802 } 803 804 /** 805 * radeon_cs_packet_next_is_pkt3_nop() - test if the next packet is P3 NOP 806 * @p: structure holding the parser context. 807 * 808 * Check if the next packet is NOP relocation packet3. 809 **/ 810 bool radeon_cs_packet_next_is_pkt3_nop(struct radeon_cs_parser *p) 811 { 812 struct radeon_cs_packet p3reloc; 813 int r; 814 815 r = radeon_cs_packet_parse(p, &p3reloc, p->idx); 816 if (r) 817 return false; 818 if (p3reloc.type != RADEON_PACKET_TYPE3) 819 return false; 820 if (p3reloc.opcode != RADEON_PACKET3_NOP) 821 return false; 822 return true; 823 } 824 825 /** 826 * radeon_cs_dump_packet() - dump raw packet context 827 * @p: structure holding the parser context. 828 * @pkt: structure holding the packet. 829 * 830 * Used mostly for debugging and error reporting. 831 **/ 832 void radeon_cs_dump_packet(struct radeon_cs_parser *p, 833 struct radeon_cs_packet *pkt) 834 { 835 volatile uint32_t *ib; 836 unsigned i; 837 unsigned idx; 838 839 ib = p->ib.ptr; 840 idx = pkt->idx; 841 for (i = 0; i <= (pkt->count + 1); i++, idx++) 842 DRM_INFO("ib[%d]=0x%08X\n", idx, ib[idx]); 843 } 844 845 /** 846 * radeon_cs_packet_next_reloc() - parse next (should be reloc) packet 847 * @parser: parser structure holding parsing context. 848 * @data: pointer to relocation data 849 * @offset_start: starting offset 850 * @offset_mask: offset mask (to align start offset on) 851 * @reloc: reloc informations 852 * 853 * Check if next packet is relocation packet3, do bo validation and compute 854 * GPU offset using the provided start. 855 **/ 856 int radeon_cs_packet_next_reloc(struct radeon_cs_parser *p, 857 struct radeon_bo_list **cs_reloc, 858 int nomm) 859 { 860 struct radeon_cs_chunk *relocs_chunk; 861 struct radeon_cs_packet p3reloc; 862 unsigned idx; 863 int r; 864 865 if (p->chunk_relocs == NULL) { 866 DRM_ERROR("No relocation chunk !\n"); 867 return -EINVAL; 868 } 869 *cs_reloc = NULL; 870 relocs_chunk = p->chunk_relocs; 871 r = radeon_cs_packet_parse(p, &p3reloc, p->idx); 872 if (r) 873 return r; 874 p->idx += p3reloc.count + 2; 875 if (p3reloc.type != RADEON_PACKET_TYPE3 || 876 p3reloc.opcode != RADEON_PACKET3_NOP) { 877 DRM_ERROR("No packet3 for relocation for packet at %d.\n", 878 p3reloc.idx); 879 radeon_cs_dump_packet(p, &p3reloc); 880 return -EINVAL; 881 } 882 idx = radeon_get_ib_value(p, p3reloc.idx + 1); 883 if (idx >= relocs_chunk->length_dw) { 884 DRM_ERROR("Relocs at %d after relocations chunk end %d !\n", 885 idx, relocs_chunk->length_dw); 886 radeon_cs_dump_packet(p, &p3reloc); 887 return -EINVAL; 888 } 889 /* FIXME: we assume reloc size is 4 dwords */ 890 if (nomm) { 891 *cs_reloc = p->relocs; 892 (*cs_reloc)->gpu_offset = 893 (u64)relocs_chunk->kdata[idx + 3] << 32; 894 (*cs_reloc)->gpu_offset |= relocs_chunk->kdata[idx + 0]; 895 } else 896 *cs_reloc = &p->relocs[(idx / 4)]; 897 return 0; 898 } 899