1 /* $NetBSD: i915_active.c,v 1.14 2022/03/16 23:32:52 riastradh Exp $ */ 2 3 /* 4 * SPDX-License-Identifier: MIT 5 * 6 * Copyright 2019 Intel Corporation 7 */ 8 9 #include <sys/cdefs.h> 10 __KERNEL_RCSID(0, "$NetBSD: i915_active.c,v 1.14 2022/03/16 23:32:52 riastradh Exp $"); 11 12 #include <linux/debugobjects.h> 13 14 #include "gt/intel_context.h" 15 #include "gt/intel_engine_pm.h" 16 #include "gt/intel_ring.h" 17 18 #include "i915_drv.h" 19 #include "i915_active.h" 20 #include "i915_globals.h" 21 22 #include <linux/nbsd-namespace.h> 23 24 /* 25 * Active refs memory management 26 * 27 * To be more economical with memory, we reap all the i915_active trees as 28 * they idle (when we know the active requests are inactive) and allocate the 29 * nodes from a local slab cache to hopefully reduce the fragmentation. 30 */ 31 static struct i915_global_active { 32 struct i915_global base; 33 struct kmem_cache *slab_cache; 34 } global; 35 36 struct active_node { 37 struct i915_active_fence base; 38 struct i915_active *ref; 39 struct rb_node node; 40 u64 timeline; 41 struct intel_engine_cs *engine; 42 }; 43 44 static inline struct active_node * 45 node_from_active(struct i915_active_fence *active) 46 { 47 return container_of(active, struct active_node, base); 48 } 49 50 #define take_preallocated_barriers(x) llist_del_all(&(x)->preallocated_barriers) 51 52 static inline bool is_barrier(const struct i915_active_fence *active) 53 { 54 return IS_ERR(rcu_access_pointer(active->fence)); 55 } 56 57 static inline struct llist_node *barrier_to_ll(struct active_node *node) 58 { 59 GEM_BUG_ON(!is_barrier(&node->base)); 60 return &node->base.llist; 61 } 62 63 static inline struct intel_engine_cs * 64 __barrier_to_engine(struct active_node *node) 65 { 66 return READ_ONCE(node->engine); 67 } 68 69 static inline struct intel_engine_cs * 70 barrier_to_engine(struct active_node *node) 71 { 72 GEM_BUG_ON(!is_barrier(&node->base)); 73 return __barrier_to_engine(node); 74 } 75 76 static inline struct active_node *barrier_from_ll(struct llist_node *x) 77 { 78 return container_of(x, struct active_node, base.llist); 79 } 80 81 #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) && IS_ENABLED(CONFIG_DEBUG_OBJECTS) 82 83 static void *active_debug_hint(void *addr) 84 { 85 struct i915_active *ref = addr; 86 87 return (void *)ref->active ?: (void *)ref->retire ?: (void *)ref; 88 } 89 90 static struct debug_obj_descr active_debug_desc = { 91 .name = "i915_active", 92 .debug_hint = active_debug_hint, 93 }; 94 95 static void debug_active_init(struct i915_active *ref) 96 { 97 debug_object_init(ref, &active_debug_desc); 98 } 99 100 static void debug_active_activate(struct i915_active *ref) 101 { 102 lockdep_assert_held(&ref->tree_lock); 103 if (!atomic_read(&ref->count)) /* before the first inc */ 104 debug_object_activate(ref, &active_debug_desc); 105 } 106 107 static void debug_active_deactivate(struct i915_active *ref) 108 { 109 lockdep_assert_held(&ref->tree_lock); 110 if (!atomic_read(&ref->count)) /* after the last dec */ 111 debug_object_deactivate(ref, &active_debug_desc); 112 } 113 114 static void debug_active_fini(struct i915_active *ref) 115 { 116 debug_object_free(ref, &active_debug_desc); 117 } 118 119 static void debug_active_assert(struct i915_active *ref) 120 { 121 debug_object_assert_init(ref, &active_debug_desc); 122 } 123 124 #else 125 126 static inline void debug_active_init(struct i915_active *ref) { } 127 static inline void debug_active_activate(struct i915_active *ref) { } 128 static inline void debug_active_deactivate(struct i915_active *ref) { } 129 static inline void debug_active_fini(struct i915_active *ref) { } 130 static inline void debug_active_assert(struct i915_active *ref) { } 131 132 #endif 133 134 #ifdef __NetBSD__ 135 136 static int 137 compare_nodes(void *cookie, const void *va, const void *vb) 138 { 139 const struct active_node *a = va; 140 const struct active_node *b = vb; 141 142 if (a->timeline < b->timeline) 143 return -1; 144 if (a->timeline > b->timeline) 145 return +1; 146 if ((uintptr_t)a < (uintptr_t)b) 147 return -1; 148 if ((uintptr_t)a > (uintptr_t)b) 149 return +1; 150 return 0; 151 } 152 153 static int 154 compare_node_key(void *cookie, const void *vn, const void *vk) 155 { 156 const struct active_node *a = vn; 157 const uint64_t *k = vk; 158 159 if (a->timeline < *k) 160 return -1; 161 if (a->timeline > *k) 162 return +1; 163 return 0; 164 } 165 166 static const rb_tree_ops_t active_rb_ops = { 167 .rbto_compare_nodes = compare_nodes, 168 .rbto_compare_key = compare_node_key, 169 .rbto_node_offset = offsetof(struct active_node, node), 170 }; 171 172 #endif 173 174 static void 175 __active_retire(struct i915_active *ref) 176 { 177 struct active_node *it, *n; 178 struct rb_root root; 179 unsigned long flags; 180 181 GEM_BUG_ON(i915_active_is_idle(ref)); 182 183 /* return the unused nodes to our slabcache -- flushing the allocator */ 184 if (!atomic_dec_and_lock_irqsave(&ref->count, &ref->tree_lock, flags)) 185 return; 186 187 GEM_BUG_ON(rcu_access_pointer(ref->excl.fence)); 188 debug_active_deactivate(ref); 189 190 #ifdef __NetBSD__ 191 rb_move(&root, &ref->tree); 192 rb_tree_init(&ref->tree.rbr_tree, &active_rb_ops); 193 #else 194 root = ref->tree; 195 ref->tree = RB_ROOT; 196 #endif 197 ref->cache = NULL; 198 199 DRM_SPIN_WAKEUP_ALL(&ref->tree_wq, &ref->tree_lock); 200 201 spin_unlock_irqrestore(&ref->tree_lock, flags); 202 203 /* After the final retire, the entire struct may be freed */ 204 if (ref->retire) 205 ref->retire(ref); 206 207 /* ... except if you wait on it, you must manage your own references! */ 208 209 rbtree_postorder_for_each_entry_safe(it, n, &root, node) { 210 GEM_BUG_ON(i915_active_fence_isset(&it->base)); 211 kmem_cache_free(global.slab_cache, it); 212 } 213 } 214 215 static void 216 active_work(struct work_struct *wrk) 217 { 218 struct i915_active *ref = container_of(wrk, typeof(*ref), work); 219 220 GEM_BUG_ON(!atomic_read(&ref->count)); 221 if (atomic_add_unless(&ref->count, -1, 1)) 222 return; 223 224 __active_retire(ref); 225 } 226 227 static void 228 active_retire(struct i915_active *ref) 229 { 230 GEM_BUG_ON(!atomic_read(&ref->count)); 231 if (atomic_add_unless(&ref->count, -1, 1)) 232 return; 233 234 if (ref->flags & I915_ACTIVE_RETIRE_SLEEPS) { 235 queue_work(system_unbound_wq, &ref->work); 236 return; 237 } 238 239 __active_retire(ref); 240 } 241 242 static inline struct dma_fence ** 243 __active_fence_slot(struct i915_active_fence *active) 244 { 245 return (struct dma_fence ** __force)&active->fence; 246 } 247 248 static inline bool 249 active_fence_cb(struct dma_fence *fence, struct dma_fence_cb *cb) 250 { 251 struct i915_active_fence *active = 252 container_of(cb, typeof(*active), cb); 253 254 return cmpxchg(__active_fence_slot(active), fence, NULL) == fence; 255 } 256 257 static void 258 node_retire(struct dma_fence *fence, struct dma_fence_cb *cb) 259 { 260 if (active_fence_cb(fence, cb)) 261 active_retire(container_of(cb, struct active_node, base.cb)->ref); 262 } 263 264 static void 265 excl_retire(struct dma_fence *fence, struct dma_fence_cb *cb) 266 { 267 if (active_fence_cb(fence, cb)) 268 active_retire(container_of(cb, struct i915_active, excl.cb)); 269 } 270 271 static struct i915_active_fence * 272 active_instance(struct i915_active *ref, struct intel_timeline *tl) 273 { 274 struct active_node *node, *prealloc; 275 struct rb_node **p, *parent; 276 u64 idx = tl->fence_context; 277 278 /* 279 * We track the most recently used timeline to skip a rbtree search 280 * for the common case, under typical loads we never need the rbtree 281 * at all. We can reuse the last slot if it is empty, that is 282 * after the previous activity has been retired, or if it matches the 283 * current timeline. 284 */ 285 node = READ_ONCE(ref->cache); 286 if (node && node->timeline == idx) 287 return &node->base; 288 289 /* Preallocate a replacement, just in case */ 290 prealloc = kmem_cache_alloc(global.slab_cache, GFP_KERNEL); 291 if (!prealloc) 292 return NULL; 293 memset(prealloc, 0, sizeof(*prealloc)); 294 295 spin_lock_irq(&ref->tree_lock); 296 GEM_BUG_ON(i915_active_is_idle(ref)); 297 298 #ifdef __NetBSD__ 299 __USE(parent); 300 __USE(p); 301 node = rb_tree_find_node(&ref->tree.rbr_tree, &idx); 302 if (node) { 303 KASSERT(node->timeline == idx); 304 goto out; 305 } 306 #else 307 parent = NULL; 308 p = &ref->tree.rb_node; 309 while (*p) { 310 parent = *p; 311 312 node = rb_entry(parent, struct active_node, node); 313 if (node->timeline == idx) { 314 kmem_cache_free(global.slab_cache, prealloc); 315 goto out; 316 } 317 318 if (node->timeline < idx) 319 p = &parent->rb_right; 320 else 321 p = &parent->rb_left; 322 } 323 #endif 324 325 node = prealloc; 326 prealloc = NULL; 327 __i915_active_fence_init(&node->base, NULL, node_retire); 328 node->ref = ref; 329 node->timeline = idx; 330 331 #ifdef __NetBSD__ 332 struct active_node *collision __diagused; 333 collision = rb_tree_insert_node(&ref->tree.rbr_tree, node); 334 KASSERT(collision == node); 335 #else 336 rb_link_node(&node->node, parent, p); 337 rb_insert_color(&node->node, &ref->tree); 338 #endif 339 340 out: 341 ref->cache = node; 342 spin_unlock_irq(&ref->tree_lock); 343 344 #ifdef __NetBSD__ 345 if (prealloc) 346 kmem_cache_free(global.slab_cache, prealloc); 347 #endif 348 349 BUILD_BUG_ON(offsetof(typeof(*node), base)); 350 return &node->base; 351 } 352 353 void __i915_active_init(struct i915_active *ref, 354 int (*active)(struct i915_active *ref), 355 void (*retire)(struct i915_active *ref), 356 struct lock_class_key *mkey, 357 struct lock_class_key *wkey) 358 { 359 unsigned long bits; 360 361 debug_active_init(ref); 362 363 ref->flags = 0; 364 ref->active = active; 365 ref->retire = ptr_unpack_bits(retire, &bits, 2); 366 if (bits & I915_ACTIVE_MAY_SLEEP) 367 ref->flags |= I915_ACTIVE_RETIRE_SLEEPS; 368 369 spin_lock_init(&ref->tree_lock); 370 DRM_INIT_WAITQUEUE(&ref->tree_wq, "i915act"); 371 #ifdef __NetBSD__ 372 rb_tree_init(&ref->tree.rbr_tree, &active_rb_ops); 373 #else 374 ref->tree = RB_ROOT; 375 #endif 376 ref->cache = NULL; 377 378 init_llist_head(&ref->preallocated_barriers); 379 atomic_set(&ref->count, 0); 380 __mutex_init(&ref->mutex, "i915_active", mkey); 381 __i915_active_fence_init(&ref->excl, NULL, excl_retire); 382 INIT_WORK(&ref->work, active_work); 383 #if IS_ENABLED(CONFIG_LOCKDEP) 384 lockdep_init_map(&ref->work.lockdep_map, "i915_active.work", wkey, 0); 385 #endif 386 } 387 388 static bool ____active_del_barrier(struct i915_active *ref, 389 struct active_node *node, 390 struct intel_engine_cs *engine) 391 392 { 393 struct llist_node *head = NULL, *tail = NULL; 394 struct llist_node *pos, *next; 395 396 GEM_BUG_ON(node->timeline != engine->kernel_context->timeline->fence_context); 397 398 /* 399 * Rebuild the llist excluding our node. We may perform this 400 * outside of the kernel_context timeline mutex and so someone 401 * else may be manipulating the engine->barrier_tasks, in 402 * which case either we or they will be upset :) 403 * 404 * A second __active_del_barrier() will report failure to claim 405 * the active_node and the caller will just shrug and know not to 406 * claim ownership of its node. 407 * 408 * A concurrent i915_request_add_active_barriers() will miss adding 409 * any of the tasks, but we will try again on the next -- and since 410 * we are actively using the barrier, we know that there will be 411 * at least another opportunity when we idle. 412 */ 413 llist_for_each_safe(pos, next, llist_del_all(&engine->barrier_tasks)) { 414 if (node == barrier_from_ll(pos)) { 415 node = NULL; 416 continue; 417 } 418 419 pos->next = head; 420 head = pos; 421 if (!tail) 422 tail = pos; 423 } 424 if (head) 425 llist_add_batch(head, tail, &engine->barrier_tasks); 426 427 return !node; 428 } 429 430 static bool 431 __active_del_barrier(struct i915_active *ref, struct active_node *node) 432 { 433 return ____active_del_barrier(ref, node, barrier_to_engine(node)); 434 } 435 436 int i915_active_ref(struct i915_active *ref, 437 struct intel_timeline *tl, 438 struct dma_fence *fence) 439 { 440 struct i915_active_fence *active; 441 int err; 442 443 lockdep_assert_held(&tl->mutex); 444 445 /* Prevent reaping in case we malloc/wait while building the tree */ 446 err = i915_active_acquire(ref); 447 if (err) 448 return err; 449 450 active = active_instance(ref, tl); 451 if (!active) { 452 err = -ENOMEM; 453 goto out; 454 } 455 456 if (is_barrier(active)) { /* proto-node used by our idle barrier */ 457 /* 458 * This request is on the kernel_context timeline, and so 459 * we can use it to substitute for the pending idle-barrer 460 * request that we want to emit on the kernel_context. 461 */ 462 __active_del_barrier(ref, node_from_active(active)); 463 RCU_INIT_POINTER(active->fence, NULL); 464 atomic_dec(&ref->count); 465 } 466 if (!__i915_active_fence_set(active, fence)) 467 atomic_inc(&ref->count); 468 469 out: 470 i915_active_release(ref); 471 return err; 472 } 473 474 void i915_active_set_exclusive(struct i915_active *ref, struct dma_fence *f) 475 { 476 /* We expect the caller to manage the exclusive timeline ordering */ 477 GEM_BUG_ON(i915_active_is_idle(ref)); 478 479 if (!__i915_active_fence_set(&ref->excl, f)) 480 atomic_inc(&ref->count); 481 } 482 483 bool i915_active_acquire_if_busy(struct i915_active *ref) 484 { 485 debug_active_assert(ref); 486 return atomic_add_unless(&ref->count, 1, 0); 487 } 488 489 int i915_active_acquire(struct i915_active *ref) 490 { 491 int err; 492 493 if (i915_active_acquire_if_busy(ref)) 494 return 0; 495 496 err = mutex_lock_interruptible(&ref->mutex); 497 if (err) 498 return err; 499 500 if (likely(!i915_active_acquire_if_busy(ref))) { 501 if (ref->active) 502 err = ref->active(ref); 503 if (!err) { 504 spin_lock_irq(&ref->tree_lock); /* __active_retire() */ 505 debug_active_activate(ref); 506 atomic_inc(&ref->count); 507 spin_unlock_irq(&ref->tree_lock); 508 } 509 } 510 511 mutex_unlock(&ref->mutex); 512 513 return err; 514 } 515 516 void i915_active_release(struct i915_active *ref) 517 { 518 debug_active_assert(ref); 519 active_retire(ref); 520 } 521 522 static void enable_signaling(struct i915_active_fence *active) 523 { 524 struct dma_fence *fence; 525 526 fence = i915_active_fence_get(active); 527 if (!fence) 528 return; 529 530 dma_fence_enable_sw_signaling(fence); 531 dma_fence_put(fence); 532 } 533 534 int i915_active_wait(struct i915_active *ref) 535 { 536 struct active_node *it, *n; 537 int err = 0; 538 539 might_sleep(); 540 541 if (!i915_active_acquire_if_busy(ref)) 542 return 0; 543 544 /* Flush lazy signals */ 545 enable_signaling(&ref->excl); 546 rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) { 547 if (is_barrier(&it->base)) /* unconnected idle barrier */ 548 continue; 549 550 enable_signaling(&it->base); 551 } 552 /* Any fence added after the wait begins will not be auto-signaled */ 553 554 i915_active_release(ref); 555 if (err) 556 return err; 557 558 spin_lock(&ref->tree_lock); 559 DRM_SPIN_WAIT_UNTIL(err, &ref->tree_wq, &ref->tree_lock, 560 i915_active_is_idle(ref)); 561 spin_unlock(&ref->tree_lock); 562 if (err) 563 return err; 564 565 flush_work(&ref->work); 566 return 0; 567 } 568 569 int i915_request_await_active(struct i915_request *rq, struct i915_active *ref) 570 { 571 int err = 0; 572 573 if (rcu_access_pointer(ref->excl.fence)) { 574 struct dma_fence *fence; 575 576 rcu_read_lock(); 577 fence = dma_fence_get_rcu_safe(&ref->excl.fence); 578 rcu_read_unlock(); 579 if (fence) { 580 err = i915_request_await_dma_fence(rq, fence); 581 dma_fence_put(fence); 582 } 583 } 584 585 /* In the future we may choose to await on all fences */ 586 587 return err; 588 } 589 590 void i915_active_fini(struct i915_active *ref) 591 { 592 debug_active_fini(ref); 593 GEM_BUG_ON(atomic_read(&ref->count)); 594 GEM_BUG_ON(work_pending(&ref->work)); 595 GEM_BUG_ON(!RB_EMPTY_ROOT(&ref->tree)); 596 mutex_destroy(&ref->mutex); 597 spin_lock_destroy(&ref->tree_lock); 598 } 599 600 static inline bool is_idle_barrier(struct active_node *node, u64 idx) 601 { 602 return node->timeline == idx && !i915_active_fence_isset(&node->base); 603 } 604 605 static struct active_node *reuse_idle_barrier(struct i915_active *ref, u64 idx) 606 { 607 struct rb_node *prev, *p; 608 609 if (RB_EMPTY_ROOT(&ref->tree)) 610 return NULL; 611 612 spin_lock_irq(&ref->tree_lock); 613 GEM_BUG_ON(i915_active_is_idle(ref)); 614 615 /* 616 * Try to reuse any existing barrier nodes already allocated for this 617 * i915_active, due to overlapping active phases there is likely a 618 * node kept alive (as we reuse before parking). We prefer to reuse 619 * completely idle barriers (less hassle in manipulating the llists), 620 * but otherwise any will do. 621 */ 622 if (ref->cache && is_idle_barrier(ref->cache, idx)) { 623 p = &ref->cache->node; 624 goto match; 625 } 626 627 #ifdef __NetBSD__ 628 { 629 struct active_node *node = 630 rb_tree_find_node_leq(&ref->tree.rbr_tree, &idx); 631 if (node) { 632 if (node->timeline == idx && is_idle_barrier(node, idx)) { 633 p = &node->node; 634 goto match; 635 } 636 prev = &node->node; 637 } else { 638 prev = NULL; 639 } 640 } 641 #else 642 prev = NULL; 643 p = ref->tree.rb_node; 644 while (p) { 645 struct active_node *node = 646 rb_entry(p, struct active_node, node); 647 648 if (is_idle_barrier(node, idx)) 649 goto match; 650 651 prev = p; 652 if (node->timeline < idx) 653 p = p->rb_right; 654 else 655 p = p->rb_left; 656 } 657 #endif 658 659 /* 660 * No quick match, but we did find the leftmost rb_node for the 661 * kernel_context. Walk the rb_tree in-order to see if there were 662 * any idle-barriers on this timeline that we missed, or just use 663 * the first pending barrier. 664 */ 665 for (p = prev; p; p = rb_next2(&ref->tree, p)) { 666 struct active_node *node = 667 rb_entry(p, struct active_node, node); 668 struct intel_engine_cs *engine; 669 670 if (node->timeline > idx) 671 break; 672 673 if (node->timeline < idx) 674 continue; 675 676 if (is_idle_barrier(node, idx)) 677 goto match; 678 679 /* 680 * The list of pending barriers is protected by the 681 * kernel_context timeline, which notably we do not hold 682 * here. i915_request_add_active_barriers() may consume 683 * the barrier before we claim it, so we have to check 684 * for success. 685 */ 686 engine = __barrier_to_engine(node); 687 smp_rmb(); /* serialise with add_active_barriers */ 688 if (is_barrier(&node->base) && 689 ____active_del_barrier(ref, node, engine)) 690 goto match; 691 } 692 693 spin_unlock_irq(&ref->tree_lock); 694 695 return NULL; 696 697 match: 698 rb_erase(p, &ref->tree); /* Hide from waits and sibling allocations */ 699 if (p == &ref->cache->node) 700 ref->cache = NULL; 701 spin_unlock_irq(&ref->tree_lock); 702 703 return rb_entry(p, struct active_node, node); 704 } 705 706 int i915_active_acquire_preallocate_barrier(struct i915_active *ref, 707 struct intel_engine_cs *engine) 708 { 709 intel_engine_mask_t tmp, mask = engine->mask; 710 struct llist_node *first = NULL, *last = NULL; 711 struct intel_gt *gt = engine->gt; 712 int err; 713 714 GEM_BUG_ON(i915_active_is_idle(ref)); 715 716 /* Wait until the previous preallocation is completed */ 717 while (!llist_empty(&ref->preallocated_barriers)) 718 cond_resched(); 719 720 /* 721 * Preallocate a node for each physical engine supporting the target 722 * engine (remember virtual engines have more than one sibling). 723 * We can then use the preallocated nodes in 724 * i915_active_acquire_barrier() 725 */ 726 for_each_engine_masked(engine, gt, mask, tmp) { 727 u64 idx = engine->kernel_context->timeline->fence_context; 728 struct llist_node *prev = first; 729 struct active_node *node; 730 731 node = reuse_idle_barrier(ref, idx); 732 if (!node) { 733 node = kmem_cache_alloc(global.slab_cache, GFP_KERNEL); 734 if (!node) { 735 err = ENOMEM; 736 goto unwind; 737 } 738 739 memset(node, 0, sizeof(*node)); 740 RCU_INIT_POINTER(node->base.fence, NULL); 741 node->base.cb.func = node_retire; 742 node->timeline = idx; 743 node->ref = ref; 744 } 745 746 if (!i915_active_fence_isset(&node->base)) { 747 /* 748 * Mark this as being *our* unconnected proto-node. 749 * 750 * Since this node is not in any list, and we have 751 * decoupled it from the rbtree, we can reuse the 752 * request to indicate this is an idle-barrier node 753 * and then we can use the rb_node and list pointers 754 * for our tracking of the pending barrier. 755 */ 756 RCU_INIT_POINTER(node->base.fence, ERR_PTR(-EAGAIN)); 757 node->engine = engine; 758 atomic_inc(&ref->count); 759 } 760 GEM_BUG_ON(rcu_access_pointer(node->base.fence) != ERR_PTR(-EAGAIN)); 761 762 GEM_BUG_ON(barrier_to_engine(node) != engine); 763 first = barrier_to_ll(node); 764 first->next = prev; 765 if (!last) 766 last = first; 767 intel_engine_pm_get(engine); 768 } 769 770 GEM_BUG_ON(!llist_empty(&ref->preallocated_barriers)); 771 llist_add_batch(first, last, &ref->preallocated_barriers); 772 773 return 0; 774 775 unwind: 776 while (first) { 777 struct active_node *node = barrier_from_ll(first); 778 779 first = first->next; 780 781 atomic_dec(&ref->count); 782 intel_engine_pm_put(barrier_to_engine(node)); 783 784 kmem_cache_free(global.slab_cache, node); 785 } 786 return err; 787 } 788 789 void i915_active_acquire_barrier(struct i915_active *ref) 790 { 791 struct llist_node *pos, *next; 792 unsigned long flags; 793 794 GEM_BUG_ON(i915_active_is_idle(ref)); 795 796 /* 797 * Transfer the list of preallocated barriers into the 798 * i915_active rbtree, but only as proto-nodes. They will be 799 * populated by i915_request_add_active_barriers() to point to the 800 * request that will eventually release them. 801 */ 802 llist_for_each_safe(pos, next, take_preallocated_barriers(ref)) { 803 struct active_node *node = barrier_from_ll(pos); 804 struct intel_engine_cs *engine = barrier_to_engine(node); 805 struct rb_node **p, *parent; 806 807 spin_lock_irqsave_nested(&ref->tree_lock, flags, 808 SINGLE_DEPTH_NESTING); 809 #ifdef __NetBSD__ 810 __USE(p); 811 __USE(parent); 812 struct active_node *collision __diagused; 813 collision = rb_tree_insert_node(&ref->tree.rbr_tree, node); 814 KASSERT(collision == node); 815 #else 816 parent = NULL; 817 p = &ref->tree.rb_node; 818 while (*p) { 819 struct active_node *it; 820 821 parent = *p; 822 823 it = rb_entry(parent, struct active_node, node); 824 if (it->timeline < node->timeline) 825 p = &parent->rb_right; 826 else 827 p = &parent->rb_left; 828 } 829 rb_link_node(&node->node, parent, p); 830 rb_insert_color(&node->node, &ref->tree); 831 #endif 832 spin_unlock_irqrestore(&ref->tree_lock, flags); 833 834 GEM_BUG_ON(!intel_engine_pm_is_awake(engine)); 835 llist_add(barrier_to_ll(node), &engine->barrier_tasks); 836 intel_engine_pm_put(engine); 837 } 838 } 839 840 static struct dma_fence **ll_to_fence_slot(struct llist_node *node) 841 { 842 return __active_fence_slot(&barrier_from_ll(node)->base); 843 } 844 845 void i915_request_add_active_barriers(struct i915_request *rq) 846 { 847 struct intel_engine_cs *engine = rq->engine; 848 struct llist_node *node, *next; 849 unsigned long flags; 850 851 GEM_BUG_ON(!intel_context_is_barrier(rq->context)); 852 GEM_BUG_ON(intel_engine_is_virtual(engine)); 853 GEM_BUG_ON(i915_request_timeline(rq) != engine->kernel_context->timeline); 854 855 node = llist_del_all(&engine->barrier_tasks); 856 if (!node) 857 return; 858 /* 859 * Attach the list of proto-fences to the in-flight request such 860 * that the parent i915_active will be released when this request 861 * is retired. 862 */ 863 spin_lock_irqsave(&rq->lock, flags); 864 llist_for_each_safe(node, next, node) { 865 /* serialise with reuse_idle_barrier */ 866 smp_store_mb(*ll_to_fence_slot(node), &rq->fence); 867 #ifdef __NetBSD__ 868 /* XXX ugh bletch */ 869 struct i915_active_fence *active = 870 container_of(node, struct i915_active_fence, llist); 871 /* XXX something bad went wrong in making this code */ 872 KASSERT(active->cb.func == node_retire || 873 active->cb.func == excl_retire || 874 active->cb.func == i915_active_noop); 875 KASSERTMSG(active->fence == &rq->fence, 876 "active=%p fence=%p; rq=%p fence=%p", 877 active, active->fence, rq, &rq->fence); 878 KASSERTMSG(!active->cb.fcb_onqueue, "active=%p", active); 879 active->cb.fcb_onqueue = true; 880 TAILQ_INSERT_TAIL(&rq->fence.f_callbacks, &active->cb, 881 fcb_entry); 882 #else 883 list_add_tail((struct list_head *)node, &rq->fence.cb_list); 884 #endif 885 } 886 spin_unlock_irqrestore(&rq->lock, flags); 887 } 888 889 /* 890 * __i915_active_fence_set: Update the last active fence along its timeline 891 * @active: the active tracker 892 * @fence: the new fence (under construction) 893 * 894 * Records the new @fence as the last active fence along its timeline in 895 * this active tracker, moving the tracking callbacks from the previous 896 * fence onto this one. Returns the previous fence (if not already completed), 897 * which the caller must ensure is executed before the new fence. To ensure 898 * that the order of fences within the timeline of the i915_active_fence is 899 * understood, it should be locked by the caller. 900 */ 901 struct dma_fence * 902 __i915_active_fence_set(struct i915_active_fence *active, 903 struct dma_fence *fence) 904 { 905 struct dma_fence *prev; 906 unsigned long flags; 907 908 if (fence == rcu_access_pointer(active->fence)) 909 return fence; 910 911 GEM_BUG_ON(test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)); 912 913 /* 914 * Consider that we have two threads arriving (A and B), with 915 * C already resident as the active->fence. 916 * 917 * A does the xchg first, and so it sees C or NULL depending 918 * on the timing of the interrupt handler. If it is NULL, the 919 * previous fence must have been signaled and we know that 920 * we are first on the timeline. If it is still present, 921 * we acquire the lock on that fence and serialise with the interrupt 922 * handler, in the process removing it from any future interrupt 923 * callback. A will then wait on C before executing (if present). 924 * 925 * As B is second, it sees A as the previous fence and so waits for 926 * it to complete its transition and takes over the occupancy for 927 * itself -- remembering that it needs to wait on A before executing. 928 * 929 * Note the strong ordering of the timeline also provides consistent 930 * nesting rules for the fence->lock; the inner lock is always the 931 * older lock. 932 */ 933 spin_lock_irqsave(fence->lock, flags); 934 prev = xchg(__active_fence_slot(active), fence); 935 if (prev) { 936 GEM_BUG_ON(prev == fence); 937 spin_lock_nested(prev->lock, SINGLE_DEPTH_NESTING); 938 #ifdef __NetBSD__ 939 /* XXX ugh bletch */ 940 KASSERT(active->cb.func == node_retire || 941 active->cb.func == excl_retire || 942 active->cb.func == i915_active_noop); 943 if (active->cb.fcb_onqueue) { 944 TAILQ_REMOVE(&prev->f_callbacks, &active->cb, 945 fcb_entry); 946 active->cb.fcb_onqueue = false; 947 } 948 #else 949 __list_del_entry(&active->cb.node); 950 #endif 951 spin_unlock(prev->lock); /* serialise with prev->cb_list */ 952 } 953 GEM_BUG_ON(rcu_access_pointer(active->fence) != fence); 954 #ifdef __NetBSD__ 955 /* XXX ugh bletch */ 956 KASSERT(!active->cb.fcb_onqueue); 957 active->cb.fcb_onqueue = true; 958 TAILQ_INSERT_TAIL(&fence->f_callbacks, &active->cb, fcb_entry); 959 #else 960 list_add_tail(&active->cb.node, &fence->cb_list); 961 #endif 962 spin_unlock_irqrestore(fence->lock, flags); 963 964 return prev; 965 } 966 967 int i915_active_fence_set(struct i915_active_fence *active, 968 struct i915_request *rq) 969 { 970 struct dma_fence *fence; 971 int err = 0; 972 973 /* Must maintain timeline ordering wrt previous active requests */ 974 rcu_read_lock(); 975 fence = __i915_active_fence_set(active, &rq->fence); 976 if (fence) /* but the previous fence may not belong to that timeline! */ 977 fence = dma_fence_get_rcu(fence); 978 rcu_read_unlock(); 979 if (fence) { 980 err = i915_request_await_dma_fence(rq, fence); 981 dma_fence_put(fence); 982 } 983 984 return err; 985 } 986 987 void i915_active_noop(struct dma_fence *fence, struct dma_fence_cb *cb) 988 { 989 active_fence_cb(fence, cb); 990 } 991 992 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) 993 #include "selftests/i915_active.c" 994 #endif 995 996 static void i915_global_active_shrink(void) 997 { 998 kmem_cache_shrink(global.slab_cache); 999 } 1000 1001 static void i915_global_active_exit(void) 1002 { 1003 kmem_cache_destroy(global.slab_cache); 1004 } 1005 1006 static struct i915_global_active global = { { 1007 .shrink = i915_global_active_shrink, 1008 .exit = i915_global_active_exit, 1009 } }; 1010 1011 int __init i915_global_active_init(void) 1012 { 1013 global.slab_cache = KMEM_CACHE(active_node, SLAB_HWCACHE_ALIGN); 1014 if (!global.slab_cache) 1015 return -ENOMEM; 1016 1017 i915_global_register(&global.base); 1018 return 0; 1019 } 1020