Home | History | Annotate | Line # | Download | only in i915
i915_active.c revision 1.4
      1 /*	$NetBSD: i915_active.c,v 1.4 2021/12/19 11:52:07 riastradh Exp $	*/
      2 
      3 /*
      4  * SPDX-License-Identifier: MIT
      5  *
      6  * Copyright  2019 Intel Corporation
      7  */
      8 
      9 #include <sys/cdefs.h>
     10 __KERNEL_RCSID(0, "$NetBSD: i915_active.c,v 1.4 2021/12/19 11:52:07 riastradh Exp $");
     11 
     12 #include <linux/debugobjects.h>
     13 
     14 #include "gt/intel_context.h"
     15 #include "gt/intel_engine_pm.h"
     16 #include "gt/intel_ring.h"
     17 
     18 #include "i915_drv.h"
     19 #include "i915_active.h"
     20 #include "i915_globals.h"
     21 
     22 #include <linux/nbsd-namespace.h>
     23 
     24 /*
     25  * Active refs memory management
     26  *
     27  * To be more economical with memory, we reap all the i915_active trees as
     28  * they idle (when we know the active requests are inactive) and allocate the
     29  * nodes from a local slab cache to hopefully reduce the fragmentation.
     30  */
     31 static struct i915_global_active {
     32 	struct i915_global base;
     33 	struct kmem_cache *slab_cache;
     34 } global;
     35 
     36 struct active_node {
     37 	struct i915_active_fence base;
     38 	struct i915_active *ref;
     39 	struct rb_node node;
     40 	u64 timeline;
     41 	struct intel_engine_cs *engine;
     42 };
     43 
     44 static inline struct active_node *
     45 node_from_active(struct i915_active_fence *active)
     46 {
     47 	return container_of(active, struct active_node, base);
     48 }
     49 
     50 #define take_preallocated_barriers(x) llist_del_all(&(x)->preallocated_barriers)
     51 
     52 static inline bool is_barrier(const struct i915_active_fence *active)
     53 {
     54 	return IS_ERR(rcu_access_pointer(active->fence));
     55 }
     56 
     57 static inline struct llist_node *barrier_to_ll(struct active_node *node)
     58 {
     59 	GEM_BUG_ON(!is_barrier(&node->base));
     60 	return &node->base.llist;
     61 }
     62 
     63 static inline struct intel_engine_cs *
     64 __barrier_to_engine(struct active_node *node)
     65 {
     66 	return READ_ONCE(node->engine);
     67 }
     68 
     69 static inline struct intel_engine_cs *
     70 barrier_to_engine(struct active_node *node)
     71 {
     72 	GEM_BUG_ON(!is_barrier(&node->base));
     73 	return __barrier_to_engine(node);
     74 }
     75 
     76 static inline struct active_node *barrier_from_ll(struct llist_node *x)
     77 {
     78 	return container_of(x, struct active_node, base.llist);
     79 }
     80 
     81 #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) && IS_ENABLED(CONFIG_DEBUG_OBJECTS)
     82 
     83 static void *active_debug_hint(void *addr)
     84 {
     85 	struct i915_active *ref = addr;
     86 
     87 	return (void *)ref->active ?: (void *)ref->retire ?: (void *)ref;
     88 }
     89 
     90 static struct debug_obj_descr active_debug_desc = {
     91 	.name = "i915_active",
     92 	.debug_hint = active_debug_hint,
     93 };
     94 
     95 static void debug_active_init(struct i915_active *ref)
     96 {
     97 	debug_object_init(ref, &active_debug_desc);
     98 }
     99 
    100 static void debug_active_activate(struct i915_active *ref)
    101 {
    102 	lockdep_assert_held(&ref->tree_lock);
    103 	if (!atomic_read(&ref->count)) /* before the first inc */
    104 		debug_object_activate(ref, &active_debug_desc);
    105 }
    106 
    107 static void debug_active_deactivate(struct i915_active *ref)
    108 {
    109 	lockdep_assert_held(&ref->tree_lock);
    110 	if (!atomic_read(&ref->count)) /* after the last dec */
    111 		debug_object_deactivate(ref, &active_debug_desc);
    112 }
    113 
    114 static void debug_active_fini(struct i915_active *ref)
    115 {
    116 	debug_object_free(ref, &active_debug_desc);
    117 }
    118 
    119 static void debug_active_assert(struct i915_active *ref)
    120 {
    121 	debug_object_assert_init(ref, &active_debug_desc);
    122 }
    123 
    124 #else
    125 
    126 static inline void debug_active_init(struct i915_active *ref) { }
    127 static inline void debug_active_activate(struct i915_active *ref) { }
    128 static inline void debug_active_deactivate(struct i915_active *ref) { }
    129 static inline void debug_active_fini(struct i915_active *ref) { }
    130 static inline void debug_active_assert(struct i915_active *ref) { }
    131 
    132 #endif
    133 
    134 #ifdef __NetBSD__
    135 
    136 static int
    137 compare_nodes(void *cookie, const void *va, const void *vb)
    138 {
    139 	const struct active_node *a = va;
    140 	const struct active_node *b = vb;
    141 
    142 	if (a->timeline < b->timeline)
    143 		return -1;
    144 	if (a->timeline > b->timeline)
    145 		return +1;
    146 	return 0;
    147 }
    148 
    149 static int
    150 compare_node_key(void *cookie, const void *vn, const void *vk)
    151 {
    152 	const struct active_node *a = vn;
    153 	const uint64_t *k = vk;
    154 
    155 	if (a->timeline < *k)
    156 		return -1;
    157 	if (a->timeline > *k)
    158 		return +1;
    159 	return 0;
    160 }
    161 
    162 static const rb_tree_ops_t active_rb_ops = {
    163 	.rbto_compare_nodes = compare_nodes,
    164 	.rbto_compare_key = compare_node_key,
    165 	.rbto_node_offset = offsetof(struct active_node, node),
    166 };
    167 
    168 #endif
    169 
    170 static void
    171 __active_retire(struct i915_active *ref)
    172 {
    173 	struct active_node *it, *n;
    174 	struct rb_root root;
    175 	unsigned long flags;
    176 
    177 	GEM_BUG_ON(i915_active_is_idle(ref));
    178 
    179 	/* return the unused nodes to our slabcache -- flushing the allocator */
    180 	if (!atomic_dec_and_lock_irqsave(&ref->count, &ref->tree_lock, flags))
    181 		return;
    182 
    183 	GEM_BUG_ON(rcu_access_pointer(ref->excl.fence));
    184 	debug_active_deactivate(ref);
    185 
    186 	root = ref->tree;
    187 #ifdef __NetBSD__
    188 	rb_tree_init(&ref->tree.rbr_tree, &active_rb_ops);
    189 #else
    190 	ref->tree = RB_ROOT;
    191 #endif
    192 	ref->cache = NULL;
    193 
    194 	DRM_SPIN_WAKEUP_ALL(&ref->tree_wq, &ref->tree_lock);
    195 
    196 	spin_unlock_irqrestore(&ref->tree_lock, flags);
    197 
    198 	/* After the final retire, the entire struct may be freed */
    199 	if (ref->retire)
    200 		ref->retire(ref);
    201 
    202 	/* ... except if you wait on it, you must manage your own references! */
    203 
    204 	rbtree_postorder_for_each_entry_safe(it, n, &root, node) {
    205 		GEM_BUG_ON(i915_active_fence_isset(&it->base));
    206 		kmem_cache_free(global.slab_cache, it);
    207 	}
    208 }
    209 
    210 static void
    211 active_work(struct work_struct *wrk)
    212 {
    213 	struct i915_active *ref = container_of(wrk, typeof(*ref), work);
    214 
    215 	GEM_BUG_ON(!atomic_read(&ref->count));
    216 	if (atomic_add_unless(&ref->count, -1, 1))
    217 		return;
    218 
    219 	__active_retire(ref);
    220 }
    221 
    222 static void
    223 active_retire(struct i915_active *ref)
    224 {
    225 	GEM_BUG_ON(!atomic_read(&ref->count));
    226 	if (atomic_add_unless(&ref->count, -1, 1))
    227 		return;
    228 
    229 	if (ref->flags & I915_ACTIVE_RETIRE_SLEEPS) {
    230 		queue_work(system_unbound_wq, &ref->work);
    231 		return;
    232 	}
    233 
    234 	__active_retire(ref);
    235 }
    236 
    237 static inline struct dma_fence **
    238 __active_fence_slot(struct i915_active_fence *active)
    239 {
    240 	return (struct dma_fence ** __force)&active->fence;
    241 }
    242 
    243 static inline bool
    244 active_fence_cb(struct dma_fence *fence, struct dma_fence_cb *cb)
    245 {
    246 	struct i915_active_fence *active =
    247 		container_of(cb, typeof(*active), cb);
    248 
    249 	return cmpxchg(__active_fence_slot(active), fence, NULL) == fence;
    250 }
    251 
    252 static void
    253 node_retire(struct dma_fence *fence, struct dma_fence_cb *cb)
    254 {
    255 	if (active_fence_cb(fence, cb))
    256 		active_retire(container_of(cb, struct active_node, base.cb)->ref);
    257 }
    258 
    259 static void
    260 excl_retire(struct dma_fence *fence, struct dma_fence_cb *cb)
    261 {
    262 	if (active_fence_cb(fence, cb))
    263 		active_retire(container_of(cb, struct i915_active, excl.cb));
    264 }
    265 
    266 static struct i915_active_fence *
    267 active_instance(struct i915_active *ref, struct intel_timeline *tl)
    268 {
    269 	struct active_node *node, *prealloc;
    270 	struct rb_node **p, *parent;
    271 	u64 idx = tl->fence_context;
    272 
    273 	/*
    274 	 * We track the most recently used timeline to skip a rbtree search
    275 	 * for the common case, under typical loads we never need the rbtree
    276 	 * at all. We can reuse the last slot if it is empty, that is
    277 	 * after the previous activity has been retired, or if it matches the
    278 	 * current timeline.
    279 	 */
    280 	node = READ_ONCE(ref->cache);
    281 	if (node && node->timeline == idx)
    282 		return &node->base;
    283 
    284 	/* Preallocate a replacement, just in case */
    285 	prealloc = kmem_cache_alloc(global.slab_cache, GFP_KERNEL);
    286 	if (!prealloc)
    287 		return NULL;
    288 
    289 	spin_lock_irq(&ref->tree_lock);
    290 	GEM_BUG_ON(i915_active_is_idle(ref));
    291 
    292 #ifdef __NetBSD__
    293 	__USE(parent);
    294 	__USE(p);
    295 	node = rb_tree_find_node(&ref->tree.rbr_tree, &idx);
    296 	if (node) {
    297 		KASSERT(node->timeline == idx);
    298 		goto out;
    299 	}
    300 #else
    301 	parent = NULL;
    302 	p = &ref->tree.rb_node;
    303 	while (*p) {
    304 		parent = *p;
    305 
    306 		node = rb_entry(parent, struct active_node, node);
    307 		if (node->timeline == idx) {
    308 			kmem_cache_free(global.slab_cache, prealloc);
    309 			goto out;
    310 		}
    311 
    312 		if (node->timeline < idx)
    313 			p = &parent->rb_right;
    314 		else
    315 			p = &parent->rb_left;
    316 	}
    317 #endif
    318 
    319 	node = prealloc;
    320 	__i915_active_fence_init(&node->base, NULL, node_retire);
    321 	node->ref = ref;
    322 	node->timeline = idx;
    323 
    324 #ifdef __NetBSD__
    325 	struct active_node *collision __diagused;
    326 	collision = rb_tree_insert_node(&ref->tree.rbr_tree, node);
    327 	KASSERT(collision == node);
    328 #else
    329 	rb_link_node(&node->node, parent, p);
    330 	rb_insert_color(&node->node, &ref->tree);
    331 #endif
    332 
    333 out:
    334 	ref->cache = node;
    335 	spin_unlock_irq(&ref->tree_lock);
    336 
    337 	BUILD_BUG_ON(offsetof(typeof(*node), base));
    338 	return &node->base;
    339 }
    340 
    341 void __i915_active_init(struct i915_active *ref,
    342 			int (*active)(struct i915_active *ref),
    343 			void (*retire)(struct i915_active *ref),
    344 			struct lock_class_key *mkey,
    345 			struct lock_class_key *wkey)
    346 {
    347 	unsigned long bits;
    348 
    349 	debug_active_init(ref);
    350 
    351 	ref->flags = 0;
    352 	ref->active = active;
    353 	ref->retire = ptr_unpack_bits(retire, &bits, 2);
    354 	if (bits & I915_ACTIVE_MAY_SLEEP)
    355 		ref->flags |= I915_ACTIVE_RETIRE_SLEEPS;
    356 
    357 	spin_lock_init(&ref->tree_lock);
    358 	DRM_INIT_WAITQUEUE(&ref->tree_wq, "i915act");
    359 #ifdef __NetBSD__
    360 	rb_tree_init(&ref->tree.rbr_tree, &active_rb_ops);
    361 #else
    362 	ref->tree = RB_ROOT;
    363 #endif
    364 	ref->cache = NULL;
    365 
    366 	init_llist_head(&ref->preallocated_barriers);
    367 	atomic_set(&ref->count, 0);
    368 	__mutex_init(&ref->mutex, "i915_active", mkey);
    369 	__i915_active_fence_init(&ref->excl, NULL, excl_retire);
    370 	INIT_WORK(&ref->work, active_work);
    371 #if IS_ENABLED(CONFIG_LOCKDEP)
    372 	lockdep_init_map(&ref->work.lockdep_map, "i915_active.work", wkey, 0);
    373 #endif
    374 }
    375 
    376 static bool ____active_del_barrier(struct i915_active *ref,
    377 				   struct active_node *node,
    378 				   struct intel_engine_cs *engine)
    379 
    380 {
    381 	struct llist_node *head = NULL, *tail = NULL;
    382 	struct llist_node *pos, *next;
    383 
    384 	GEM_BUG_ON(node->timeline != engine->kernel_context->timeline->fence_context);
    385 
    386 	/*
    387 	 * Rebuild the llist excluding our node. We may perform this
    388 	 * outside of the kernel_context timeline mutex and so someone
    389 	 * else may be manipulating the engine->barrier_tasks, in
    390 	 * which case either we or they will be upset :)
    391 	 *
    392 	 * A second __active_del_barrier() will report failure to claim
    393 	 * the active_node and the caller will just shrug and know not to
    394 	 * claim ownership of its node.
    395 	 *
    396 	 * A concurrent i915_request_add_active_barriers() will miss adding
    397 	 * any of the tasks, but we will try again on the next -- and since
    398 	 * we are actively using the barrier, we know that there will be
    399 	 * at least another opportunity when we idle.
    400 	 */
    401 	llist_for_each_safe(pos, next, llist_del_all(&engine->barrier_tasks)) {
    402 		if (node == barrier_from_ll(pos)) {
    403 			node = NULL;
    404 			continue;
    405 		}
    406 
    407 		pos->next = head;
    408 		head = pos;
    409 		if (!tail)
    410 			tail = pos;
    411 	}
    412 	if (head)
    413 		llist_add_batch(head, tail, &engine->barrier_tasks);
    414 
    415 	return !node;
    416 }
    417 
    418 static bool
    419 __active_del_barrier(struct i915_active *ref, struct active_node *node)
    420 {
    421 	return ____active_del_barrier(ref, node, barrier_to_engine(node));
    422 }
    423 
    424 int i915_active_ref(struct i915_active *ref,
    425 		    struct intel_timeline *tl,
    426 		    struct dma_fence *fence)
    427 {
    428 	struct i915_active_fence *active;
    429 	int err;
    430 
    431 	lockdep_assert_held(&tl->mutex);
    432 
    433 	/* Prevent reaping in case we malloc/wait while building the tree */
    434 	err = i915_active_acquire(ref);
    435 	if (err)
    436 		return err;
    437 
    438 	active = active_instance(ref, tl);
    439 	if (!active) {
    440 		err = -ENOMEM;
    441 		goto out;
    442 	}
    443 
    444 	if (is_barrier(active)) { /* proto-node used by our idle barrier */
    445 		/*
    446 		 * This request is on the kernel_context timeline, and so
    447 		 * we can use it to substitute for the pending idle-barrer
    448 		 * request that we want to emit on the kernel_context.
    449 		 */
    450 		__active_del_barrier(ref, node_from_active(active));
    451 		RCU_INIT_POINTER(active->fence, NULL);
    452 		atomic_dec(&ref->count);
    453 	}
    454 	if (!__i915_active_fence_set(active, fence))
    455 		atomic_inc(&ref->count);
    456 
    457 out:
    458 	i915_active_release(ref);
    459 	return err;
    460 }
    461 
    462 void i915_active_set_exclusive(struct i915_active *ref, struct dma_fence *f)
    463 {
    464 	/* We expect the caller to manage the exclusive timeline ordering */
    465 	GEM_BUG_ON(i915_active_is_idle(ref));
    466 
    467 	if (!__i915_active_fence_set(&ref->excl, f))
    468 		atomic_inc(&ref->count);
    469 }
    470 
    471 bool i915_active_acquire_if_busy(struct i915_active *ref)
    472 {
    473 	debug_active_assert(ref);
    474 	return atomic_add_unless(&ref->count, 1, 0);
    475 }
    476 
    477 int i915_active_acquire(struct i915_active *ref)
    478 {
    479 	int err;
    480 
    481 	if (i915_active_acquire_if_busy(ref))
    482 		return 0;
    483 
    484 	err = mutex_lock_interruptible(&ref->mutex);
    485 	if (err)
    486 		return err;
    487 
    488 	if (likely(!i915_active_acquire_if_busy(ref))) {
    489 		if (ref->active)
    490 			err = ref->active(ref);
    491 		if (!err) {
    492 			spin_lock_irq(&ref->tree_lock); /* __active_retire() */
    493 			debug_active_activate(ref);
    494 			atomic_inc(&ref->count);
    495 			spin_unlock_irq(&ref->tree_lock);
    496 		}
    497 	}
    498 
    499 	mutex_unlock(&ref->mutex);
    500 
    501 	return err;
    502 }
    503 
    504 void i915_active_release(struct i915_active *ref)
    505 {
    506 	debug_active_assert(ref);
    507 	active_retire(ref);
    508 }
    509 
    510 static void enable_signaling(struct i915_active_fence *active)
    511 {
    512 	struct dma_fence *fence;
    513 
    514 	fence = i915_active_fence_get(active);
    515 	if (!fence)
    516 		return;
    517 
    518 	dma_fence_enable_sw_signaling(fence);
    519 	dma_fence_put(fence);
    520 }
    521 
    522 int i915_active_wait(struct i915_active *ref)
    523 {
    524 	struct active_node *it, *n;
    525 	int err = 0;
    526 
    527 	might_sleep();
    528 
    529 	if (!i915_active_acquire_if_busy(ref))
    530 		return 0;
    531 
    532 	/* Flush lazy signals */
    533 	enable_signaling(&ref->excl);
    534 	rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) {
    535 		if (is_barrier(&it->base)) /* unconnected idle barrier */
    536 			continue;
    537 
    538 		enable_signaling(&it->base);
    539 	}
    540 	/* Any fence added after the wait begins will not be auto-signaled */
    541 
    542 	i915_active_release(ref);
    543 	if (err)
    544 		return err;
    545 
    546 	spin_lock(&ref->tree_lock);
    547 	DRM_SPIN_WAIT_UNTIL(err, &ref->tree_wq, &ref->tree_lock,
    548 	    i915_active_is_idle(ref));
    549 	spin_unlock(&ref->tree_lock);
    550 	if (err)
    551 		return err;
    552 
    553 	flush_work(&ref->work);
    554 	return 0;
    555 }
    556 
    557 int i915_request_await_active(struct i915_request *rq, struct i915_active *ref)
    558 {
    559 	int err = 0;
    560 
    561 	if (rcu_access_pointer(ref->excl.fence)) {
    562 		struct dma_fence *fence;
    563 
    564 		rcu_read_lock();
    565 		fence = dma_fence_get_rcu_safe(&ref->excl.fence);
    566 		rcu_read_unlock();
    567 		if (fence) {
    568 			err = i915_request_await_dma_fence(rq, fence);
    569 			dma_fence_put(fence);
    570 		}
    571 	}
    572 
    573 	/* In the future we may choose to await on all fences */
    574 
    575 	return err;
    576 }
    577 
    578 #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
    579 void i915_active_fini(struct i915_active *ref)
    580 {
    581 	debug_active_fini(ref);
    582 	GEM_BUG_ON(atomic_read(&ref->count));
    583 	GEM_BUG_ON(work_pending(&ref->work));
    584 	GEM_BUG_ON(!RB_EMPTY_ROOT(&ref->tree));
    585 	mutex_destroy(&ref->mutex);
    586 }
    587 #endif
    588 
    589 static inline bool is_idle_barrier(struct active_node *node, u64 idx)
    590 {
    591 	return node->timeline == idx && !i915_active_fence_isset(&node->base);
    592 }
    593 
    594 static struct active_node *reuse_idle_barrier(struct i915_active *ref, u64 idx)
    595 {
    596 	struct rb_node *prev, *p;
    597 
    598 	if (RB_EMPTY_ROOT(&ref->tree))
    599 		return NULL;
    600 
    601 	spin_lock_irq(&ref->tree_lock);
    602 	GEM_BUG_ON(i915_active_is_idle(ref));
    603 
    604 	/*
    605 	 * Try to reuse any existing barrier nodes already allocated for this
    606 	 * i915_active, due to overlapping active phases there is likely a
    607 	 * node kept alive (as we reuse before parking). We prefer to reuse
    608 	 * completely idle barriers (less hassle in manipulating the llists),
    609 	 * but otherwise any will do.
    610 	 */
    611 	if (ref->cache && is_idle_barrier(ref->cache, idx)) {
    612 		p = &ref->cache->node;
    613 		goto match;
    614 	}
    615 
    616 #ifdef __NetBSD__
    617     {
    618 	struct active_node *node =
    619 	    rb_tree_find_node_leq(&ref->tree.rbr_tree, &idx);
    620 	if (node) {
    621 		if (node->timeline == idx && is_idle_barrier(node, idx)) {
    622 			p = &node->node;
    623 			goto match;
    624 		}
    625 		prev = &node->node;
    626 	} else {
    627 		prev = NULL;
    628 	}
    629     }
    630 #else
    631 	prev = NULL;
    632 	p = ref->tree.rb_node;
    633 	while (p) {
    634 		struct active_node *node =
    635 			rb_entry(p, struct active_node, node);
    636 
    637 		if (is_idle_barrier(node, idx))
    638 			goto match;
    639 
    640 		prev = p;
    641 		if (node->timeline < idx)
    642 			p = p->rb_right;
    643 		else
    644 			p = p->rb_left;
    645 	}
    646 #endif
    647 
    648 	/*
    649 	 * No quick match, but we did find the leftmost rb_node for the
    650 	 * kernel_context. Walk the rb_tree in-order to see if there were
    651 	 * any idle-barriers on this timeline that we missed, or just use
    652 	 * the first pending barrier.
    653 	 */
    654 	for (p = prev; p; p = rb_next2(&ref->tree, p)) {
    655 		struct active_node *node =
    656 			rb_entry(p, struct active_node, node);
    657 		struct intel_engine_cs *engine;
    658 
    659 		if (node->timeline > idx)
    660 			break;
    661 
    662 		if (node->timeline < idx)
    663 			continue;
    664 
    665 		if (is_idle_barrier(node, idx))
    666 			goto match;
    667 
    668 		/*
    669 		 * The list of pending barriers is protected by the
    670 		 * kernel_context timeline, which notably we do not hold
    671 		 * here. i915_request_add_active_barriers() may consume
    672 		 * the barrier before we claim it, so we have to check
    673 		 * for success.
    674 		 */
    675 		engine = __barrier_to_engine(node);
    676 		smp_rmb(); /* serialise with add_active_barriers */
    677 		if (is_barrier(&node->base) &&
    678 		    ____active_del_barrier(ref, node, engine))
    679 			goto match;
    680 	}
    681 
    682 	spin_unlock_irq(&ref->tree_lock);
    683 
    684 	return NULL;
    685 
    686 match:
    687 	rb_erase(p, &ref->tree); /* Hide from waits and sibling allocations */
    688 	if (p == &ref->cache->node)
    689 		ref->cache = NULL;
    690 	spin_unlock_irq(&ref->tree_lock);
    691 
    692 	return rb_entry(p, struct active_node, node);
    693 }
    694 
    695 int i915_active_acquire_preallocate_barrier(struct i915_active *ref,
    696 					    struct intel_engine_cs *engine)
    697 {
    698 	intel_engine_mask_t tmp, mask = engine->mask;
    699 	struct llist_node *first = NULL, *last = NULL;
    700 	struct intel_gt *gt = engine->gt;
    701 	int err;
    702 
    703 	GEM_BUG_ON(i915_active_is_idle(ref));
    704 
    705 	/* Wait until the previous preallocation is completed */
    706 	while (!llist_empty(&ref->preallocated_barriers))
    707 		cond_resched();
    708 
    709 	/*
    710 	 * Preallocate a node for each physical engine supporting the target
    711 	 * engine (remember virtual engines have more than one sibling).
    712 	 * We can then use the preallocated nodes in
    713 	 * i915_active_acquire_barrier()
    714 	 */
    715 	for_each_engine_masked(engine, gt, mask, tmp) {
    716 		u64 idx = engine->kernel_context->timeline->fence_context;
    717 		struct llist_node *prev = first;
    718 		struct active_node *node;
    719 
    720 		node = reuse_idle_barrier(ref, idx);
    721 		if (!node) {
    722 			node = kmem_cache_alloc(global.slab_cache, GFP_KERNEL);
    723 			if (!node) {
    724 				err = ENOMEM;
    725 				goto unwind;
    726 			}
    727 
    728 			RCU_INIT_POINTER(node->base.fence, NULL);
    729 			node->base.cb.func = node_retire;
    730 			node->timeline = idx;
    731 			node->ref = ref;
    732 		}
    733 
    734 		if (!i915_active_fence_isset(&node->base)) {
    735 			/*
    736 			 * Mark this as being *our* unconnected proto-node.
    737 			 *
    738 			 * Since this node is not in any list, and we have
    739 			 * decoupled it from the rbtree, we can reuse the
    740 			 * request to indicate this is an idle-barrier node
    741 			 * and then we can use the rb_node and list pointers
    742 			 * for our tracking of the pending barrier.
    743 			 */
    744 			RCU_INIT_POINTER(node->base.fence, ERR_PTR(-EAGAIN));
    745 			node->engine = engine;
    746 			atomic_inc(&ref->count);
    747 		}
    748 		GEM_BUG_ON(rcu_access_pointer(node->base.fence) != ERR_PTR(-EAGAIN));
    749 
    750 		GEM_BUG_ON(barrier_to_engine(node) != engine);
    751 		first = barrier_to_ll(node);
    752 		first->next = prev;
    753 		if (!last)
    754 			last = first;
    755 		intel_engine_pm_get(engine);
    756 	}
    757 
    758 	GEM_BUG_ON(!llist_empty(&ref->preallocated_barriers));
    759 	llist_add_batch(first, last, &ref->preallocated_barriers);
    760 
    761 	return 0;
    762 
    763 unwind:
    764 	while (first) {
    765 		struct active_node *node = barrier_from_ll(first);
    766 
    767 		first = first->next;
    768 
    769 		atomic_dec(&ref->count);
    770 		intel_engine_pm_put(barrier_to_engine(node));
    771 
    772 		kmem_cache_free(global.slab_cache, node);
    773 	}
    774 	return err;
    775 }
    776 
    777 void i915_active_acquire_barrier(struct i915_active *ref)
    778 {
    779 	struct llist_node *pos, *next;
    780 	unsigned long flags;
    781 
    782 	GEM_BUG_ON(i915_active_is_idle(ref));
    783 
    784 	/*
    785 	 * Transfer the list of preallocated barriers into the
    786 	 * i915_active rbtree, but only as proto-nodes. They will be
    787 	 * populated by i915_request_add_active_barriers() to point to the
    788 	 * request that will eventually release them.
    789 	 */
    790 	llist_for_each_safe(pos, next, take_preallocated_barriers(ref)) {
    791 		struct active_node *node = barrier_from_ll(pos);
    792 		struct intel_engine_cs *engine = barrier_to_engine(node);
    793 		struct rb_node **p, *parent;
    794 
    795 		spin_lock_irqsave_nested(&ref->tree_lock, flags,
    796 					 SINGLE_DEPTH_NESTING);
    797 #ifdef __NetBSD__
    798 		__USE(p);
    799 		__USE(parent);
    800 		struct active_node *collision __diagused;
    801 		collision = rb_tree_insert_node(&ref->tree.rbr_tree, node);
    802 		KASSERT(collision == node);
    803 #else
    804 		parent = NULL;
    805 		p = &ref->tree.rb_node;
    806 		while (*p) {
    807 			struct active_node *it;
    808 
    809 			parent = *p;
    810 
    811 			it = rb_entry(parent, struct active_node, node);
    812 			if (it->timeline < node->timeline)
    813 				p = &parent->rb_right;
    814 			else
    815 				p = &parent->rb_left;
    816 		}
    817 		rb_link_node(&node->node, parent, p);
    818 		rb_insert_color(&node->node, &ref->tree);
    819 #endif
    820 		spin_unlock_irqrestore(&ref->tree_lock, flags);
    821 
    822 		GEM_BUG_ON(!intel_engine_pm_is_awake(engine));
    823 		llist_add(barrier_to_ll(node), &engine->barrier_tasks);
    824 		intel_engine_pm_put(engine);
    825 	}
    826 }
    827 
    828 static struct dma_fence **ll_to_fence_slot(struct llist_node *node)
    829 {
    830 	return __active_fence_slot(&barrier_from_ll(node)->base);
    831 }
    832 
    833 void i915_request_add_active_barriers(struct i915_request *rq)
    834 {
    835 	struct intel_engine_cs *engine = rq->engine;
    836 	struct llist_node *node, *next;
    837 	unsigned long flags;
    838 
    839 	GEM_BUG_ON(!intel_context_is_barrier(rq->context));
    840 	GEM_BUG_ON(intel_engine_is_virtual(engine));
    841 	GEM_BUG_ON(i915_request_timeline(rq) != engine->kernel_context->timeline);
    842 
    843 	node = llist_del_all(&engine->barrier_tasks);
    844 	if (!node)
    845 		return;
    846 	/*
    847 	 * Attach the list of proto-fences to the in-flight request such
    848 	 * that the parent i915_active will be released when this request
    849 	 * is retired.
    850 	 */
    851 	spin_lock_irqsave(&rq->lock, flags);
    852 	llist_for_each_safe(node, next, node) {
    853 		/* serialise with reuse_idle_barrier */
    854 		smp_store_mb(*ll_to_fence_slot(node), &rq->fence);
    855 #ifdef __NetBSD__
    856 		spin_unlock(&rq->lock);
    857 		struct i915_active_fence *fence =
    858 		    container_of(node, struct i915_active_fence, llist);
    859 		/* XXX something bad went wrong in making this code */
    860 		KASSERT(fence->cb.func == node_retire);
    861 		(void)dma_fence_add_callback(fence->fence, &fence->cb,
    862 		    node_retire);
    863 		spin_lock(&rq->lock);
    864 #else
    865 		list_add_tail((struct list_head *)node, &rq->fence.cb_list);
    866 #endif
    867 	}
    868 	spin_unlock_irqrestore(&rq->lock, flags);
    869 }
    870 
    871 /*
    872  * __i915_active_fence_set: Update the last active fence along its timeline
    873  * @active: the active tracker
    874  * @fence: the new fence (under construction)
    875  *
    876  * Records the new @fence as the last active fence along its timeline in
    877  * this active tracker, moving the tracking callbacks from the previous
    878  * fence onto this one. Returns the previous fence (if not already completed),
    879  * which the caller must ensure is executed before the new fence. To ensure
    880  * that the order of fences within the timeline of the i915_active_fence is
    881  * understood, it should be locked by the caller.
    882  */
    883 struct dma_fence *
    884 __i915_active_fence_set(struct i915_active_fence *active,
    885 			struct dma_fence *fence)
    886 {
    887 	struct dma_fence *prev;
    888 	unsigned long flags;
    889 
    890 	if (fence == rcu_access_pointer(active->fence))
    891 		return fence;
    892 
    893 	GEM_BUG_ON(test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags));
    894 
    895 	/*
    896 	 * Consider that we have two threads arriving (A and B), with
    897 	 * C already resident as the active->fence.
    898 	 *
    899 	 * A does the xchg first, and so it sees C or NULL depending
    900 	 * on the timing of the interrupt handler. If it is NULL, the
    901 	 * previous fence must have been signaled and we know that
    902 	 * we are first on the timeline. If it is still present,
    903 	 * we acquire the lock on that fence and serialise with the interrupt
    904 	 * handler, in the process removing it from any future interrupt
    905 	 * callback. A will then wait on C before executing (if present).
    906 	 *
    907 	 * As B is second, it sees A as the previous fence and so waits for
    908 	 * it to complete its transition and takes over the occupancy for
    909 	 * itself -- remembering that it needs to wait on A before executing.
    910 	 *
    911 	 * Note the strong ordering of the timeline also provides consistent
    912 	 * nesting rules for the fence->lock; the inner lock is always the
    913 	 * older lock.
    914 	 */
    915 	spin_lock_irqsave(fence->lock, flags);
    916 	prev = xchg(__active_fence_slot(active), fence);
    917 	if (prev) {
    918 		GEM_BUG_ON(prev == fence);
    919 #ifdef __NetBSD__
    920 		KASSERT(active->cb.func == node_retire);
    921 		(void)dma_fence_remove_callback(prev, &active->cb);
    922 #else
    923 		spin_lock_nested(prev->lock, SINGLE_DEPTH_NESTING);
    924 		__list_del_entry(&active->cb.node);
    925 		spin_unlock(prev->lock); /* serialise with prev->cb_list */
    926 #endif
    927 	}
    928 	GEM_BUG_ON(rcu_access_pointer(active->fence) != fence);
    929 #ifndef __NetBSD__
    930 	list_add_tail(&active->cb.node, &fence->cb_list);
    931 #endif
    932 	spin_unlock_irqrestore(fence->lock, flags);
    933 
    934 #ifdef __NetBSD__
    935 	KASSERT(active->cb.func == node_retire);
    936 	dma_fence_add_callback(fence, &active->cb, node_retire);
    937 #endif
    938 
    939 	return prev;
    940 }
    941 
    942 int i915_active_fence_set(struct i915_active_fence *active,
    943 			  struct i915_request *rq)
    944 {
    945 	struct dma_fence *fence;
    946 	int err = 0;
    947 
    948 	/* Must maintain timeline ordering wrt previous active requests */
    949 	rcu_read_lock();
    950 	fence = __i915_active_fence_set(active, &rq->fence);
    951 	if (fence) /* but the previous fence may not belong to that timeline! */
    952 		fence = dma_fence_get_rcu(fence);
    953 	rcu_read_unlock();
    954 	if (fence) {
    955 		err = i915_request_await_dma_fence(rq, fence);
    956 		dma_fence_put(fence);
    957 	}
    958 
    959 	return err;
    960 }
    961 
    962 void i915_active_noop(struct dma_fence *fence, struct dma_fence_cb *cb)
    963 {
    964 	active_fence_cb(fence, cb);
    965 }
    966 
    967 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
    968 #include "selftests/i915_active.c"
    969 #endif
    970 
    971 static void i915_global_active_shrink(void)
    972 {
    973 	kmem_cache_shrink(global.slab_cache);
    974 }
    975 
    976 static void i915_global_active_exit(void)
    977 {
    978 	kmem_cache_destroy(global.slab_cache);
    979 }
    980 
    981 static struct i915_global_active global = { {
    982 	.shrink = i915_global_active_shrink,
    983 	.exit = i915_global_active_exit,
    984 } };
    985 
    986 int __init i915_global_active_init(void)
    987 {
    988 	global.slab_cache = KMEM_CACHE(active_node, SLAB_HWCACHE_ALIGN);
    989 	if (!global.slab_cache)
    990 		return -ENOMEM;
    991 
    992 	i915_global_register(&global.base);
    993 	return 0;
    994 }
    995