intel_timeline.c revision 1.4       1 /*	$NetBSD: intel_timeline.c,v 1.4 2021/12/19 11:57:19 riastradh Exp $	*/
      2 
      3 /*
      4  * SPDX-License-Identifier: MIT
      5  *
      6  * Copyright  2016-2018 Intel Corporation
      7  */
      8 
      9 #include <sys/cdefs.h>
     10 __KERNEL_RCSID(0, "$NetBSD: intel_timeline.c,v 1.4 2021/12/19 11:57:19 riastradh Exp $");
     11 
     12 #include "i915_drv.h"
     13 
     14 #include "i915_active.h"
     15 #include "i915_syncmap.h"
     16 #include "intel_gt.h"
     17 #include "intel_ring.h"
     18 #include "intel_timeline.h"
     19 
     20 #include <linux/nbsd-namespace.h>
     21 
     22 #define ptr_set_bit(ptr, bit) ((typeof(ptr))((unsigned long)(ptr) | BIT(bit)))
     23 #define ptr_test_bit(ptr, bit) ((unsigned long)(ptr) & BIT(bit))
     24 
     25 #define CACHELINE_BITS 6
     26 #define CACHELINE_FREE CACHELINE_BITS
     27 
     28 struct intel_timeline_hwsp {
     29 	struct intel_gt *gt;
     30 	struct intel_gt_timelines *gt_timelines;
     31 	struct list_head free_link;
     32 	struct i915_vma *vma;
     33 	u64 free_bitmap;
     34 };
     35 
     36 static struct i915_vma *__hwsp_alloc(struct intel_gt *gt)
     37 {
     38 	struct drm_i915_private *i915 = gt->i915;
     39 	struct drm_i915_gem_object *obj;
     40 	struct i915_vma *vma;
     41 
     42 	obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
     43 	if (IS_ERR(obj))
     44 		return ERR_CAST(obj);
     45 
     46 	i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC);
     47 
     48 	vma = i915_vma_instance(obj, >->ggtt->vm, NULL);
     49 	if (IS_ERR(vma))
     50 		i915_gem_object_put(obj);
     51 
     52 	return vma;
     53 }
     54 
     55 static struct i915_vma *
     56 hwsp_alloc(struct intel_timeline *timeline, unsigned int *cacheline)
     57 {
     58 	struct intel_gt_timelines *gt = &timeline->gt->timelines;
     59 	struct intel_timeline_hwsp *hwsp;
     60 
     61 	BUILD_BUG_ON(BITS_PER_TYPE(u64) * CACHELINE_BYTES > PAGE_SIZE);
     62 
     63 	spin_lock_irq(>->hwsp_lock);
     64 
     65 	/* hwsp_free_list only contains HWSP that have available cachelines */
     66 	hwsp = list_first_entry_or_null(>->hwsp_free_list,
     67 					typeof(*hwsp), free_link);
     68 	if (!hwsp) {
     69 		struct i915_vma *vma;
     70 
     71 		spin_unlock_irq(>->hwsp_lock);
     72 
     73 		hwsp = kmalloc(sizeof(*hwsp), GFP_KERNEL);
     74 		if (!hwsp)
     75 			return ERR_PTR(-ENOMEM);
     76 
     77 		vma = __hwsp_alloc(timeline->gt);
     78 		if (IS_ERR(vma)) {
     79 			kfree(hwsp);
     80 			return vma;
     81 		}
     82 
     83 		vma->private = hwsp;
     84 		hwsp->gt = timeline->gt;
     85 		hwsp->vma = vma;
     86 		hwsp->free_bitmap = ~0ull;
     87 		hwsp->gt_timelines = gt;
     88 
     89 		spin_lock_irq(>->hwsp_lock);
     90 		list_add(&hwsp->free_link, >->hwsp_free_list);
     91 	}
     92 
     93 	GEM_BUG_ON(!hwsp->free_bitmap);
     94 	*cacheline = __ffs64(hwsp->free_bitmap);
     95 	hwsp->free_bitmap &= ~BIT_ULL(*cacheline);
     96 	if (!hwsp->free_bitmap)
     97 		list_del(&hwsp->free_link);
     98 
     99 	spin_unlock_irq(>->hwsp_lock);
    100 
    101 	GEM_BUG_ON(hwsp->vma->private != hwsp);
    102 	return hwsp->vma;
    103 }
    104 
    105 static void __idle_hwsp_free(struct intel_timeline_hwsp *hwsp, int cacheline)
    106 {
    107 	struct intel_gt_timelines *gt = hwsp->gt_timelines;
    108 	unsigned long flags;
    109 
    110 	spin_lock_irqsave(>->hwsp_lock, flags);
    111 
    112 	/* As a cacheline becomes available, publish the HWSP on the freelist */
    113 	if (!hwsp->free_bitmap)
    114 		list_add_tail(&hwsp->free_link, >->hwsp_free_list);
    115 
    116 	GEM_BUG_ON(cacheline >= BITS_PER_TYPE(hwsp->free_bitmap));
    117 	hwsp->free_bitmap |= BIT_ULL(cacheline);
    118 
    119 	/* And if no one is left using it, give the page back to the system */
    120 	if (hwsp->free_bitmap == ~0ull) {
    121 		i915_vma_put(hwsp->vma);
    122 		list_del(&hwsp->free_link);
    123 		kfree(hwsp);
    124 	}
    125 
    126 	spin_unlock_irqrestore(>->hwsp_lock, flags);
    127 }
    128 
    129 static void __idle_cacheline_free(struct intel_timeline_cacheline *cl)
    130 {
    131 	GEM_BUG_ON(!i915_active_is_idle(&cl->active));
    132 
    133 	i915_gem_object_unpin_map(cl->hwsp->vma->obj);
    134 	i915_vma_put(cl->hwsp->vma);
    135 	__idle_hwsp_free(cl->hwsp, ptr_unmask_bits(cl->vaddr, CACHELINE_BITS));
    136 
    137 	i915_active_fini(&cl->active);
    138 	kfree_rcu(cl, rcu);
    139 }
    140 
    141 __i915_active_call
    142 static void __cacheline_retire(struct i915_active *active)
    143 {
    144 	struct intel_timeline_cacheline *cl =
    145 		container_of(active, typeof(*cl), active);
    146 
    147 	i915_vma_unpin(cl->hwsp->vma);
    148 	if (ptr_test_bit(cl->vaddr, CACHELINE_FREE))
    149 		__idle_cacheline_free(cl);
    150 }
    151 
    152 static int __cacheline_active(struct i915_active *active)
    153 {
    154 	struct intel_timeline_cacheline *cl =
    155 		container_of(active, typeof(*cl), active);
    156 
    157 	__i915_vma_pin(cl->hwsp->vma);
    158 	return 0;
    159 }
    160 
    161 static struct intel_timeline_cacheline *
    162 cacheline_alloc(struct intel_timeline_hwsp *hwsp, unsigned int cacheline)
    163 {
    164 	struct intel_timeline_cacheline *cl;
    165 	void *vaddr;
    166 
    167 	GEM_BUG_ON(cacheline >= BIT(CACHELINE_BITS));
    168 
    169 	cl = kmalloc(sizeof(*cl), GFP_KERNEL);
    170 	if (!cl)
    171 		return ERR_PTR(-ENOMEM);
    172 
    173 	vaddr = i915_gem_object_pin_map(hwsp->vma->obj, I915_MAP_WB);
    174 	if (IS_ERR(vaddr)) {
    175 		kfree(cl);
    176 		return ERR_CAST(vaddr);
    177 	}
    178 
    179 	i915_vma_get(hwsp->vma);
    180 	cl->hwsp = hwsp;
    181 	cl->vaddr = page_pack_bits(vaddr, cacheline);
    182 
    183 	i915_active_init(&cl->active, __cacheline_active, __cacheline_retire);
    184 
    185 	return cl;
    186 }
    187 
    188 static void cacheline_acquire(struct intel_timeline_cacheline *cl)
    189 {
    190 	if (cl)
    191 		i915_active_acquire(&cl->active);
    192 }
    193 
    194 static void cacheline_release(struct intel_timeline_cacheline *cl)
    195 {
    196 	if (cl)
    197 		i915_active_release(&cl->active);
    198 }
    199 
    200 static void cacheline_free(struct intel_timeline_cacheline *cl)
    201 {
    202 	GEM_BUG_ON(ptr_test_bit(cl->vaddr, CACHELINE_FREE));
    203 	cl->vaddr = ptr_set_bit(cl->vaddr, CACHELINE_FREE);
    204 
    205 	if (i915_active_is_idle(&cl->active))
    206 		__idle_cacheline_free(cl);
    207 }
    208 
    209 int intel_timeline_init(struct intel_timeline *timeline,
    210 			struct intel_gt *gt,
    211 			struct i915_vma *hwsp)
    212 {
    213 	void *vaddr;
    214 
    215 	kref_init(&timeline->kref);
    216 	atomic_set(&timeline->pin_count, 0);
    217 
    218 	timeline->gt = gt;
    219 
    220 	timeline->has_initial_breadcrumb = !hwsp;
    221 	timeline->hwsp_cacheline = NULL;
    222 
    223 	if (!hwsp) {
    224 		struct intel_timeline_cacheline *cl;
    225 		unsigned int cacheline;
    226 
    227 		hwsp = hwsp_alloc(timeline, &cacheline);
    228 		if (IS_ERR(hwsp))
    229 			return PTR_ERR(hwsp);
    230 
    231 		cl = cacheline_alloc(hwsp->private, cacheline);
    232 		if (IS_ERR(cl)) {
    233 			__idle_hwsp_free(hwsp->private, cacheline);
    234 			return PTR_ERR(cl);
    235 		}
    236 
    237 		timeline->hwsp_cacheline = cl;
    238 		timeline->hwsp_offset = cacheline * CACHELINE_BYTES;
    239 
    240 		vaddr = page_mask_bits(cl->vaddr);
    241 	} else {
    242 		timeline->hwsp_offset = I915_GEM_HWS_SEQNO_ADDR;
    243 
    244 		vaddr = i915_gem_object_pin_map(hwsp->obj, I915_MAP_WB);
    245 		if (IS_ERR(vaddr))
    246 			return PTR_ERR(vaddr);
    247 	}
    248 
    249 	timeline->hwsp_seqno =
    250 		memset(vaddr + timeline->hwsp_offset, 0, CACHELINE_BYTES);
    251 
    252 	timeline->hwsp_ggtt = i915_vma_get(hwsp);
    253 	GEM_BUG_ON(timeline->hwsp_offset >= hwsp->size);
    254 
    255 	timeline->fence_context = dma_fence_context_alloc(1);
    256 
    257 	mutex_init(&timeline->mutex);
    258 
    259 	INIT_ACTIVE_FENCE(&timeline->last_request);
    260 	INIT_LIST_HEAD(&timeline->requests);
    261 
    262 	i915_syncmap_init(&timeline->sync);
    263 
    264 	return 0;
    265 }
    266 
    267 void intel_gt_init_timelines(struct intel_gt *gt)
    268 {
    269 	struct intel_gt_timelines *timelines = >->timelines;
    270 
    271 	spin_lock_init(&timelines->lock);
    272 	INIT_LIST_HEAD(&timelines->active_list);
    273 
    274 	spin_lock_init(&timelines->hwsp_lock);
    275 	INIT_LIST_HEAD(&timelines->hwsp_free_list);
    276 }
    277 
    278 void intel_timeline_fini(struct intel_timeline *timeline)
    279 {
    280 	GEM_BUG_ON(atomic_read(&timeline->pin_count));
    281 	GEM_BUG_ON(!list_empty(&timeline->requests));
    282 	GEM_BUG_ON(timeline->retire);
    283 
    284 	if (timeline->hwsp_cacheline)
    285 		cacheline_free(timeline->hwsp_cacheline);
    286 	else
    287 		i915_gem_object_unpin_map(timeline->hwsp_ggtt->obj);
    288 
    289 	i915_vma_put(timeline->hwsp_ggtt);
    290 
    291 	mutex_destroy(&timeline->mutex);
    292 }
    293 
    294 struct intel_timeline *
    295 intel_timeline_create(struct intel_gt *gt, struct i915_vma *global_hwsp)
    296 {
    297 	struct intel_timeline *timeline;
    298 	int err;
    299 
    300 	timeline = kzalloc(sizeof(*timeline), GFP_KERNEL);
    301 	if (!timeline)
    302 		return ERR_PTR(-ENOMEM);
    303 
    304 	err = intel_timeline_init(timeline, gt, global_hwsp);
    305 	if (err) {
    306 		kfree(timeline);
    307 		return ERR_PTR(err);
    308 	}
    309 
    310 	return timeline;
    311 }
    312 
    313 int intel_timeline_pin(struct intel_timeline *tl)
    314 {
    315 	int err;
    316 
    317 	if (atomic_add_unless(&tl->pin_count, 1, 0))
    318 		return 0;
    319 
    320 	err = i915_vma_pin(tl->hwsp_ggtt, 0, 0, PIN_GLOBAL | PIN_HIGH);
    321 	if (err)
    322 		return err;
    323 
    324 	tl->hwsp_offset =
    325 		i915_ggtt_offset(tl->hwsp_ggtt) +
    326 		offset_in_page(tl->hwsp_offset);
    327 
    328 	cacheline_acquire(tl->hwsp_cacheline);
    329 	if (atomic_fetch_inc(&tl->pin_count)) {
    330 		cacheline_release(tl->hwsp_cacheline);
    331 		__i915_vma_unpin(tl->hwsp_ggtt);
    332 	}
    333 
    334 	return 0;
    335 }
    336 
    337 void intel_timeline_enter(struct intel_timeline *tl)
    338 {
    339 	struct intel_gt_timelines *timelines = &tl->gt->timelines;
    340 
    341 	/*
    342 	 * Pretend we are serialised by the timeline->mutex.
    343 	 *
    344 	 * While generally true, there are a few exceptions to the rule
    345 	 * for the engine->kernel_context being used to manage power
    346 	 * transitions. As the engine_park may be called from under any
    347 	 * timeline, it uses the power mutex as a global serialisation
    348 	 * lock to prevent any other request entering its timeline.
    349 	 *
    350 	 * The rule is generally tl->mutex, otherwise engine->wakeref.mutex.
    351 	 *
    352 	 * However, intel_gt_retire_request() does not know which engine
    353 	 * it is retiring along and so cannot partake in the engine-pm
    354 	 * barrier, and there we use the tl->active_count as a means to
    355 	 * pin the timeline in the active_list while the locks are dropped.
    356 	 * Ergo, as that is outside of the engine-pm barrier, we need to
    357 	 * use atomic to manipulate tl->active_count.
    358 	 */
    359 	lockdep_assert_held(&tl->mutex);
    360 
    361 	if (atomic_add_unless(&tl->active_count, 1, 0))
    362 		return;
    363 
    364 	spin_lock(&timelines->lock);
    365 	if (!atomic_fetch_inc(&tl->active_count))
    366 		list_add_tail(&tl->link, &timelines->active_list);
    367 	spin_unlock(&timelines->lock);
    368 }
    369 
    370 void intel_timeline_exit(struct intel_timeline *tl)
    371 {
    372 	struct intel_gt_timelines *timelines = &tl->gt->timelines;
    373 
    374 	/* See intel_timeline_enter() */
    375 	lockdep_assert_held(&tl->mutex);
    376 
    377 	GEM_BUG_ON(!atomic_read(&tl->active_count));
    378 	if (atomic_add_unless(&tl->active_count, -1, 1))
    379 		return;
    380 
    381 	spin_lock(&timelines->lock);
    382 	if (atomic_dec_and_test(&tl->active_count))
    383 		list_del(&tl->link);
    384 	spin_unlock(&timelines->lock);
    385 
    386 	/*
    387 	 * Since this timeline is idle, all bariers upon which we were waiting
    388 	 * must also be complete and so we can discard the last used barriers
    389 	 * without loss of information.
    390 	 */
    391 	i915_syncmap_free(&tl->sync);
    392 }
    393 
    394 static u32 timeline_advance(struct intel_timeline *tl)
    395 {
    396 	GEM_BUG_ON(!atomic_read(&tl->pin_count));
    397 	GEM_BUG_ON(tl->seqno & tl->has_initial_breadcrumb);
    398 
    399 	return tl->seqno += 1 + tl->has_initial_breadcrumb;
    400 }
    401 
    402 static void timeline_rollback(struct intel_timeline *tl)
    403 {
    404 	tl->seqno -= 1 + tl->has_initial_breadcrumb;
    405 }
    406 
    407 static noinline int
    408 __intel_timeline_get_seqno(struct intel_timeline *tl,
    409 			   struct i915_request *rq,
    410 			   u32 *seqno)
    411 {
    412 	struct intel_timeline_cacheline *cl;
    413 	unsigned int cacheline;
    414 	struct i915_vma *vma;
    415 	void *vaddr;
    416 	int err;
    417 
    418 	/*
    419 	 * If there is an outstanding GPU reference to this cacheline,
    420 	 * such as it being sampled by a HW semaphore on another timeline,
    421 	 * we cannot wraparound our seqno value (the HW semaphore does
    422 	 * a strict greater-than-or-equals compare, not i915_seqno_passed).
    423 	 * So if the cacheline is still busy, we must detach ourselves
    424 	 * from it and leave it inflight alongside its users.
    425 	 *
    426 	 * However, if nobody is watching and we can guarantee that nobody
    427 	 * will, we could simply reuse the same cacheline.
    428 	 *
    429 	 * if (i915_active_request_is_signaled(&tl->last_request) &&
    430 	 *     i915_active_is_signaled(&tl->hwsp_cacheline->active))
    431 	 *	return 0;
    432 	 *
    433 	 * That seems unlikely for a busy timeline that needed to wrap in
    434 	 * the first place, so just replace the cacheline.
    435 	 */
    436 
    437 	vma = hwsp_alloc(tl, &cacheline);
    438 	if (IS_ERR(vma)) {
    439 		err = PTR_ERR(vma);
    440 		goto err_rollback;
    441 	}
    442 
    443 	err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
    444 	if (err) {
    445 		__idle_hwsp_free(vma->private, cacheline);
    446 		goto err_rollback;
    447 	}
    448 
    449 	cl = cacheline_alloc(vma->private, cacheline);
    450 	if (IS_ERR(cl)) {
    451 		err = PTR_ERR(cl);
    452 		__idle_hwsp_free(vma->private, cacheline);
    453 		goto err_unpin;
    454 	}
    455 	GEM_BUG_ON(cl->hwsp->vma != vma);
    456 
    457 	/*
    458 	 * Attach the old cacheline to the current request, so that we only
    459 	 * free it after the current request is retired, which ensures that
    460 	 * all writes into the cacheline from previous requests are complete.
    461 	 */
    462 	err = i915_active_ref(&tl->hwsp_cacheline->active, tl, &rq->fence);
    463 	if (err)
    464 		goto err_cacheline;
    465 
    466 	cacheline_release(tl->hwsp_cacheline); /* ownership now xfered to rq */
    467 	cacheline_free(tl->hwsp_cacheline);
    468 
    469 	i915_vma_unpin(tl->hwsp_ggtt); /* binding kept alive by old cacheline */
    470 	i915_vma_put(tl->hwsp_ggtt);
    471 
    472 	tl->hwsp_ggtt = i915_vma_get(vma);
    473 
    474 	vaddr = page_mask_bits(cl->vaddr);
    475 	tl->hwsp_offset = cacheline * CACHELINE_BYTES;
    476 	tl->hwsp_seqno =
    477 		memset(vaddr + tl->hwsp_offset, 0, CACHELINE_BYTES);
    478 
    479 	tl->hwsp_offset += i915_ggtt_offset(vma);
    480 
    481 	cacheline_acquire(cl);
    482 	tl->hwsp_cacheline = cl;
    483 
    484 	*seqno = timeline_advance(tl);
    485 	GEM_BUG_ON(i915_seqno_passed(*tl->hwsp_seqno, *seqno));
    486 	return 0;
    487 
    488 err_cacheline:
    489 	cacheline_free(cl);
    490 err_unpin:
    491 	i915_vma_unpin(vma);
    492 err_rollback:
    493 	timeline_rollback(tl);
    494 	return err;
    495 }
    496 
    497 int intel_timeline_get_seqno(struct intel_timeline *tl,
    498 			     struct i915_request *rq,
    499 			     u32 *seqno)
    500 {
    501 	*seqno = timeline_advance(tl);
    502 
    503 	/* Replace the HWSP on wraparound for HW semaphores */
    504 	if (unlikely(!*seqno && tl->hwsp_cacheline))
    505 		return __intel_timeline_get_seqno(tl, rq, seqno);
    506 
    507 	return 0;
    508 }
    509 
    510 static int cacheline_ref(struct intel_timeline_cacheline *cl,
    511 			 struct i915_request *rq)
    512 {
    513 	return i915_active_add_request(&cl->active, rq);
    514 }
    515 
    516 int intel_timeline_read_hwsp(struct i915_request *from,
    517 			     struct i915_request *to,
    518 			     u32 *hwsp)
    519 {
    520 	struct intel_timeline_cacheline *cl;
    521 	int err;
    522 
    523 	GEM_BUG_ON(!rcu_access_pointer(from->hwsp_cacheline));
    524 
    525 	rcu_read_lock();
    526 	cl = rcu_dereference(from->hwsp_cacheline);
    527 	if (unlikely(!i915_active_acquire_if_busy(&cl->active)))
    528 		goto unlock; /* seqno wrapped and completed! */
    529 	if (unlikely(i915_request_completed(from)))
    530 		goto release;
    531 	rcu_read_unlock();
    532 
    533 	err = cacheline_ref(cl, to);
    534 	if (err)
    535 		goto out;
    536 
    537 	*hwsp = i915_ggtt_offset(cl->hwsp->vma) +
    538 		ptr_unmask_bits(cl->vaddr, CACHELINE_BITS) * CACHELINE_BYTES;
    539 
    540 out:
    541 	i915_active_release(&cl->active);
    542 	return err;
    543 
    544 release:
    545 	i915_active_release(&cl->active);
    546 unlock:
    547 	rcu_read_unlock();
    548 	return 1;
    549 }
    550 
    551 void intel_timeline_unpin(struct intel_timeline *tl)
    552 {
    553 	GEM_BUG_ON(!atomic_read(&tl->pin_count));
    554 	if (!atomic_dec_and_test(&tl->pin_count))
    555 		return;
    556 
    557 	cacheline_release(tl->hwsp_cacheline);
    558 
    559 	__i915_vma_unpin(tl->hwsp_ggtt);
    560 }
    561 
    562 void __intel_timeline_free(struct kref *kref)
    563 {
    564 	struct intel_timeline *timeline =
    565 		container_of(kref, typeof(*timeline), kref);
    566 
    567 	intel_timeline_fini(timeline);
    568 	kfree_rcu(timeline, rcu);
    569 }
    570 
    571 void intel_gt_fini_timelines(struct intel_gt *gt)
    572 {
    573 	struct intel_gt_timelines *timelines = >->timelines;
    574 
    575 	GEM_BUG_ON(!list_empty(&timelines->active_list));
    576 	GEM_BUG_ON(!list_empty(&timelines->hwsp_free_list));
    577 }
    578 
    579 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
    580 #include "gt/selftests/mock_timeline.c"
    581 #include "gt/selftest_timeline.c"
    582 #endif
    583