intel_timeline.c revision 1.3       1 /*	$NetBSD: intel_timeline.c,v 1.3 2021/12/19 11:49:11 riastradh Exp $	*/
      2 
      3 /*
      4  * SPDX-License-Identifier: MIT
      5  *
      6  * Copyright  2016-2018 Intel Corporation
      7  */
      8 
      9 #include <sys/cdefs.h>
     10 __KERNEL_RCSID(0, "$NetBSD: intel_timeline.c,v 1.3 2021/12/19 11:49:11 riastradh Exp $");
     11 
     12 #include "i915_drv.h"
     13 
     14 #include "i915_active.h"
     15 #include "i915_syncmap.h"
     16 #include "intel_gt.h"
     17 #include "intel_ring.h"
     18 #include "intel_timeline.h"
     19 
     20 #include <linux/nbsd-namespace.h>
     21 
     22 #define ptr_set_bit(ptr, bit) ((typeof(ptr))((unsigned long)(ptr) | BIT(bit)))
     23 #define ptr_test_bit(ptr, bit) ((unsigned long)(ptr) & BIT(bit))
     24 
     25 #define CACHELINE_BITS 6
     26 #define CACHELINE_FREE CACHELINE_BITS
     27 
     28 struct intel_timeline_hwsp {
     29 	struct intel_gt *gt;
     30 	struct intel_gt_timelines *gt_timelines;
     31 	struct list_head free_link;
     32 	struct i915_vma *vma;
     33 	u64 free_bitmap;
     34 };
     35 
     36 static struct i915_vma *__hwsp_alloc(struct intel_gt *gt)
     37 {
     38 	struct drm_i915_private *i915 = gt->i915;
     39 	struct drm_i915_gem_object *obj;
     40 	struct i915_vma *vma;
     41 
     42 	obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
     43 	if (IS_ERR(obj))
     44 		return ERR_CAST(obj);
     45 
     46 	i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC);
     47 
     48 	vma = i915_vma_instance(obj, >->ggtt->vm, NULL);
     49 	if (IS_ERR(vma))
     50 		i915_gem_object_put(obj);
     51 
     52 	return vma;
     53 }
     54 
     55 static struct i915_vma *
     56 hwsp_alloc(struct intel_timeline *timeline, unsigned int *cacheline)
     57 {
     58 	struct intel_gt_timelines *gt = &timeline->gt->timelines;
     59 	struct intel_timeline_hwsp *hwsp;
     60 
     61 	BUILD_BUG_ON(BITS_PER_TYPE(u64) * CACHELINE_BYTES > PAGE_SIZE);
     62 
     63 	spin_lock_irq(>->hwsp_lock);
     64 
     65 	/* hwsp_free_list only contains HWSP that have available cachelines */
     66 	hwsp = list_first_entry_or_null(>->hwsp_free_list,
     67 					typeof(*hwsp), free_link);
     68 	if (!hwsp) {
     69 		struct i915_vma *vma;
     70 
     71 		spin_unlock_irq(>->hwsp_lock);
     72 
     73 		hwsp = kmalloc(sizeof(*hwsp), GFP_KERNEL);
     74 		if (!hwsp)
     75 			return ERR_PTR(-ENOMEM);
     76 
     77 		vma = __hwsp_alloc(timeline->gt);
     78 		if (IS_ERR(vma)) {
     79 			kfree(hwsp);
     80 			return vma;
     81 		}
     82 
     83 		vma->private = hwsp;
     84 		hwsp->gt = timeline->gt;
     85 		hwsp->vma = vma;
     86 		hwsp->free_bitmap = ~0ull;
     87 		hwsp->gt_timelines = gt;
     88 
     89 		spin_lock_irq(>->hwsp_lock);
     90 		list_add(&hwsp->free_link, >->hwsp_free_list);
     91 	}
     92 
     93 	GEM_BUG_ON(!hwsp->free_bitmap);
     94 	*cacheline = __ffs64(hwsp->free_bitmap);
     95 	hwsp->free_bitmap &= ~BIT_ULL(*cacheline);
     96 	if (!hwsp->free_bitmap)
     97 		list_del(&hwsp->free_link);
     98 
     99 	spin_unlock_irq(>->hwsp_lock);
    100 
    101 	GEM_BUG_ON(hwsp->vma->private != hwsp);
    102 	return hwsp->vma;
    103 }
    104 
    105 static void __idle_hwsp_free(struct intel_timeline_hwsp *hwsp, int cacheline)
    106 {
    107 	struct intel_gt_timelines *gt = hwsp->gt_timelines;
    108 	unsigned long flags;
    109 
    110 	spin_lock_irqsave(>->hwsp_lock, flags);
    111 
    112 	/* As a cacheline becomes available, publish the HWSP on the freelist */
    113 	if (!hwsp->free_bitmap)
    114 		list_add_tail(&hwsp->free_link, >->hwsp_free_list);
    115 
    116 	GEM_BUG_ON(cacheline >= BITS_PER_TYPE(hwsp->free_bitmap));
    117 	hwsp->free_bitmap |= BIT_ULL(cacheline);
    118 
    119 	/* And if no one is left using it, give the page back to the system */
    120 	if (hwsp->free_bitmap == ~0ull) {
    121 		i915_vma_put(hwsp->vma);
    122 		list_del(&hwsp->free_link);
    123 		kfree(hwsp);
    124 	}
    125 
    126 	spin_unlock_irqrestore(>->hwsp_lock, flags);
    127 }
    128 
    129 static void __idle_cacheline_free(struct intel_timeline_cacheline *cl)
    130 {
    131 	GEM_BUG_ON(!i915_active_is_idle(&cl->active));
    132 
    133 	i915_gem_object_unpin_map(cl->hwsp->vma->obj);
    134 	i915_vma_put(cl->hwsp->vma);
    135 	__idle_hwsp_free(cl->hwsp, ptr_unmask_bits(cl->vaddr, CACHELINE_BITS));
    136 
    137 	i915_active_fini(&cl->active);
    138 	kfree_rcu(cl, rcu);
    139 }
    140 
    141 __i915_active_call
    142 static void __cacheline_retire(struct i915_active *active)
    143 {
    144 	struct intel_timeline_cacheline *cl =
    145 		container_of(active, typeof(*cl), active);
    146 
    147 	i915_vma_unpin(cl->hwsp->vma);
    148 	if (ptr_test_bit(cl->vaddr, CACHELINE_FREE))
    149 		__idle_cacheline_free(cl);
    150 }
    151 
    152 static int __cacheline_active(struct i915_active *active)
    153 {
    154 	struct intel_timeline_cacheline *cl =
    155 		container_of(active, typeof(*cl), active);
    156 
    157 	__i915_vma_pin(cl->hwsp->vma);
    158 	return 0;
    159 }
    160 
    161 static struct intel_timeline_cacheline *
    162 cacheline_alloc(struct intel_timeline_hwsp *hwsp, unsigned int cacheline)
    163 {
    164 	struct intel_timeline_cacheline *cl;
    165 	void *vaddr;
    166 
    167 	GEM_BUG_ON(cacheline >= BIT(CACHELINE_BITS));
    168 
    169 	cl = kmalloc(sizeof(*cl), GFP_KERNEL);
    170 	if (!cl)
    171 		return ERR_PTR(-ENOMEM);
    172 
    173 	vaddr = i915_gem_object_pin_map(hwsp->vma->obj, I915_MAP_WB);
    174 	if (IS_ERR(vaddr)) {
    175 		kfree(cl);
    176 		return ERR_CAST(vaddr);
    177 	}
    178 
    179 	i915_vma_get(hwsp->vma);
    180 	cl->hwsp = hwsp;
    181 	cl->vaddr = page_pack_bits(vaddr, cacheline);
    182 
    183 	i915_active_init(&cl->active, __cacheline_active, __cacheline_retire);
    184 
    185 	return cl;
    186 }
    187 
    188 static void cacheline_acquire(struct intel_timeline_cacheline *cl)
    189 {
    190 	if (cl)
    191 		i915_active_acquire(&cl->active);
    192 }
    193 
    194 static void cacheline_release(struct intel_timeline_cacheline *cl)
    195 {
    196 	if (cl)
    197 		i915_active_release(&cl->active);
    198 }
    199 
    200 static void cacheline_free(struct intel_timeline_cacheline *cl)
    201 {
    202 	GEM_BUG_ON(ptr_test_bit(cl->vaddr, CACHELINE_FREE));
    203 	cl->vaddr = ptr_set_bit(cl->vaddr, CACHELINE_FREE);
    204 
    205 	if (i915_active_is_idle(&cl->active))
    206 		__idle_cacheline_free(cl);
    207 }
    208 
    209 int intel_timeline_init(struct intel_timeline *timeline,
    210 			struct intel_gt *gt,
    211 			struct i915_vma *hwsp)
    212 {
    213 	void *vaddr;
    214 
    215 	kref_init(&timeline->kref);
    216 	atomic_set(&timeline->pin_count, 0);
    217 
    218 	timeline->gt = gt;
    219 
    220 	timeline->has_initial_breadcrumb = !hwsp;
    221 	timeline->hwsp_cacheline = NULL;
    222 
    223 	if (!hwsp) {
    224 		struct intel_timeline_cacheline *cl;
    225 		unsigned int cacheline;
    226 
    227 		hwsp = hwsp_alloc(timeline, &cacheline);
    228 		if (IS_ERR(hwsp))
    229 			return PTR_ERR(hwsp);
    230 
    231 		cl = cacheline_alloc(hwsp->private, cacheline);
    232 		if (IS_ERR(cl)) {
    233 			__idle_hwsp_free(hwsp->private, cacheline);
    234 			return PTR_ERR(cl);
    235 		}
    236 
    237 		timeline->hwsp_cacheline = cl;
    238 		timeline->hwsp_offset = cacheline * CACHELINE_BYTES;
    239 
    240 		vaddr = page_mask_bits(cl->vaddr);
    241 	} else {
    242 		timeline->hwsp_offset = I915_GEM_HWS_SEQNO_ADDR;
    243 
    244 		vaddr = i915_gem_object_pin_map(hwsp->obj, I915_MAP_WB);
    245 		if (IS_ERR(vaddr))
    246 			return PTR_ERR(vaddr);
    247 	}
    248 
    249 	timeline->hwsp_seqno =
    250 		memset(vaddr + timeline->hwsp_offset, 0, CACHELINE_BYTES);
    251 
    252 	timeline->hwsp_ggtt = i915_vma_get(hwsp);
    253 	GEM_BUG_ON(timeline->hwsp_offset >= hwsp->size);
    254 
    255 	timeline->fence_context = dma_fence_context_alloc(1);
    256 
    257 	mutex_init(&timeline->mutex);
    258 
    259 	INIT_ACTIVE_FENCE(&timeline->last_request);
    260 	INIT_LIST_HEAD(&timeline->requests);
    261 
    262 	i915_syncmap_init(&timeline->sync);
    263 
    264 	return 0;
    265 }
    266 
    267 void intel_gt_init_timelines(struct intel_gt *gt)
    268 {
    269 	struct intel_gt_timelines *timelines = >->timelines;
    270 
    271 	spin_lock_init(&timelines->lock);
    272 	INIT_LIST_HEAD(&timelines->active_list);
    273 
    274 	spin_lock_init(&timelines->hwsp_lock);
    275 	INIT_LIST_HEAD(&timelines->hwsp_free_list);
    276 }
    277 
    278 void intel_timeline_fini(struct intel_timeline *timeline)
    279 {
    280 	GEM_BUG_ON(atomic_read(&timeline->pin_count));
    281 	GEM_BUG_ON(!list_empty(&timeline->requests));
    282 	GEM_BUG_ON(timeline->retire);
    283 
    284 	if (timeline->hwsp_cacheline)
    285 		cacheline_free(timeline->hwsp_cacheline);
    286 	else
    287 		i915_gem_object_unpin_map(timeline->hwsp_ggtt->obj);
    288 
    289 	i915_vma_put(timeline->hwsp_ggtt);
    290 }
    291 
    292 struct intel_timeline *
    293 intel_timeline_create(struct intel_gt *gt, struct i915_vma *global_hwsp)
    294 {
    295 	struct intel_timeline *timeline;
    296 	int err;
    297 
    298 	timeline = kzalloc(sizeof(*timeline), GFP_KERNEL);
    299 	if (!timeline)
    300 		return ERR_PTR(-ENOMEM);
    301 
    302 	err = intel_timeline_init(timeline, gt, global_hwsp);
    303 	if (err) {
    304 		kfree(timeline);
    305 		return ERR_PTR(err);
    306 	}
    307 
    308 	return timeline;
    309 }
    310 
    311 int intel_timeline_pin(struct intel_timeline *tl)
    312 {
    313 	int err;
    314 
    315 	if (atomic_add_unless(&tl->pin_count, 1, 0))
    316 		return 0;
    317 
    318 	err = i915_vma_pin(tl->hwsp_ggtt, 0, 0, PIN_GLOBAL | PIN_HIGH);
    319 	if (err)
    320 		return err;
    321 
    322 	tl->hwsp_offset =
    323 		i915_ggtt_offset(tl->hwsp_ggtt) +
    324 		offset_in_page(tl->hwsp_offset);
    325 
    326 	cacheline_acquire(tl->hwsp_cacheline);
    327 	if (atomic_fetch_inc(&tl->pin_count)) {
    328 		cacheline_release(tl->hwsp_cacheline);
    329 		__i915_vma_unpin(tl->hwsp_ggtt);
    330 	}
    331 
    332 	return 0;
    333 }
    334 
    335 void intel_timeline_enter(struct intel_timeline *tl)
    336 {
    337 	struct intel_gt_timelines *timelines = &tl->gt->timelines;
    338 
    339 	/*
    340 	 * Pretend we are serialised by the timeline->mutex.
    341 	 *
    342 	 * While generally true, there are a few exceptions to the rule
    343 	 * for the engine->kernel_context being used to manage power
    344 	 * transitions. As the engine_park may be called from under any
    345 	 * timeline, it uses the power mutex as a global serialisation
    346 	 * lock to prevent any other request entering its timeline.
    347 	 *
    348 	 * The rule is generally tl->mutex, otherwise engine->wakeref.mutex.
    349 	 *
    350 	 * However, intel_gt_retire_request() does not know which engine
    351 	 * it is retiring along and so cannot partake in the engine-pm
    352 	 * barrier, and there we use the tl->active_count as a means to
    353 	 * pin the timeline in the active_list while the locks are dropped.
    354 	 * Ergo, as that is outside of the engine-pm barrier, we need to
    355 	 * use atomic to manipulate tl->active_count.
    356 	 */
    357 	lockdep_assert_held(&tl->mutex);
    358 
    359 	if (atomic_add_unless(&tl->active_count, 1, 0))
    360 		return;
    361 
    362 	spin_lock(&timelines->lock);
    363 	if (!atomic_fetch_inc(&tl->active_count))
    364 		list_add_tail(&tl->link, &timelines->active_list);
    365 	spin_unlock(&timelines->lock);
    366 }
    367 
    368 void intel_timeline_exit(struct intel_timeline *tl)
    369 {
    370 	struct intel_gt_timelines *timelines = &tl->gt->timelines;
    371 
    372 	/* See intel_timeline_enter() */
    373 	lockdep_assert_held(&tl->mutex);
    374 
    375 	GEM_BUG_ON(!atomic_read(&tl->active_count));
    376 	if (atomic_add_unless(&tl->active_count, -1, 1))
    377 		return;
    378 
    379 	spin_lock(&timelines->lock);
    380 	if (atomic_dec_and_test(&tl->active_count))
    381 		list_del(&tl->link);
    382 	spin_unlock(&timelines->lock);
    383 
    384 	/*
    385 	 * Since this timeline is idle, all bariers upon which we were waiting
    386 	 * must also be complete and so we can discard the last used barriers
    387 	 * without loss of information.
    388 	 */
    389 	i915_syncmap_free(&tl->sync);
    390 }
    391 
    392 static u32 timeline_advance(struct intel_timeline *tl)
    393 {
    394 	GEM_BUG_ON(!atomic_read(&tl->pin_count));
    395 	GEM_BUG_ON(tl->seqno & tl->has_initial_breadcrumb);
    396 
    397 	return tl->seqno += 1 + tl->has_initial_breadcrumb;
    398 }
    399 
    400 static void timeline_rollback(struct intel_timeline *tl)
    401 {
    402 	tl->seqno -= 1 + tl->has_initial_breadcrumb;
    403 }
    404 
    405 static noinline int
    406 __intel_timeline_get_seqno(struct intel_timeline *tl,
    407 			   struct i915_request *rq,
    408 			   u32 *seqno)
    409 {
    410 	struct intel_timeline_cacheline *cl;
    411 	unsigned int cacheline;
    412 	struct i915_vma *vma;
    413 	void *vaddr;
    414 	int err;
    415 
    416 	/*
    417 	 * If there is an outstanding GPU reference to this cacheline,
    418 	 * such as it being sampled by a HW semaphore on another timeline,
    419 	 * we cannot wraparound our seqno value (the HW semaphore does
    420 	 * a strict greater-than-or-equals compare, not i915_seqno_passed).
    421 	 * So if the cacheline is still busy, we must detach ourselves
    422 	 * from it and leave it inflight alongside its users.
    423 	 *
    424 	 * However, if nobody is watching and we can guarantee that nobody
    425 	 * will, we could simply reuse the same cacheline.
    426 	 *
    427 	 * if (i915_active_request_is_signaled(&tl->last_request) &&
    428 	 *     i915_active_is_signaled(&tl->hwsp_cacheline->active))
    429 	 *	return 0;
    430 	 *
    431 	 * That seems unlikely for a busy timeline that needed to wrap in
    432 	 * the first place, so just replace the cacheline.
    433 	 */
    434 
    435 	vma = hwsp_alloc(tl, &cacheline);
    436 	if (IS_ERR(vma)) {
    437 		err = PTR_ERR(vma);
    438 		goto err_rollback;
    439 	}
    440 
    441 	err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
    442 	if (err) {
    443 		__idle_hwsp_free(vma->private, cacheline);
    444 		goto err_rollback;
    445 	}
    446 
    447 	cl = cacheline_alloc(vma->private, cacheline);
    448 	if (IS_ERR(cl)) {
    449 		err = PTR_ERR(cl);
    450 		__idle_hwsp_free(vma->private, cacheline);
    451 		goto err_unpin;
    452 	}
    453 	GEM_BUG_ON(cl->hwsp->vma != vma);
    454 
    455 	/*
    456 	 * Attach the old cacheline to the current request, so that we only
    457 	 * free it after the current request is retired, which ensures that
    458 	 * all writes into the cacheline from previous requests are complete.
    459 	 */
    460 	err = i915_active_ref(&tl->hwsp_cacheline->active, tl, &rq->fence);
    461 	if (err)
    462 		goto err_cacheline;
    463 
    464 	cacheline_release(tl->hwsp_cacheline); /* ownership now xfered to rq */
    465 	cacheline_free(tl->hwsp_cacheline);
    466 
    467 	i915_vma_unpin(tl->hwsp_ggtt); /* binding kept alive by old cacheline */
    468 	i915_vma_put(tl->hwsp_ggtt);
    469 
    470 	tl->hwsp_ggtt = i915_vma_get(vma);
    471 
    472 	vaddr = page_mask_bits(cl->vaddr);
    473 	tl->hwsp_offset = cacheline * CACHELINE_BYTES;
    474 	tl->hwsp_seqno =
    475 		memset(vaddr + tl->hwsp_offset, 0, CACHELINE_BYTES);
    476 
    477 	tl->hwsp_offset += i915_ggtt_offset(vma);
    478 
    479 	cacheline_acquire(cl);
    480 	tl->hwsp_cacheline = cl;
    481 
    482 	*seqno = timeline_advance(tl);
    483 	GEM_BUG_ON(i915_seqno_passed(*tl->hwsp_seqno, *seqno));
    484 	return 0;
    485 
    486 err_cacheline:
    487 	cacheline_free(cl);
    488 err_unpin:
    489 	i915_vma_unpin(vma);
    490 err_rollback:
    491 	timeline_rollback(tl);
    492 	return err;
    493 }
    494 
    495 int intel_timeline_get_seqno(struct intel_timeline *tl,
    496 			     struct i915_request *rq,
    497 			     u32 *seqno)
    498 {
    499 	*seqno = timeline_advance(tl);
    500 
    501 	/* Replace the HWSP on wraparound for HW semaphores */
    502 	if (unlikely(!*seqno && tl->hwsp_cacheline))
    503 		return __intel_timeline_get_seqno(tl, rq, seqno);
    504 
    505 	return 0;
    506 }
    507 
    508 static int cacheline_ref(struct intel_timeline_cacheline *cl,
    509 			 struct i915_request *rq)
    510 {
    511 	return i915_active_add_request(&cl->active, rq);
    512 }
    513 
    514 int intel_timeline_read_hwsp(struct i915_request *from,
    515 			     struct i915_request *to,
    516 			     u32 *hwsp)
    517 {
    518 	struct intel_timeline_cacheline *cl;
    519 	int err;
    520 
    521 	GEM_BUG_ON(!rcu_access_pointer(from->hwsp_cacheline));
    522 
    523 	rcu_read_lock();
    524 	cl = rcu_dereference(from->hwsp_cacheline);
    525 	if (unlikely(!i915_active_acquire_if_busy(&cl->active)))
    526 		goto unlock; /* seqno wrapped and completed! */
    527 	if (unlikely(i915_request_completed(from)))
    528 		goto release;
    529 	rcu_read_unlock();
    530 
    531 	err = cacheline_ref(cl, to);
    532 	if (err)
    533 		goto out;
    534 
    535 	*hwsp = i915_ggtt_offset(cl->hwsp->vma) +
    536 		ptr_unmask_bits(cl->vaddr, CACHELINE_BITS) * CACHELINE_BYTES;
    537 
    538 out:
    539 	i915_active_release(&cl->active);
    540 	return err;
    541 
    542 release:
    543 	i915_active_release(&cl->active);
    544 unlock:
    545 	rcu_read_unlock();
    546 	return 1;
    547 }
    548 
    549 void intel_timeline_unpin(struct intel_timeline *tl)
    550 {
    551 	GEM_BUG_ON(!atomic_read(&tl->pin_count));
    552 	if (!atomic_dec_and_test(&tl->pin_count))
    553 		return;
    554 
    555 	cacheline_release(tl->hwsp_cacheline);
    556 
    557 	__i915_vma_unpin(tl->hwsp_ggtt);
    558 }
    559 
    560 void __intel_timeline_free(struct kref *kref)
    561 {
    562 	struct intel_timeline *timeline =
    563 		container_of(kref, typeof(*timeline), kref);
    564 
    565 	intel_timeline_fini(timeline);
    566 	kfree_rcu(timeline, rcu);
    567 }
    568 
    569 void intel_gt_fini_timelines(struct intel_gt *gt)
    570 {
    571 	struct intel_gt_timelines *timelines = >->timelines;
    572 
    573 	GEM_BUG_ON(!list_empty(&timelines->active_list));
    574 	GEM_BUG_ON(!list_empty(&timelines->hwsp_free_list));
    575 }
    576 
    577 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
    578 #include "gt/selftests/mock_timeline.c"
    579 #include "gt/selftest_timeline.c"
    580 #endif
    581