Home | History | Annotate | Line # | Download | only in gt
intel_timeline.c revision 1.1.1.1
      1 /*	$NetBSD: intel_timeline.c,v 1.1.1.1 2021/12/18 20:15:33 riastradh Exp $	*/
      2 
      3 /*
      4  * SPDX-License-Identifier: MIT
      5  *
      6  * Copyright  2016-2018 Intel Corporation
      7  */
      8 
      9 #include <sys/cdefs.h>
     10 __KERNEL_RCSID(0, "$NetBSD: intel_timeline.c,v 1.1.1.1 2021/12/18 20:15:33 riastradh Exp $");
     11 
     12 #include "i915_drv.h"
     13 
     14 #include "i915_active.h"
     15 #include "i915_syncmap.h"
     16 #include "intel_gt.h"
     17 #include "intel_ring.h"
     18 #include "intel_timeline.h"
     19 
     20 #define ptr_set_bit(ptr, bit) ((typeof(ptr))((unsigned long)(ptr) | BIT(bit)))
     21 #define ptr_test_bit(ptr, bit) ((unsigned long)(ptr) & BIT(bit))
     22 
     23 #define CACHELINE_BITS 6
     24 #define CACHELINE_FREE CACHELINE_BITS
     25 
     26 struct intel_timeline_hwsp {
     27 	struct intel_gt *gt;
     28 	struct intel_gt_timelines *gt_timelines;
     29 	struct list_head free_link;
     30 	struct i915_vma *vma;
     31 	u64 free_bitmap;
     32 };
     33 
     34 static struct i915_vma *__hwsp_alloc(struct intel_gt *gt)
     35 {
     36 	struct drm_i915_private *i915 = gt->i915;
     37 	struct drm_i915_gem_object *obj;
     38 	struct i915_vma *vma;
     39 
     40 	obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
     41 	if (IS_ERR(obj))
     42 		return ERR_CAST(obj);
     43 
     44 	i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC);
     45 
     46 	vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
     47 	if (IS_ERR(vma))
     48 		i915_gem_object_put(obj);
     49 
     50 	return vma;
     51 }
     52 
     53 static struct i915_vma *
     54 hwsp_alloc(struct intel_timeline *timeline, unsigned int *cacheline)
     55 {
     56 	struct intel_gt_timelines *gt = &timeline->gt->timelines;
     57 	struct intel_timeline_hwsp *hwsp;
     58 
     59 	BUILD_BUG_ON(BITS_PER_TYPE(u64) * CACHELINE_BYTES > PAGE_SIZE);
     60 
     61 	spin_lock_irq(&gt->hwsp_lock);
     62 
     63 	/* hwsp_free_list only contains HWSP that have available cachelines */
     64 	hwsp = list_first_entry_or_null(&gt->hwsp_free_list,
     65 					typeof(*hwsp), free_link);
     66 	if (!hwsp) {
     67 		struct i915_vma *vma;
     68 
     69 		spin_unlock_irq(&gt->hwsp_lock);
     70 
     71 		hwsp = kmalloc(sizeof(*hwsp), GFP_KERNEL);
     72 		if (!hwsp)
     73 			return ERR_PTR(-ENOMEM);
     74 
     75 		vma = __hwsp_alloc(timeline->gt);
     76 		if (IS_ERR(vma)) {
     77 			kfree(hwsp);
     78 			return vma;
     79 		}
     80 
     81 		vma->private = hwsp;
     82 		hwsp->gt = timeline->gt;
     83 		hwsp->vma = vma;
     84 		hwsp->free_bitmap = ~0ull;
     85 		hwsp->gt_timelines = gt;
     86 
     87 		spin_lock_irq(&gt->hwsp_lock);
     88 		list_add(&hwsp->free_link, &gt->hwsp_free_list);
     89 	}
     90 
     91 	GEM_BUG_ON(!hwsp->free_bitmap);
     92 	*cacheline = __ffs64(hwsp->free_bitmap);
     93 	hwsp->free_bitmap &= ~BIT_ULL(*cacheline);
     94 	if (!hwsp->free_bitmap)
     95 		list_del(&hwsp->free_link);
     96 
     97 	spin_unlock_irq(&gt->hwsp_lock);
     98 
     99 	GEM_BUG_ON(hwsp->vma->private != hwsp);
    100 	return hwsp->vma;
    101 }
    102 
    103 static void __idle_hwsp_free(struct intel_timeline_hwsp *hwsp, int cacheline)
    104 {
    105 	struct intel_gt_timelines *gt = hwsp->gt_timelines;
    106 	unsigned long flags;
    107 
    108 	spin_lock_irqsave(&gt->hwsp_lock, flags);
    109 
    110 	/* As a cacheline becomes available, publish the HWSP on the freelist */
    111 	if (!hwsp->free_bitmap)
    112 		list_add_tail(&hwsp->free_link, &gt->hwsp_free_list);
    113 
    114 	GEM_BUG_ON(cacheline >= BITS_PER_TYPE(hwsp->free_bitmap));
    115 	hwsp->free_bitmap |= BIT_ULL(cacheline);
    116 
    117 	/* And if no one is left using it, give the page back to the system */
    118 	if (hwsp->free_bitmap == ~0ull) {
    119 		i915_vma_put(hwsp->vma);
    120 		list_del(&hwsp->free_link);
    121 		kfree(hwsp);
    122 	}
    123 
    124 	spin_unlock_irqrestore(&gt->hwsp_lock, flags);
    125 }
    126 
    127 static void __idle_cacheline_free(struct intel_timeline_cacheline *cl)
    128 {
    129 	GEM_BUG_ON(!i915_active_is_idle(&cl->active));
    130 
    131 	i915_gem_object_unpin_map(cl->hwsp->vma->obj);
    132 	i915_vma_put(cl->hwsp->vma);
    133 	__idle_hwsp_free(cl->hwsp, ptr_unmask_bits(cl->vaddr, CACHELINE_BITS));
    134 
    135 	i915_active_fini(&cl->active);
    136 	kfree_rcu(cl, rcu);
    137 }
    138 
    139 __i915_active_call
    140 static void __cacheline_retire(struct i915_active *active)
    141 {
    142 	struct intel_timeline_cacheline *cl =
    143 		container_of(active, typeof(*cl), active);
    144 
    145 	i915_vma_unpin(cl->hwsp->vma);
    146 	if (ptr_test_bit(cl->vaddr, CACHELINE_FREE))
    147 		__idle_cacheline_free(cl);
    148 }
    149 
    150 static int __cacheline_active(struct i915_active *active)
    151 {
    152 	struct intel_timeline_cacheline *cl =
    153 		container_of(active, typeof(*cl), active);
    154 
    155 	__i915_vma_pin(cl->hwsp->vma);
    156 	return 0;
    157 }
    158 
    159 static struct intel_timeline_cacheline *
    160 cacheline_alloc(struct intel_timeline_hwsp *hwsp, unsigned int cacheline)
    161 {
    162 	struct intel_timeline_cacheline *cl;
    163 	void *vaddr;
    164 
    165 	GEM_BUG_ON(cacheline >= BIT(CACHELINE_BITS));
    166 
    167 	cl = kmalloc(sizeof(*cl), GFP_KERNEL);
    168 	if (!cl)
    169 		return ERR_PTR(-ENOMEM);
    170 
    171 	vaddr = i915_gem_object_pin_map(hwsp->vma->obj, I915_MAP_WB);
    172 	if (IS_ERR(vaddr)) {
    173 		kfree(cl);
    174 		return ERR_CAST(vaddr);
    175 	}
    176 
    177 	i915_vma_get(hwsp->vma);
    178 	cl->hwsp = hwsp;
    179 	cl->vaddr = page_pack_bits(vaddr, cacheline);
    180 
    181 	i915_active_init(&cl->active, __cacheline_active, __cacheline_retire);
    182 
    183 	return cl;
    184 }
    185 
    186 static void cacheline_acquire(struct intel_timeline_cacheline *cl)
    187 {
    188 	if (cl)
    189 		i915_active_acquire(&cl->active);
    190 }
    191 
    192 static void cacheline_release(struct intel_timeline_cacheline *cl)
    193 {
    194 	if (cl)
    195 		i915_active_release(&cl->active);
    196 }
    197 
    198 static void cacheline_free(struct intel_timeline_cacheline *cl)
    199 {
    200 	GEM_BUG_ON(ptr_test_bit(cl->vaddr, CACHELINE_FREE));
    201 	cl->vaddr = ptr_set_bit(cl->vaddr, CACHELINE_FREE);
    202 
    203 	if (i915_active_is_idle(&cl->active))
    204 		__idle_cacheline_free(cl);
    205 }
    206 
    207 int intel_timeline_init(struct intel_timeline *timeline,
    208 			struct intel_gt *gt,
    209 			struct i915_vma *hwsp)
    210 {
    211 	void *vaddr;
    212 
    213 	kref_init(&timeline->kref);
    214 	atomic_set(&timeline->pin_count, 0);
    215 
    216 	timeline->gt = gt;
    217 
    218 	timeline->has_initial_breadcrumb = !hwsp;
    219 	timeline->hwsp_cacheline = NULL;
    220 
    221 	if (!hwsp) {
    222 		struct intel_timeline_cacheline *cl;
    223 		unsigned int cacheline;
    224 
    225 		hwsp = hwsp_alloc(timeline, &cacheline);
    226 		if (IS_ERR(hwsp))
    227 			return PTR_ERR(hwsp);
    228 
    229 		cl = cacheline_alloc(hwsp->private, cacheline);
    230 		if (IS_ERR(cl)) {
    231 			__idle_hwsp_free(hwsp->private, cacheline);
    232 			return PTR_ERR(cl);
    233 		}
    234 
    235 		timeline->hwsp_cacheline = cl;
    236 		timeline->hwsp_offset = cacheline * CACHELINE_BYTES;
    237 
    238 		vaddr = page_mask_bits(cl->vaddr);
    239 	} else {
    240 		timeline->hwsp_offset = I915_GEM_HWS_SEQNO_ADDR;
    241 
    242 		vaddr = i915_gem_object_pin_map(hwsp->obj, I915_MAP_WB);
    243 		if (IS_ERR(vaddr))
    244 			return PTR_ERR(vaddr);
    245 	}
    246 
    247 	timeline->hwsp_seqno =
    248 		memset(vaddr + timeline->hwsp_offset, 0, CACHELINE_BYTES);
    249 
    250 	timeline->hwsp_ggtt = i915_vma_get(hwsp);
    251 	GEM_BUG_ON(timeline->hwsp_offset >= hwsp->size);
    252 
    253 	timeline->fence_context = dma_fence_context_alloc(1);
    254 
    255 	mutex_init(&timeline->mutex);
    256 
    257 	INIT_ACTIVE_FENCE(&timeline->last_request);
    258 	INIT_LIST_HEAD(&timeline->requests);
    259 
    260 	i915_syncmap_init(&timeline->sync);
    261 
    262 	return 0;
    263 }
    264 
    265 void intel_gt_init_timelines(struct intel_gt *gt)
    266 {
    267 	struct intel_gt_timelines *timelines = &gt->timelines;
    268 
    269 	spin_lock_init(&timelines->lock);
    270 	INIT_LIST_HEAD(&timelines->active_list);
    271 
    272 	spin_lock_init(&timelines->hwsp_lock);
    273 	INIT_LIST_HEAD(&timelines->hwsp_free_list);
    274 }
    275 
    276 void intel_timeline_fini(struct intel_timeline *timeline)
    277 {
    278 	GEM_BUG_ON(atomic_read(&timeline->pin_count));
    279 	GEM_BUG_ON(!list_empty(&timeline->requests));
    280 	GEM_BUG_ON(timeline->retire);
    281 
    282 	if (timeline->hwsp_cacheline)
    283 		cacheline_free(timeline->hwsp_cacheline);
    284 	else
    285 		i915_gem_object_unpin_map(timeline->hwsp_ggtt->obj);
    286 
    287 	i915_vma_put(timeline->hwsp_ggtt);
    288 }
    289 
    290 struct intel_timeline *
    291 intel_timeline_create(struct intel_gt *gt, struct i915_vma *global_hwsp)
    292 {
    293 	struct intel_timeline *timeline;
    294 	int err;
    295 
    296 	timeline = kzalloc(sizeof(*timeline), GFP_KERNEL);
    297 	if (!timeline)
    298 		return ERR_PTR(-ENOMEM);
    299 
    300 	err = intel_timeline_init(timeline, gt, global_hwsp);
    301 	if (err) {
    302 		kfree(timeline);
    303 		return ERR_PTR(err);
    304 	}
    305 
    306 	return timeline;
    307 }
    308 
    309 int intel_timeline_pin(struct intel_timeline *tl)
    310 {
    311 	int err;
    312 
    313 	if (atomic_add_unless(&tl->pin_count, 1, 0))
    314 		return 0;
    315 
    316 	err = i915_vma_pin(tl->hwsp_ggtt, 0, 0, PIN_GLOBAL | PIN_HIGH);
    317 	if (err)
    318 		return err;
    319 
    320 	tl->hwsp_offset =
    321 		i915_ggtt_offset(tl->hwsp_ggtt) +
    322 		offset_in_page(tl->hwsp_offset);
    323 
    324 	cacheline_acquire(tl->hwsp_cacheline);
    325 	if (atomic_fetch_inc(&tl->pin_count)) {
    326 		cacheline_release(tl->hwsp_cacheline);
    327 		__i915_vma_unpin(tl->hwsp_ggtt);
    328 	}
    329 
    330 	return 0;
    331 }
    332 
    333 void intel_timeline_enter(struct intel_timeline *tl)
    334 {
    335 	struct intel_gt_timelines *timelines = &tl->gt->timelines;
    336 
    337 	/*
    338 	 * Pretend we are serialised by the timeline->mutex.
    339 	 *
    340 	 * While generally true, there are a few exceptions to the rule
    341 	 * for the engine->kernel_context being used to manage power
    342 	 * transitions. As the engine_park may be called from under any
    343 	 * timeline, it uses the power mutex as a global serialisation
    344 	 * lock to prevent any other request entering its timeline.
    345 	 *
    346 	 * The rule is generally tl->mutex, otherwise engine->wakeref.mutex.
    347 	 *
    348 	 * However, intel_gt_retire_request() does not know which engine
    349 	 * it is retiring along and so cannot partake in the engine-pm
    350 	 * barrier, and there we use the tl->active_count as a means to
    351 	 * pin the timeline in the active_list while the locks are dropped.
    352 	 * Ergo, as that is outside of the engine-pm barrier, we need to
    353 	 * use atomic to manipulate tl->active_count.
    354 	 */
    355 	lockdep_assert_held(&tl->mutex);
    356 
    357 	if (atomic_add_unless(&tl->active_count, 1, 0))
    358 		return;
    359 
    360 	spin_lock(&timelines->lock);
    361 	if (!atomic_fetch_inc(&tl->active_count))
    362 		list_add_tail(&tl->link, &timelines->active_list);
    363 	spin_unlock(&timelines->lock);
    364 }
    365 
    366 void intel_timeline_exit(struct intel_timeline *tl)
    367 {
    368 	struct intel_gt_timelines *timelines = &tl->gt->timelines;
    369 
    370 	/* See intel_timeline_enter() */
    371 	lockdep_assert_held(&tl->mutex);
    372 
    373 	GEM_BUG_ON(!atomic_read(&tl->active_count));
    374 	if (atomic_add_unless(&tl->active_count, -1, 1))
    375 		return;
    376 
    377 	spin_lock(&timelines->lock);
    378 	if (atomic_dec_and_test(&tl->active_count))
    379 		list_del(&tl->link);
    380 	spin_unlock(&timelines->lock);
    381 
    382 	/*
    383 	 * Since this timeline is idle, all bariers upon which we were waiting
    384 	 * must also be complete and so we can discard the last used barriers
    385 	 * without loss of information.
    386 	 */
    387 	i915_syncmap_free(&tl->sync);
    388 }
    389 
    390 static u32 timeline_advance(struct intel_timeline *tl)
    391 {
    392 	GEM_BUG_ON(!atomic_read(&tl->pin_count));
    393 	GEM_BUG_ON(tl->seqno & tl->has_initial_breadcrumb);
    394 
    395 	return tl->seqno += 1 + tl->has_initial_breadcrumb;
    396 }
    397 
    398 static void timeline_rollback(struct intel_timeline *tl)
    399 {
    400 	tl->seqno -= 1 + tl->has_initial_breadcrumb;
    401 }
    402 
    403 static noinline int
    404 __intel_timeline_get_seqno(struct intel_timeline *tl,
    405 			   struct i915_request *rq,
    406 			   u32 *seqno)
    407 {
    408 	struct intel_timeline_cacheline *cl;
    409 	unsigned int cacheline;
    410 	struct i915_vma *vma;
    411 	void *vaddr;
    412 	int err;
    413 
    414 	/*
    415 	 * If there is an outstanding GPU reference to this cacheline,
    416 	 * such as it being sampled by a HW semaphore on another timeline,
    417 	 * we cannot wraparound our seqno value (the HW semaphore does
    418 	 * a strict greater-than-or-equals compare, not i915_seqno_passed).
    419 	 * So if the cacheline is still busy, we must detach ourselves
    420 	 * from it and leave it inflight alongside its users.
    421 	 *
    422 	 * However, if nobody is watching and we can guarantee that nobody
    423 	 * will, we could simply reuse the same cacheline.
    424 	 *
    425 	 * if (i915_active_request_is_signaled(&tl->last_request) &&
    426 	 *     i915_active_is_signaled(&tl->hwsp_cacheline->active))
    427 	 *	return 0;
    428 	 *
    429 	 * That seems unlikely for a busy timeline that needed to wrap in
    430 	 * the first place, so just replace the cacheline.
    431 	 */
    432 
    433 	vma = hwsp_alloc(tl, &cacheline);
    434 	if (IS_ERR(vma)) {
    435 		err = PTR_ERR(vma);
    436 		goto err_rollback;
    437 	}
    438 
    439 	err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
    440 	if (err) {
    441 		__idle_hwsp_free(vma->private, cacheline);
    442 		goto err_rollback;
    443 	}
    444 
    445 	cl = cacheline_alloc(vma->private, cacheline);
    446 	if (IS_ERR(cl)) {
    447 		err = PTR_ERR(cl);
    448 		__idle_hwsp_free(vma->private, cacheline);
    449 		goto err_unpin;
    450 	}
    451 	GEM_BUG_ON(cl->hwsp->vma != vma);
    452 
    453 	/*
    454 	 * Attach the old cacheline to the current request, so that we only
    455 	 * free it after the current request is retired, which ensures that
    456 	 * all writes into the cacheline from previous requests are complete.
    457 	 */
    458 	err = i915_active_ref(&tl->hwsp_cacheline->active, tl, &rq->fence);
    459 	if (err)
    460 		goto err_cacheline;
    461 
    462 	cacheline_release(tl->hwsp_cacheline); /* ownership now xfered to rq */
    463 	cacheline_free(tl->hwsp_cacheline);
    464 
    465 	i915_vma_unpin(tl->hwsp_ggtt); /* binding kept alive by old cacheline */
    466 	i915_vma_put(tl->hwsp_ggtt);
    467 
    468 	tl->hwsp_ggtt = i915_vma_get(vma);
    469 
    470 	vaddr = page_mask_bits(cl->vaddr);
    471 	tl->hwsp_offset = cacheline * CACHELINE_BYTES;
    472 	tl->hwsp_seqno =
    473 		memset(vaddr + tl->hwsp_offset, 0, CACHELINE_BYTES);
    474 
    475 	tl->hwsp_offset += i915_ggtt_offset(vma);
    476 
    477 	cacheline_acquire(cl);
    478 	tl->hwsp_cacheline = cl;
    479 
    480 	*seqno = timeline_advance(tl);
    481 	GEM_BUG_ON(i915_seqno_passed(*tl->hwsp_seqno, *seqno));
    482 	return 0;
    483 
    484 err_cacheline:
    485 	cacheline_free(cl);
    486 err_unpin:
    487 	i915_vma_unpin(vma);
    488 err_rollback:
    489 	timeline_rollback(tl);
    490 	return err;
    491 }
    492 
    493 int intel_timeline_get_seqno(struct intel_timeline *tl,
    494 			     struct i915_request *rq,
    495 			     u32 *seqno)
    496 {
    497 	*seqno = timeline_advance(tl);
    498 
    499 	/* Replace the HWSP on wraparound for HW semaphores */
    500 	if (unlikely(!*seqno && tl->hwsp_cacheline))
    501 		return __intel_timeline_get_seqno(tl, rq, seqno);
    502 
    503 	return 0;
    504 }
    505 
    506 static int cacheline_ref(struct intel_timeline_cacheline *cl,
    507 			 struct i915_request *rq)
    508 {
    509 	return i915_active_add_request(&cl->active, rq);
    510 }
    511 
    512 int intel_timeline_read_hwsp(struct i915_request *from,
    513 			     struct i915_request *to,
    514 			     u32 *hwsp)
    515 {
    516 	struct intel_timeline_cacheline *cl;
    517 	int err;
    518 
    519 	GEM_BUG_ON(!rcu_access_pointer(from->hwsp_cacheline));
    520 
    521 	rcu_read_lock();
    522 	cl = rcu_dereference(from->hwsp_cacheline);
    523 	if (unlikely(!i915_active_acquire_if_busy(&cl->active)))
    524 		goto unlock; /* seqno wrapped and completed! */
    525 	if (unlikely(i915_request_completed(from)))
    526 		goto release;
    527 	rcu_read_unlock();
    528 
    529 	err = cacheline_ref(cl, to);
    530 	if (err)
    531 		goto out;
    532 
    533 	*hwsp = i915_ggtt_offset(cl->hwsp->vma) +
    534 		ptr_unmask_bits(cl->vaddr, CACHELINE_BITS) * CACHELINE_BYTES;
    535 
    536 out:
    537 	i915_active_release(&cl->active);
    538 	return err;
    539 
    540 release:
    541 	i915_active_release(&cl->active);
    542 unlock:
    543 	rcu_read_unlock();
    544 	return 1;
    545 }
    546 
    547 void intel_timeline_unpin(struct intel_timeline *tl)
    548 {
    549 	GEM_BUG_ON(!atomic_read(&tl->pin_count));
    550 	if (!atomic_dec_and_test(&tl->pin_count))
    551 		return;
    552 
    553 	cacheline_release(tl->hwsp_cacheline);
    554 
    555 	__i915_vma_unpin(tl->hwsp_ggtt);
    556 }
    557 
    558 void __intel_timeline_free(struct kref *kref)
    559 {
    560 	struct intel_timeline *timeline =
    561 		container_of(kref, typeof(*timeline), kref);
    562 
    563 	intel_timeline_fini(timeline);
    564 	kfree_rcu(timeline, rcu);
    565 }
    566 
    567 void intel_gt_fini_timelines(struct intel_gt *gt)
    568 {
    569 	struct intel_gt_timelines *timelines = &gt->timelines;
    570 
    571 	GEM_BUG_ON(!list_empty(&timelines->active_list));
    572 	GEM_BUG_ON(!list_empty(&timelines->hwsp_free_list));
    573 }
    574 
    575 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
    576 #include "gt/selftests/mock_timeline.c"
    577 #include "gt/selftest_timeline.c"
    578 #endif
    579