Home | History | Annotate | Line # | Download | only in gt
      1 /*	$NetBSD: selftest_timeline.c,v 1.2 2021/12/18 23:45:30 riastradh Exp $	*/
      2 
      3 /*
      4  * SPDX-License-Identifier: MIT
      5  *
      6  * Copyright  2017-2018 Intel Corporation
      7  */
      8 
      9 #include <sys/cdefs.h>
     10 __KERNEL_RCSID(0, "$NetBSD: selftest_timeline.c,v 1.2 2021/12/18 23:45:30 riastradh Exp $");
     11 
     12 #include <linux/prime_numbers.h>
     13 
     14 #include "intel_engine_pm.h"
     15 #include "intel_gt.h"
     16 #include "intel_gt_requests.h"
     17 #include "intel_ring.h"
     18 
     19 #include "../selftests/i915_random.h"
     20 #include "../i915_selftest.h"
     21 
     22 #include "../selftests/igt_flush_test.h"
     23 #include "../selftests/mock_gem_device.h"
     24 #include "selftests/mock_timeline.h"
     25 
     26 static struct page *hwsp_page(struct intel_timeline *tl)
     27 {
     28 	struct drm_i915_gem_object *obj = tl->hwsp_ggtt->obj;
     29 
     30 	GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
     31 	return sg_page(obj->mm.pages->sgl);
     32 }
     33 
     34 static unsigned long hwsp_cacheline(struct intel_timeline *tl)
     35 {
     36 	unsigned long address = (unsigned long)page_address(hwsp_page(tl));
     37 
     38 	return (address + tl->hwsp_offset) / CACHELINE_BYTES;
     39 }
     40 
     41 #define CACHELINES_PER_PAGE (PAGE_SIZE / CACHELINE_BYTES)
     42 
     43 struct mock_hwsp_freelist {
     44 	struct intel_gt *gt;
     45 	struct radix_tree_root cachelines;
     46 	struct intel_timeline **history;
     47 	unsigned long count, max;
     48 	struct rnd_state prng;
     49 };
     50 
     51 enum {
     52 	SHUFFLE = BIT(0),
     53 };
     54 
     55 static void __mock_hwsp_record(struct mock_hwsp_freelist *state,
     56 			       unsigned int idx,
     57 			       struct intel_timeline *tl)
     58 {
     59 	tl = xchg(&state->history[idx], tl);
     60 	if (tl) {
     61 		radix_tree_delete(&state->cachelines, hwsp_cacheline(tl));
     62 		intel_timeline_put(tl);
     63 	}
     64 }
     65 
     66 static int __mock_hwsp_timeline(struct mock_hwsp_freelist *state,
     67 				unsigned int count,
     68 				unsigned int flags)
     69 {
     70 	struct intel_timeline *tl;
     71 	unsigned int idx;
     72 
     73 	while (count--) {
     74 		unsigned long cacheline;
     75 		int err;
     76 
     77 		tl = intel_timeline_create(state->gt, NULL);
     78 		if (IS_ERR(tl))
     79 			return PTR_ERR(tl);
     80 
     81 		cacheline = hwsp_cacheline(tl);
     82 		err = radix_tree_insert(&state->cachelines, cacheline, tl);
     83 		if (err) {
     84 			if (err == -EEXIST) {
     85 				pr_err("HWSP cacheline %lu already used; duplicate allocation!\n",
     86 				       cacheline);
     87 			}
     88 			intel_timeline_put(tl);
     89 			return err;
     90 		}
     91 
     92 		idx = state->count++ % state->max;
     93 		__mock_hwsp_record(state, idx, tl);
     94 	}
     95 
     96 	if (flags & SHUFFLE)
     97 		i915_prandom_shuffle(state->history,
     98 				     sizeof(*state->history),
     99 				     min(state->count, state->max),
    100 				     &state->prng);
    101 
    102 	count = i915_prandom_u32_max_state(min(state->count, state->max),
    103 					   &state->prng);
    104 	while (count--) {
    105 		idx = --state->count % state->max;
    106 		__mock_hwsp_record(state, idx, NULL);
    107 	}
    108 
    109 	return 0;
    110 }
    111 
    112 static int mock_hwsp_freelist(void *arg)
    113 {
    114 	struct mock_hwsp_freelist state;
    115 	struct drm_i915_private *i915;
    116 	const struct {
    117 		const char *name;
    118 		unsigned int flags;
    119 	} phases[] = {
    120 		{ "linear", 0 },
    121 		{ "shuffled", SHUFFLE },
    122 		{ },
    123 	}, *p;
    124 	unsigned int na;
    125 	int err = 0;
    126 
    127 	i915 = mock_gem_device();
    128 	if (!i915)
    129 		return -ENOMEM;
    130 
    131 	INIT_RADIX_TREE(&state.cachelines, GFP_KERNEL);
    132 	state.prng = I915_RND_STATE_INITIALIZER(i915_selftest.random_seed);
    133 
    134 	state.gt = &i915->gt;
    135 
    136 	/*
    137 	 * Create a bunch of timelines and check that their HWSP do not overlap.
    138 	 * Free some, and try again.
    139 	 */
    140 
    141 	state.max = PAGE_SIZE / sizeof(*state.history);
    142 	state.count = 0;
    143 	state.history = kcalloc(state.max, sizeof(*state.history), GFP_KERNEL);
    144 	if (!state.history) {
    145 		err = -ENOMEM;
    146 		goto err_put;
    147 	}
    148 
    149 	for (p = phases; p->name; p++) {
    150 		pr_debug("%s(%s)\n", __func__, p->name);
    151 		for_each_prime_number_from(na, 1, 2 * CACHELINES_PER_PAGE) {
    152 			err = __mock_hwsp_timeline(&state, na, p->flags);
    153 			if (err)
    154 				goto out;
    155 		}
    156 	}
    157 
    158 out:
    159 	for (na = 0; na < state.max; na++)
    160 		__mock_hwsp_record(&state, na, NULL);
    161 	kfree(state.history);
    162 err_put:
    163 	drm_dev_put(&i915->drm);
    164 	return err;
    165 }
    166 
    167 struct __igt_sync {
    168 	const char *name;
    169 	u32 seqno;
    170 	bool expected;
    171 	bool set;
    172 };
    173 
    174 static int __igt_sync(struct intel_timeline *tl,
    175 		      u64 ctx,
    176 		      const struct __igt_sync *p,
    177 		      const char *name)
    178 {
    179 	int ret;
    180 
    181 	if (__intel_timeline_sync_is_later(tl, ctx, p->seqno) != p->expected) {
    182 		pr_err("%s: %s(ctx=%llu, seqno=%u) expected passed %s but failed\n",
    183 		       name, p->name, ctx, p->seqno, yesno(p->expected));
    184 		return -EINVAL;
    185 	}
    186 
    187 	if (p->set) {
    188 		ret = __intel_timeline_sync_set(tl, ctx, p->seqno);
    189 		if (ret)
    190 			return ret;
    191 	}
    192 
    193 	return 0;
    194 }
    195 
    196 static int igt_sync(void *arg)
    197 {
    198 	const struct __igt_sync pass[] = {
    199 		{ "unset", 0, false, false },
    200 		{ "new", 0, false, true },
    201 		{ "0a", 0, true, true },
    202 		{ "1a", 1, false, true },
    203 		{ "1b", 1, true, true },
    204 		{ "0b", 0, true, false },
    205 		{ "2a", 2, false, true },
    206 		{ "4", 4, false, true },
    207 		{ "INT_MAX", INT_MAX, false, true },
    208 		{ "INT_MAX-1", INT_MAX-1, true, false },
    209 		{ "INT_MAX+1", (u32)INT_MAX+1, false, true },
    210 		{ "INT_MAX", INT_MAX, true, false },
    211 		{ "UINT_MAX", UINT_MAX, false, true },
    212 		{ "wrap", 0, false, true },
    213 		{ "unwrap", UINT_MAX, true, false },
    214 		{},
    215 	}, *p;
    216 	struct intel_timeline tl;
    217 	int order, offset;
    218 	int ret = -ENODEV;
    219 
    220 	mock_timeline_init(&tl, 0);
    221 	for (p = pass; p->name; p++) {
    222 		for (order = 1; order < 64; order++) {
    223 			for (offset = -1; offset <= (order > 1); offset++) {
    224 				u64 ctx = BIT_ULL(order) + offset;
    225 
    226 				ret = __igt_sync(&tl, ctx, p, "1");
    227 				if (ret)
    228 					goto out;
    229 			}
    230 		}
    231 	}
    232 	mock_timeline_fini(&tl);
    233 
    234 	mock_timeline_init(&tl, 0);
    235 	for (order = 1; order < 64; order++) {
    236 		for (offset = -1; offset <= (order > 1); offset++) {
    237 			u64 ctx = BIT_ULL(order) + offset;
    238 
    239 			for (p = pass; p->name; p++) {
    240 				ret = __igt_sync(&tl, ctx, p, "2");
    241 				if (ret)
    242 					goto out;
    243 			}
    244 		}
    245 	}
    246 
    247 out:
    248 	mock_timeline_fini(&tl);
    249 	return ret;
    250 }
    251 
    252 static unsigned int random_engine(struct rnd_state *rnd)
    253 {
    254 	return i915_prandom_u32_max_state(I915_NUM_ENGINES, rnd);
    255 }
    256 
    257 static int bench_sync(void *arg)
    258 {
    259 	struct rnd_state prng;
    260 	struct intel_timeline tl;
    261 	unsigned long end_time, count;
    262 	u64 prng32_1M;
    263 	ktime_t kt;
    264 	int order, last_order;
    265 
    266 	mock_timeline_init(&tl, 0);
    267 
    268 	/* Lookups from cache are very fast and so the random number generation
    269 	 * and the loop itself becomes a significant factor in the per-iteration
    270 	 * timings. We try to compensate the results by measuring the overhead
    271 	 * of the prng and subtract it from the reported results.
    272 	 */
    273 	prandom_seed_state(&prng, i915_selftest.random_seed);
    274 	count = 0;
    275 	kt = ktime_get();
    276 	end_time = jiffies + HZ/10;
    277 	do {
    278 		u32 x;
    279 
    280 		/* Make sure the compiler doesn't optimise away the prng call */
    281 		WRITE_ONCE(x, prandom_u32_state(&prng));
    282 
    283 		count++;
    284 	} while (!time_after(jiffies, end_time));
    285 	kt = ktime_sub(ktime_get(), kt);
    286 	pr_debug("%s: %lu random evaluations, %lluns/prng\n",
    287 		 __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
    288 	prng32_1M = div64_ul(ktime_to_ns(kt) << 20, count);
    289 
    290 	/* Benchmark (only) setting random context ids */
    291 	prandom_seed_state(&prng, i915_selftest.random_seed);
    292 	count = 0;
    293 	kt = ktime_get();
    294 	end_time = jiffies + HZ/10;
    295 	do {
    296 		u64 id = i915_prandom_u64_state(&prng);
    297 
    298 		__intel_timeline_sync_set(&tl, id, 0);
    299 		count++;
    300 	} while (!time_after(jiffies, end_time));
    301 	kt = ktime_sub(ktime_get(), kt);
    302 	kt = ktime_sub_ns(kt, (count * prng32_1M * 2) >> 20);
    303 	pr_info("%s: %lu random insertions, %lluns/insert\n",
    304 		__func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
    305 
    306 	/* Benchmark looking up the exact same context ids as we just set */
    307 	prandom_seed_state(&prng, i915_selftest.random_seed);
    308 	end_time = count;
    309 	kt = ktime_get();
    310 	while (end_time--) {
    311 		u64 id = i915_prandom_u64_state(&prng);
    312 
    313 		if (!__intel_timeline_sync_is_later(&tl, id, 0)) {
    314 			mock_timeline_fini(&tl);
    315 			pr_err("Lookup of %llu failed\n", id);
    316 			return -EINVAL;
    317 		}
    318 	}
    319 	kt = ktime_sub(ktime_get(), kt);
    320 	kt = ktime_sub_ns(kt, (count * prng32_1M * 2) >> 20);
    321 	pr_info("%s: %lu random lookups, %lluns/lookup\n",
    322 		__func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
    323 
    324 	mock_timeline_fini(&tl);
    325 	cond_resched();
    326 
    327 	mock_timeline_init(&tl, 0);
    328 
    329 	/* Benchmark setting the first N (in order) contexts */
    330 	count = 0;
    331 	kt = ktime_get();
    332 	end_time = jiffies + HZ/10;
    333 	do {
    334 		__intel_timeline_sync_set(&tl, count++, 0);
    335 	} while (!time_after(jiffies, end_time));
    336 	kt = ktime_sub(ktime_get(), kt);
    337 	pr_info("%s: %lu in-order insertions, %lluns/insert\n",
    338 		__func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
    339 
    340 	/* Benchmark looking up the exact same context ids as we just set */
    341 	end_time = count;
    342 	kt = ktime_get();
    343 	while (end_time--) {
    344 		if (!__intel_timeline_sync_is_later(&tl, end_time, 0)) {
    345 			pr_err("Lookup of %lu failed\n", end_time);
    346 			mock_timeline_fini(&tl);
    347 			return -EINVAL;
    348 		}
    349 	}
    350 	kt = ktime_sub(ktime_get(), kt);
    351 	pr_info("%s: %lu in-order lookups, %lluns/lookup\n",
    352 		__func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
    353 
    354 	mock_timeline_fini(&tl);
    355 	cond_resched();
    356 
    357 	mock_timeline_init(&tl, 0);
    358 
    359 	/* Benchmark searching for a random context id and maybe changing it */
    360 	prandom_seed_state(&prng, i915_selftest.random_seed);
    361 	count = 0;
    362 	kt = ktime_get();
    363 	end_time = jiffies + HZ/10;
    364 	do {
    365 		u32 id = random_engine(&prng);
    366 		u32 seqno = prandom_u32_state(&prng);
    367 
    368 		if (!__intel_timeline_sync_is_later(&tl, id, seqno))
    369 			__intel_timeline_sync_set(&tl, id, seqno);
    370 
    371 		count++;
    372 	} while (!time_after(jiffies, end_time));
    373 	kt = ktime_sub(ktime_get(), kt);
    374 	kt = ktime_sub_ns(kt, (count * prng32_1M * 2) >> 20);
    375 	pr_info("%s: %lu repeated insert/lookups, %lluns/op\n",
    376 		__func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
    377 	mock_timeline_fini(&tl);
    378 	cond_resched();
    379 
    380 	/* Benchmark searching for a known context id and changing the seqno */
    381 	for (last_order = 1, order = 1; order < 32;
    382 	     ({ int tmp = last_order; last_order = order; order += tmp; })) {
    383 		unsigned int mask = BIT(order) - 1;
    384 
    385 		mock_timeline_init(&tl, 0);
    386 
    387 		count = 0;
    388 		kt = ktime_get();
    389 		end_time = jiffies + HZ/10;
    390 		do {
    391 			/* Without assuming too many details of the underlying
    392 			 * implementation, try to identify its phase-changes
    393 			 * (if any)!
    394 			 */
    395 			u64 id = (u64)(count & mask) << order;
    396 
    397 			__intel_timeline_sync_is_later(&tl, id, 0);
    398 			__intel_timeline_sync_set(&tl, id, 0);
    399 
    400 			count++;
    401 		} while (!time_after(jiffies, end_time));
    402 		kt = ktime_sub(ktime_get(), kt);
    403 		pr_info("%s: %lu cyclic/%d insert/lookups, %lluns/op\n",
    404 			__func__, count, order,
    405 			(long long)div64_ul(ktime_to_ns(kt), count));
    406 		mock_timeline_fini(&tl);
    407 		cond_resched();
    408 	}
    409 
    410 	return 0;
    411 }
    412 
    413 int intel_timeline_mock_selftests(void)
    414 {
    415 	static const struct i915_subtest tests[] = {
    416 		SUBTEST(mock_hwsp_freelist),
    417 		SUBTEST(igt_sync),
    418 		SUBTEST(bench_sync),
    419 	};
    420 
    421 	return i915_subtests(tests, NULL);
    422 }
    423 
    424 static int emit_ggtt_store_dw(struct i915_request *rq, u32 addr, u32 value)
    425 {
    426 	u32 *cs;
    427 
    428 	cs = intel_ring_begin(rq, 4);
    429 	if (IS_ERR(cs))
    430 		return PTR_ERR(cs);
    431 
    432 	if (INTEL_GEN(rq->i915) >= 8) {
    433 		*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
    434 		*cs++ = addr;
    435 		*cs++ = 0;
    436 		*cs++ = value;
    437 	} else if (INTEL_GEN(rq->i915) >= 4) {
    438 		*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
    439 		*cs++ = 0;
    440 		*cs++ = addr;
    441 		*cs++ = value;
    442 	} else {
    443 		*cs++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL;
    444 		*cs++ = addr;
    445 		*cs++ = value;
    446 		*cs++ = MI_NOOP;
    447 	}
    448 
    449 	intel_ring_advance(rq, cs);
    450 
    451 	return 0;
    452 }
    453 
    454 static struct i915_request *
    455 tl_write(struct intel_timeline *tl, struct intel_engine_cs *engine, u32 value)
    456 {
    457 	struct i915_request *rq;
    458 	int err;
    459 
    460 	err = intel_timeline_pin(tl);
    461 	if (err) {
    462 		rq = ERR_PTR(err);
    463 		goto out;
    464 	}
    465 
    466 	rq = intel_engine_create_kernel_request(engine);
    467 	if (IS_ERR(rq))
    468 		goto out_unpin;
    469 
    470 	i915_request_get(rq);
    471 
    472 	err = emit_ggtt_store_dw(rq, tl->hwsp_offset, value);
    473 	i915_request_add(rq);
    474 	if (err) {
    475 		i915_request_put(rq);
    476 		rq = ERR_PTR(err);
    477 	}
    478 
    479 out_unpin:
    480 	intel_timeline_unpin(tl);
    481 out:
    482 	if (IS_ERR(rq))
    483 		pr_err("Failed to write to timeline!\n");
    484 	return rq;
    485 }
    486 
    487 static struct intel_timeline *
    488 checked_intel_timeline_create(struct intel_gt *gt)
    489 {
    490 	struct intel_timeline *tl;
    491 
    492 	tl = intel_timeline_create(gt, NULL);
    493 	if (IS_ERR(tl))
    494 		return tl;
    495 
    496 	if (*tl->hwsp_seqno != tl->seqno) {
    497 		pr_err("Timeline created with incorrect breadcrumb, found %x, expected %x\n",
    498 		       *tl->hwsp_seqno, tl->seqno);
    499 		intel_timeline_put(tl);
    500 		return ERR_PTR(-EINVAL);
    501 	}
    502 
    503 	return tl;
    504 }
    505 
    506 static int live_hwsp_engine(void *arg)
    507 {
    508 #define NUM_TIMELINES 4096
    509 	struct intel_gt *gt = arg;
    510 	struct intel_timeline **timelines;
    511 	struct intel_engine_cs *engine;
    512 	enum intel_engine_id id;
    513 	unsigned long count, n;
    514 	int err = 0;
    515 
    516 	/*
    517 	 * Create a bunch of timelines and check we can write
    518 	 * independently to each of their breadcrumb slots.
    519 	 */
    520 
    521 	timelines = kvmalloc_array(NUM_TIMELINES * I915_NUM_ENGINES,
    522 				   sizeof(*timelines),
    523 				   GFP_KERNEL);
    524 	if (!timelines)
    525 		return -ENOMEM;
    526 
    527 	count = 0;
    528 	for_each_engine(engine, gt, id) {
    529 		if (!intel_engine_can_store_dword(engine))
    530 			continue;
    531 
    532 		intel_engine_pm_get(engine);
    533 
    534 		for (n = 0; n < NUM_TIMELINES; n++) {
    535 			struct intel_timeline *tl;
    536 			struct i915_request *rq;
    537 
    538 			tl = checked_intel_timeline_create(gt);
    539 			if (IS_ERR(tl)) {
    540 				err = PTR_ERR(tl);
    541 				break;
    542 			}
    543 
    544 			rq = tl_write(tl, engine, count);
    545 			if (IS_ERR(rq)) {
    546 				intel_timeline_put(tl);
    547 				err = PTR_ERR(rq);
    548 				break;
    549 			}
    550 
    551 			timelines[count++] = tl;
    552 			i915_request_put(rq);
    553 		}
    554 
    555 		intel_engine_pm_put(engine);
    556 		if (err)
    557 			break;
    558 	}
    559 
    560 	if (igt_flush_test(gt->i915))
    561 		err = -EIO;
    562 
    563 	for (n = 0; n < count; n++) {
    564 		struct intel_timeline *tl = timelines[n];
    565 
    566 		if (!err && *tl->hwsp_seqno != n) {
    567 			pr_err("Invalid seqno stored in timeline %lu, found 0x%x\n",
    568 			       n, *tl->hwsp_seqno);
    569 			err = -EINVAL;
    570 		}
    571 		intel_timeline_put(tl);
    572 	}
    573 
    574 	kvfree(timelines);
    575 	return err;
    576 #undef NUM_TIMELINES
    577 }
    578 
    579 static int live_hwsp_alternate(void *arg)
    580 {
    581 #define NUM_TIMELINES 4096
    582 	struct intel_gt *gt = arg;
    583 	struct intel_timeline **timelines;
    584 	struct intel_engine_cs *engine;
    585 	enum intel_engine_id id;
    586 	unsigned long count, n;
    587 	int err = 0;
    588 
    589 	/*
    590 	 * Create a bunch of timelines and check we can write
    591 	 * independently to each of their breadcrumb slots with adjacent
    592 	 * engines.
    593 	 */
    594 
    595 	timelines = kvmalloc_array(NUM_TIMELINES * I915_NUM_ENGINES,
    596 				   sizeof(*timelines),
    597 				   GFP_KERNEL);
    598 	if (!timelines)
    599 		return -ENOMEM;
    600 
    601 	count = 0;
    602 	for (n = 0; n < NUM_TIMELINES; n++) {
    603 		for_each_engine(engine, gt, id) {
    604 			struct intel_timeline *tl;
    605 			struct i915_request *rq;
    606 
    607 			if (!intel_engine_can_store_dword(engine))
    608 				continue;
    609 
    610 			tl = checked_intel_timeline_create(gt);
    611 			if (IS_ERR(tl)) {
    612 				intel_engine_pm_put(engine);
    613 				err = PTR_ERR(tl);
    614 				goto out;
    615 			}
    616 
    617 			intel_engine_pm_get(engine);
    618 			rq = tl_write(tl, engine, count);
    619 			intel_engine_pm_put(engine);
    620 			if (IS_ERR(rq)) {
    621 				intel_timeline_put(tl);
    622 				err = PTR_ERR(rq);
    623 				goto out;
    624 			}
    625 
    626 			timelines[count++] = tl;
    627 			i915_request_put(rq);
    628 		}
    629 	}
    630 
    631 out:
    632 	if (igt_flush_test(gt->i915))
    633 		err = -EIO;
    634 
    635 	for (n = 0; n < count; n++) {
    636 		struct intel_timeline *tl = timelines[n];
    637 
    638 		if (!err && *tl->hwsp_seqno != n) {
    639 			pr_err("Invalid seqno stored in timeline %lu, found 0x%x\n",
    640 			       n, *tl->hwsp_seqno);
    641 			err = -EINVAL;
    642 		}
    643 		intel_timeline_put(tl);
    644 	}
    645 
    646 	kvfree(timelines);
    647 	return err;
    648 #undef NUM_TIMELINES
    649 }
    650 
    651 static int live_hwsp_wrap(void *arg)
    652 {
    653 	struct intel_gt *gt = arg;
    654 	struct intel_engine_cs *engine;
    655 	struct intel_timeline *tl;
    656 	enum intel_engine_id id;
    657 	int err = 0;
    658 
    659 	/*
    660 	 * Across a seqno wrap, we need to keep the old cacheline alive for
    661 	 * foreign GPU references.
    662 	 */
    663 
    664 	tl = intel_timeline_create(gt, NULL);
    665 	if (IS_ERR(tl))
    666 		return PTR_ERR(tl);
    667 
    668 	if (!tl->has_initial_breadcrumb || !tl->hwsp_cacheline)
    669 		goto out_free;
    670 
    671 	err = intel_timeline_pin(tl);
    672 	if (err)
    673 		goto out_free;
    674 
    675 	for_each_engine(engine, gt, id) {
    676 		const u32 *hwsp_seqno[2];
    677 		struct i915_request *rq;
    678 		u32 seqno[2];
    679 
    680 		if (!intel_engine_can_store_dword(engine))
    681 			continue;
    682 
    683 		rq = intel_engine_create_kernel_request(engine);
    684 		if (IS_ERR(rq)) {
    685 			err = PTR_ERR(rq);
    686 			goto out;
    687 		}
    688 
    689 		tl->seqno = -4u;
    690 
    691 		mutex_lock_nested(&tl->mutex, SINGLE_DEPTH_NESTING);
    692 		err = intel_timeline_get_seqno(tl, rq, &seqno[0]);
    693 		mutex_unlock(&tl->mutex);
    694 		if (err) {
    695 			i915_request_add(rq);
    696 			goto out;
    697 		}
    698 		pr_debug("seqno[0]:%08x, hwsp_offset:%08x\n",
    699 			 seqno[0], tl->hwsp_offset);
    700 
    701 		err = emit_ggtt_store_dw(rq, tl->hwsp_offset, seqno[0]);
    702 		if (err) {
    703 			i915_request_add(rq);
    704 			goto out;
    705 		}
    706 		hwsp_seqno[0] = tl->hwsp_seqno;
    707 
    708 		mutex_lock_nested(&tl->mutex, SINGLE_DEPTH_NESTING);
    709 		err = intel_timeline_get_seqno(tl, rq, &seqno[1]);
    710 		mutex_unlock(&tl->mutex);
    711 		if (err) {
    712 			i915_request_add(rq);
    713 			goto out;
    714 		}
    715 		pr_debug("seqno[1]:%08x, hwsp_offset:%08x\n",
    716 			 seqno[1], tl->hwsp_offset);
    717 
    718 		err = emit_ggtt_store_dw(rq, tl->hwsp_offset, seqno[1]);
    719 		if (err) {
    720 			i915_request_add(rq);
    721 			goto out;
    722 		}
    723 		hwsp_seqno[1] = tl->hwsp_seqno;
    724 
    725 		/* With wrap should come a new hwsp */
    726 		GEM_BUG_ON(seqno[1] >= seqno[0]);
    727 		GEM_BUG_ON(hwsp_seqno[0] == hwsp_seqno[1]);
    728 
    729 		i915_request_add(rq);
    730 
    731 		if (i915_request_wait(rq, 0, HZ / 5) < 0) {
    732 			pr_err("Wait for timeline writes timed out!\n");
    733 			err = -EIO;
    734 			goto out;
    735 		}
    736 
    737 		if (*hwsp_seqno[0] != seqno[0] || *hwsp_seqno[1] != seqno[1]) {
    738 			pr_err("Bad timeline values: found (%x, %x), expected (%x, %x)\n",
    739 			       *hwsp_seqno[0], *hwsp_seqno[1],
    740 			       seqno[0], seqno[1]);
    741 			err = -EINVAL;
    742 			goto out;
    743 		}
    744 
    745 		intel_gt_retire_requests(gt); /* recycle HWSP */
    746 	}
    747 
    748 out:
    749 	if (igt_flush_test(gt->i915))
    750 		err = -EIO;
    751 
    752 	intel_timeline_unpin(tl);
    753 out_free:
    754 	intel_timeline_put(tl);
    755 	return err;
    756 }
    757 
    758 static int live_hwsp_recycle(void *arg)
    759 {
    760 	struct intel_gt *gt = arg;
    761 	struct intel_engine_cs *engine;
    762 	enum intel_engine_id id;
    763 	unsigned long count;
    764 	int err = 0;
    765 
    766 	/*
    767 	 * Check seqno writes into one timeline at a time. We expect to
    768 	 * recycle the breadcrumb slot between iterations and neither
    769 	 * want to confuse ourselves or the GPU.
    770 	 */
    771 
    772 	count = 0;
    773 	for_each_engine(engine, gt, id) {
    774 		IGT_TIMEOUT(end_time);
    775 
    776 		if (!intel_engine_can_store_dword(engine))
    777 			continue;
    778 
    779 		intel_engine_pm_get(engine);
    780 
    781 		do {
    782 			struct intel_timeline *tl;
    783 			struct i915_request *rq;
    784 
    785 			tl = checked_intel_timeline_create(gt);
    786 			if (IS_ERR(tl)) {
    787 				err = PTR_ERR(tl);
    788 				break;
    789 			}
    790 
    791 			rq = tl_write(tl, engine, count);
    792 			if (IS_ERR(rq)) {
    793 				intel_timeline_put(tl);
    794 				err = PTR_ERR(rq);
    795 				break;
    796 			}
    797 
    798 			if (i915_request_wait(rq, 0, HZ / 5) < 0) {
    799 				pr_err("Wait for timeline writes timed out!\n");
    800 				i915_request_put(rq);
    801 				intel_timeline_put(tl);
    802 				err = -EIO;
    803 				break;
    804 			}
    805 
    806 			if (*tl->hwsp_seqno != count) {
    807 				pr_err("Invalid seqno stored in timeline %lu, found 0x%x\n",
    808 				       count, *tl->hwsp_seqno);
    809 				err = -EINVAL;
    810 			}
    811 
    812 			i915_request_put(rq);
    813 			intel_timeline_put(tl);
    814 			count++;
    815 
    816 			if (err)
    817 				break;
    818 		} while (!__igt_timeout(end_time, NULL));
    819 
    820 		intel_engine_pm_put(engine);
    821 		if (err)
    822 			break;
    823 	}
    824 
    825 	return err;
    826 }
    827 
    828 int intel_timeline_live_selftests(struct drm_i915_private *i915)
    829 {
    830 	static const struct i915_subtest tests[] = {
    831 		SUBTEST(live_hwsp_recycle),
    832 		SUBTEST(live_hwsp_engine),
    833 		SUBTEST(live_hwsp_alternate),
    834 		SUBTEST(live_hwsp_wrap),
    835 	};
    836 
    837 	if (intel_gt_is_wedged(&i915->gt))
    838 		return 0;
    839 
    840 	return intel_gt_live_subtests(tests, &i915->gt);
    841 }
    842