1 1.1 riastrad /* $NetBSD: selftest_timeline.c,v 1.2 2021/12/18 23:45:30 riastradh Exp $ */ 2 1.1 riastrad 3 1.1 riastrad /* 4 1.1 riastrad * SPDX-License-Identifier: MIT 5 1.1 riastrad * 6 1.1 riastrad * Copyright 2017-2018 Intel Corporation 7 1.1 riastrad */ 8 1.1 riastrad 9 1.1 riastrad #include <sys/cdefs.h> 10 1.1 riastrad __KERNEL_RCSID(0, "$NetBSD: selftest_timeline.c,v 1.2 2021/12/18 23:45:30 riastradh Exp $"); 11 1.1 riastrad 12 1.1 riastrad #include <linux/prime_numbers.h> 13 1.1 riastrad 14 1.1 riastrad #include "intel_engine_pm.h" 15 1.1 riastrad #include "intel_gt.h" 16 1.1 riastrad #include "intel_gt_requests.h" 17 1.1 riastrad #include "intel_ring.h" 18 1.1 riastrad 19 1.1 riastrad #include "../selftests/i915_random.h" 20 1.1 riastrad #include "../i915_selftest.h" 21 1.1 riastrad 22 1.1 riastrad #include "../selftests/igt_flush_test.h" 23 1.1 riastrad #include "../selftests/mock_gem_device.h" 24 1.1 riastrad #include "selftests/mock_timeline.h" 25 1.1 riastrad 26 1.1 riastrad static struct page *hwsp_page(struct intel_timeline *tl) 27 1.1 riastrad { 28 1.1 riastrad struct drm_i915_gem_object *obj = tl->hwsp_ggtt->obj; 29 1.1 riastrad 30 1.1 riastrad GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); 31 1.1 riastrad return sg_page(obj->mm.pages->sgl); 32 1.1 riastrad } 33 1.1 riastrad 34 1.1 riastrad static unsigned long hwsp_cacheline(struct intel_timeline *tl) 35 1.1 riastrad { 36 1.1 riastrad unsigned long address = (unsigned long)page_address(hwsp_page(tl)); 37 1.1 riastrad 38 1.1 riastrad return (address + tl->hwsp_offset) / CACHELINE_BYTES; 39 1.1 riastrad } 40 1.1 riastrad 41 1.1 riastrad #define CACHELINES_PER_PAGE (PAGE_SIZE / CACHELINE_BYTES) 42 1.1 riastrad 43 1.1 riastrad struct mock_hwsp_freelist { 44 1.1 riastrad struct intel_gt *gt; 45 1.1 riastrad struct radix_tree_root cachelines; 46 1.1 riastrad struct intel_timeline **history; 47 1.1 riastrad unsigned long count, max; 48 1.1 riastrad struct rnd_state prng; 49 1.1 riastrad }; 50 1.1 riastrad 51 1.1 riastrad enum { 52 1.1 riastrad SHUFFLE = BIT(0), 53 1.1 riastrad }; 54 1.1 riastrad 55 1.1 riastrad static void __mock_hwsp_record(struct mock_hwsp_freelist *state, 56 1.1 riastrad unsigned int idx, 57 1.1 riastrad struct intel_timeline *tl) 58 1.1 riastrad { 59 1.1 riastrad tl = xchg(&state->history[idx], tl); 60 1.1 riastrad if (tl) { 61 1.1 riastrad radix_tree_delete(&state->cachelines, hwsp_cacheline(tl)); 62 1.1 riastrad intel_timeline_put(tl); 63 1.1 riastrad } 64 1.1 riastrad } 65 1.1 riastrad 66 1.1 riastrad static int __mock_hwsp_timeline(struct mock_hwsp_freelist *state, 67 1.1 riastrad unsigned int count, 68 1.1 riastrad unsigned int flags) 69 1.1 riastrad { 70 1.1 riastrad struct intel_timeline *tl; 71 1.1 riastrad unsigned int idx; 72 1.1 riastrad 73 1.1 riastrad while (count--) { 74 1.1 riastrad unsigned long cacheline; 75 1.1 riastrad int err; 76 1.1 riastrad 77 1.1 riastrad tl = intel_timeline_create(state->gt, NULL); 78 1.1 riastrad if (IS_ERR(tl)) 79 1.1 riastrad return PTR_ERR(tl); 80 1.1 riastrad 81 1.1 riastrad cacheline = hwsp_cacheline(tl); 82 1.1 riastrad err = radix_tree_insert(&state->cachelines, cacheline, tl); 83 1.1 riastrad if (err) { 84 1.1 riastrad if (err == -EEXIST) { 85 1.1 riastrad pr_err("HWSP cacheline %lu already used; duplicate allocation!\n", 86 1.1 riastrad cacheline); 87 1.1 riastrad } 88 1.1 riastrad intel_timeline_put(tl); 89 1.1 riastrad return err; 90 1.1 riastrad } 91 1.1 riastrad 92 1.1 riastrad idx = state->count++ % state->max; 93 1.1 riastrad __mock_hwsp_record(state, idx, tl); 94 1.1 riastrad } 95 1.1 riastrad 96 1.1 riastrad if (flags & SHUFFLE) 97 1.1 riastrad i915_prandom_shuffle(state->history, 98 1.1 riastrad sizeof(*state->history), 99 1.1 riastrad min(state->count, state->max), 100 1.1 riastrad &state->prng); 101 1.1 riastrad 102 1.1 riastrad count = i915_prandom_u32_max_state(min(state->count, state->max), 103 1.1 riastrad &state->prng); 104 1.1 riastrad while (count--) { 105 1.1 riastrad idx = --state->count % state->max; 106 1.1 riastrad __mock_hwsp_record(state, idx, NULL); 107 1.1 riastrad } 108 1.1 riastrad 109 1.1 riastrad return 0; 110 1.1 riastrad } 111 1.1 riastrad 112 1.1 riastrad static int mock_hwsp_freelist(void *arg) 113 1.1 riastrad { 114 1.1 riastrad struct mock_hwsp_freelist state; 115 1.1 riastrad struct drm_i915_private *i915; 116 1.1 riastrad const struct { 117 1.1 riastrad const char *name; 118 1.1 riastrad unsigned int flags; 119 1.1 riastrad } phases[] = { 120 1.1 riastrad { "linear", 0 }, 121 1.1 riastrad { "shuffled", SHUFFLE }, 122 1.1 riastrad { }, 123 1.1 riastrad }, *p; 124 1.1 riastrad unsigned int na; 125 1.1 riastrad int err = 0; 126 1.1 riastrad 127 1.1 riastrad i915 = mock_gem_device(); 128 1.1 riastrad if (!i915) 129 1.1 riastrad return -ENOMEM; 130 1.1 riastrad 131 1.1 riastrad INIT_RADIX_TREE(&state.cachelines, GFP_KERNEL); 132 1.1 riastrad state.prng = I915_RND_STATE_INITIALIZER(i915_selftest.random_seed); 133 1.1 riastrad 134 1.1 riastrad state.gt = &i915->gt; 135 1.1 riastrad 136 1.1 riastrad /* 137 1.1 riastrad * Create a bunch of timelines and check that their HWSP do not overlap. 138 1.1 riastrad * Free some, and try again. 139 1.1 riastrad */ 140 1.1 riastrad 141 1.1 riastrad state.max = PAGE_SIZE / sizeof(*state.history); 142 1.1 riastrad state.count = 0; 143 1.1 riastrad state.history = kcalloc(state.max, sizeof(*state.history), GFP_KERNEL); 144 1.1 riastrad if (!state.history) { 145 1.1 riastrad err = -ENOMEM; 146 1.1 riastrad goto err_put; 147 1.1 riastrad } 148 1.1 riastrad 149 1.1 riastrad for (p = phases; p->name; p++) { 150 1.1 riastrad pr_debug("%s(%s)\n", __func__, p->name); 151 1.1 riastrad for_each_prime_number_from(na, 1, 2 * CACHELINES_PER_PAGE) { 152 1.1 riastrad err = __mock_hwsp_timeline(&state, na, p->flags); 153 1.1 riastrad if (err) 154 1.1 riastrad goto out; 155 1.1 riastrad } 156 1.1 riastrad } 157 1.1 riastrad 158 1.1 riastrad out: 159 1.1 riastrad for (na = 0; na < state.max; na++) 160 1.1 riastrad __mock_hwsp_record(&state, na, NULL); 161 1.1 riastrad kfree(state.history); 162 1.1 riastrad err_put: 163 1.1 riastrad drm_dev_put(&i915->drm); 164 1.1 riastrad return err; 165 1.1 riastrad } 166 1.1 riastrad 167 1.1 riastrad struct __igt_sync { 168 1.1 riastrad const char *name; 169 1.1 riastrad u32 seqno; 170 1.1 riastrad bool expected; 171 1.1 riastrad bool set; 172 1.1 riastrad }; 173 1.1 riastrad 174 1.1 riastrad static int __igt_sync(struct intel_timeline *tl, 175 1.1 riastrad u64 ctx, 176 1.1 riastrad const struct __igt_sync *p, 177 1.1 riastrad const char *name) 178 1.1 riastrad { 179 1.1 riastrad int ret; 180 1.1 riastrad 181 1.1 riastrad if (__intel_timeline_sync_is_later(tl, ctx, p->seqno) != p->expected) { 182 1.1 riastrad pr_err("%s: %s(ctx=%llu, seqno=%u) expected passed %s but failed\n", 183 1.1 riastrad name, p->name, ctx, p->seqno, yesno(p->expected)); 184 1.1 riastrad return -EINVAL; 185 1.1 riastrad } 186 1.1 riastrad 187 1.1 riastrad if (p->set) { 188 1.1 riastrad ret = __intel_timeline_sync_set(tl, ctx, p->seqno); 189 1.1 riastrad if (ret) 190 1.1 riastrad return ret; 191 1.1 riastrad } 192 1.1 riastrad 193 1.1 riastrad return 0; 194 1.1 riastrad } 195 1.1 riastrad 196 1.1 riastrad static int igt_sync(void *arg) 197 1.1 riastrad { 198 1.1 riastrad const struct __igt_sync pass[] = { 199 1.1 riastrad { "unset", 0, false, false }, 200 1.1 riastrad { "new", 0, false, true }, 201 1.1 riastrad { "0a", 0, true, true }, 202 1.1 riastrad { "1a", 1, false, true }, 203 1.1 riastrad { "1b", 1, true, true }, 204 1.1 riastrad { "0b", 0, true, false }, 205 1.1 riastrad { "2a", 2, false, true }, 206 1.1 riastrad { "4", 4, false, true }, 207 1.1 riastrad { "INT_MAX", INT_MAX, false, true }, 208 1.1 riastrad { "INT_MAX-1", INT_MAX-1, true, false }, 209 1.1 riastrad { "INT_MAX+1", (u32)INT_MAX+1, false, true }, 210 1.1 riastrad { "INT_MAX", INT_MAX, true, false }, 211 1.1 riastrad { "UINT_MAX", UINT_MAX, false, true }, 212 1.1 riastrad { "wrap", 0, false, true }, 213 1.1 riastrad { "unwrap", UINT_MAX, true, false }, 214 1.1 riastrad {}, 215 1.1 riastrad }, *p; 216 1.1 riastrad struct intel_timeline tl; 217 1.1 riastrad int order, offset; 218 1.1 riastrad int ret = -ENODEV; 219 1.1 riastrad 220 1.1 riastrad mock_timeline_init(&tl, 0); 221 1.1 riastrad for (p = pass; p->name; p++) { 222 1.1 riastrad for (order = 1; order < 64; order++) { 223 1.1 riastrad for (offset = -1; offset <= (order > 1); offset++) { 224 1.1 riastrad u64 ctx = BIT_ULL(order) + offset; 225 1.1 riastrad 226 1.1 riastrad ret = __igt_sync(&tl, ctx, p, "1"); 227 1.1 riastrad if (ret) 228 1.1 riastrad goto out; 229 1.1 riastrad } 230 1.1 riastrad } 231 1.1 riastrad } 232 1.1 riastrad mock_timeline_fini(&tl); 233 1.1 riastrad 234 1.1 riastrad mock_timeline_init(&tl, 0); 235 1.1 riastrad for (order = 1; order < 64; order++) { 236 1.1 riastrad for (offset = -1; offset <= (order > 1); offset++) { 237 1.1 riastrad u64 ctx = BIT_ULL(order) + offset; 238 1.1 riastrad 239 1.1 riastrad for (p = pass; p->name; p++) { 240 1.1 riastrad ret = __igt_sync(&tl, ctx, p, "2"); 241 1.1 riastrad if (ret) 242 1.1 riastrad goto out; 243 1.1 riastrad } 244 1.1 riastrad } 245 1.1 riastrad } 246 1.1 riastrad 247 1.1 riastrad out: 248 1.1 riastrad mock_timeline_fini(&tl); 249 1.1 riastrad return ret; 250 1.1 riastrad } 251 1.1 riastrad 252 1.1 riastrad static unsigned int random_engine(struct rnd_state *rnd) 253 1.1 riastrad { 254 1.1 riastrad return i915_prandom_u32_max_state(I915_NUM_ENGINES, rnd); 255 1.1 riastrad } 256 1.1 riastrad 257 1.1 riastrad static int bench_sync(void *arg) 258 1.1 riastrad { 259 1.1 riastrad struct rnd_state prng; 260 1.1 riastrad struct intel_timeline tl; 261 1.1 riastrad unsigned long end_time, count; 262 1.1 riastrad u64 prng32_1M; 263 1.1 riastrad ktime_t kt; 264 1.1 riastrad int order, last_order; 265 1.1 riastrad 266 1.1 riastrad mock_timeline_init(&tl, 0); 267 1.1 riastrad 268 1.1 riastrad /* Lookups from cache are very fast and so the random number generation 269 1.1 riastrad * and the loop itself becomes a significant factor in the per-iteration 270 1.1 riastrad * timings. We try to compensate the results by measuring the overhead 271 1.1 riastrad * of the prng and subtract it from the reported results. 272 1.1 riastrad */ 273 1.1 riastrad prandom_seed_state(&prng, i915_selftest.random_seed); 274 1.1 riastrad count = 0; 275 1.1 riastrad kt = ktime_get(); 276 1.1 riastrad end_time = jiffies + HZ/10; 277 1.1 riastrad do { 278 1.1 riastrad u32 x; 279 1.1 riastrad 280 1.1 riastrad /* Make sure the compiler doesn't optimise away the prng call */ 281 1.1 riastrad WRITE_ONCE(x, prandom_u32_state(&prng)); 282 1.1 riastrad 283 1.1 riastrad count++; 284 1.1 riastrad } while (!time_after(jiffies, end_time)); 285 1.1 riastrad kt = ktime_sub(ktime_get(), kt); 286 1.1 riastrad pr_debug("%s: %lu random evaluations, %lluns/prng\n", 287 1.1 riastrad __func__, count, (long long)div64_ul(ktime_to_ns(kt), count)); 288 1.1 riastrad prng32_1M = div64_ul(ktime_to_ns(kt) << 20, count); 289 1.1 riastrad 290 1.1 riastrad /* Benchmark (only) setting random context ids */ 291 1.1 riastrad prandom_seed_state(&prng, i915_selftest.random_seed); 292 1.1 riastrad count = 0; 293 1.1 riastrad kt = ktime_get(); 294 1.1 riastrad end_time = jiffies + HZ/10; 295 1.1 riastrad do { 296 1.1 riastrad u64 id = i915_prandom_u64_state(&prng); 297 1.1 riastrad 298 1.1 riastrad __intel_timeline_sync_set(&tl, id, 0); 299 1.1 riastrad count++; 300 1.1 riastrad } while (!time_after(jiffies, end_time)); 301 1.1 riastrad kt = ktime_sub(ktime_get(), kt); 302 1.1 riastrad kt = ktime_sub_ns(kt, (count * prng32_1M * 2) >> 20); 303 1.1 riastrad pr_info("%s: %lu random insertions, %lluns/insert\n", 304 1.1 riastrad __func__, count, (long long)div64_ul(ktime_to_ns(kt), count)); 305 1.1 riastrad 306 1.1 riastrad /* Benchmark looking up the exact same context ids as we just set */ 307 1.1 riastrad prandom_seed_state(&prng, i915_selftest.random_seed); 308 1.1 riastrad end_time = count; 309 1.1 riastrad kt = ktime_get(); 310 1.1 riastrad while (end_time--) { 311 1.1 riastrad u64 id = i915_prandom_u64_state(&prng); 312 1.1 riastrad 313 1.1 riastrad if (!__intel_timeline_sync_is_later(&tl, id, 0)) { 314 1.1 riastrad mock_timeline_fini(&tl); 315 1.1 riastrad pr_err("Lookup of %llu failed\n", id); 316 1.1 riastrad return -EINVAL; 317 1.1 riastrad } 318 1.1 riastrad } 319 1.1 riastrad kt = ktime_sub(ktime_get(), kt); 320 1.1 riastrad kt = ktime_sub_ns(kt, (count * prng32_1M * 2) >> 20); 321 1.1 riastrad pr_info("%s: %lu random lookups, %lluns/lookup\n", 322 1.1 riastrad __func__, count, (long long)div64_ul(ktime_to_ns(kt), count)); 323 1.1 riastrad 324 1.1 riastrad mock_timeline_fini(&tl); 325 1.1 riastrad cond_resched(); 326 1.1 riastrad 327 1.1 riastrad mock_timeline_init(&tl, 0); 328 1.1 riastrad 329 1.1 riastrad /* Benchmark setting the first N (in order) contexts */ 330 1.1 riastrad count = 0; 331 1.1 riastrad kt = ktime_get(); 332 1.1 riastrad end_time = jiffies + HZ/10; 333 1.1 riastrad do { 334 1.1 riastrad __intel_timeline_sync_set(&tl, count++, 0); 335 1.1 riastrad } while (!time_after(jiffies, end_time)); 336 1.1 riastrad kt = ktime_sub(ktime_get(), kt); 337 1.1 riastrad pr_info("%s: %lu in-order insertions, %lluns/insert\n", 338 1.1 riastrad __func__, count, (long long)div64_ul(ktime_to_ns(kt), count)); 339 1.1 riastrad 340 1.1 riastrad /* Benchmark looking up the exact same context ids as we just set */ 341 1.1 riastrad end_time = count; 342 1.1 riastrad kt = ktime_get(); 343 1.1 riastrad while (end_time--) { 344 1.1 riastrad if (!__intel_timeline_sync_is_later(&tl, end_time, 0)) { 345 1.1 riastrad pr_err("Lookup of %lu failed\n", end_time); 346 1.1 riastrad mock_timeline_fini(&tl); 347 1.1 riastrad return -EINVAL; 348 1.1 riastrad } 349 1.1 riastrad } 350 1.1 riastrad kt = ktime_sub(ktime_get(), kt); 351 1.1 riastrad pr_info("%s: %lu in-order lookups, %lluns/lookup\n", 352 1.1 riastrad __func__, count, (long long)div64_ul(ktime_to_ns(kt), count)); 353 1.1 riastrad 354 1.1 riastrad mock_timeline_fini(&tl); 355 1.1 riastrad cond_resched(); 356 1.1 riastrad 357 1.1 riastrad mock_timeline_init(&tl, 0); 358 1.1 riastrad 359 1.1 riastrad /* Benchmark searching for a random context id and maybe changing it */ 360 1.1 riastrad prandom_seed_state(&prng, i915_selftest.random_seed); 361 1.1 riastrad count = 0; 362 1.1 riastrad kt = ktime_get(); 363 1.1 riastrad end_time = jiffies + HZ/10; 364 1.1 riastrad do { 365 1.1 riastrad u32 id = random_engine(&prng); 366 1.1 riastrad u32 seqno = prandom_u32_state(&prng); 367 1.1 riastrad 368 1.1 riastrad if (!__intel_timeline_sync_is_later(&tl, id, seqno)) 369 1.1 riastrad __intel_timeline_sync_set(&tl, id, seqno); 370 1.1 riastrad 371 1.1 riastrad count++; 372 1.1 riastrad } while (!time_after(jiffies, end_time)); 373 1.1 riastrad kt = ktime_sub(ktime_get(), kt); 374 1.1 riastrad kt = ktime_sub_ns(kt, (count * prng32_1M * 2) >> 20); 375 1.1 riastrad pr_info("%s: %lu repeated insert/lookups, %lluns/op\n", 376 1.1 riastrad __func__, count, (long long)div64_ul(ktime_to_ns(kt), count)); 377 1.1 riastrad mock_timeline_fini(&tl); 378 1.1 riastrad cond_resched(); 379 1.1 riastrad 380 1.1 riastrad /* Benchmark searching for a known context id and changing the seqno */ 381 1.1 riastrad for (last_order = 1, order = 1; order < 32; 382 1.1 riastrad ({ int tmp = last_order; last_order = order; order += tmp; })) { 383 1.1 riastrad unsigned int mask = BIT(order) - 1; 384 1.1 riastrad 385 1.1 riastrad mock_timeline_init(&tl, 0); 386 1.1 riastrad 387 1.1 riastrad count = 0; 388 1.1 riastrad kt = ktime_get(); 389 1.1 riastrad end_time = jiffies + HZ/10; 390 1.1 riastrad do { 391 1.1 riastrad /* Without assuming too many details of the underlying 392 1.1 riastrad * implementation, try to identify its phase-changes 393 1.1 riastrad * (if any)! 394 1.1 riastrad */ 395 1.1 riastrad u64 id = (u64)(count & mask) << order; 396 1.1 riastrad 397 1.1 riastrad __intel_timeline_sync_is_later(&tl, id, 0); 398 1.1 riastrad __intel_timeline_sync_set(&tl, id, 0); 399 1.1 riastrad 400 1.1 riastrad count++; 401 1.1 riastrad } while (!time_after(jiffies, end_time)); 402 1.1 riastrad kt = ktime_sub(ktime_get(), kt); 403 1.1 riastrad pr_info("%s: %lu cyclic/%d insert/lookups, %lluns/op\n", 404 1.1 riastrad __func__, count, order, 405 1.1 riastrad (long long)div64_ul(ktime_to_ns(kt), count)); 406 1.1 riastrad mock_timeline_fini(&tl); 407 1.1 riastrad cond_resched(); 408 1.1 riastrad } 409 1.1 riastrad 410 1.1 riastrad return 0; 411 1.1 riastrad } 412 1.1 riastrad 413 1.1 riastrad int intel_timeline_mock_selftests(void) 414 1.1 riastrad { 415 1.1 riastrad static const struct i915_subtest tests[] = { 416 1.1 riastrad SUBTEST(mock_hwsp_freelist), 417 1.1 riastrad SUBTEST(igt_sync), 418 1.1 riastrad SUBTEST(bench_sync), 419 1.1 riastrad }; 420 1.1 riastrad 421 1.1 riastrad return i915_subtests(tests, NULL); 422 1.1 riastrad } 423 1.1 riastrad 424 1.1 riastrad static int emit_ggtt_store_dw(struct i915_request *rq, u32 addr, u32 value) 425 1.1 riastrad { 426 1.1 riastrad u32 *cs; 427 1.1 riastrad 428 1.1 riastrad cs = intel_ring_begin(rq, 4); 429 1.1 riastrad if (IS_ERR(cs)) 430 1.1 riastrad return PTR_ERR(cs); 431 1.1 riastrad 432 1.1 riastrad if (INTEL_GEN(rq->i915) >= 8) { 433 1.1 riastrad *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; 434 1.1 riastrad *cs++ = addr; 435 1.1 riastrad *cs++ = 0; 436 1.1 riastrad *cs++ = value; 437 1.1 riastrad } else if (INTEL_GEN(rq->i915) >= 4) { 438 1.1 riastrad *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; 439 1.1 riastrad *cs++ = 0; 440 1.1 riastrad *cs++ = addr; 441 1.1 riastrad *cs++ = value; 442 1.1 riastrad } else { 443 1.1 riastrad *cs++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL; 444 1.1 riastrad *cs++ = addr; 445 1.1 riastrad *cs++ = value; 446 1.1 riastrad *cs++ = MI_NOOP; 447 1.1 riastrad } 448 1.1 riastrad 449 1.1 riastrad intel_ring_advance(rq, cs); 450 1.1 riastrad 451 1.1 riastrad return 0; 452 1.1 riastrad } 453 1.1 riastrad 454 1.1 riastrad static struct i915_request * 455 1.1 riastrad tl_write(struct intel_timeline *tl, struct intel_engine_cs *engine, u32 value) 456 1.1 riastrad { 457 1.1 riastrad struct i915_request *rq; 458 1.1 riastrad int err; 459 1.1 riastrad 460 1.1 riastrad err = intel_timeline_pin(tl); 461 1.1 riastrad if (err) { 462 1.1 riastrad rq = ERR_PTR(err); 463 1.1 riastrad goto out; 464 1.1 riastrad } 465 1.1 riastrad 466 1.1 riastrad rq = intel_engine_create_kernel_request(engine); 467 1.1 riastrad if (IS_ERR(rq)) 468 1.1 riastrad goto out_unpin; 469 1.1 riastrad 470 1.1 riastrad i915_request_get(rq); 471 1.1 riastrad 472 1.1 riastrad err = emit_ggtt_store_dw(rq, tl->hwsp_offset, value); 473 1.1 riastrad i915_request_add(rq); 474 1.1 riastrad if (err) { 475 1.1 riastrad i915_request_put(rq); 476 1.1 riastrad rq = ERR_PTR(err); 477 1.1 riastrad } 478 1.1 riastrad 479 1.1 riastrad out_unpin: 480 1.1 riastrad intel_timeline_unpin(tl); 481 1.1 riastrad out: 482 1.1 riastrad if (IS_ERR(rq)) 483 1.1 riastrad pr_err("Failed to write to timeline!\n"); 484 1.1 riastrad return rq; 485 1.1 riastrad } 486 1.1 riastrad 487 1.1 riastrad static struct intel_timeline * 488 1.1 riastrad checked_intel_timeline_create(struct intel_gt *gt) 489 1.1 riastrad { 490 1.1 riastrad struct intel_timeline *tl; 491 1.1 riastrad 492 1.1 riastrad tl = intel_timeline_create(gt, NULL); 493 1.1 riastrad if (IS_ERR(tl)) 494 1.1 riastrad return tl; 495 1.1 riastrad 496 1.1 riastrad if (*tl->hwsp_seqno != tl->seqno) { 497 1.1 riastrad pr_err("Timeline created with incorrect breadcrumb, found %x, expected %x\n", 498 1.1 riastrad *tl->hwsp_seqno, tl->seqno); 499 1.1 riastrad intel_timeline_put(tl); 500 1.1 riastrad return ERR_PTR(-EINVAL); 501 1.1 riastrad } 502 1.1 riastrad 503 1.1 riastrad return tl; 504 1.1 riastrad } 505 1.1 riastrad 506 1.1 riastrad static int live_hwsp_engine(void *arg) 507 1.1 riastrad { 508 1.1 riastrad #define NUM_TIMELINES 4096 509 1.1 riastrad struct intel_gt *gt = arg; 510 1.1 riastrad struct intel_timeline **timelines; 511 1.1 riastrad struct intel_engine_cs *engine; 512 1.1 riastrad enum intel_engine_id id; 513 1.1 riastrad unsigned long count, n; 514 1.1 riastrad int err = 0; 515 1.1 riastrad 516 1.1 riastrad /* 517 1.1 riastrad * Create a bunch of timelines and check we can write 518 1.1 riastrad * independently to each of their breadcrumb slots. 519 1.1 riastrad */ 520 1.1 riastrad 521 1.1 riastrad timelines = kvmalloc_array(NUM_TIMELINES * I915_NUM_ENGINES, 522 1.1 riastrad sizeof(*timelines), 523 1.1 riastrad GFP_KERNEL); 524 1.1 riastrad if (!timelines) 525 1.1 riastrad return -ENOMEM; 526 1.1 riastrad 527 1.1 riastrad count = 0; 528 1.1 riastrad for_each_engine(engine, gt, id) { 529 1.1 riastrad if (!intel_engine_can_store_dword(engine)) 530 1.1 riastrad continue; 531 1.1 riastrad 532 1.1 riastrad intel_engine_pm_get(engine); 533 1.1 riastrad 534 1.1 riastrad for (n = 0; n < NUM_TIMELINES; n++) { 535 1.1 riastrad struct intel_timeline *tl; 536 1.1 riastrad struct i915_request *rq; 537 1.1 riastrad 538 1.1 riastrad tl = checked_intel_timeline_create(gt); 539 1.1 riastrad if (IS_ERR(tl)) { 540 1.1 riastrad err = PTR_ERR(tl); 541 1.1 riastrad break; 542 1.1 riastrad } 543 1.1 riastrad 544 1.1 riastrad rq = tl_write(tl, engine, count); 545 1.1 riastrad if (IS_ERR(rq)) { 546 1.1 riastrad intel_timeline_put(tl); 547 1.1 riastrad err = PTR_ERR(rq); 548 1.1 riastrad break; 549 1.1 riastrad } 550 1.1 riastrad 551 1.1 riastrad timelines[count++] = tl; 552 1.1 riastrad i915_request_put(rq); 553 1.1 riastrad } 554 1.1 riastrad 555 1.1 riastrad intel_engine_pm_put(engine); 556 1.1 riastrad if (err) 557 1.1 riastrad break; 558 1.1 riastrad } 559 1.1 riastrad 560 1.1 riastrad if (igt_flush_test(gt->i915)) 561 1.1 riastrad err = -EIO; 562 1.1 riastrad 563 1.1 riastrad for (n = 0; n < count; n++) { 564 1.1 riastrad struct intel_timeline *tl = timelines[n]; 565 1.1 riastrad 566 1.1 riastrad if (!err && *tl->hwsp_seqno != n) { 567 1.1 riastrad pr_err("Invalid seqno stored in timeline %lu, found 0x%x\n", 568 1.1 riastrad n, *tl->hwsp_seqno); 569 1.1 riastrad err = -EINVAL; 570 1.1 riastrad } 571 1.1 riastrad intel_timeline_put(tl); 572 1.1 riastrad } 573 1.1 riastrad 574 1.1 riastrad kvfree(timelines); 575 1.1 riastrad return err; 576 1.1 riastrad #undef NUM_TIMELINES 577 1.1 riastrad } 578 1.1 riastrad 579 1.1 riastrad static int live_hwsp_alternate(void *arg) 580 1.1 riastrad { 581 1.1 riastrad #define NUM_TIMELINES 4096 582 1.1 riastrad struct intel_gt *gt = arg; 583 1.1 riastrad struct intel_timeline **timelines; 584 1.1 riastrad struct intel_engine_cs *engine; 585 1.1 riastrad enum intel_engine_id id; 586 1.1 riastrad unsigned long count, n; 587 1.1 riastrad int err = 0; 588 1.1 riastrad 589 1.1 riastrad /* 590 1.1 riastrad * Create a bunch of timelines and check we can write 591 1.1 riastrad * independently to each of their breadcrumb slots with adjacent 592 1.1 riastrad * engines. 593 1.1 riastrad */ 594 1.1 riastrad 595 1.1 riastrad timelines = kvmalloc_array(NUM_TIMELINES * I915_NUM_ENGINES, 596 1.1 riastrad sizeof(*timelines), 597 1.1 riastrad GFP_KERNEL); 598 1.1 riastrad if (!timelines) 599 1.1 riastrad return -ENOMEM; 600 1.1 riastrad 601 1.1 riastrad count = 0; 602 1.1 riastrad for (n = 0; n < NUM_TIMELINES; n++) { 603 1.1 riastrad for_each_engine(engine, gt, id) { 604 1.1 riastrad struct intel_timeline *tl; 605 1.1 riastrad struct i915_request *rq; 606 1.1 riastrad 607 1.1 riastrad if (!intel_engine_can_store_dword(engine)) 608 1.1 riastrad continue; 609 1.1 riastrad 610 1.1 riastrad tl = checked_intel_timeline_create(gt); 611 1.1 riastrad if (IS_ERR(tl)) { 612 1.1 riastrad intel_engine_pm_put(engine); 613 1.1 riastrad err = PTR_ERR(tl); 614 1.1 riastrad goto out; 615 1.1 riastrad } 616 1.1 riastrad 617 1.1 riastrad intel_engine_pm_get(engine); 618 1.1 riastrad rq = tl_write(tl, engine, count); 619 1.1 riastrad intel_engine_pm_put(engine); 620 1.1 riastrad if (IS_ERR(rq)) { 621 1.1 riastrad intel_timeline_put(tl); 622 1.1 riastrad err = PTR_ERR(rq); 623 1.1 riastrad goto out; 624 1.1 riastrad } 625 1.1 riastrad 626 1.1 riastrad timelines[count++] = tl; 627 1.1 riastrad i915_request_put(rq); 628 1.1 riastrad } 629 1.1 riastrad } 630 1.1 riastrad 631 1.1 riastrad out: 632 1.1 riastrad if (igt_flush_test(gt->i915)) 633 1.1 riastrad err = -EIO; 634 1.1 riastrad 635 1.1 riastrad for (n = 0; n < count; n++) { 636 1.1 riastrad struct intel_timeline *tl = timelines[n]; 637 1.1 riastrad 638 1.1 riastrad if (!err && *tl->hwsp_seqno != n) { 639 1.1 riastrad pr_err("Invalid seqno stored in timeline %lu, found 0x%x\n", 640 1.1 riastrad n, *tl->hwsp_seqno); 641 1.1 riastrad err = -EINVAL; 642 1.1 riastrad } 643 1.1 riastrad intel_timeline_put(tl); 644 1.1 riastrad } 645 1.1 riastrad 646 1.1 riastrad kvfree(timelines); 647 1.1 riastrad return err; 648 1.1 riastrad #undef NUM_TIMELINES 649 1.1 riastrad } 650 1.1 riastrad 651 1.1 riastrad static int live_hwsp_wrap(void *arg) 652 1.1 riastrad { 653 1.1 riastrad struct intel_gt *gt = arg; 654 1.1 riastrad struct intel_engine_cs *engine; 655 1.1 riastrad struct intel_timeline *tl; 656 1.1 riastrad enum intel_engine_id id; 657 1.1 riastrad int err = 0; 658 1.1 riastrad 659 1.1 riastrad /* 660 1.1 riastrad * Across a seqno wrap, we need to keep the old cacheline alive for 661 1.1 riastrad * foreign GPU references. 662 1.1 riastrad */ 663 1.1 riastrad 664 1.1 riastrad tl = intel_timeline_create(gt, NULL); 665 1.1 riastrad if (IS_ERR(tl)) 666 1.1 riastrad return PTR_ERR(tl); 667 1.1 riastrad 668 1.1 riastrad if (!tl->has_initial_breadcrumb || !tl->hwsp_cacheline) 669 1.1 riastrad goto out_free; 670 1.1 riastrad 671 1.1 riastrad err = intel_timeline_pin(tl); 672 1.1 riastrad if (err) 673 1.1 riastrad goto out_free; 674 1.1 riastrad 675 1.1 riastrad for_each_engine(engine, gt, id) { 676 1.1 riastrad const u32 *hwsp_seqno[2]; 677 1.1 riastrad struct i915_request *rq; 678 1.1 riastrad u32 seqno[2]; 679 1.1 riastrad 680 1.1 riastrad if (!intel_engine_can_store_dword(engine)) 681 1.1 riastrad continue; 682 1.1 riastrad 683 1.1 riastrad rq = intel_engine_create_kernel_request(engine); 684 1.1 riastrad if (IS_ERR(rq)) { 685 1.1 riastrad err = PTR_ERR(rq); 686 1.1 riastrad goto out; 687 1.1 riastrad } 688 1.1 riastrad 689 1.1 riastrad tl->seqno = -4u; 690 1.1 riastrad 691 1.1 riastrad mutex_lock_nested(&tl->mutex, SINGLE_DEPTH_NESTING); 692 1.1 riastrad err = intel_timeline_get_seqno(tl, rq, &seqno[0]); 693 1.1 riastrad mutex_unlock(&tl->mutex); 694 1.1 riastrad if (err) { 695 1.1 riastrad i915_request_add(rq); 696 1.1 riastrad goto out; 697 1.1 riastrad } 698 1.1 riastrad pr_debug("seqno[0]:%08x, hwsp_offset:%08x\n", 699 1.1 riastrad seqno[0], tl->hwsp_offset); 700 1.1 riastrad 701 1.1 riastrad err = emit_ggtt_store_dw(rq, tl->hwsp_offset, seqno[0]); 702 1.1 riastrad if (err) { 703 1.1 riastrad i915_request_add(rq); 704 1.1 riastrad goto out; 705 1.1 riastrad } 706 1.1 riastrad hwsp_seqno[0] = tl->hwsp_seqno; 707 1.1 riastrad 708 1.1 riastrad mutex_lock_nested(&tl->mutex, SINGLE_DEPTH_NESTING); 709 1.1 riastrad err = intel_timeline_get_seqno(tl, rq, &seqno[1]); 710 1.1 riastrad mutex_unlock(&tl->mutex); 711 1.1 riastrad if (err) { 712 1.1 riastrad i915_request_add(rq); 713 1.1 riastrad goto out; 714 1.1 riastrad } 715 1.1 riastrad pr_debug("seqno[1]:%08x, hwsp_offset:%08x\n", 716 1.1 riastrad seqno[1], tl->hwsp_offset); 717 1.1 riastrad 718 1.1 riastrad err = emit_ggtt_store_dw(rq, tl->hwsp_offset, seqno[1]); 719 1.1 riastrad if (err) { 720 1.1 riastrad i915_request_add(rq); 721 1.1 riastrad goto out; 722 1.1 riastrad } 723 1.1 riastrad hwsp_seqno[1] = tl->hwsp_seqno; 724 1.1 riastrad 725 1.1 riastrad /* With wrap should come a new hwsp */ 726 1.1 riastrad GEM_BUG_ON(seqno[1] >= seqno[0]); 727 1.1 riastrad GEM_BUG_ON(hwsp_seqno[0] == hwsp_seqno[1]); 728 1.1 riastrad 729 1.1 riastrad i915_request_add(rq); 730 1.1 riastrad 731 1.1 riastrad if (i915_request_wait(rq, 0, HZ / 5) < 0) { 732 1.1 riastrad pr_err("Wait for timeline writes timed out!\n"); 733 1.1 riastrad err = -EIO; 734 1.1 riastrad goto out; 735 1.1 riastrad } 736 1.1 riastrad 737 1.1 riastrad if (*hwsp_seqno[0] != seqno[0] || *hwsp_seqno[1] != seqno[1]) { 738 1.1 riastrad pr_err("Bad timeline values: found (%x, %x), expected (%x, %x)\n", 739 1.1 riastrad *hwsp_seqno[0], *hwsp_seqno[1], 740 1.1 riastrad seqno[0], seqno[1]); 741 1.1 riastrad err = -EINVAL; 742 1.1 riastrad goto out; 743 1.1 riastrad } 744 1.1 riastrad 745 1.1 riastrad intel_gt_retire_requests(gt); /* recycle HWSP */ 746 1.1 riastrad } 747 1.1 riastrad 748 1.1 riastrad out: 749 1.1 riastrad if (igt_flush_test(gt->i915)) 750 1.1 riastrad err = -EIO; 751 1.1 riastrad 752 1.1 riastrad intel_timeline_unpin(tl); 753 1.1 riastrad out_free: 754 1.1 riastrad intel_timeline_put(tl); 755 1.1 riastrad return err; 756 1.1 riastrad } 757 1.1 riastrad 758 1.1 riastrad static int live_hwsp_recycle(void *arg) 759 1.1 riastrad { 760 1.1 riastrad struct intel_gt *gt = arg; 761 1.1 riastrad struct intel_engine_cs *engine; 762 1.1 riastrad enum intel_engine_id id; 763 1.1 riastrad unsigned long count; 764 1.1 riastrad int err = 0; 765 1.1 riastrad 766 1.1 riastrad /* 767 1.1 riastrad * Check seqno writes into one timeline at a time. We expect to 768 1.1 riastrad * recycle the breadcrumb slot between iterations and neither 769 1.1 riastrad * want to confuse ourselves or the GPU. 770 1.1 riastrad */ 771 1.1 riastrad 772 1.1 riastrad count = 0; 773 1.1 riastrad for_each_engine(engine, gt, id) { 774 1.1 riastrad IGT_TIMEOUT(end_time); 775 1.1 riastrad 776 1.1 riastrad if (!intel_engine_can_store_dword(engine)) 777 1.1 riastrad continue; 778 1.1 riastrad 779 1.1 riastrad intel_engine_pm_get(engine); 780 1.1 riastrad 781 1.1 riastrad do { 782 1.1 riastrad struct intel_timeline *tl; 783 1.1 riastrad struct i915_request *rq; 784 1.1 riastrad 785 1.1 riastrad tl = checked_intel_timeline_create(gt); 786 1.1 riastrad if (IS_ERR(tl)) { 787 1.1 riastrad err = PTR_ERR(tl); 788 1.1 riastrad break; 789 1.1 riastrad } 790 1.1 riastrad 791 1.1 riastrad rq = tl_write(tl, engine, count); 792 1.1 riastrad if (IS_ERR(rq)) { 793 1.1 riastrad intel_timeline_put(tl); 794 1.1 riastrad err = PTR_ERR(rq); 795 1.1 riastrad break; 796 1.1 riastrad } 797 1.1 riastrad 798 1.1 riastrad if (i915_request_wait(rq, 0, HZ / 5) < 0) { 799 1.1 riastrad pr_err("Wait for timeline writes timed out!\n"); 800 1.1 riastrad i915_request_put(rq); 801 1.1 riastrad intel_timeline_put(tl); 802 1.1 riastrad err = -EIO; 803 1.1 riastrad break; 804 1.1 riastrad } 805 1.1 riastrad 806 1.1 riastrad if (*tl->hwsp_seqno != count) { 807 1.1 riastrad pr_err("Invalid seqno stored in timeline %lu, found 0x%x\n", 808 1.1 riastrad count, *tl->hwsp_seqno); 809 1.1 riastrad err = -EINVAL; 810 1.1 riastrad } 811 1.1 riastrad 812 1.1 riastrad i915_request_put(rq); 813 1.1 riastrad intel_timeline_put(tl); 814 1.1 riastrad count++; 815 1.1 riastrad 816 1.1 riastrad if (err) 817 1.1 riastrad break; 818 1.1 riastrad } while (!__igt_timeout(end_time, NULL)); 819 1.1 riastrad 820 1.1 riastrad intel_engine_pm_put(engine); 821 1.1 riastrad if (err) 822 1.1 riastrad break; 823 1.1 riastrad } 824 1.1 riastrad 825 1.1 riastrad return err; 826 1.1 riastrad } 827 1.1 riastrad 828 1.1 riastrad int intel_timeline_live_selftests(struct drm_i915_private *i915) 829 1.1 riastrad { 830 1.1 riastrad static const struct i915_subtest tests[] = { 831 1.1 riastrad SUBTEST(live_hwsp_recycle), 832 1.1 riastrad SUBTEST(live_hwsp_engine), 833 1.1 riastrad SUBTEST(live_hwsp_alternate), 834 1.1 riastrad SUBTEST(live_hwsp_wrap), 835 1.1 riastrad }; 836 1.1 riastrad 837 1.1 riastrad if (intel_gt_is_wedged(&i915->gt)) 838 1.1 riastrad return 0; 839 1.1 riastrad 840 1.1 riastrad return intel_gt_live_subtests(tests, &i915->gt); 841 1.1 riastrad } 842