1 /* $NetBSD: i915_gem_object_blt.c,v 1.2 2021/12/18 23:45:30 riastradh Exp $ */ 2 3 // SPDX-License-Identifier: MIT 4 /* 5 * Copyright 2019 Intel Corporation 6 */ 7 8 #include <sys/cdefs.h> 9 __KERNEL_RCSID(0, "$NetBSD: i915_gem_object_blt.c,v 1.2 2021/12/18 23:45:30 riastradh Exp $"); 10 11 #include <linux/sort.h> 12 13 #include "gt/intel_gt.h" 14 #include "gt/intel_engine_user.h" 15 16 #include "i915_selftest.h" 17 18 #include "gem/i915_gem_context.h" 19 #include "selftests/igt_flush_test.h" 20 #include "selftests/i915_random.h" 21 #include "selftests/mock_drm.h" 22 #include "huge_gem_object.h" 23 #include "mock_context.h" 24 25 static int wrap_ktime_compare(const void *A, const void *B) 26 { 27 const ktime_t *a = A, *b = B; 28 29 return ktime_compare(*a, *b); 30 } 31 32 static int __perf_fill_blt(struct drm_i915_gem_object *obj) 33 { 34 struct drm_i915_private *i915 = to_i915(obj->base.dev); 35 int inst = 0; 36 37 do { 38 struct intel_engine_cs *engine; 39 ktime_t t[5]; 40 int pass; 41 int err; 42 43 engine = intel_engine_lookup_user(i915, 44 I915_ENGINE_CLASS_COPY, 45 inst++); 46 if (!engine) 47 return 0; 48 49 intel_engine_pm_get(engine); 50 for (pass = 0; pass < ARRAY_SIZE(t); pass++) { 51 struct intel_context *ce = engine->kernel_context; 52 ktime_t t0, t1; 53 54 t0 = ktime_get(); 55 56 err = i915_gem_object_fill_blt(obj, ce, 0); 57 if (err) 58 break; 59 60 err = i915_gem_object_wait(obj, 61 I915_WAIT_ALL, 62 MAX_SCHEDULE_TIMEOUT); 63 if (err) 64 break; 65 66 t1 = ktime_get(); 67 t[pass] = ktime_sub(t1, t0); 68 } 69 intel_engine_pm_put(engine); 70 if (err) 71 return err; 72 73 sort(t, ARRAY_SIZE(t), sizeof(*t), wrap_ktime_compare, NULL); 74 pr_info("%s: blt %zd KiB fill: %lld MiB/s\n", 75 engine->name, 76 obj->base.size >> 10, 77 div64_u64(mul_u32_u32(4 * obj->base.size, 78 1000 * 1000 * 1000), 79 t[1] + 2 * t[2] + t[3]) >> 20); 80 } while (1); 81 } 82 83 static int perf_fill_blt(void *arg) 84 { 85 struct drm_i915_private *i915 = arg; 86 static const unsigned long sizes[] = { 87 SZ_4K, 88 SZ_64K, 89 SZ_2M, 90 SZ_64M 91 }; 92 int i; 93 94 for (i = 0; i < ARRAY_SIZE(sizes); i++) { 95 struct drm_i915_gem_object *obj; 96 int err; 97 98 obj = i915_gem_object_create_internal(i915, sizes[i]); 99 if (IS_ERR(obj)) 100 return PTR_ERR(obj); 101 102 err = __perf_fill_blt(obj); 103 i915_gem_object_put(obj); 104 if (err) 105 return err; 106 } 107 108 return 0; 109 } 110 111 static int __perf_copy_blt(struct drm_i915_gem_object *src, 112 struct drm_i915_gem_object *dst) 113 { 114 struct drm_i915_private *i915 = to_i915(src->base.dev); 115 int inst = 0; 116 117 do { 118 struct intel_engine_cs *engine; 119 ktime_t t[5]; 120 int pass; 121 int err = 0; 122 123 engine = intel_engine_lookup_user(i915, 124 I915_ENGINE_CLASS_COPY, 125 inst++); 126 if (!engine) 127 return 0; 128 129 intel_engine_pm_get(engine); 130 for (pass = 0; pass < ARRAY_SIZE(t); pass++) { 131 struct intel_context *ce = engine->kernel_context; 132 ktime_t t0, t1; 133 134 t0 = ktime_get(); 135 136 err = i915_gem_object_copy_blt(src, dst, ce); 137 if (err) 138 break; 139 140 err = i915_gem_object_wait(dst, 141 I915_WAIT_ALL, 142 MAX_SCHEDULE_TIMEOUT); 143 if (err) 144 break; 145 146 t1 = ktime_get(); 147 t[pass] = ktime_sub(t1, t0); 148 } 149 intel_engine_pm_put(engine); 150 if (err) 151 return err; 152 153 sort(t, ARRAY_SIZE(t), sizeof(*t), wrap_ktime_compare, NULL); 154 pr_info("%s: blt %zd KiB copy: %lld MiB/s\n", 155 engine->name, 156 src->base.size >> 10, 157 div64_u64(mul_u32_u32(4 * src->base.size, 158 1000 * 1000 * 1000), 159 t[1] + 2 * t[2] + t[3]) >> 20); 160 } while (1); 161 } 162 163 static int perf_copy_blt(void *arg) 164 { 165 struct drm_i915_private *i915 = arg; 166 static const unsigned long sizes[] = { 167 SZ_4K, 168 SZ_64K, 169 SZ_2M, 170 SZ_64M 171 }; 172 int i; 173 174 for (i = 0; i < ARRAY_SIZE(sizes); i++) { 175 struct drm_i915_gem_object *src, *dst; 176 int err; 177 178 src = i915_gem_object_create_internal(i915, sizes[i]); 179 if (IS_ERR(src)) 180 return PTR_ERR(src); 181 182 dst = i915_gem_object_create_internal(i915, sizes[i]); 183 if (IS_ERR(dst)) { 184 err = PTR_ERR(dst); 185 goto err_src; 186 } 187 188 err = __perf_copy_blt(src, dst); 189 190 i915_gem_object_put(dst); 191 err_src: 192 i915_gem_object_put(src); 193 if (err) 194 return err; 195 } 196 197 return 0; 198 } 199 200 struct igt_thread_arg { 201 struct drm_i915_private *i915; 202 struct i915_gem_context *ctx; 203 struct file *file; 204 struct rnd_state prng; 205 unsigned int n_cpus; 206 }; 207 208 static int igt_fill_blt_thread(void *arg) 209 { 210 struct igt_thread_arg *thread = arg; 211 struct drm_i915_private *i915 = thread->i915; 212 struct rnd_state *prng = &thread->prng; 213 struct drm_i915_gem_object *obj; 214 struct i915_gem_context *ctx; 215 struct intel_context *ce; 216 unsigned int prio; 217 IGT_TIMEOUT(end); 218 int err; 219 220 ctx = thread->ctx; 221 if (!ctx) { 222 ctx = live_context(i915, thread->file); 223 if (IS_ERR(ctx)) 224 return PTR_ERR(ctx); 225 226 prio = i915_prandom_u32_max_state(I915_PRIORITY_MAX, prng); 227 ctx->sched.priority = I915_USER_PRIORITY(prio); 228 } 229 230 ce = i915_gem_context_get_engine(ctx, BCS0); 231 GEM_BUG_ON(IS_ERR(ce)); 232 233 do { 234 const u32 max_block_size = S16_MAX * PAGE_SIZE; 235 u32 val = prandom_u32_state(prng); 236 u64 total = ce->vm->total; 237 u32 phys_sz; 238 u32 sz; 239 u32 *vaddr; 240 u32 i; 241 242 /* 243 * If we have a tiny shared address space, like for the GGTT 244 * then we can't be too greedy. 245 */ 246 if (i915_is_ggtt(ce->vm)) 247 total = div64_u64(total, thread->n_cpus); 248 249 sz = min_t(u64, total >> 4, prandom_u32_state(prng)); 250 phys_sz = sz % (max_block_size + 1); 251 252 sz = round_up(sz, PAGE_SIZE); 253 phys_sz = round_up(phys_sz, PAGE_SIZE); 254 255 pr_debug("%s with phys_sz= %x, sz=%x, val=%x\n", __func__, 256 phys_sz, sz, val); 257 258 obj = huge_gem_object(i915, phys_sz, sz); 259 if (IS_ERR(obj)) { 260 err = PTR_ERR(obj); 261 goto err_flush; 262 } 263 264 vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB); 265 if (IS_ERR(vaddr)) { 266 err = PTR_ERR(vaddr); 267 goto err_put; 268 } 269 270 /* 271 * Make sure the potentially async clflush does its job, if 272 * required. 273 */ 274 memset32(vaddr, val ^ 0xdeadbeaf, 275 huge_gem_object_phys_size(obj) / sizeof(u32)); 276 277 if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE)) 278 obj->cache_dirty = true; 279 280 err = i915_gem_object_fill_blt(obj, ce, val); 281 if (err) 282 goto err_unpin; 283 284 i915_gem_object_lock(obj); 285 err = i915_gem_object_set_to_cpu_domain(obj, false); 286 i915_gem_object_unlock(obj); 287 if (err) 288 goto err_unpin; 289 290 for (i = 0; i < huge_gem_object_phys_size(obj) / sizeof(u32); ++i) { 291 if (vaddr[i] != val) { 292 pr_err("vaddr[%u]=%x, expected=%x\n", i, 293 vaddr[i], val); 294 err = -EINVAL; 295 goto err_unpin; 296 } 297 } 298 299 i915_gem_object_unpin_map(obj); 300 i915_gem_object_put(obj); 301 } while (!time_after(jiffies, end)); 302 303 goto err_flush; 304 305 err_unpin: 306 i915_gem_object_unpin_map(obj); 307 err_put: 308 i915_gem_object_put(obj); 309 err_flush: 310 if (err == -ENOMEM) 311 err = 0; 312 313 intel_context_put(ce); 314 return err; 315 } 316 317 static int igt_copy_blt_thread(void *arg) 318 { 319 struct igt_thread_arg *thread = arg; 320 struct drm_i915_private *i915 = thread->i915; 321 struct rnd_state *prng = &thread->prng; 322 struct drm_i915_gem_object *src, *dst; 323 struct i915_gem_context *ctx; 324 struct intel_context *ce; 325 unsigned int prio; 326 IGT_TIMEOUT(end); 327 int err; 328 329 ctx = thread->ctx; 330 if (!ctx) { 331 ctx = live_context(i915, thread->file); 332 if (IS_ERR(ctx)) 333 return PTR_ERR(ctx); 334 335 prio = i915_prandom_u32_max_state(I915_PRIORITY_MAX, prng); 336 ctx->sched.priority = I915_USER_PRIORITY(prio); 337 } 338 339 ce = i915_gem_context_get_engine(ctx, BCS0); 340 GEM_BUG_ON(IS_ERR(ce)); 341 342 do { 343 const u32 max_block_size = S16_MAX * PAGE_SIZE; 344 u32 val = prandom_u32_state(prng); 345 u64 total = ce->vm->total; 346 u32 phys_sz; 347 u32 sz; 348 u32 *vaddr; 349 u32 i; 350 351 if (i915_is_ggtt(ce->vm)) 352 total = div64_u64(total, thread->n_cpus); 353 354 sz = min_t(u64, total >> 4, prandom_u32_state(prng)); 355 phys_sz = sz % (max_block_size + 1); 356 357 sz = round_up(sz, PAGE_SIZE); 358 phys_sz = round_up(phys_sz, PAGE_SIZE); 359 360 pr_debug("%s with phys_sz= %x, sz=%x, val=%x\n", __func__, 361 phys_sz, sz, val); 362 363 src = huge_gem_object(i915, phys_sz, sz); 364 if (IS_ERR(src)) { 365 err = PTR_ERR(src); 366 goto err_flush; 367 } 368 369 vaddr = i915_gem_object_pin_map(src, I915_MAP_WB); 370 if (IS_ERR(vaddr)) { 371 err = PTR_ERR(vaddr); 372 goto err_put_src; 373 } 374 375 memset32(vaddr, val, 376 huge_gem_object_phys_size(src) / sizeof(u32)); 377 378 i915_gem_object_unpin_map(src); 379 380 if (!(src->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ)) 381 src->cache_dirty = true; 382 383 dst = huge_gem_object(i915, phys_sz, sz); 384 if (IS_ERR(dst)) { 385 err = PTR_ERR(dst); 386 goto err_put_src; 387 } 388 389 vaddr = i915_gem_object_pin_map(dst, I915_MAP_WB); 390 if (IS_ERR(vaddr)) { 391 err = PTR_ERR(vaddr); 392 goto err_put_dst; 393 } 394 395 memset32(vaddr, val ^ 0xdeadbeaf, 396 huge_gem_object_phys_size(dst) / sizeof(u32)); 397 398 if (!(dst->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE)) 399 dst->cache_dirty = true; 400 401 err = i915_gem_object_copy_blt(src, dst, ce); 402 if (err) 403 goto err_unpin; 404 405 i915_gem_object_lock(dst); 406 err = i915_gem_object_set_to_cpu_domain(dst, false); 407 i915_gem_object_unlock(dst); 408 if (err) 409 goto err_unpin; 410 411 for (i = 0; i < huge_gem_object_phys_size(dst) / sizeof(u32); ++i) { 412 if (vaddr[i] != val) { 413 pr_err("vaddr[%u]=%x, expected=%x\n", i, 414 vaddr[i], val); 415 err = -EINVAL; 416 goto err_unpin; 417 } 418 } 419 420 i915_gem_object_unpin_map(dst); 421 422 i915_gem_object_put(src); 423 i915_gem_object_put(dst); 424 } while (!time_after(jiffies, end)); 425 426 goto err_flush; 427 428 err_unpin: 429 i915_gem_object_unpin_map(dst); 430 err_put_dst: 431 i915_gem_object_put(dst); 432 err_put_src: 433 i915_gem_object_put(src); 434 err_flush: 435 if (err == -ENOMEM) 436 err = 0; 437 438 intel_context_put(ce); 439 return err; 440 } 441 442 static int igt_threaded_blt(struct drm_i915_private *i915, 443 int (*blt_fn)(void *arg), 444 unsigned int flags) 445 #define SINGLE_CTX BIT(0) 446 { 447 struct igt_thread_arg *thread; 448 struct task_struct **tsk; 449 unsigned int n_cpus, i; 450 I915_RND_STATE(prng); 451 int err = 0; 452 453 n_cpus = num_online_cpus() + 1; 454 455 tsk = kcalloc(n_cpus, sizeof(struct task_struct *), GFP_KERNEL); 456 if (!tsk) 457 return 0; 458 459 thread = kcalloc(n_cpus, sizeof(struct igt_thread_arg), GFP_KERNEL); 460 if (!thread) 461 goto out_tsk; 462 463 thread[0].file = mock_file(i915); 464 if (IS_ERR(thread[0].file)) { 465 err = PTR_ERR(thread[0].file); 466 goto out_thread; 467 } 468 469 if (flags & SINGLE_CTX) { 470 thread[0].ctx = live_context(i915, thread[0].file); 471 if (IS_ERR(thread[0].ctx)) { 472 err = PTR_ERR(thread[0].ctx); 473 goto out_file; 474 } 475 } 476 477 for (i = 0; i < n_cpus; ++i) { 478 thread[i].i915 = i915; 479 thread[i].file = thread[0].file; 480 thread[i].ctx = thread[0].ctx; 481 thread[i].n_cpus = n_cpus; 482 thread[i].prng = 483 I915_RND_STATE_INITIALIZER(prandom_u32_state(&prng)); 484 485 tsk[i] = kthread_run(blt_fn, &thread[i], "igt/blt-%d", i); 486 if (IS_ERR(tsk[i])) { 487 err = PTR_ERR(tsk[i]); 488 break; 489 } 490 491 get_task_struct(tsk[i]); 492 } 493 494 yield(); /* start all threads before we kthread_stop() */ 495 496 for (i = 0; i < n_cpus; ++i) { 497 int status; 498 499 if (IS_ERR_OR_NULL(tsk[i])) 500 continue; 501 502 status = kthread_stop(tsk[i]); 503 if (status && !err) 504 err = status; 505 506 put_task_struct(tsk[i]); 507 } 508 509 out_file: 510 fput(thread[0].file); 511 out_thread: 512 kfree(thread); 513 out_tsk: 514 kfree(tsk); 515 return err; 516 } 517 518 static int igt_fill_blt(void *arg) 519 { 520 return igt_threaded_blt(arg, igt_fill_blt_thread, 0); 521 } 522 523 static int igt_fill_blt_ctx0(void *arg) 524 { 525 return igt_threaded_blt(arg, igt_fill_blt_thread, SINGLE_CTX); 526 } 527 528 static int igt_copy_blt(void *arg) 529 { 530 return igt_threaded_blt(arg, igt_copy_blt_thread, 0); 531 } 532 533 static int igt_copy_blt_ctx0(void *arg) 534 { 535 return igt_threaded_blt(arg, igt_copy_blt_thread, SINGLE_CTX); 536 } 537 538 int i915_gem_object_blt_live_selftests(struct drm_i915_private *i915) 539 { 540 static const struct i915_subtest tests[] = { 541 SUBTEST(igt_fill_blt), 542 SUBTEST(igt_fill_blt_ctx0), 543 SUBTEST(igt_copy_blt), 544 SUBTEST(igt_copy_blt_ctx0), 545 }; 546 547 if (intel_gt_is_wedged(&i915->gt)) 548 return 0; 549 550 if (!HAS_ENGINE(i915, BCS0)) 551 return 0; 552 553 return i915_live_subtests(tests, i915); 554 } 555 556 int i915_gem_object_blt_perf_selftests(struct drm_i915_private *i915) 557 { 558 static const struct i915_subtest tests[] = { 559 SUBTEST(perf_fill_blt), 560 SUBTEST(perf_copy_blt), 561 }; 562 563 if (intel_gt_is_wedged(&i915->gt)) 564 return 0; 565 566 return i915_live_subtests(tests, i915); 567 } 568