1 /* $NetBSD: intel_gt.c,v 1.3 2021/12/19 11:39:55 riastradh Exp $ */ 2 3 // SPDX-License-Identifier: MIT 4 /* 5 * Copyright 2019 Intel Corporation 6 */ 7 8 #include <sys/cdefs.h> 9 __KERNEL_RCSID(0, "$NetBSD: intel_gt.c,v 1.3 2021/12/19 11:39:55 riastradh Exp $"); 10 11 #include <linux/kernel.h> 12 13 #if IS_ENABLED(CONFIG_DEBUGFS) 14 #include "debugfs_gt.h" 15 #endif 16 #include "i915_drv.h" 17 #include "intel_context.h" 18 #include "intel_gt.h" 19 #include "intel_gt_pm.h" 20 #include "intel_gt_requests.h" 21 #include "intel_mocs.h" 22 #include "intel_rc6.h" 23 #include "intel_renderstate.h" 24 #include "intel_rps.h" 25 #include "intel_uncore.h" 26 #include "intel_pm.h" 27 28 void intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915) 29 { 30 gt->i915 = i915; 31 gt->uncore = &i915->uncore; 32 33 spin_lock_init(>->irq_lock); 34 35 INIT_LIST_HEAD(>->closed_vma); 36 spin_lock_init(>->closed_lock); 37 38 intel_gt_init_reset(gt); 39 intel_gt_init_requests(gt); 40 intel_gt_init_timelines(gt); 41 intel_gt_pm_init_early(gt); 42 43 intel_rps_init_early(>->rps); 44 intel_uc_init_early(>->uc); 45 } 46 47 void intel_gt_init_hw_early(struct intel_gt *gt, struct i915_ggtt *ggtt) 48 { 49 gt->ggtt = ggtt; 50 } 51 52 static void init_unused_ring(struct intel_gt *gt, u32 base) 53 { 54 struct intel_uncore *uncore = gt->uncore; 55 56 intel_uncore_write(uncore, RING_CTL(base), 0); 57 intel_uncore_write(uncore, RING_HEAD(base), 0); 58 intel_uncore_write(uncore, RING_TAIL(base), 0); 59 intel_uncore_write(uncore, RING_START(base), 0); 60 } 61 62 static void init_unused_rings(struct intel_gt *gt) 63 { 64 struct drm_i915_private *i915 = gt->i915; 65 66 if (IS_I830(i915)) { 67 init_unused_ring(gt, PRB1_BASE); 68 init_unused_ring(gt, SRB0_BASE); 69 init_unused_ring(gt, SRB1_BASE); 70 init_unused_ring(gt, SRB2_BASE); 71 init_unused_ring(gt, SRB3_BASE); 72 } else if (IS_GEN(i915, 2)) { 73 init_unused_ring(gt, SRB0_BASE); 74 init_unused_ring(gt, SRB1_BASE); 75 } else if (IS_GEN(i915, 3)) { 76 init_unused_ring(gt, PRB1_BASE); 77 init_unused_ring(gt, PRB2_BASE); 78 } 79 } 80 81 int intel_gt_init_hw(struct intel_gt *gt) 82 { 83 struct drm_i915_private *i915 = gt->i915; 84 struct intel_uncore *uncore = gt->uncore; 85 int ret; 86 87 gt->last_init_time = ktime_get(); 88 89 /* Double layer security blanket, see i915_gem_init() */ 90 intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL); 91 92 if (HAS_EDRAM(i915) && INTEL_GEN(i915) < 9) 93 intel_uncore_rmw(uncore, HSW_IDICR, 0, IDIHASHMSK(0xf)); 94 95 if (IS_HASWELL(i915)) 96 intel_uncore_write(uncore, 97 MI_PREDICATE_RESULT_2, 98 IS_HSW_GT3(i915) ? 99 LOWER_SLICE_ENABLED : LOWER_SLICE_DISABLED); 100 101 /* Apply the GT workarounds... */ 102 intel_gt_apply_workarounds(gt); 103 /* ...and determine whether they are sticking. */ 104 intel_gt_verify_workarounds(gt, "init"); 105 106 intel_gt_init_swizzling(gt); 107 108 /* 109 * At least 830 can leave some of the unused rings 110 * "active" (ie. head != tail) after resume which 111 * will prevent c3 entry. Makes sure all unused rings 112 * are totally idle. 113 */ 114 init_unused_rings(gt); 115 116 ret = i915_ppgtt_init_hw(gt); 117 if (ret) { 118 DRM_ERROR("Enabling PPGTT failed (%d)\n", ret); 119 goto out; 120 } 121 122 /* We can't enable contexts until all firmware is loaded */ 123 ret = intel_uc_init_hw(>->uc); 124 if (ret) { 125 i915_probe_error(i915, "Enabling uc failed (%d)\n", ret); 126 goto out; 127 } 128 129 intel_mocs_init(gt); 130 131 out: 132 intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL); 133 return ret; 134 } 135 136 static void rmw_set(struct intel_uncore *uncore, i915_reg_t reg, u32 set) 137 { 138 intel_uncore_rmw(uncore, reg, 0, set); 139 } 140 141 static void rmw_clear(struct intel_uncore *uncore, i915_reg_t reg, u32 clr) 142 { 143 intel_uncore_rmw(uncore, reg, clr, 0); 144 } 145 146 static void clear_register(struct intel_uncore *uncore, i915_reg_t reg) 147 { 148 intel_uncore_rmw(uncore, reg, 0, 0); 149 } 150 151 static void gen8_clear_engine_error_register(struct intel_engine_cs *engine) 152 { 153 GEN6_RING_FAULT_REG_RMW(engine, RING_FAULT_VALID, 0); 154 GEN6_RING_FAULT_REG_POSTING_READ(engine); 155 } 156 157 void 158 intel_gt_clear_error_registers(struct intel_gt *gt, 159 intel_engine_mask_t engine_mask) 160 { 161 struct drm_i915_private *i915 = gt->i915; 162 struct intel_uncore *uncore = gt->uncore; 163 u32 eir; 164 165 if (!IS_GEN(i915, 2)) 166 clear_register(uncore, PGTBL_ER); 167 168 if (INTEL_GEN(i915) < 4) 169 clear_register(uncore, IPEIR(RENDER_RING_BASE)); 170 else 171 clear_register(uncore, IPEIR_I965); 172 173 clear_register(uncore, EIR); 174 eir = intel_uncore_read(uncore, EIR); 175 if (eir) { 176 /* 177 * some errors might have become stuck, 178 * mask them. 179 */ 180 DRM_DEBUG_DRIVER("EIR stuck: 0x%08x, masking\n", eir); 181 rmw_set(uncore, EMR, eir); 182 intel_uncore_write(uncore, GEN2_IIR, 183 I915_MASTER_ERROR_INTERRUPT); 184 } 185 186 if (INTEL_GEN(i915) >= 12) { 187 rmw_clear(uncore, GEN12_RING_FAULT_REG, RING_FAULT_VALID); 188 intel_uncore_posting_read(uncore, GEN12_RING_FAULT_REG); 189 } else if (INTEL_GEN(i915) >= 8) { 190 rmw_clear(uncore, GEN8_RING_FAULT_REG, RING_FAULT_VALID); 191 intel_uncore_posting_read(uncore, GEN8_RING_FAULT_REG); 192 } else if (INTEL_GEN(i915) >= 6) { 193 struct intel_engine_cs *engine; 194 enum intel_engine_id id; 195 196 for_each_engine_masked(engine, gt, engine_mask, id) 197 gen8_clear_engine_error_register(engine); 198 } 199 } 200 201 static void gen6_check_faults(struct intel_gt *gt) 202 { 203 struct intel_engine_cs *engine; 204 enum intel_engine_id id; 205 u32 fault; 206 207 for_each_engine(engine, gt, id) { 208 fault = GEN6_RING_FAULT_REG_READ(engine); 209 if (fault & RING_FAULT_VALID) { 210 DRM_DEBUG_DRIVER("Unexpected fault\n" 211 "\tAddr: 0x%08"PRIx32"\n" 212 "\tAddress space: %s\n" 213 "\tSource ID: %d\n" 214 "\tType: %d\n", 215 fault & PAGE_MASK, 216 fault & RING_FAULT_GTTSEL_MASK ? 217 "GGTT" : "PPGTT", 218 RING_FAULT_SRCID(fault), 219 RING_FAULT_FAULT_TYPE(fault)); 220 } 221 } 222 } 223 224 static void gen8_check_faults(struct intel_gt *gt) 225 { 226 struct intel_uncore *uncore = gt->uncore; 227 i915_reg_t fault_reg, fault_data0_reg, fault_data1_reg; 228 u32 fault; 229 230 if (INTEL_GEN(gt->i915) >= 12) { 231 fault_reg = GEN12_RING_FAULT_REG; 232 fault_data0_reg = GEN12_FAULT_TLB_DATA0; 233 fault_data1_reg = GEN12_FAULT_TLB_DATA1; 234 } else { 235 fault_reg = GEN8_RING_FAULT_REG; 236 fault_data0_reg = GEN8_FAULT_TLB_DATA0; 237 fault_data1_reg = GEN8_FAULT_TLB_DATA1; 238 } 239 240 fault = intel_uncore_read(uncore, fault_reg); 241 if (fault & RING_FAULT_VALID) { 242 u32 fault_data0, fault_data1; 243 u64 fault_addr; 244 245 fault_data0 = intel_uncore_read(uncore, fault_data0_reg); 246 fault_data1 = intel_uncore_read(uncore, fault_data1_reg); 247 248 fault_addr = ((u64)(fault_data1 & FAULT_VA_HIGH_BITS) << 44) | 249 ((u64)fault_data0 << 12); 250 251 DRM_DEBUG_DRIVER("Unexpected fault\n" 252 "\tAddr: 0x%08x_%08x\n" 253 "\tAddress space: %s\n" 254 "\tEngine ID: %d\n" 255 "\tSource ID: %d\n" 256 "\tType: %d\n", 257 upper_32_bits(fault_addr), 258 lower_32_bits(fault_addr), 259 fault_data1 & FAULT_GTT_SEL ? "GGTT" : "PPGTT", 260 GEN8_RING_FAULT_ENGINE_ID(fault), 261 RING_FAULT_SRCID(fault), 262 RING_FAULT_FAULT_TYPE(fault)); 263 } 264 } 265 266 void intel_gt_check_and_clear_faults(struct intel_gt *gt) 267 { 268 struct drm_i915_private *i915 = gt->i915; 269 270 /* From GEN8 onwards we only have one 'All Engine Fault Register' */ 271 if (INTEL_GEN(i915) >= 8) 272 gen8_check_faults(gt); 273 else if (INTEL_GEN(i915) >= 6) 274 gen6_check_faults(gt); 275 else 276 return; 277 278 intel_gt_clear_error_registers(gt, ALL_ENGINES); 279 } 280 281 void intel_gt_flush_ggtt_writes(struct intel_gt *gt) 282 { 283 struct intel_uncore *uncore = gt->uncore; 284 intel_wakeref_t wakeref; 285 286 /* 287 * No actual flushing is required for the GTT write domain for reads 288 * from the GTT domain. Writes to it "immediately" go to main memory 289 * as far as we know, so there's no chipset flush. It also doesn't 290 * land in the GPU render cache. 291 * 292 * However, we do have to enforce the order so that all writes through 293 * the GTT land before any writes to the device, such as updates to 294 * the GATT itself. 295 * 296 * We also have to wait a bit for the writes to land from the GTT. 297 * An uncached read (i.e. mmio) seems to be ideal for the round-trip 298 * timing. This issue has only been observed when switching quickly 299 * between GTT writes and CPU reads from inside the kernel on recent hw, 300 * and it appears to only affect discrete GTT blocks (i.e. on LLC 301 * system agents we cannot reproduce this behaviour, until Cannonlake 302 * that was!). 303 */ 304 305 wmb(); 306 307 if (INTEL_INFO(gt->i915)->has_coherent_ggtt) 308 return; 309 310 intel_gt_chipset_flush(gt); 311 312 with_intel_runtime_pm_if_in_use(uncore->rpm, wakeref) { 313 unsigned long flags; 314 315 spin_lock_irqsave(&uncore->lock, flags); 316 intel_uncore_posting_read_fw(uncore, 317 RING_HEAD(RENDER_RING_BASE)); 318 spin_unlock_irqrestore(&uncore->lock, flags); 319 } 320 } 321 322 void intel_gt_chipset_flush(struct intel_gt *gt) 323 { 324 wmb(); 325 if (INTEL_GEN(gt->i915) < 6) 326 intel_gtt_chipset_flush(); 327 } 328 329 void intel_gt_driver_register(struct intel_gt *gt) 330 { 331 intel_rps_driver_register(>->rps); 332 333 #if IS_ENABLED(CONFIG_DEBUGFS) 334 debugfs_gt_register(gt); 335 #endif 336 } 337 338 static int intel_gt_init_scratch(struct intel_gt *gt, unsigned int size) 339 { 340 struct drm_i915_private *i915 = gt->i915; 341 struct drm_i915_gem_object *obj; 342 struct i915_vma *vma; 343 int ret; 344 345 obj = i915_gem_object_create_stolen(i915, size); 346 if (IS_ERR(obj)) 347 obj = i915_gem_object_create_internal(i915, size); 348 if (IS_ERR(obj)) { 349 DRM_ERROR("Failed to allocate scratch page\n"); 350 return PTR_ERR(obj); 351 } 352 353 vma = i915_vma_instance(obj, >->ggtt->vm, NULL); 354 if (IS_ERR(vma)) { 355 ret = PTR_ERR(vma); 356 goto err_unref; 357 } 358 359 ret = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH); 360 if (ret) 361 goto err_unref; 362 363 gt->scratch = i915_vma_make_unshrinkable(vma); 364 365 return 0; 366 367 err_unref: 368 i915_gem_object_put(obj); 369 return ret; 370 } 371 372 static void intel_gt_fini_scratch(struct intel_gt *gt) 373 { 374 i915_vma_unpin_and_release(>->scratch, 0); 375 } 376 377 static struct i915_address_space *kernel_vm(struct intel_gt *gt) 378 { 379 if (INTEL_PPGTT(gt->i915) > INTEL_PPGTT_ALIASING) 380 return &i915_ppgtt_create(gt)->vm; 381 else 382 return i915_vm_get(>->ggtt->vm); 383 } 384 385 static int __intel_context_flush_retire(struct intel_context *ce) 386 { 387 struct intel_timeline *tl; 388 389 tl = intel_context_timeline_lock(ce); 390 if (IS_ERR(tl)) 391 return PTR_ERR(tl); 392 393 intel_context_timeline_unlock(tl); 394 return 0; 395 } 396 397 static int __engines_record_defaults(struct intel_gt *gt) 398 { 399 struct i915_request *requests[I915_NUM_ENGINES] = {}; 400 struct intel_engine_cs *engine; 401 enum intel_engine_id id; 402 int err = 0; 403 404 /* 405 * As we reset the gpu during very early sanitisation, the current 406 * register state on the GPU should reflect its defaults values. 407 * We load a context onto the hw (with restore-inhibit), then switch 408 * over to a second context to save that default register state. We 409 * can then prime every new context with that state so they all start 410 * from the same default HW values. 411 */ 412 413 for_each_engine(engine, gt, id) { 414 struct intel_renderstate so; 415 struct intel_context *ce; 416 struct i915_request *rq; 417 418 /* We must be able to switch to something! */ 419 GEM_BUG_ON(!engine->kernel_context); 420 421 err = intel_renderstate_init(&so, engine); 422 if (err) 423 goto out; 424 425 ce = intel_context_create(engine); 426 if (IS_ERR(ce)) { 427 err = PTR_ERR(ce); 428 goto out; 429 } 430 431 rq = intel_context_create_request(ce); 432 if (IS_ERR(rq)) { 433 err = PTR_ERR(rq); 434 intel_context_put(ce); 435 goto out; 436 } 437 438 err = intel_engine_emit_ctx_wa(rq); 439 if (err) 440 goto err_rq; 441 442 err = intel_renderstate_emit(&so, rq); 443 if (err) 444 goto err_rq; 445 446 err_rq: 447 requests[id] = i915_request_get(rq); 448 i915_request_add(rq); 449 intel_renderstate_fini(&so); 450 if (err) 451 goto out; 452 } 453 454 /* Flush the default context image to memory, and enable powersaving. */ 455 if (intel_gt_wait_for_idle(gt, I915_GEM_IDLE_TIMEOUT) == -ETIME) { 456 err = -EIO; 457 goto out; 458 } 459 460 for (id = 0; id < ARRAY_SIZE(requests); id++) { 461 struct i915_request *rq; 462 struct i915_vma *state; 463 void *vaddr; 464 465 rq = requests[id]; 466 if (!rq) 467 continue; 468 469 GEM_BUG_ON(!test_bit(CONTEXT_ALLOC_BIT, &rq->context->flags)); 470 state = rq->context->state; 471 if (!state) 472 continue; 473 474 /* Serialise with retirement on another CPU */ 475 GEM_BUG_ON(!i915_request_completed(rq)); 476 err = __intel_context_flush_retire(rq->context); 477 if (err) 478 goto out; 479 480 /* We want to be able to unbind the state from the GGTT */ 481 GEM_BUG_ON(intel_context_is_pinned(rq->context)); 482 483 /* 484 * As we will hold a reference to the logical state, it will 485 * not be torn down with the context, and importantly the 486 * object will hold onto its vma (making it possible for a 487 * stray GTT write to corrupt our defaults). Unmap the vma 488 * from the GTT to prevent such accidents and reclaim the 489 * space. 490 */ 491 err = i915_vma_unbind(state); 492 if (err) 493 goto out; 494 495 i915_gem_object_lock(state->obj); 496 err = i915_gem_object_set_to_cpu_domain(state->obj, false); 497 i915_gem_object_unlock(state->obj); 498 if (err) 499 goto out; 500 501 i915_gem_object_set_cache_coherency(state->obj, I915_CACHE_LLC); 502 503 /* Check we can acquire the image of the context state */ 504 vaddr = i915_gem_object_pin_map(state->obj, I915_MAP_FORCE_WB); 505 if (IS_ERR(vaddr)) { 506 err = PTR_ERR(vaddr); 507 goto out; 508 } 509 510 rq->engine->default_state = i915_gem_object_get(state->obj); 511 i915_gem_object_unpin_map(state->obj); 512 } 513 514 out: 515 /* 516 * If we have to abandon now, we expect the engines to be idle 517 * and ready to be torn-down. The quickest way we can accomplish 518 * this is by declaring ourselves wedged. 519 */ 520 if (err) 521 intel_gt_set_wedged(gt); 522 523 for (id = 0; id < ARRAY_SIZE(requests); id++) { 524 struct intel_context *ce; 525 struct i915_request *rq; 526 527 rq = requests[id]; 528 if (!rq) 529 continue; 530 531 ce = rq->context; 532 i915_request_put(rq); 533 intel_context_put(ce); 534 } 535 return err; 536 } 537 538 static int __engines_verify_workarounds(struct intel_gt *gt) 539 { 540 struct intel_engine_cs *engine; 541 enum intel_engine_id id; 542 int err = 0; 543 544 if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) 545 return 0; 546 547 for_each_engine(engine, gt, id) { 548 if (intel_engine_verify_workarounds(engine, "load")) 549 err = -EIO; 550 } 551 552 return err; 553 } 554 555 static void __intel_gt_disable(struct intel_gt *gt) 556 { 557 intel_gt_set_wedged_on_init(gt); 558 559 intel_gt_suspend_prepare(gt); 560 intel_gt_suspend_late(gt); 561 562 GEM_BUG_ON(intel_gt_pm_is_awake(gt)); 563 } 564 565 int intel_gt_init(struct intel_gt *gt) 566 { 567 int err; 568 569 err = i915_inject_probe_error(gt->i915, -ENODEV); 570 if (err) 571 return err; 572 573 /* 574 * This is just a security blanket to placate dragons. 575 * On some systems, we very sporadically observe that the first TLBs 576 * used by the CS may be stale, despite us poking the TLB reset. If 577 * we hold the forcewake during initialisation these problems 578 * just magically go away. 579 */ 580 intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL); 581 582 err = intel_gt_init_scratch(gt, IS_GEN(gt->i915, 2) ? SZ_256K : SZ_4K); 583 if (err) 584 goto out_fw; 585 586 intel_gt_pm_init(gt); 587 588 gt->vm = kernel_vm(gt); 589 if (!gt->vm) { 590 err = -ENOMEM; 591 goto err_pm; 592 } 593 594 err = intel_engines_init(gt); 595 if (err) 596 goto err_engines; 597 598 intel_uc_init(>->uc); 599 600 err = intel_gt_resume(gt); 601 if (err) 602 goto err_uc_init; 603 604 err = __engines_record_defaults(gt); 605 if (err) 606 goto err_gt; 607 608 err = __engines_verify_workarounds(gt); 609 if (err) 610 goto err_gt; 611 612 err = i915_inject_probe_error(gt->i915, -EIO); 613 if (err) 614 goto err_gt; 615 616 goto out_fw; 617 err_gt: 618 __intel_gt_disable(gt); 619 intel_uc_fini_hw(>->uc); 620 err_uc_init: 621 intel_uc_fini(>->uc); 622 err_engines: 623 intel_engines_release(gt); 624 i915_vm_put(fetch_and_zero(>->vm)); 625 err_pm: 626 intel_gt_pm_fini(gt); 627 intel_gt_fini_scratch(gt); 628 out_fw: 629 if (err) 630 intel_gt_set_wedged_on_init(gt); 631 intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL); 632 return err; 633 } 634 635 void intel_gt_driver_remove(struct intel_gt *gt) 636 { 637 __intel_gt_disable(gt); 638 639 intel_uc_fini_hw(>->uc); 640 intel_uc_fini(>->uc); 641 642 intel_engines_release(gt); 643 } 644 645 void intel_gt_driver_unregister(struct intel_gt *gt) 646 { 647 intel_rps_driver_unregister(>->rps); 648 } 649 650 void intel_gt_driver_release(struct intel_gt *gt) 651 { 652 struct i915_address_space *vm; 653 654 vm = fetch_and_zero(>->vm); 655 if (vm) /* FIXME being called twice on error paths :( */ 656 i915_vm_put(vm); 657 658 intel_gt_pm_fini(gt); 659 intel_gt_fini_scratch(gt); 660 } 661 662 void intel_gt_driver_late_release(struct intel_gt *gt) 663 { 664 intel_uc_driver_late_release(>->uc); 665 intel_gt_fini_requests(gt); 666 intel_gt_fini_reset(gt); 667 intel_gt_fini_timelines(gt); 668 intel_engines_free(gt); 669 } 670