1 /* $NetBSD: intel_workarounds.c,v 1.3 2021/12/19 11:49:11 riastradh Exp $ */ 2 3 /* 4 * SPDX-License-Identifier: MIT 5 * 6 * Copyright 2014-2018 Intel Corporation 7 */ 8 9 #include <sys/cdefs.h> 10 __KERNEL_RCSID(0, "$NetBSD: intel_workarounds.c,v 1.3 2021/12/19 11:49:11 riastradh Exp $"); 11 12 #include "i915_drv.h" 13 #include "intel_context.h" 14 #include "intel_engine_pm.h" 15 #include "intel_gt.h" 16 #include "intel_ring.h" 17 #include "intel_workarounds.h" 18 19 #include <linux/nbsd-namespace.h> 20 21 /** 22 * DOC: Hardware workarounds 23 * 24 * This file is intended as a central place to implement most [1]_ of the 25 * required workarounds for hardware to work as originally intended. They fall 26 * in five basic categories depending on how/when they are applied: 27 * 28 * - Workarounds that touch registers that are saved/restored to/from the HW 29 * context image. The list is emitted (via Load Register Immediate commands) 30 * everytime a new context is created. 31 * - GT workarounds. The list of these WAs is applied whenever these registers 32 * revert to default values (on GPU reset, suspend/resume [2]_, etc..). 33 * - Display workarounds. The list is applied during display clock-gating 34 * initialization. 35 * - Workarounds that whitelist a privileged register, so that UMDs can manage 36 * them directly. This is just a special case of a MMMIO workaround (as we 37 * write the list of these to/be-whitelisted registers to some special HW 38 * registers). 39 * - Workaround batchbuffers, that get executed automatically by the hardware 40 * on every HW context restore. 41 * 42 * .. [1] Please notice that there are other WAs that, due to their nature, 43 * cannot be applied from a central place. Those are peppered around the rest 44 * of the code, as needed. 45 * 46 * .. [2] Technically, some registers are powercontext saved & restored, so they 47 * survive a suspend/resume. In practice, writing them again is not too 48 * costly and simplifies things. We can revisit this in the future. 49 * 50 * Layout 51 * ~~~~~~ 52 * 53 * Keep things in this file ordered by WA type, as per the above (context, GT, 54 * display, register whitelist, batchbuffer). Then, inside each type, keep the 55 * following order: 56 * 57 * - Infrastructure functions and macros 58 * - WAs per platform in standard gen/chrono order 59 * - Public functions to init or apply the given workaround type. 60 */ 61 62 static void wa_init_start(struct i915_wa_list *wal, const char *name, const char *engine_name) 63 { 64 wal->name = name; 65 wal->engine_name = engine_name; 66 } 67 68 #define WA_LIST_CHUNK (1 << 4) 69 70 static void wa_init_finish(struct i915_wa_list *wal) 71 { 72 /* Trim unused entries. */ 73 if (!IS_ALIGNED(wal->count, WA_LIST_CHUNK)) { 74 struct i915_wa *list = kmemdup(wal->list, 75 wal->count * sizeof(*list), 76 GFP_KERNEL); 77 78 if (list) { 79 kfree(wal->list); 80 wal->list = list; 81 } 82 } 83 84 if (!wal->count) 85 return; 86 87 DRM_DEBUG_DRIVER("Initialized %u %s workarounds on %s\n", 88 wal->wa_count, wal->name, wal->engine_name); 89 } 90 91 static void _wa_add(struct i915_wa_list *wal, const struct i915_wa *wa) 92 { 93 unsigned int addr = i915_mmio_reg_offset(wa->reg); 94 unsigned int start = 0, end = wal->count; 95 const unsigned int grow = WA_LIST_CHUNK; 96 struct i915_wa *wa_; 97 98 GEM_BUG_ON(!is_power_of_2(grow)); 99 100 if (IS_ALIGNED(wal->count, grow)) { /* Either uninitialized or full. */ 101 struct i915_wa *list; 102 103 list = kmalloc_array(ALIGN(wal->count + 1, grow), sizeof(*wa), 104 GFP_KERNEL); 105 if (!list) { 106 DRM_ERROR("No space for workaround init!\n"); 107 return; 108 } 109 110 if (wal->list) 111 memcpy(list, wal->list, sizeof(*wa) * wal->count); 112 113 wal->list = list; 114 } 115 116 while (start < end) { 117 unsigned int mid = start + (end - start) / 2; 118 119 if (i915_mmio_reg_offset(wal->list[mid].reg) < addr) { 120 start = mid + 1; 121 } else if (i915_mmio_reg_offset(wal->list[mid].reg) > addr) { 122 end = mid; 123 } else { 124 wa_ = &wal->list[mid]; 125 126 if ((wa->mask & ~wa_->mask) == 0) { 127 DRM_ERROR("Discarding overwritten w/a for reg %04x (mask: %08x, value: %08x)\n", 128 i915_mmio_reg_offset(wa_->reg), 129 wa_->mask, wa_->val); 130 131 wa_->val &= ~wa->mask; 132 } 133 134 wal->wa_count++; 135 wa_->val |= wa->val; 136 wa_->mask |= wa->mask; 137 wa_->read |= wa->read; 138 return; 139 } 140 } 141 142 wal->wa_count++; 143 wa_ = &wal->list[wal->count++]; 144 *wa_ = *wa; 145 146 while (wa_-- > wal->list) { 147 GEM_BUG_ON(i915_mmio_reg_offset(wa_[0].reg) == 148 i915_mmio_reg_offset(wa_[1].reg)); 149 if (i915_mmio_reg_offset(wa_[1].reg) > 150 i915_mmio_reg_offset(wa_[0].reg)) 151 break; 152 153 swap(wa_[1], wa_[0]); 154 } 155 } 156 157 static void wa_add(struct i915_wa_list *wal, i915_reg_t reg, u32 mask, 158 u32 val, u32 read_mask) 159 { 160 struct i915_wa wa = { 161 .reg = reg, 162 .mask = mask, 163 .val = val, 164 .read = read_mask, 165 }; 166 167 _wa_add(wal, &wa); 168 } 169 170 static void 171 wa_write_masked_or(struct i915_wa_list *wal, i915_reg_t reg, u32 mask, 172 u32 val) 173 { 174 wa_add(wal, reg, mask, val, mask); 175 } 176 177 static void 178 wa_masked_en(struct i915_wa_list *wal, i915_reg_t reg, u32 val) 179 { 180 wa_write_masked_or(wal, reg, val, _MASKED_BIT_ENABLE(val)); 181 } 182 183 static void 184 wa_write(struct i915_wa_list *wal, i915_reg_t reg, u32 val) 185 { 186 wa_write_masked_or(wal, reg, ~0, val); 187 } 188 189 static void 190 wa_write_or(struct i915_wa_list *wal, i915_reg_t reg, u32 val) 191 { 192 wa_write_masked_or(wal, reg, val, val); 193 } 194 195 #define WA_SET_BIT_MASKED(addr, mask) \ 196 wa_write_masked_or(wal, (addr), (mask), _MASKED_BIT_ENABLE(mask)) 197 198 #define WA_CLR_BIT_MASKED(addr, mask) \ 199 wa_write_masked_or(wal, (addr), (mask), _MASKED_BIT_DISABLE(mask)) 200 201 #define WA_SET_FIELD_MASKED(addr, mask, value) \ 202 wa_write_masked_or(wal, (addr), (mask), _MASKED_FIELD((mask), (value))) 203 204 static void gen8_ctx_workarounds_init(struct intel_engine_cs *engine, 205 struct i915_wa_list *wal) 206 { 207 WA_SET_BIT_MASKED(INSTPM, INSTPM_FORCE_ORDERING); 208 209 /* WaDisableAsyncFlipPerfMode:bdw,chv */ 210 WA_SET_BIT_MASKED(MI_MODE, ASYNC_FLIP_PERF_DISABLE); 211 212 /* WaDisablePartialInstShootdown:bdw,chv */ 213 WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, 214 PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE); 215 216 /* Use Force Non-Coherent whenever executing a 3D context. This is a 217 * workaround for for a possible hang in the unlikely event a TLB 218 * invalidation occurs during a PSD flush. 219 */ 220 /* WaForceEnableNonCoherent:bdw,chv */ 221 /* WaHdcDisableFetchWhenMasked:bdw,chv */ 222 WA_SET_BIT_MASKED(HDC_CHICKEN0, 223 HDC_DONOT_FETCH_MEM_WHEN_MASKED | 224 HDC_FORCE_NON_COHERENT); 225 226 /* From the Haswell PRM, Command Reference: Registers, CACHE_MODE_0: 227 * "The Hierarchical Z RAW Stall Optimization allows non-overlapping 228 * polygons in the same 8x4 pixel/sample area to be processed without 229 * stalling waiting for the earlier ones to write to Hierarchical Z 230 * buffer." 231 * 232 * This optimization is off by default for BDW and CHV; turn it on. 233 */ 234 WA_CLR_BIT_MASKED(CACHE_MODE_0_GEN7, HIZ_RAW_STALL_OPT_DISABLE); 235 236 /* Wa4x4STCOptimizationDisable:bdw,chv */ 237 WA_SET_BIT_MASKED(CACHE_MODE_1, GEN8_4x4_STC_OPTIMIZATION_DISABLE); 238 239 /* 240 * BSpec recommends 8x4 when MSAA is used, 241 * however in practice 16x4 seems fastest. 242 * 243 * Note that PS/WM thread counts depend on the WIZ hashing 244 * disable bit, which we don't touch here, but it's good 245 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). 246 */ 247 WA_SET_FIELD_MASKED(GEN7_GT_MODE, 248 GEN6_WIZ_HASHING_MASK, 249 GEN6_WIZ_HASHING_16x4); 250 } 251 252 static void bdw_ctx_workarounds_init(struct intel_engine_cs *engine, 253 struct i915_wa_list *wal) 254 { 255 struct drm_i915_private *i915 = engine->i915; 256 257 gen8_ctx_workarounds_init(engine, wal); 258 259 /* WaDisableThreadStallDopClockGating:bdw (pre-production) */ 260 WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE); 261 262 /* WaDisableDopClockGating:bdw 263 * 264 * Also see the related UCGTCL1 write in bdw_init_clock_gating() 265 * to disable EUTC clock gating. 266 */ 267 WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2, 268 DOP_CLOCK_GATING_DISABLE); 269 270 WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, 271 GEN8_SAMPLER_POWER_BYPASS_DIS); 272 273 WA_SET_BIT_MASKED(HDC_CHICKEN0, 274 /* WaForceContextSaveRestoreNonCoherent:bdw */ 275 HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT | 276 /* WaDisableFenceDestinationToSLM:bdw (pre-prod) */ 277 (IS_BDW_GT3(i915) ? HDC_FENCE_DEST_SLM_DISABLE : 0)); 278 } 279 280 static void chv_ctx_workarounds_init(struct intel_engine_cs *engine, 281 struct i915_wa_list *wal) 282 { 283 gen8_ctx_workarounds_init(engine, wal); 284 285 /* WaDisableThreadStallDopClockGating:chv */ 286 WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE); 287 288 /* Improve HiZ throughput on CHV. */ 289 WA_SET_BIT_MASKED(HIZ_CHICKEN, CHV_HZ_8X8_MODE_IN_1X); 290 } 291 292 static void gen9_ctx_workarounds_init(struct intel_engine_cs *engine, 293 struct i915_wa_list *wal) 294 { 295 struct drm_i915_private *i915 = engine->i915; 296 297 if (HAS_LLC(i915)) { 298 /* WaCompressedResourceSamplerPbeMediaNewHashMode:skl,kbl 299 * 300 * Must match Display Engine. See 301 * WaCompressedResourceDisplayNewHashMode. 302 */ 303 WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, 304 GEN9_PBE_COMPRESSED_HASH_SELECTION); 305 WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7, 306 GEN9_SAMPLER_HASH_COMPRESSED_READ_ADDR); 307 } 308 309 /* WaClearFlowControlGpgpuContextSave:skl,bxt,kbl,glk,cfl */ 310 /* WaDisablePartialInstShootdown:skl,bxt,kbl,glk,cfl */ 311 WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, 312 FLOW_CONTROL_ENABLE | 313 PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE); 314 315 /* WaEnableYV12BugFixInHalfSliceChicken7:skl,bxt,kbl,glk,cfl */ 316 /* WaEnableSamplerGPGPUPreemptionSupport:skl,bxt,kbl,cfl */ 317 WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7, 318 GEN9_ENABLE_YV12_BUGFIX | 319 GEN9_ENABLE_GPGPU_PREEMPTION); 320 321 /* Wa4x4STCOptimizationDisable:skl,bxt,kbl,glk,cfl */ 322 /* WaDisablePartialResolveInVc:skl,bxt,kbl,cfl */ 323 WA_SET_BIT_MASKED(CACHE_MODE_1, 324 GEN8_4x4_STC_OPTIMIZATION_DISABLE | 325 GEN9_PARTIAL_RESOLVE_IN_VC_DISABLE); 326 327 /* WaCcsTlbPrefetchDisable:skl,bxt,kbl,glk,cfl */ 328 WA_CLR_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN5, 329 GEN9_CCS_TLB_PREFETCH_ENABLE); 330 331 /* WaForceContextSaveRestoreNonCoherent:skl,bxt,kbl,cfl */ 332 WA_SET_BIT_MASKED(HDC_CHICKEN0, 333 HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT | 334 HDC_FORCE_CSR_NON_COHERENT_OVR_DISABLE); 335 336 /* WaForceEnableNonCoherent and WaDisableHDCInvalidation are 337 * both tied to WaForceContextSaveRestoreNonCoherent 338 * in some hsds for skl. We keep the tie for all gen9. The 339 * documentation is a bit hazy and so we want to get common behaviour, 340 * even though there is no clear evidence we would need both on kbl/bxt. 341 * This area has been source of system hangs so we play it safe 342 * and mimic the skl regardless of what bspec says. 343 * 344 * Use Force Non-Coherent whenever executing a 3D context. This 345 * is a workaround for a possible hang in the unlikely event 346 * a TLB invalidation occurs during a PSD flush. 347 */ 348 349 /* WaForceEnableNonCoherent:skl,bxt,kbl,cfl */ 350 WA_SET_BIT_MASKED(HDC_CHICKEN0, 351 HDC_FORCE_NON_COHERENT); 352 353 /* WaDisableSamplerPowerBypassForSOPingPong:skl,bxt,kbl,cfl */ 354 if (IS_SKYLAKE(i915) || IS_KABYLAKE(i915) || IS_COFFEELAKE(i915)) 355 WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, 356 GEN8_SAMPLER_POWER_BYPASS_DIS); 357 358 /* WaDisableSTUnitPowerOptimization:skl,bxt,kbl,glk,cfl */ 359 WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN2, GEN8_ST_PO_DISABLE); 360 361 /* 362 * Supporting preemption with fine-granularity requires changes in the 363 * batch buffer programming. Since we can't break old userspace, we 364 * need to set our default preemption level to safe value. Userspace is 365 * still able to use more fine-grained preemption levels, since in 366 * WaEnablePreemptionGranularityControlByUMD we're whitelisting the 367 * per-ctx register. As such, WaDisable{3D,GPGPU}MidCmdPreemption are 368 * not real HW workarounds, but merely a way to start using preemption 369 * while maintaining old contract with userspace. 370 */ 371 372 /* WaDisable3DMidCmdPreemption:skl,bxt,glk,cfl,[cnl] */ 373 WA_CLR_BIT_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_3D_OBJECT_LEVEL); 374 375 /* WaDisableGPGPUMidCmdPreemption:skl,bxt,blk,cfl,[cnl] */ 376 WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1, 377 GEN9_PREEMPT_GPGPU_LEVEL_MASK, 378 GEN9_PREEMPT_GPGPU_COMMAND_LEVEL); 379 380 /* WaClearHIZ_WM_CHICKEN3:bxt,glk */ 381 if (IS_GEN9_LP(i915)) 382 WA_SET_BIT_MASKED(GEN9_WM_CHICKEN3, GEN9_FACTOR_IN_CLR_VAL_HIZ); 383 } 384 385 static void skl_tune_iz_hashing(struct intel_engine_cs *engine, 386 struct i915_wa_list *wal) 387 { 388 struct drm_i915_private *i915 = engine->i915; 389 u8 vals[3] = { 0, 0, 0 }; 390 unsigned int i; 391 392 for (i = 0; i < 3; i++) { 393 u8 ss; 394 395 /* 396 * Only consider slices where one, and only one, subslice has 7 397 * EUs 398 */ 399 if (!is_power_of_2(RUNTIME_INFO(i915)->sseu.subslice_7eu[i])) 400 continue; 401 402 /* 403 * subslice_7eu[i] != 0 (because of the check above) and 404 * ss_max == 4 (maximum number of subslices possible per slice) 405 * 406 * -> 0 <= ss <= 3; 407 */ 408 ss = ffs(RUNTIME_INFO(i915)->sseu.subslice_7eu[i]) - 1; 409 vals[i] = 3 - ss; 410 } 411 412 if (vals[0] == 0 && vals[1] == 0 && vals[2] == 0) 413 return; 414 415 /* Tune IZ hashing. See intel_device_info_runtime_init() */ 416 WA_SET_FIELD_MASKED(GEN7_GT_MODE, 417 GEN9_IZ_HASHING_MASK(2) | 418 GEN9_IZ_HASHING_MASK(1) | 419 GEN9_IZ_HASHING_MASK(0), 420 GEN9_IZ_HASHING(2, vals[2]) | 421 GEN9_IZ_HASHING(1, vals[1]) | 422 GEN9_IZ_HASHING(0, vals[0])); 423 } 424 425 static void skl_ctx_workarounds_init(struct intel_engine_cs *engine, 426 struct i915_wa_list *wal) 427 { 428 gen9_ctx_workarounds_init(engine, wal); 429 skl_tune_iz_hashing(engine, wal); 430 } 431 432 static void bxt_ctx_workarounds_init(struct intel_engine_cs *engine, 433 struct i915_wa_list *wal) 434 { 435 gen9_ctx_workarounds_init(engine, wal); 436 437 /* WaDisableThreadStallDopClockGating:bxt */ 438 WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, 439 STALL_DOP_GATING_DISABLE); 440 441 /* WaToEnableHwFixForPushConstHWBug:bxt */ 442 WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, 443 GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); 444 } 445 446 static void kbl_ctx_workarounds_init(struct intel_engine_cs *engine, 447 struct i915_wa_list *wal) 448 { 449 struct drm_i915_private *i915 = engine->i915; 450 451 gen9_ctx_workarounds_init(engine, wal); 452 453 /* WaToEnableHwFixForPushConstHWBug:kbl */ 454 if (IS_KBL_REVID(i915, KBL_REVID_C0, REVID_FOREVER)) 455 WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, 456 GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); 457 458 /* WaDisableSbeCacheDispatchPortSharing:kbl */ 459 WA_SET_BIT_MASKED(GEN7_HALF_SLICE_CHICKEN1, 460 GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE); 461 } 462 463 static void glk_ctx_workarounds_init(struct intel_engine_cs *engine, 464 struct i915_wa_list *wal) 465 { 466 gen9_ctx_workarounds_init(engine, wal); 467 468 /* WaToEnableHwFixForPushConstHWBug:glk */ 469 WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, 470 GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); 471 } 472 473 static void cfl_ctx_workarounds_init(struct intel_engine_cs *engine, 474 struct i915_wa_list *wal) 475 { 476 gen9_ctx_workarounds_init(engine, wal); 477 478 /* WaToEnableHwFixForPushConstHWBug:cfl */ 479 WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, 480 GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); 481 482 /* WaDisableSbeCacheDispatchPortSharing:cfl */ 483 WA_SET_BIT_MASKED(GEN7_HALF_SLICE_CHICKEN1, 484 GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE); 485 } 486 487 static void cnl_ctx_workarounds_init(struct intel_engine_cs *engine, 488 struct i915_wa_list *wal) 489 { 490 struct drm_i915_private *i915 = engine->i915; 491 492 /* WaForceContextSaveRestoreNonCoherent:cnl */ 493 WA_SET_BIT_MASKED(CNL_HDC_CHICKEN0, 494 HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT); 495 496 /* WaThrottleEUPerfToAvoidTDBackPressure:cnl(pre-prod) */ 497 if (IS_CNL_REVID(i915, CNL_REVID_B0, CNL_REVID_B0)) 498 WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, THROTTLE_12_5); 499 500 /* WaDisableReplayBufferBankArbitrationOptimization:cnl */ 501 WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, 502 GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); 503 504 /* WaDisableEnhancedSBEVertexCaching:cnl (pre-prod) */ 505 if (IS_CNL_REVID(i915, 0, CNL_REVID_B0)) 506 WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, 507 GEN8_CSC2_SBE_VUE_CACHE_CONSERVATIVE); 508 509 /* WaPushConstantDereferenceHoldDisable:cnl */ 510 WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2, PUSH_CONSTANT_DEREF_DISABLE); 511 512 /* FtrEnableFastAnisoL1BankingFix:cnl */ 513 WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, CNL_FAST_ANISO_L1_BANKING_FIX); 514 515 /* WaDisable3DMidCmdPreemption:cnl */ 516 WA_CLR_BIT_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_3D_OBJECT_LEVEL); 517 518 /* WaDisableGPGPUMidCmdPreemption:cnl */ 519 WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1, 520 GEN9_PREEMPT_GPGPU_LEVEL_MASK, 521 GEN9_PREEMPT_GPGPU_COMMAND_LEVEL); 522 523 /* WaDisableEarlyEOT:cnl */ 524 WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, DISABLE_EARLY_EOT); 525 } 526 527 static void icl_ctx_workarounds_init(struct intel_engine_cs *engine, 528 struct i915_wa_list *wal) 529 { 530 struct drm_i915_private *i915 = engine->i915; 531 532 /* WaDisableBankHangMode:icl */ 533 wa_write(wal, 534 GEN8_L3CNTLREG, 535 intel_uncore_read(engine->uncore, GEN8_L3CNTLREG) | 536 GEN8_ERRDETBCTRL); 537 538 /* Wa_1604370585:icl (pre-prod) 539 * Formerly known as WaPushConstantDereferenceHoldDisable 540 */ 541 if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_B0)) 542 WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2, 543 PUSH_CONSTANT_DEREF_DISABLE); 544 545 /* WaForceEnableNonCoherent:icl 546 * This is not the same workaround as in early Gen9 platforms, where 547 * lacking this could cause system hangs, but coherency performance 548 * overhead is high and only a few compute workloads really need it 549 * (the register is whitelisted in hardware now, so UMDs can opt in 550 * for coherency if they have a good reason). 551 */ 552 WA_SET_BIT_MASKED(ICL_HDC_MODE, HDC_FORCE_NON_COHERENT); 553 554 /* Wa_2006611047:icl (pre-prod) 555 * Formerly known as WaDisableImprovedTdlClkGating 556 */ 557 if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_A0)) 558 WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2, 559 GEN11_TDL_CLOCK_GATING_FIX_DISABLE); 560 561 /* Wa_2006665173:icl (pre-prod) */ 562 if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_A0)) 563 WA_SET_BIT_MASKED(GEN11_COMMON_SLICE_CHICKEN3, 564 GEN11_BLEND_EMB_FIX_DISABLE_IN_RCC); 565 566 /* WaEnableFloatBlendOptimization:icl */ 567 wa_write_masked_or(wal, 568 GEN10_CACHE_MODE_SS, 569 0, /* write-only, so skip validation */ 570 _MASKED_BIT_ENABLE(FLOAT_BLEND_OPTIMIZATION_ENABLE)); 571 572 /* WaDisableGPGPUMidThreadPreemption:icl */ 573 WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1, 574 GEN9_PREEMPT_GPGPU_LEVEL_MASK, 575 GEN9_PREEMPT_GPGPU_THREAD_GROUP_LEVEL); 576 577 /* allow headerless messages for preemptible GPGPU context */ 578 WA_SET_BIT_MASKED(GEN10_SAMPLER_MODE, 579 GEN11_SAMPLER_ENABLE_HEADLESS_MSG); 580 } 581 582 static void tgl_ctx_workarounds_init(struct intel_engine_cs *engine, 583 struct i915_wa_list *wal) 584 { 585 u32 val; 586 587 /* Wa_1409142259:tgl */ 588 WA_SET_BIT_MASKED(GEN11_COMMON_SLICE_CHICKEN3, 589 GEN12_DISABLE_CPS_AWARE_COLOR_PIPE); 590 591 /* Wa_1604555607:tgl */ 592 val = intel_uncore_read(engine->uncore, FF_MODE2); 593 val &= ~FF_MODE2_TDS_TIMER_MASK; 594 val |= FF_MODE2_TDS_TIMER_128; 595 /* 596 * FIXME: FF_MODE2 register is not readable till TGL B0. We can 597 * enable verification of WA from the later steppings, which enables 598 * the read of FF_MODE2. 599 */ 600 wa_add(wal, FF_MODE2, FF_MODE2_TDS_TIMER_MASK, val, 601 IS_TGL_REVID(engine->i915, TGL_REVID_A0, TGL_REVID_A0) ? 0 : 602 FF_MODE2_TDS_TIMER_MASK); 603 } 604 605 static void 606 __intel_engine_init_ctx_wa(struct intel_engine_cs *engine, 607 struct i915_wa_list *wal, 608 const char *name) 609 { 610 struct drm_i915_private *i915 = engine->i915; 611 612 if (engine->class != RENDER_CLASS) 613 return; 614 615 wa_init_start(wal, name, engine->name); 616 617 if (IS_GEN(i915, 12)) 618 tgl_ctx_workarounds_init(engine, wal); 619 else if (IS_GEN(i915, 11)) 620 icl_ctx_workarounds_init(engine, wal); 621 else if (IS_CANNONLAKE(i915)) 622 cnl_ctx_workarounds_init(engine, wal); 623 else if (IS_COFFEELAKE(i915)) 624 cfl_ctx_workarounds_init(engine, wal); 625 else if (IS_GEMINILAKE(i915)) 626 glk_ctx_workarounds_init(engine, wal); 627 else if (IS_KABYLAKE(i915)) 628 kbl_ctx_workarounds_init(engine, wal); 629 else if (IS_BROXTON(i915)) 630 bxt_ctx_workarounds_init(engine, wal); 631 else if (IS_SKYLAKE(i915)) 632 skl_ctx_workarounds_init(engine, wal); 633 else if (IS_CHERRYVIEW(i915)) 634 chv_ctx_workarounds_init(engine, wal); 635 else if (IS_BROADWELL(i915)) 636 bdw_ctx_workarounds_init(engine, wal); 637 else if (INTEL_GEN(i915) < 8) 638 return; 639 else 640 MISSING_CASE(INTEL_GEN(i915)); 641 642 wa_init_finish(wal); 643 } 644 645 void intel_engine_init_ctx_wa(struct intel_engine_cs *engine) 646 { 647 __intel_engine_init_ctx_wa(engine, &engine->ctx_wa_list, "context"); 648 } 649 650 int intel_engine_emit_ctx_wa(struct i915_request *rq) 651 { 652 struct i915_wa_list *wal = &rq->engine->ctx_wa_list; 653 struct i915_wa *wa; 654 unsigned int i; 655 u32 *cs; 656 int ret; 657 658 if (wal->count == 0) 659 return 0; 660 661 ret = rq->engine->emit_flush(rq, EMIT_BARRIER); 662 if (ret) 663 return ret; 664 665 cs = intel_ring_begin(rq, (wal->count * 2 + 2)); 666 if (IS_ERR(cs)) 667 return PTR_ERR(cs); 668 669 *cs++ = MI_LOAD_REGISTER_IMM(wal->count); 670 for (i = 0, wa = wal->list; i < wal->count; i++, wa++) { 671 *cs++ = i915_mmio_reg_offset(wa->reg); 672 *cs++ = wa->val; 673 } 674 *cs++ = MI_NOOP; 675 676 intel_ring_advance(rq, cs); 677 678 ret = rq->engine->emit_flush(rq, EMIT_BARRIER); 679 if (ret) 680 return ret; 681 682 return 0; 683 } 684 685 static void 686 gen9_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) 687 { 688 /* WaDisableKillLogic:bxt,skl,kbl */ 689 if (!IS_COFFEELAKE(i915)) 690 wa_write_or(wal, 691 GAM_ECOCHK, 692 ECOCHK_DIS_TLB); 693 694 if (HAS_LLC(i915)) { 695 /* WaCompressedResourceSamplerPbeMediaNewHashMode:skl,kbl 696 * 697 * Must match Display Engine. See 698 * WaCompressedResourceDisplayNewHashMode. 699 */ 700 wa_write_or(wal, 701 MMCD_MISC_CTRL, 702 MMCD_PCLA | MMCD_HOTSPOT_EN); 703 } 704 705 /* WaDisableHDCInvalidation:skl,bxt,kbl,cfl */ 706 wa_write_or(wal, 707 GAM_ECOCHK, 708 BDW_DISABLE_HDC_INVALIDATION); 709 } 710 711 static void 712 skl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) 713 { 714 gen9_gt_workarounds_init(i915, wal); 715 716 /* WaDisableGafsUnitClkGating:skl */ 717 wa_write_or(wal, 718 GEN7_UCGCTL4, 719 GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE); 720 721 /* WaInPlaceDecompressionHang:skl */ 722 if (IS_SKL_REVID(i915, SKL_REVID_H0, REVID_FOREVER)) 723 wa_write_or(wal, 724 GEN9_GAMT_ECO_REG_RW_IA, 725 GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); 726 } 727 728 static void 729 bxt_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) 730 { 731 gen9_gt_workarounds_init(i915, wal); 732 733 /* WaInPlaceDecompressionHang:bxt */ 734 wa_write_or(wal, 735 GEN9_GAMT_ECO_REG_RW_IA, 736 GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); 737 } 738 739 static void 740 kbl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) 741 { 742 gen9_gt_workarounds_init(i915, wal); 743 744 /* WaDisableDynamicCreditSharing:kbl */ 745 if (IS_KBL_REVID(i915, 0, KBL_REVID_B0)) 746 wa_write_or(wal, 747 GAMT_CHKN_BIT_REG, 748 GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING); 749 750 /* WaDisableGafsUnitClkGating:kbl */ 751 wa_write_or(wal, 752 GEN7_UCGCTL4, 753 GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE); 754 755 /* WaInPlaceDecompressionHang:kbl */ 756 wa_write_or(wal, 757 GEN9_GAMT_ECO_REG_RW_IA, 758 GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); 759 } 760 761 static void 762 glk_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) 763 { 764 gen9_gt_workarounds_init(i915, wal); 765 } 766 767 static void 768 cfl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) 769 { 770 gen9_gt_workarounds_init(i915, wal); 771 772 /* WaDisableGafsUnitClkGating:cfl */ 773 wa_write_or(wal, 774 GEN7_UCGCTL4, 775 GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE); 776 777 /* WaInPlaceDecompressionHang:cfl */ 778 wa_write_or(wal, 779 GEN9_GAMT_ECO_REG_RW_IA, 780 GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); 781 } 782 783 static void 784 wa_init_mcr(struct drm_i915_private *i915, struct i915_wa_list *wal) 785 { 786 const struct sseu_dev_info *sseu = &RUNTIME_INFO(i915)->sseu; 787 unsigned int slice, subslice; 788 u32 l3_en, mcr, mcr_mask; 789 790 GEM_BUG_ON(INTEL_GEN(i915) < 10); 791 792 /* 793 * WaProgramMgsrForL3BankSpecificMmioReads: cnl,icl 794 * L3Banks could be fused off in single slice scenario. If that is 795 * the case, we might need to program MCR select to a valid L3Bank 796 * by default, to make sure we correctly read certain registers 797 * later on (in the range 0xB100 - 0xB3FF). 798 * 799 * WaProgramMgsrForCorrectSliceSpecificMmioReads:cnl,icl 800 * Before any MMIO read into slice/subslice specific registers, MCR 801 * packet control register needs to be programmed to point to any 802 * enabled s/ss pair. Otherwise, incorrect values will be returned. 803 * This means each subsequent MMIO read will be forwarded to an 804 * specific s/ss combination, but this is OK since these registers 805 * are consistent across s/ss in almost all cases. In the rare 806 * occasions, such as INSTDONE, where this value is dependent 807 * on s/ss combo, the read should be done with read_subslice_reg. 808 * 809 * Since GEN8_MCR_SELECTOR contains dual-purpose bits which select both 810 * to which subslice, or to which L3 bank, the respective mmio reads 811 * will go, we have to find a common index which works for both 812 * accesses. 813 * 814 * Case where we cannot find a common index fortunately should not 815 * happen in production hardware, so we only emit a warning instead of 816 * implementing something more complex that requires checking the range 817 * of every MMIO read. 818 */ 819 820 if (INTEL_GEN(i915) >= 10 && is_power_of_2(sseu->slice_mask)) { 821 u32 l3_fuse = 822 intel_uncore_read(&i915->uncore, GEN10_MIRROR_FUSE3) & 823 GEN10_L3BANK_MASK; 824 825 DRM_DEBUG_DRIVER("L3 fuse = %x\n", l3_fuse); 826 l3_en = ~(l3_fuse << GEN10_L3BANK_PAIR_COUNT | l3_fuse); 827 } else { 828 l3_en = ~0; 829 } 830 831 slice = fls(sseu->slice_mask) - 1; 832 subslice = fls(l3_en & intel_sseu_get_subslices(sseu, slice)); 833 if (!subslice) { 834 DRM_WARN("No common index found between subslice mask %x and L3 bank mask %x!\n", 835 intel_sseu_get_subslices(sseu, slice), l3_en); 836 subslice = fls(l3_en); 837 WARN_ON(!subslice); 838 } 839 subslice--; 840 841 if (INTEL_GEN(i915) >= 11) { 842 mcr = GEN11_MCR_SLICE(slice) | GEN11_MCR_SUBSLICE(subslice); 843 mcr_mask = GEN11_MCR_SLICE_MASK | GEN11_MCR_SUBSLICE_MASK; 844 } else { 845 mcr = GEN8_MCR_SLICE(slice) | GEN8_MCR_SUBSLICE(subslice); 846 mcr_mask = GEN8_MCR_SLICE_MASK | GEN8_MCR_SUBSLICE_MASK; 847 } 848 849 DRM_DEBUG_DRIVER("MCR slice/subslice = %x\n", mcr); 850 851 wa_write_masked_or(wal, GEN8_MCR_SELECTOR, mcr_mask, mcr); 852 } 853 854 static void 855 cnl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) 856 { 857 wa_init_mcr(i915, wal); 858 859 /* WaDisableI2mCycleOnWRPort:cnl (pre-prod) */ 860 if (IS_CNL_REVID(i915, CNL_REVID_B0, CNL_REVID_B0)) 861 wa_write_or(wal, 862 GAMT_CHKN_BIT_REG, 863 GAMT_CHKN_DISABLE_I2M_CYCLE_ON_WR_PORT); 864 865 /* WaInPlaceDecompressionHang:cnl */ 866 wa_write_or(wal, 867 GEN9_GAMT_ECO_REG_RW_IA, 868 GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); 869 } 870 871 static void 872 icl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) 873 { 874 wa_init_mcr(i915, wal); 875 876 /* WaInPlaceDecompressionHang:icl */ 877 wa_write_or(wal, 878 GEN9_GAMT_ECO_REG_RW_IA, 879 GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); 880 881 /* WaModifyGamTlbPartitioning:icl */ 882 wa_write_masked_or(wal, 883 GEN11_GACB_PERF_CTRL, 884 GEN11_HASH_CTRL_MASK, 885 GEN11_HASH_CTRL_BIT0 | GEN11_HASH_CTRL_BIT4); 886 887 /* Wa_1405766107:icl 888 * Formerly known as WaCL2SFHalfMaxAlloc 889 */ 890 wa_write_or(wal, 891 GEN11_LSN_UNSLCVC, 892 GEN11_LSN_UNSLCVC_GAFS_HALF_SF_MAXALLOC | 893 GEN11_LSN_UNSLCVC_GAFS_HALF_CL2_MAXALLOC); 894 895 /* Wa_220166154:icl 896 * Formerly known as WaDisCtxReload 897 */ 898 wa_write_or(wal, 899 GEN8_GAMW_ECO_DEV_RW_IA, 900 GAMW_ECO_DEV_CTX_RELOAD_DISABLE); 901 902 /* Wa_1405779004:icl (pre-prod) */ 903 if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_A0)) 904 wa_write_or(wal, 905 SLICE_UNIT_LEVEL_CLKGATE, 906 MSCUNIT_CLKGATE_DIS); 907 908 /* Wa_1406680159:icl */ 909 wa_write_or(wal, 910 SUBSLICE_UNIT_LEVEL_CLKGATE, 911 GWUNIT_CLKGATE_DIS); 912 913 /* Wa_1406838659:icl (pre-prod) */ 914 if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_B0)) 915 wa_write_or(wal, 916 INF_UNIT_LEVEL_CLKGATE, 917 CGPSF_CLKGATE_DIS); 918 919 /* Wa_1406463099:icl 920 * Formerly known as WaGamTlbPendError 921 */ 922 wa_write_or(wal, 923 GAMT_CHKN_BIT_REG, 924 GAMT_CHKN_DISABLE_L3_COH_PIPE); 925 926 /* Wa_1607087056:icl */ 927 wa_write_or(wal, 928 SLICE_UNIT_LEVEL_CLKGATE, 929 L3_CLKGATE_DIS | L3_CR2X_CLKGATE_DIS); 930 } 931 932 static void 933 tgl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) 934 { 935 /* Wa_1409420604:tgl */ 936 if (IS_TGL_REVID(i915, TGL_REVID_A0, TGL_REVID_A0)) 937 wa_write_or(wal, 938 SUBSLICE_UNIT_LEVEL_CLKGATE2, 939 CPSSUNIT_CLKGATE_DIS); 940 941 /* Wa_1409180338:tgl */ 942 if (IS_TGL_REVID(i915, TGL_REVID_A0, TGL_REVID_A0)) 943 wa_write_or(wal, 944 SLICE_UNIT_LEVEL_CLKGATE, 945 L3_CLKGATE_DIS | L3_CR2X_CLKGATE_DIS); 946 } 947 948 static void 949 gt_init_workarounds(struct drm_i915_private *i915, struct i915_wa_list *wal) 950 { 951 if (IS_GEN(i915, 12)) 952 tgl_gt_workarounds_init(i915, wal); 953 else if (IS_GEN(i915, 11)) 954 icl_gt_workarounds_init(i915, wal); 955 else if (IS_CANNONLAKE(i915)) 956 cnl_gt_workarounds_init(i915, wal); 957 else if (IS_COFFEELAKE(i915)) 958 cfl_gt_workarounds_init(i915, wal); 959 else if (IS_GEMINILAKE(i915)) 960 glk_gt_workarounds_init(i915, wal); 961 else if (IS_KABYLAKE(i915)) 962 kbl_gt_workarounds_init(i915, wal); 963 else if (IS_BROXTON(i915)) 964 bxt_gt_workarounds_init(i915, wal); 965 else if (IS_SKYLAKE(i915)) 966 skl_gt_workarounds_init(i915, wal); 967 else if (INTEL_GEN(i915) <= 8) 968 return; 969 else 970 MISSING_CASE(INTEL_GEN(i915)); 971 } 972 973 void intel_gt_init_workarounds(struct drm_i915_private *i915) 974 { 975 struct i915_wa_list *wal = &i915->gt_wa_list; 976 977 wa_init_start(wal, "GT", "global"); 978 gt_init_workarounds(i915, wal); 979 wa_init_finish(wal); 980 } 981 982 static enum forcewake_domains 983 wal_get_fw_for_rmw(struct intel_uncore *uncore, const struct i915_wa_list *wal) 984 { 985 enum forcewake_domains fw = 0; 986 struct i915_wa *wa; 987 unsigned int i; 988 989 for (i = 0, wa = wal->list; i < wal->count; i++, wa++) 990 fw |= intel_uncore_forcewake_for_reg(uncore, 991 wa->reg, 992 FW_REG_READ | 993 FW_REG_WRITE); 994 995 return fw; 996 } 997 998 static bool 999 wa_verify(const struct i915_wa *wa, u32 cur, const char *name, const char *from) 1000 { 1001 if ((cur ^ wa->val) & wa->read) { 1002 DRM_ERROR("%s workaround lost on %s! (%x=%x/%x, expected %x, mask=%x)\n", 1003 name, from, i915_mmio_reg_offset(wa->reg), 1004 cur, cur & wa->read, 1005 wa->val, wa->mask); 1006 1007 return false; 1008 } 1009 1010 return true; 1011 } 1012 1013 static void 1014 wa_list_apply(struct intel_uncore *uncore, const struct i915_wa_list *wal) 1015 { 1016 enum forcewake_domains fw; 1017 unsigned long flags; 1018 struct i915_wa *wa; 1019 unsigned int i; 1020 1021 if (!wal->count) 1022 return; 1023 1024 fw = wal_get_fw_for_rmw(uncore, wal); 1025 1026 spin_lock_irqsave(&uncore->lock, flags); 1027 intel_uncore_forcewake_get__locked(uncore, fw); 1028 1029 for (i = 0, wa = wal->list; i < wal->count; i++, wa++) { 1030 intel_uncore_rmw_fw(uncore, wa->reg, wa->mask, wa->val); 1031 if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) 1032 wa_verify(wa, 1033 intel_uncore_read_fw(uncore, wa->reg), 1034 wal->name, "application"); 1035 } 1036 1037 intel_uncore_forcewake_put__locked(uncore, fw); 1038 spin_unlock_irqrestore(&uncore->lock, flags); 1039 } 1040 1041 void intel_gt_apply_workarounds(struct intel_gt *gt) 1042 { 1043 wa_list_apply(gt->uncore, >->i915->gt_wa_list); 1044 } 1045 1046 static bool wa_list_verify(struct intel_uncore *uncore, 1047 const struct i915_wa_list *wal, 1048 const char *from) 1049 { 1050 struct i915_wa *wa; 1051 unsigned int i; 1052 bool ok = true; 1053 1054 for (i = 0, wa = wal->list; i < wal->count; i++, wa++) 1055 ok &= wa_verify(wa, 1056 intel_uncore_read(uncore, wa->reg), 1057 wal->name, from); 1058 1059 return ok; 1060 } 1061 1062 bool intel_gt_verify_workarounds(struct intel_gt *gt, const char *from) 1063 { 1064 return wa_list_verify(gt->uncore, >->i915->gt_wa_list, from); 1065 } 1066 1067 static inline bool is_nonpriv_flags_valid(u32 flags) 1068 { 1069 /* Check only valid flag bits are set */ 1070 if (flags & ~RING_FORCE_TO_NONPRIV_MASK_VALID) 1071 return false; 1072 1073 /* NB: Only 3 out of 4 enum values are valid for access field */ 1074 if ((flags & RING_FORCE_TO_NONPRIV_ACCESS_MASK) == 1075 RING_FORCE_TO_NONPRIV_ACCESS_INVALID) 1076 return false; 1077 1078 return true; 1079 } 1080 1081 static void 1082 whitelist_reg_ext(struct i915_wa_list *wal, i915_reg_t reg, u32 flags) 1083 { 1084 struct i915_wa wa = { 1085 .reg = reg 1086 }; 1087 1088 if (GEM_DEBUG_WARN_ON(wal->count >= RING_MAX_NONPRIV_SLOTS)) 1089 return; 1090 1091 if (GEM_DEBUG_WARN_ON(!is_nonpriv_flags_valid(flags))) 1092 return; 1093 1094 wa.reg.reg |= flags; 1095 _wa_add(wal, &wa); 1096 } 1097 1098 static void 1099 whitelist_reg(struct i915_wa_list *wal, i915_reg_t reg) 1100 { 1101 whitelist_reg_ext(wal, reg, RING_FORCE_TO_NONPRIV_ACCESS_RW); 1102 } 1103 1104 static void gen9_whitelist_build(struct i915_wa_list *w) 1105 { 1106 /* WaVFEStateAfterPipeControlwithMediaStateClear:skl,bxt,glk,cfl */ 1107 whitelist_reg(w, GEN9_CTX_PREEMPT_REG); 1108 1109 /* WaEnablePreemptionGranularityControlByUMD:skl,bxt,kbl,cfl,[cnl] */ 1110 whitelist_reg(w, GEN8_CS_CHICKEN1); 1111 1112 /* WaAllowUMDToModifyHDCChicken1:skl,bxt,kbl,glk,cfl */ 1113 whitelist_reg(w, GEN8_HDC_CHICKEN1); 1114 1115 /* WaSendPushConstantsFromMMIO:skl,bxt */ 1116 whitelist_reg(w, COMMON_SLICE_CHICKEN2); 1117 } 1118 1119 static void skl_whitelist_build(struct intel_engine_cs *engine) 1120 { 1121 struct i915_wa_list *w = &engine->whitelist; 1122 1123 if (engine->class != RENDER_CLASS) 1124 return; 1125 1126 gen9_whitelist_build(w); 1127 1128 /* WaDisableLSQCROPERFforOCL:skl */ 1129 whitelist_reg(w, GEN8_L3SQCREG4); 1130 } 1131 1132 static void bxt_whitelist_build(struct intel_engine_cs *engine) 1133 { 1134 if (engine->class != RENDER_CLASS) 1135 return; 1136 1137 gen9_whitelist_build(&engine->whitelist); 1138 } 1139 1140 static void kbl_whitelist_build(struct intel_engine_cs *engine) 1141 { 1142 struct i915_wa_list *w = &engine->whitelist; 1143 1144 if (engine->class != RENDER_CLASS) 1145 return; 1146 1147 gen9_whitelist_build(w); 1148 1149 /* WaDisableLSQCROPERFforOCL:kbl */ 1150 whitelist_reg(w, GEN8_L3SQCREG4); 1151 } 1152 1153 static void glk_whitelist_build(struct intel_engine_cs *engine) 1154 { 1155 struct i915_wa_list *w = &engine->whitelist; 1156 1157 if (engine->class != RENDER_CLASS) 1158 return; 1159 1160 gen9_whitelist_build(w); 1161 1162 /* WA #0862: Userspace has to set "Barrier Mode" to avoid hangs. */ 1163 whitelist_reg(w, GEN9_SLICE_COMMON_ECO_CHICKEN1); 1164 } 1165 1166 static void cfl_whitelist_build(struct intel_engine_cs *engine) 1167 { 1168 struct i915_wa_list *w = &engine->whitelist; 1169 1170 if (engine->class != RENDER_CLASS) 1171 return; 1172 1173 gen9_whitelist_build(w); 1174 1175 /* 1176 * WaAllowPMDepthAndInvocationCountAccessFromUMD:cfl,whl,cml,aml 1177 * 1178 * This covers 4 register which are next to one another : 1179 * - PS_INVOCATION_COUNT 1180 * - PS_INVOCATION_COUNT_UDW 1181 * - PS_DEPTH_COUNT 1182 * - PS_DEPTH_COUNT_UDW 1183 */ 1184 whitelist_reg_ext(w, PS_INVOCATION_COUNT, 1185 RING_FORCE_TO_NONPRIV_ACCESS_RD | 1186 RING_FORCE_TO_NONPRIV_RANGE_4); 1187 } 1188 1189 static void cnl_whitelist_build(struct intel_engine_cs *engine) 1190 { 1191 struct i915_wa_list *w = &engine->whitelist; 1192 1193 if (engine->class != RENDER_CLASS) 1194 return; 1195 1196 /* WaEnablePreemptionGranularityControlByUMD:cnl */ 1197 whitelist_reg(w, GEN8_CS_CHICKEN1); 1198 } 1199 1200 static void icl_whitelist_build(struct intel_engine_cs *engine) 1201 { 1202 struct i915_wa_list *w = &engine->whitelist; 1203 1204 switch (engine->class) { 1205 case RENDER_CLASS: 1206 /* WaAllowUMDToModifyHalfSliceChicken7:icl */ 1207 whitelist_reg(w, GEN9_HALF_SLICE_CHICKEN7); 1208 1209 /* WaAllowUMDToModifySamplerMode:icl */ 1210 whitelist_reg(w, GEN10_SAMPLER_MODE); 1211 1212 /* WaEnableStateCacheRedirectToCS:icl */ 1213 whitelist_reg(w, GEN9_SLICE_COMMON_ECO_CHICKEN1); 1214 1215 /* 1216 * WaAllowPMDepthAndInvocationCountAccessFromUMD:icl 1217 * 1218 * This covers 4 register which are next to one another : 1219 * - PS_INVOCATION_COUNT 1220 * - PS_INVOCATION_COUNT_UDW 1221 * - PS_DEPTH_COUNT 1222 * - PS_DEPTH_COUNT_UDW 1223 */ 1224 whitelist_reg_ext(w, PS_INVOCATION_COUNT, 1225 RING_FORCE_TO_NONPRIV_ACCESS_RD | 1226 RING_FORCE_TO_NONPRIV_RANGE_4); 1227 break; 1228 1229 case VIDEO_DECODE_CLASS: 1230 /* hucStatusRegOffset */ 1231 whitelist_reg_ext(w, _MMIO(0x2000 + engine->mmio_base), 1232 RING_FORCE_TO_NONPRIV_ACCESS_RD); 1233 /* hucUKernelHdrInfoRegOffset */ 1234 whitelist_reg_ext(w, _MMIO(0x2014 + engine->mmio_base), 1235 RING_FORCE_TO_NONPRIV_ACCESS_RD); 1236 /* hucStatus2RegOffset */ 1237 whitelist_reg_ext(w, _MMIO(0x23B0 + engine->mmio_base), 1238 RING_FORCE_TO_NONPRIV_ACCESS_RD); 1239 break; 1240 1241 default: 1242 break; 1243 } 1244 } 1245 1246 static void tgl_whitelist_build(struct intel_engine_cs *engine) 1247 { 1248 struct i915_wa_list *w = &engine->whitelist; 1249 1250 switch (engine->class) { 1251 case RENDER_CLASS: 1252 /* 1253 * WaAllowPMDepthAndInvocationCountAccessFromUMD:tgl 1254 * 1255 * This covers 4 registers which are next to one another : 1256 * - PS_INVOCATION_COUNT 1257 * - PS_INVOCATION_COUNT_UDW 1258 * - PS_DEPTH_COUNT 1259 * - PS_DEPTH_COUNT_UDW 1260 */ 1261 whitelist_reg_ext(w, PS_INVOCATION_COUNT, 1262 RING_FORCE_TO_NONPRIV_ACCESS_RD | 1263 RING_FORCE_TO_NONPRIV_RANGE_4); 1264 break; 1265 default: 1266 break; 1267 } 1268 } 1269 1270 void intel_engine_init_whitelist(struct intel_engine_cs *engine) 1271 { 1272 struct drm_i915_private *i915 = engine->i915; 1273 struct i915_wa_list *w = &engine->whitelist; 1274 1275 wa_init_start(w, "whitelist", engine->name); 1276 1277 if (IS_GEN(i915, 12)) 1278 tgl_whitelist_build(engine); 1279 else if (IS_GEN(i915, 11)) 1280 icl_whitelist_build(engine); 1281 else if (IS_CANNONLAKE(i915)) 1282 cnl_whitelist_build(engine); 1283 else if (IS_COFFEELAKE(i915)) 1284 cfl_whitelist_build(engine); 1285 else if (IS_GEMINILAKE(i915)) 1286 glk_whitelist_build(engine); 1287 else if (IS_KABYLAKE(i915)) 1288 kbl_whitelist_build(engine); 1289 else if (IS_BROXTON(i915)) 1290 bxt_whitelist_build(engine); 1291 else if (IS_SKYLAKE(i915)) 1292 skl_whitelist_build(engine); 1293 else if (INTEL_GEN(i915) <= 8) 1294 return; 1295 else 1296 MISSING_CASE(INTEL_GEN(i915)); 1297 1298 wa_init_finish(w); 1299 } 1300 1301 void intel_engine_apply_whitelist(struct intel_engine_cs *engine) 1302 { 1303 const struct i915_wa_list *wal = &engine->whitelist; 1304 struct intel_uncore *uncore = engine->uncore; 1305 const u32 base = engine->mmio_base; 1306 struct i915_wa *wa; 1307 unsigned int i; 1308 1309 if (!wal->count) 1310 return; 1311 1312 for (i = 0, wa = wal->list; i < wal->count; i++, wa++) 1313 intel_uncore_write(uncore, 1314 RING_FORCE_TO_NONPRIV(base, i), 1315 i915_mmio_reg_offset(wa->reg)); 1316 1317 /* And clear the rest just in case of garbage */ 1318 for (; i < RING_MAX_NONPRIV_SLOTS; i++) 1319 intel_uncore_write(uncore, 1320 RING_FORCE_TO_NONPRIV(base, i), 1321 i915_mmio_reg_offset(RING_NOPID(base))); 1322 } 1323 1324 static void 1325 rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) 1326 { 1327 struct drm_i915_private *i915 = engine->i915; 1328 1329 if (IS_TGL_REVID(i915, TGL_REVID_A0, TGL_REVID_A0)) { 1330 /* Wa_1606700617:tgl */ 1331 wa_masked_en(wal, 1332 GEN9_CS_DEBUG_MODE1, 1333 FF_DOP_CLOCK_GATE_DISABLE); 1334 1335 /* Wa_1607138336:tgl */ 1336 wa_write_or(wal, 1337 GEN9_CTX_PREEMPT_REG, 1338 GEN12_DISABLE_POSH_BUSY_FF_DOP_CG); 1339 1340 /* Wa_1607030317:tgl */ 1341 /* Wa_1607186500:tgl */ 1342 /* Wa_1607297627:tgl */ 1343 wa_masked_en(wal, 1344 GEN6_RC_SLEEP_PSMI_CONTROL, 1345 GEN12_WAIT_FOR_EVENT_POWER_DOWN_DISABLE | 1346 GEN8_RC_SEMA_IDLE_MSG_DISABLE); 1347 1348 /* 1349 * Wa_1606679103:tgl 1350 * (see also Wa_1606682166:icl) 1351 */ 1352 wa_write_or(wal, 1353 GEN7_SARCHKMD, 1354 GEN7_DISABLE_SAMPLER_PREFETCH); 1355 } 1356 1357 if (IS_GEN(i915, 11)) { 1358 /* This is not an Wa. Enable for better image quality */ 1359 wa_masked_en(wal, 1360 _3D_CHICKEN3, 1361 _3D_CHICKEN3_AA_LINE_QUALITY_FIX_ENABLE); 1362 1363 /* WaPipelineFlushCoherentLines:icl */ 1364 wa_write_or(wal, 1365 GEN8_L3SQCREG4, 1366 GEN8_LQSC_FLUSH_COHERENT_LINES); 1367 1368 /* 1369 * Wa_1405543622:icl 1370 * Formerly known as WaGAPZPriorityScheme 1371 */ 1372 wa_write_or(wal, 1373 GEN8_GARBCNTL, 1374 GEN11_ARBITRATION_PRIO_ORDER_MASK); 1375 1376 /* 1377 * Wa_1604223664:icl 1378 * Formerly known as WaL3BankAddressHashing 1379 */ 1380 wa_write_masked_or(wal, 1381 GEN8_GARBCNTL, 1382 GEN11_HASH_CTRL_EXCL_MASK, 1383 GEN11_HASH_CTRL_EXCL_BIT0); 1384 wa_write_masked_or(wal, 1385 GEN11_GLBLINVL, 1386 GEN11_BANK_HASH_ADDR_EXCL_MASK, 1387 GEN11_BANK_HASH_ADDR_EXCL_BIT0); 1388 1389 /* 1390 * Wa_1405733216:icl 1391 * Formerly known as WaDisableCleanEvicts 1392 */ 1393 wa_write_or(wal, 1394 GEN8_L3SQCREG4, 1395 GEN11_LQSC_CLEAN_EVICT_DISABLE); 1396 1397 /* WaForwardProgressSoftReset:icl */ 1398 wa_write_or(wal, 1399 GEN10_SCRATCH_LNCF2, 1400 PMFLUSHDONE_LNICRSDROP | 1401 PMFLUSH_GAPL3UNBLOCK | 1402 PMFLUSHDONE_LNEBLK); 1403 1404 /* Wa_1406609255:icl (pre-prod) */ 1405 if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_B0)) 1406 wa_write_or(wal, 1407 GEN7_SARCHKMD, 1408 GEN7_DISABLE_DEMAND_PREFETCH); 1409 1410 /* Wa_1606682166:icl */ 1411 wa_write_or(wal, 1412 GEN7_SARCHKMD, 1413 GEN7_DISABLE_SAMPLER_PREFETCH); 1414 1415 /* Wa_1409178092:icl */ 1416 wa_write_masked_or(wal, 1417 GEN11_SCRATCH2, 1418 GEN11_COHERENT_PARTIAL_WRITE_MERGE_ENABLE, 1419 0); 1420 } 1421 1422 if (IS_GEN_RANGE(i915, 9, 11)) { 1423 /* FtrPerCtxtPreemptionGranularityControl:skl,bxt,kbl,cfl,cnl,icl */ 1424 wa_masked_en(wal, 1425 GEN7_FF_SLICE_CS_CHICKEN1, 1426 GEN9_FFSC_PERCTX_PREEMPT_CTRL); 1427 } 1428 1429 if (IS_SKYLAKE(i915) || IS_KABYLAKE(i915) || IS_COFFEELAKE(i915)) { 1430 /* WaEnableGapsTsvCreditFix:skl,kbl,cfl */ 1431 wa_write_or(wal, 1432 GEN8_GARBCNTL, 1433 GEN9_GAPS_TSV_CREDIT_DISABLE); 1434 } 1435 1436 if (IS_BROXTON(i915)) { 1437 /* WaDisablePooledEuLoadBalancingFix:bxt */ 1438 wa_masked_en(wal, 1439 FF_SLICE_CS_CHICKEN2, 1440 GEN9_POOLED_EU_LOAD_BALANCING_FIX_DISABLE); 1441 } 1442 1443 if (IS_GEN(i915, 9)) { 1444 /* WaContextSwitchWithConcurrentTLBInvalidate:skl,bxt,kbl,glk,cfl */ 1445 wa_masked_en(wal, 1446 GEN9_CSFE_CHICKEN1_RCS, 1447 GEN9_PREEMPT_GPGPU_SYNC_SWITCH_DISABLE); 1448 1449 /* WaEnableLbsSlaRetryTimerDecrement:skl,bxt,kbl,glk,cfl */ 1450 wa_write_or(wal, 1451 BDW_SCRATCH1, 1452 GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE); 1453 1454 /* WaProgramL3SqcReg1DefaultForPerf:bxt,glk */ 1455 if (IS_GEN9_LP(i915)) 1456 wa_write_masked_or(wal, 1457 GEN8_L3SQCREG1, 1458 L3_PRIO_CREDITS_MASK, 1459 L3_GENERAL_PRIO_CREDITS(62) | 1460 L3_HIGH_PRIO_CREDITS(2)); 1461 1462 /* WaOCLCoherentLineFlush:skl,bxt,kbl,cfl */ 1463 wa_write_or(wal, 1464 GEN8_L3SQCREG4, 1465 GEN8_LQSC_FLUSH_COHERENT_LINES); 1466 } 1467 } 1468 1469 static void 1470 xcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) 1471 { 1472 struct drm_i915_private *i915 = engine->i915; 1473 1474 /* WaKBLVECSSemaphoreWaitPoll:kbl */ 1475 if (IS_KBL_REVID(i915, KBL_REVID_A0, KBL_REVID_E0)) { 1476 wa_write(wal, 1477 RING_SEMA_WAIT_POLL(engine->mmio_base), 1478 1); 1479 } 1480 } 1481 1482 static void 1483 engine_init_workarounds(struct intel_engine_cs *engine, struct i915_wa_list *wal) 1484 { 1485 if (I915_SELFTEST_ONLY(INTEL_GEN(engine->i915) < 8)) 1486 return; 1487 1488 if (engine->class == RENDER_CLASS) 1489 rcs_engine_wa_init(engine, wal); 1490 else 1491 xcs_engine_wa_init(engine, wal); 1492 } 1493 1494 void intel_engine_init_workarounds(struct intel_engine_cs *engine) 1495 { 1496 struct i915_wa_list *wal = &engine->wa_list; 1497 1498 if (INTEL_GEN(engine->i915) < 8) 1499 return; 1500 1501 wa_init_start(wal, "engine", engine->name); 1502 engine_init_workarounds(engine, wal); 1503 wa_init_finish(wal); 1504 } 1505 1506 void intel_engine_apply_workarounds(struct intel_engine_cs *engine) 1507 { 1508 wa_list_apply(engine->uncore, &engine->wa_list); 1509 } 1510 1511 static struct i915_vma * 1512 create_scratch(struct i915_address_space *vm, int count) 1513 { 1514 struct drm_i915_gem_object *obj; 1515 struct i915_vma *vma; 1516 unsigned int size; 1517 int err; 1518 1519 size = round_up(count * sizeof(u32), PAGE_SIZE); 1520 obj = i915_gem_object_create_internal(vm->i915, size); 1521 if (IS_ERR(obj)) 1522 return ERR_CAST(obj); 1523 1524 i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC); 1525 1526 vma = i915_vma_instance(obj, vm, NULL); 1527 if (IS_ERR(vma)) { 1528 err = PTR_ERR(vma); 1529 goto err_obj; 1530 } 1531 1532 err = i915_vma_pin(vma, 0, 0, 1533 i915_vma_is_ggtt(vma) ? PIN_GLOBAL : PIN_USER); 1534 if (err) 1535 goto err_obj; 1536 1537 return vma; 1538 1539 err_obj: 1540 i915_gem_object_put(obj); 1541 return ERR_PTR(err); 1542 } 1543 1544 static bool mcr_range(struct drm_i915_private *i915, u32 offset) 1545 { 1546 /* 1547 * Registers in this range are affected by the MCR selector 1548 * which only controls CPU initiated MMIO. Routing does not 1549 * work for CS access so we cannot verify them on this path. 1550 */ 1551 if (INTEL_GEN(i915) >= 8 && (offset >= 0xb000 && offset <= 0xb4ff)) 1552 return true; 1553 1554 return false; 1555 } 1556 1557 static int 1558 wa_list_srm(struct i915_request *rq, 1559 const struct i915_wa_list *wal, 1560 struct i915_vma *vma) 1561 { 1562 struct drm_i915_private *i915 = rq->i915; 1563 unsigned int i, count = 0; 1564 const struct i915_wa *wa; 1565 u32 srm, *cs; 1566 1567 srm = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT; 1568 if (INTEL_GEN(i915) >= 8) 1569 srm++; 1570 1571 for (i = 0, wa = wal->list; i < wal->count; i++, wa++) { 1572 if (!mcr_range(i915, i915_mmio_reg_offset(wa->reg))) 1573 count++; 1574 } 1575 1576 cs = intel_ring_begin(rq, 4 * count); 1577 if (IS_ERR(cs)) 1578 return PTR_ERR(cs); 1579 1580 for (i = 0, wa = wal->list; i < wal->count; i++, wa++) { 1581 u32 offset = i915_mmio_reg_offset(wa->reg); 1582 1583 if (mcr_range(i915, offset)) 1584 continue; 1585 1586 *cs++ = srm; 1587 *cs++ = offset; 1588 *cs++ = i915_ggtt_offset(vma) + sizeof(u32) * i; 1589 *cs++ = 0; 1590 } 1591 intel_ring_advance(rq, cs); 1592 1593 return 0; 1594 } 1595 1596 static int engine_wa_list_verify(struct intel_context *ce, 1597 const struct i915_wa_list * const wal, 1598 const char *from) 1599 { 1600 const struct i915_wa *wa; 1601 struct i915_request *rq; 1602 struct i915_vma *vma; 1603 unsigned int i; 1604 u32 *results; 1605 int err; 1606 1607 if (!wal->count) 1608 return 0; 1609 1610 vma = create_scratch(&ce->engine->gt->ggtt->vm, wal->count); 1611 if (IS_ERR(vma)) 1612 return PTR_ERR(vma); 1613 1614 intel_engine_pm_get(ce->engine); 1615 rq = intel_context_create_request(ce); 1616 intel_engine_pm_put(ce->engine); 1617 if (IS_ERR(rq)) { 1618 err = PTR_ERR(rq); 1619 goto err_vma; 1620 } 1621 1622 err = wa_list_srm(rq, wal, vma); 1623 if (err) 1624 goto err_vma; 1625 1626 i915_request_get(rq); 1627 i915_request_add(rq); 1628 if (i915_request_wait(rq, 0, HZ / 5) < 0) { 1629 err = -ETIME; 1630 goto err_rq; 1631 } 1632 1633 results = i915_gem_object_pin_map(vma->obj, I915_MAP_WB); 1634 if (IS_ERR(results)) { 1635 err = PTR_ERR(results); 1636 goto err_rq; 1637 } 1638 1639 err = 0; 1640 for (i = 0, wa = wal->list; i < wal->count; i++, wa++) { 1641 if (mcr_range(rq->i915, i915_mmio_reg_offset(wa->reg))) 1642 continue; 1643 1644 if (!wa_verify(wa, results[i], wal->name, from)) 1645 err = -ENXIO; 1646 } 1647 1648 i915_gem_object_unpin_map(vma->obj); 1649 1650 err_rq: 1651 i915_request_put(rq); 1652 err_vma: 1653 i915_vma_unpin(vma); 1654 i915_vma_put(vma); 1655 return err; 1656 } 1657 1658 int intel_engine_verify_workarounds(struct intel_engine_cs *engine, 1659 const char *from) 1660 { 1661 return engine_wa_list_verify(engine->kernel_context, 1662 &engine->wa_list, 1663 from); 1664 } 1665 1666 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) 1667 #include "selftest_workarounds.c" 1668 #endif 1669