1 1.6 riastrad /* $NetBSD: intel_reset.c,v 1.6 2021/12/19 12:32:15 riastradh Exp $ */ 2 1.1 riastrad 3 1.1 riastrad /* 4 1.1 riastrad * SPDX-License-Identifier: MIT 5 1.1 riastrad * 6 1.1 riastrad * Copyright 2008-2018 Intel Corporation 7 1.1 riastrad */ 8 1.1 riastrad 9 1.1 riastrad #include <sys/cdefs.h> 10 1.6 riastrad __KERNEL_RCSID(0, "$NetBSD: intel_reset.c,v 1.6 2021/12/19 12:32:15 riastradh Exp $"); 11 1.1 riastrad 12 1.1 riastrad #include <linux/sched/mm.h> 13 1.1 riastrad #include <linux/stop_machine.h> 14 1.1 riastrad 15 1.1 riastrad #include "display/intel_display_types.h" 16 1.1 riastrad #include "display/intel_overlay.h" 17 1.1 riastrad 18 1.1 riastrad #include "gem/i915_gem_context.h" 19 1.1 riastrad 20 1.1 riastrad #include "i915_drv.h" 21 1.1 riastrad #include "i915_gpu_error.h" 22 1.1 riastrad #include "i915_irq.h" 23 1.1 riastrad #include "intel_engine_pm.h" 24 1.1 riastrad #include "intel_gt.h" 25 1.1 riastrad #include "intel_gt_pm.h" 26 1.1 riastrad #include "intel_reset.h" 27 1.1 riastrad 28 1.1 riastrad #include "uc/intel_guc.h" 29 1.1 riastrad #include "uc/intel_guc_submission.h" 30 1.1 riastrad 31 1.4 riastrad #include <linux/nbsd-namespace.h> 32 1.4 riastrad 33 1.1 riastrad #define RESET_MAX_RETRIES 3 34 1.1 riastrad 35 1.1 riastrad /* XXX How to handle concurrent GGTT updates using tiling registers? */ 36 1.1 riastrad #define RESET_UNDER_STOP_MACHINE 0 37 1.1 riastrad 38 1.1 riastrad static void rmw_set_fw(struct intel_uncore *uncore, i915_reg_t reg, u32 set) 39 1.1 riastrad { 40 1.1 riastrad intel_uncore_rmw_fw(uncore, reg, 0, set); 41 1.1 riastrad } 42 1.1 riastrad 43 1.1 riastrad static void rmw_clear_fw(struct intel_uncore *uncore, i915_reg_t reg, u32 clr) 44 1.1 riastrad { 45 1.1 riastrad intel_uncore_rmw_fw(uncore, reg, clr, 0); 46 1.1 riastrad } 47 1.1 riastrad 48 1.1 riastrad static void engine_skip_context(struct i915_request *rq) 49 1.1 riastrad { 50 1.1 riastrad struct intel_engine_cs *engine = rq->engine; 51 1.1 riastrad struct intel_context *hung_ctx = rq->context; 52 1.1 riastrad 53 1.1 riastrad if (!i915_request_is_active(rq)) 54 1.1 riastrad return; 55 1.1 riastrad 56 1.1 riastrad lockdep_assert_held(&engine->active.lock); 57 1.1 riastrad list_for_each_entry_continue(rq, &engine->active.requests, sched.link) 58 1.1 riastrad if (rq->context == hung_ctx) 59 1.1 riastrad i915_request_skip(rq, -EIO); 60 1.1 riastrad } 61 1.1 riastrad 62 1.1 riastrad static void client_mark_guilty(struct i915_gem_context *ctx, bool banned) 63 1.1 riastrad { 64 1.1 riastrad struct drm_i915_file_private *file_priv = ctx->file_priv; 65 1.1 riastrad unsigned long prev_hang; 66 1.1 riastrad unsigned int score; 67 1.1 riastrad 68 1.1 riastrad if (IS_ERR_OR_NULL(file_priv)) 69 1.1 riastrad return; 70 1.1 riastrad 71 1.1 riastrad score = 0; 72 1.1 riastrad if (banned) 73 1.1 riastrad score = I915_CLIENT_SCORE_CONTEXT_BAN; 74 1.1 riastrad 75 1.1 riastrad prev_hang = xchg(&file_priv->hang_timestamp, jiffies); 76 1.1 riastrad if (time_before(jiffies, prev_hang + I915_CLIENT_FAST_HANG_JIFFIES)) 77 1.1 riastrad score += I915_CLIENT_SCORE_HANG_FAST; 78 1.1 riastrad 79 1.1 riastrad if (score) { 80 1.1 riastrad atomic_add(score, &file_priv->ban_score); 81 1.1 riastrad 82 1.1 riastrad DRM_DEBUG_DRIVER("client %s: gained %u ban score, now %u\n", 83 1.1 riastrad ctx->name, score, 84 1.1 riastrad atomic_read(&file_priv->ban_score)); 85 1.1 riastrad } 86 1.1 riastrad } 87 1.1 riastrad 88 1.1 riastrad static bool mark_guilty(struct i915_request *rq) 89 1.1 riastrad { 90 1.1 riastrad struct i915_gem_context *ctx; 91 1.1 riastrad unsigned long prev_hang; 92 1.1 riastrad bool banned; 93 1.1 riastrad int i; 94 1.1 riastrad 95 1.1 riastrad rcu_read_lock(); 96 1.1 riastrad ctx = rcu_dereference(rq->context->gem_context); 97 1.1 riastrad if (ctx && !kref_get_unless_zero(&ctx->ref)) 98 1.1 riastrad ctx = NULL; 99 1.1 riastrad rcu_read_unlock(); 100 1.1 riastrad if (!ctx) 101 1.1 riastrad return false; 102 1.1 riastrad 103 1.1 riastrad if (i915_gem_context_is_closed(ctx)) { 104 1.1 riastrad intel_context_set_banned(rq->context); 105 1.1 riastrad banned = true; 106 1.1 riastrad goto out; 107 1.1 riastrad } 108 1.1 riastrad 109 1.1 riastrad atomic_inc(&ctx->guilty_count); 110 1.1 riastrad 111 1.1 riastrad /* Cool contexts are too cool to be banned! (Used for reset testing.) */ 112 1.1 riastrad if (!i915_gem_context_is_bannable(ctx)) { 113 1.1 riastrad banned = false; 114 1.1 riastrad goto out; 115 1.1 riastrad } 116 1.1 riastrad 117 1.1 riastrad dev_notice(ctx->i915->drm.dev, 118 1.1 riastrad "%s context reset due to GPU hang\n", 119 1.1 riastrad ctx->name); 120 1.1 riastrad 121 1.1 riastrad /* Record the timestamp for the last N hangs */ 122 1.1 riastrad prev_hang = ctx->hang_timestamp[0]; 123 1.1 riastrad for (i = 0; i < ARRAY_SIZE(ctx->hang_timestamp) - 1; i++) 124 1.1 riastrad ctx->hang_timestamp[i] = ctx->hang_timestamp[i + 1]; 125 1.1 riastrad ctx->hang_timestamp[i] = jiffies; 126 1.1 riastrad 127 1.1 riastrad /* If we have hung N+1 times in rapid succession, we ban the context! */ 128 1.1 riastrad banned = !i915_gem_context_is_recoverable(ctx); 129 1.1 riastrad if (time_before(jiffies, prev_hang + CONTEXT_FAST_HANG_JIFFIES)) 130 1.1 riastrad banned = true; 131 1.1 riastrad if (banned) { 132 1.1 riastrad DRM_DEBUG_DRIVER("context %s: guilty %d, banned\n", 133 1.1 riastrad ctx->name, atomic_read(&ctx->guilty_count)); 134 1.1 riastrad intel_context_set_banned(rq->context); 135 1.1 riastrad } 136 1.1 riastrad 137 1.1 riastrad client_mark_guilty(ctx, banned); 138 1.1 riastrad 139 1.1 riastrad out: 140 1.1 riastrad i915_gem_context_put(ctx); 141 1.1 riastrad return banned; 142 1.1 riastrad } 143 1.1 riastrad 144 1.1 riastrad static void mark_innocent(struct i915_request *rq) 145 1.1 riastrad { 146 1.1 riastrad struct i915_gem_context *ctx; 147 1.1 riastrad 148 1.1 riastrad rcu_read_lock(); 149 1.1 riastrad ctx = rcu_dereference(rq->context->gem_context); 150 1.1 riastrad if (ctx) 151 1.1 riastrad atomic_inc(&ctx->active_count); 152 1.1 riastrad rcu_read_unlock(); 153 1.1 riastrad } 154 1.1 riastrad 155 1.1 riastrad void __i915_request_reset(struct i915_request *rq, bool guilty) 156 1.1 riastrad { 157 1.1 riastrad RQ_TRACE(rq, "guilty? %s\n", yesno(guilty)); 158 1.1 riastrad 159 1.1 riastrad GEM_BUG_ON(i915_request_completed(rq)); 160 1.1 riastrad 161 1.1 riastrad rcu_read_lock(); /* protect the GEM context */ 162 1.1 riastrad if (guilty) { 163 1.1 riastrad i915_request_skip(rq, -EIO); 164 1.1 riastrad if (mark_guilty(rq)) 165 1.1 riastrad engine_skip_context(rq); 166 1.1 riastrad } else { 167 1.1 riastrad dma_fence_set_error(&rq->fence, -EAGAIN); 168 1.1 riastrad mark_innocent(rq); 169 1.1 riastrad } 170 1.1 riastrad rcu_read_unlock(); 171 1.1 riastrad } 172 1.1 riastrad 173 1.1 riastrad static bool i915_in_reset(struct pci_dev *pdev) 174 1.1 riastrad { 175 1.1 riastrad u8 gdrst; 176 1.1 riastrad 177 1.1 riastrad pci_read_config_byte(pdev, I915_GDRST, &gdrst); 178 1.1 riastrad return gdrst & GRDOM_RESET_STATUS; 179 1.1 riastrad } 180 1.1 riastrad 181 1.1 riastrad static int i915_do_reset(struct intel_gt *gt, 182 1.1 riastrad intel_engine_mask_t engine_mask, 183 1.1 riastrad unsigned int retry) 184 1.1 riastrad { 185 1.1 riastrad struct pci_dev *pdev = gt->i915->drm.pdev; 186 1.1 riastrad int err; 187 1.1 riastrad 188 1.1 riastrad /* Assert reset for at least 20 usec, and wait for acknowledgement. */ 189 1.1 riastrad pci_write_config_byte(pdev, I915_GDRST, GRDOM_RESET_ENABLE); 190 1.1 riastrad udelay(50); 191 1.1 riastrad err = wait_for_atomic(i915_in_reset(pdev), 50); 192 1.1 riastrad 193 1.1 riastrad /* Clear the reset request. */ 194 1.1 riastrad pci_write_config_byte(pdev, I915_GDRST, 0); 195 1.1 riastrad udelay(50); 196 1.1 riastrad if (!err) 197 1.1 riastrad err = wait_for_atomic(!i915_in_reset(pdev), 50); 198 1.1 riastrad 199 1.1 riastrad return err; 200 1.1 riastrad } 201 1.1 riastrad 202 1.1 riastrad static bool g4x_reset_complete(struct pci_dev *pdev) 203 1.1 riastrad { 204 1.1 riastrad u8 gdrst; 205 1.1 riastrad 206 1.1 riastrad pci_read_config_byte(pdev, I915_GDRST, &gdrst); 207 1.1 riastrad return (gdrst & GRDOM_RESET_ENABLE) == 0; 208 1.1 riastrad } 209 1.1 riastrad 210 1.1 riastrad static int g33_do_reset(struct intel_gt *gt, 211 1.1 riastrad intel_engine_mask_t engine_mask, 212 1.1 riastrad unsigned int retry) 213 1.1 riastrad { 214 1.1 riastrad struct pci_dev *pdev = gt->i915->drm.pdev; 215 1.1 riastrad 216 1.1 riastrad pci_write_config_byte(pdev, I915_GDRST, GRDOM_RESET_ENABLE); 217 1.1 riastrad return wait_for_atomic(g4x_reset_complete(pdev), 50); 218 1.1 riastrad } 219 1.1 riastrad 220 1.1 riastrad static int g4x_do_reset(struct intel_gt *gt, 221 1.1 riastrad intel_engine_mask_t engine_mask, 222 1.1 riastrad unsigned int retry) 223 1.1 riastrad { 224 1.1 riastrad struct pci_dev *pdev = gt->i915->drm.pdev; 225 1.1 riastrad struct intel_uncore *uncore = gt->uncore; 226 1.1 riastrad int ret; 227 1.1 riastrad 228 1.1 riastrad /* WaVcpClkGateDisableForMediaReset:ctg,elk */ 229 1.1 riastrad rmw_set_fw(uncore, VDECCLK_GATE_D, VCP_UNIT_CLOCK_GATE_DISABLE); 230 1.1 riastrad intel_uncore_posting_read_fw(uncore, VDECCLK_GATE_D); 231 1.1 riastrad 232 1.1 riastrad pci_write_config_byte(pdev, I915_GDRST, 233 1.1 riastrad GRDOM_MEDIA | GRDOM_RESET_ENABLE); 234 1.1 riastrad ret = wait_for_atomic(g4x_reset_complete(pdev), 50); 235 1.1 riastrad if (ret) { 236 1.1 riastrad DRM_DEBUG_DRIVER("Wait for media reset failed\n"); 237 1.1 riastrad goto out; 238 1.1 riastrad } 239 1.1 riastrad 240 1.1 riastrad pci_write_config_byte(pdev, I915_GDRST, 241 1.1 riastrad GRDOM_RENDER | GRDOM_RESET_ENABLE); 242 1.1 riastrad ret = wait_for_atomic(g4x_reset_complete(pdev), 50); 243 1.1 riastrad if (ret) { 244 1.1 riastrad DRM_DEBUG_DRIVER("Wait for render reset failed\n"); 245 1.1 riastrad goto out; 246 1.1 riastrad } 247 1.1 riastrad 248 1.1 riastrad out: 249 1.1 riastrad pci_write_config_byte(pdev, I915_GDRST, 0); 250 1.1 riastrad 251 1.1 riastrad rmw_clear_fw(uncore, VDECCLK_GATE_D, VCP_UNIT_CLOCK_GATE_DISABLE); 252 1.1 riastrad intel_uncore_posting_read_fw(uncore, VDECCLK_GATE_D); 253 1.1 riastrad 254 1.1 riastrad return ret; 255 1.1 riastrad } 256 1.1 riastrad 257 1.1 riastrad static int ilk_do_reset(struct intel_gt *gt, intel_engine_mask_t engine_mask, 258 1.1 riastrad unsigned int retry) 259 1.1 riastrad { 260 1.1 riastrad struct intel_uncore *uncore = gt->uncore; 261 1.1 riastrad int ret; 262 1.1 riastrad 263 1.1 riastrad intel_uncore_write_fw(uncore, ILK_GDSR, 264 1.1 riastrad ILK_GRDOM_RENDER | ILK_GRDOM_RESET_ENABLE); 265 1.1 riastrad ret = __intel_wait_for_register_fw(uncore, ILK_GDSR, 266 1.1 riastrad ILK_GRDOM_RESET_ENABLE, 0, 267 1.1 riastrad 5000, 0, 268 1.1 riastrad NULL); 269 1.1 riastrad if (ret) { 270 1.1 riastrad DRM_DEBUG_DRIVER("Wait for render reset failed\n"); 271 1.1 riastrad goto out; 272 1.1 riastrad } 273 1.1 riastrad 274 1.1 riastrad intel_uncore_write_fw(uncore, ILK_GDSR, 275 1.1 riastrad ILK_GRDOM_MEDIA | ILK_GRDOM_RESET_ENABLE); 276 1.1 riastrad ret = __intel_wait_for_register_fw(uncore, ILK_GDSR, 277 1.1 riastrad ILK_GRDOM_RESET_ENABLE, 0, 278 1.1 riastrad 5000, 0, 279 1.1 riastrad NULL); 280 1.1 riastrad if (ret) { 281 1.1 riastrad DRM_DEBUG_DRIVER("Wait for media reset failed\n"); 282 1.1 riastrad goto out; 283 1.1 riastrad } 284 1.1 riastrad 285 1.1 riastrad out: 286 1.1 riastrad intel_uncore_write_fw(uncore, ILK_GDSR, 0); 287 1.1 riastrad intel_uncore_posting_read_fw(uncore, ILK_GDSR); 288 1.1 riastrad return ret; 289 1.1 riastrad } 290 1.1 riastrad 291 1.1 riastrad /* Reset the hardware domains (GENX_GRDOM_*) specified by mask */ 292 1.1 riastrad static int gen6_hw_domain_reset(struct intel_gt *gt, u32 hw_domain_mask) 293 1.1 riastrad { 294 1.1 riastrad struct intel_uncore *uncore = gt->uncore; 295 1.1 riastrad int err; 296 1.1 riastrad 297 1.1 riastrad /* 298 1.1 riastrad * GEN6_GDRST is not in the gt power well, no need to check 299 1.1 riastrad * for fifo space for the write or forcewake the chip for 300 1.1 riastrad * the read 301 1.1 riastrad */ 302 1.1 riastrad intel_uncore_write_fw(uncore, GEN6_GDRST, hw_domain_mask); 303 1.1 riastrad 304 1.1 riastrad /* Wait for the device to ack the reset requests */ 305 1.1 riastrad err = __intel_wait_for_register_fw(uncore, 306 1.1 riastrad GEN6_GDRST, hw_domain_mask, 0, 307 1.1 riastrad 500, 0, 308 1.1 riastrad NULL); 309 1.1 riastrad if (err) 310 1.1 riastrad DRM_DEBUG_DRIVER("Wait for 0x%08x engines reset failed\n", 311 1.1 riastrad hw_domain_mask); 312 1.1 riastrad 313 1.1 riastrad return err; 314 1.1 riastrad } 315 1.1 riastrad 316 1.1 riastrad static int gen6_reset_engines(struct intel_gt *gt, 317 1.1 riastrad intel_engine_mask_t engine_mask, 318 1.1 riastrad unsigned int retry) 319 1.1 riastrad { 320 1.1 riastrad static const u32 hw_engine_mask[] = { 321 1.1 riastrad [RCS0] = GEN6_GRDOM_RENDER, 322 1.1 riastrad [BCS0] = GEN6_GRDOM_BLT, 323 1.1 riastrad [VCS0] = GEN6_GRDOM_MEDIA, 324 1.1 riastrad [VCS1] = GEN8_GRDOM_MEDIA2, 325 1.1 riastrad [VECS0] = GEN6_GRDOM_VECS, 326 1.1 riastrad }; 327 1.1 riastrad struct intel_engine_cs *engine; 328 1.1 riastrad u32 hw_mask; 329 1.1 riastrad 330 1.1 riastrad if (engine_mask == ALL_ENGINES) { 331 1.1 riastrad hw_mask = GEN6_GRDOM_FULL; 332 1.1 riastrad } else { 333 1.1 riastrad intel_engine_mask_t tmp; 334 1.1 riastrad 335 1.1 riastrad hw_mask = 0; 336 1.1 riastrad for_each_engine_masked(engine, gt, engine_mask, tmp) { 337 1.1 riastrad GEM_BUG_ON(engine->id >= ARRAY_SIZE(hw_engine_mask)); 338 1.1 riastrad hw_mask |= hw_engine_mask[engine->id]; 339 1.1 riastrad } 340 1.1 riastrad } 341 1.1 riastrad 342 1.1 riastrad return gen6_hw_domain_reset(gt, hw_mask); 343 1.1 riastrad } 344 1.1 riastrad 345 1.1 riastrad static int gen11_lock_sfc(struct intel_engine_cs *engine, u32 *hw_mask) 346 1.1 riastrad { 347 1.1 riastrad struct intel_uncore *uncore = engine->uncore; 348 1.1 riastrad u8 vdbox_sfc_access = RUNTIME_INFO(engine->i915)->vdbox_sfc_access; 349 1.1 riastrad i915_reg_t sfc_forced_lock, sfc_forced_lock_ack; 350 1.1 riastrad u32 sfc_forced_lock_bit, sfc_forced_lock_ack_bit; 351 1.1 riastrad i915_reg_t sfc_usage; 352 1.1 riastrad u32 sfc_usage_bit; 353 1.1 riastrad u32 sfc_reset_bit; 354 1.1 riastrad int ret; 355 1.1 riastrad 356 1.1 riastrad switch (engine->class) { 357 1.1 riastrad case VIDEO_DECODE_CLASS: 358 1.1 riastrad if ((BIT(engine->instance) & vdbox_sfc_access) == 0) 359 1.1 riastrad return 0; 360 1.1 riastrad 361 1.1 riastrad sfc_forced_lock = GEN11_VCS_SFC_FORCED_LOCK(engine); 362 1.1 riastrad sfc_forced_lock_bit = GEN11_VCS_SFC_FORCED_LOCK_BIT; 363 1.1 riastrad 364 1.1 riastrad sfc_forced_lock_ack = GEN11_VCS_SFC_LOCK_STATUS(engine); 365 1.1 riastrad sfc_forced_lock_ack_bit = GEN11_VCS_SFC_LOCK_ACK_BIT; 366 1.1 riastrad 367 1.1 riastrad sfc_usage = GEN11_VCS_SFC_LOCK_STATUS(engine); 368 1.1 riastrad sfc_usage_bit = GEN11_VCS_SFC_USAGE_BIT; 369 1.1 riastrad sfc_reset_bit = GEN11_VCS_SFC_RESET_BIT(engine->instance); 370 1.1 riastrad break; 371 1.1 riastrad 372 1.1 riastrad case VIDEO_ENHANCEMENT_CLASS: 373 1.1 riastrad sfc_forced_lock = GEN11_VECS_SFC_FORCED_LOCK(engine); 374 1.1 riastrad sfc_forced_lock_bit = GEN11_VECS_SFC_FORCED_LOCK_BIT; 375 1.1 riastrad 376 1.1 riastrad sfc_forced_lock_ack = GEN11_VECS_SFC_LOCK_ACK(engine); 377 1.1 riastrad sfc_forced_lock_ack_bit = GEN11_VECS_SFC_LOCK_ACK_BIT; 378 1.1 riastrad 379 1.1 riastrad sfc_usage = GEN11_VECS_SFC_USAGE(engine); 380 1.1 riastrad sfc_usage_bit = GEN11_VECS_SFC_USAGE_BIT; 381 1.1 riastrad sfc_reset_bit = GEN11_VECS_SFC_RESET_BIT(engine->instance); 382 1.1 riastrad break; 383 1.1 riastrad 384 1.1 riastrad default: 385 1.1 riastrad return 0; 386 1.1 riastrad } 387 1.1 riastrad 388 1.1 riastrad /* 389 1.1 riastrad * If the engine is using a SFC, tell the engine that a software reset 390 1.1 riastrad * is going to happen. The engine will then try to force lock the SFC. 391 1.1 riastrad * If SFC ends up being locked to the engine we want to reset, we have 392 1.1 riastrad * to reset it as well (we will unlock it once the reset sequence is 393 1.1 riastrad * completed). 394 1.1 riastrad */ 395 1.1 riastrad if (!(intel_uncore_read_fw(uncore, sfc_usage) & sfc_usage_bit)) 396 1.1 riastrad return 0; 397 1.1 riastrad 398 1.1 riastrad rmw_set_fw(uncore, sfc_forced_lock, sfc_forced_lock_bit); 399 1.1 riastrad 400 1.1 riastrad ret = __intel_wait_for_register_fw(uncore, 401 1.1 riastrad sfc_forced_lock_ack, 402 1.1 riastrad sfc_forced_lock_ack_bit, 403 1.1 riastrad sfc_forced_lock_ack_bit, 404 1.1 riastrad 1000, 0, NULL); 405 1.1 riastrad 406 1.1 riastrad /* Was the SFC released while we were trying to lock it? */ 407 1.1 riastrad if (!(intel_uncore_read_fw(uncore, sfc_usage) & sfc_usage_bit)) 408 1.1 riastrad return 0; 409 1.1 riastrad 410 1.1 riastrad if (ret) { 411 1.1 riastrad DRM_DEBUG_DRIVER("Wait for SFC forced lock ack failed\n"); 412 1.1 riastrad return ret; 413 1.1 riastrad } 414 1.1 riastrad 415 1.1 riastrad *hw_mask |= sfc_reset_bit; 416 1.1 riastrad return 0; 417 1.1 riastrad } 418 1.1 riastrad 419 1.1 riastrad static void gen11_unlock_sfc(struct intel_engine_cs *engine) 420 1.1 riastrad { 421 1.1 riastrad struct intel_uncore *uncore = engine->uncore; 422 1.1 riastrad u8 vdbox_sfc_access = RUNTIME_INFO(engine->i915)->vdbox_sfc_access; 423 1.1 riastrad i915_reg_t sfc_forced_lock; 424 1.1 riastrad u32 sfc_forced_lock_bit; 425 1.1 riastrad 426 1.1 riastrad switch (engine->class) { 427 1.1 riastrad case VIDEO_DECODE_CLASS: 428 1.1 riastrad if ((BIT(engine->instance) & vdbox_sfc_access) == 0) 429 1.1 riastrad return; 430 1.1 riastrad 431 1.1 riastrad sfc_forced_lock = GEN11_VCS_SFC_FORCED_LOCK(engine); 432 1.1 riastrad sfc_forced_lock_bit = GEN11_VCS_SFC_FORCED_LOCK_BIT; 433 1.1 riastrad break; 434 1.1 riastrad 435 1.1 riastrad case VIDEO_ENHANCEMENT_CLASS: 436 1.1 riastrad sfc_forced_lock = GEN11_VECS_SFC_FORCED_LOCK(engine); 437 1.1 riastrad sfc_forced_lock_bit = GEN11_VECS_SFC_FORCED_LOCK_BIT; 438 1.1 riastrad break; 439 1.1 riastrad 440 1.1 riastrad default: 441 1.1 riastrad return; 442 1.1 riastrad } 443 1.1 riastrad 444 1.1 riastrad rmw_clear_fw(uncore, sfc_forced_lock, sfc_forced_lock_bit); 445 1.1 riastrad } 446 1.1 riastrad 447 1.1 riastrad static int gen11_reset_engines(struct intel_gt *gt, 448 1.1 riastrad intel_engine_mask_t engine_mask, 449 1.1 riastrad unsigned int retry) 450 1.1 riastrad { 451 1.1 riastrad static const u32 hw_engine_mask[] = { 452 1.1 riastrad [RCS0] = GEN11_GRDOM_RENDER, 453 1.1 riastrad [BCS0] = GEN11_GRDOM_BLT, 454 1.1 riastrad [VCS0] = GEN11_GRDOM_MEDIA, 455 1.1 riastrad [VCS1] = GEN11_GRDOM_MEDIA2, 456 1.1 riastrad [VCS2] = GEN11_GRDOM_MEDIA3, 457 1.1 riastrad [VCS3] = GEN11_GRDOM_MEDIA4, 458 1.1 riastrad [VECS0] = GEN11_GRDOM_VECS, 459 1.1 riastrad [VECS1] = GEN11_GRDOM_VECS2, 460 1.1 riastrad }; 461 1.1 riastrad struct intel_engine_cs *engine; 462 1.1 riastrad intel_engine_mask_t tmp; 463 1.1 riastrad u32 hw_mask; 464 1.1 riastrad int ret; 465 1.1 riastrad 466 1.1 riastrad if (engine_mask == ALL_ENGINES) { 467 1.1 riastrad hw_mask = GEN11_GRDOM_FULL; 468 1.1 riastrad } else { 469 1.1 riastrad hw_mask = 0; 470 1.1 riastrad for_each_engine_masked(engine, gt, engine_mask, tmp) { 471 1.1 riastrad GEM_BUG_ON(engine->id >= ARRAY_SIZE(hw_engine_mask)); 472 1.1 riastrad hw_mask |= hw_engine_mask[engine->id]; 473 1.1 riastrad ret = gen11_lock_sfc(engine, &hw_mask); 474 1.1 riastrad if (ret) 475 1.1 riastrad goto sfc_unlock; 476 1.1 riastrad } 477 1.1 riastrad } 478 1.1 riastrad 479 1.1 riastrad ret = gen6_hw_domain_reset(gt, hw_mask); 480 1.1 riastrad 481 1.1 riastrad sfc_unlock: 482 1.1 riastrad /* 483 1.1 riastrad * We unlock the SFC based on the lock status and not the result of 484 1.1 riastrad * gen11_lock_sfc to make sure that we clean properly if something 485 1.1 riastrad * wrong happened during the lock (e.g. lock acquired after timeout 486 1.1 riastrad * expiration). 487 1.1 riastrad */ 488 1.1 riastrad if (engine_mask != ALL_ENGINES) 489 1.1 riastrad for_each_engine_masked(engine, gt, engine_mask, tmp) 490 1.1 riastrad gen11_unlock_sfc(engine); 491 1.1 riastrad 492 1.1 riastrad return ret; 493 1.1 riastrad } 494 1.1 riastrad 495 1.1 riastrad static int gen8_engine_reset_prepare(struct intel_engine_cs *engine) 496 1.1 riastrad { 497 1.1 riastrad struct intel_uncore *uncore = engine->uncore; 498 1.1 riastrad const i915_reg_t reg = RING_RESET_CTL(engine->mmio_base); 499 1.1 riastrad u32 request, mask, ack; 500 1.1 riastrad int ret; 501 1.1 riastrad 502 1.1 riastrad ack = intel_uncore_read_fw(uncore, reg); 503 1.1 riastrad if (ack & RESET_CTL_CAT_ERROR) { 504 1.1 riastrad /* 505 1.1 riastrad * For catastrophic errors, ready-for-reset sequence 506 1.1 riastrad * needs to be bypassed: HAS#396813 507 1.1 riastrad */ 508 1.1 riastrad request = RESET_CTL_CAT_ERROR; 509 1.1 riastrad mask = RESET_CTL_CAT_ERROR; 510 1.1 riastrad 511 1.1 riastrad /* Catastrophic errors need to be cleared by HW */ 512 1.1 riastrad ack = 0; 513 1.1 riastrad } else if (!(ack & RESET_CTL_READY_TO_RESET)) { 514 1.1 riastrad request = RESET_CTL_REQUEST_RESET; 515 1.1 riastrad mask = RESET_CTL_READY_TO_RESET; 516 1.1 riastrad ack = RESET_CTL_READY_TO_RESET; 517 1.1 riastrad } else { 518 1.1 riastrad return 0; 519 1.1 riastrad } 520 1.1 riastrad 521 1.1 riastrad intel_uncore_write_fw(uncore, reg, _MASKED_BIT_ENABLE(request)); 522 1.1 riastrad ret = __intel_wait_for_register_fw(uncore, reg, mask, ack, 523 1.1 riastrad 700, 0, NULL); 524 1.1 riastrad if (ret) 525 1.1 riastrad DRM_ERROR("%s reset request timed out: {request: %08x, RESET_CTL: %08x}\n", 526 1.1 riastrad engine->name, request, 527 1.1 riastrad intel_uncore_read_fw(uncore, reg)); 528 1.1 riastrad 529 1.1 riastrad return ret; 530 1.1 riastrad } 531 1.1 riastrad 532 1.1 riastrad static void gen8_engine_reset_cancel(struct intel_engine_cs *engine) 533 1.1 riastrad { 534 1.1 riastrad intel_uncore_write_fw(engine->uncore, 535 1.1 riastrad RING_RESET_CTL(engine->mmio_base), 536 1.1 riastrad _MASKED_BIT_DISABLE(RESET_CTL_REQUEST_RESET)); 537 1.1 riastrad } 538 1.1 riastrad 539 1.1 riastrad static int gen8_reset_engines(struct intel_gt *gt, 540 1.1 riastrad intel_engine_mask_t engine_mask, 541 1.1 riastrad unsigned int retry) 542 1.1 riastrad { 543 1.1 riastrad struct intel_engine_cs *engine; 544 1.1 riastrad const bool reset_non_ready = retry >= 1; 545 1.1 riastrad intel_engine_mask_t tmp; 546 1.1 riastrad int ret; 547 1.1 riastrad 548 1.1 riastrad for_each_engine_masked(engine, gt, engine_mask, tmp) { 549 1.1 riastrad ret = gen8_engine_reset_prepare(engine); 550 1.1 riastrad if (ret && !reset_non_ready) 551 1.1 riastrad goto skip_reset; 552 1.1 riastrad 553 1.1 riastrad /* 554 1.1 riastrad * If this is not the first failed attempt to prepare, 555 1.1 riastrad * we decide to proceed anyway. 556 1.1 riastrad * 557 1.1 riastrad * By doing so we risk context corruption and with 558 1.1 riastrad * some gens (kbl), possible system hang if reset 559 1.1 riastrad * happens during active bb execution. 560 1.1 riastrad * 561 1.1 riastrad * We rather take context corruption instead of 562 1.1 riastrad * failed reset with a wedged driver/gpu. And 563 1.1 riastrad * active bb execution case should be covered by 564 1.1 riastrad * stop_engines() we have before the reset. 565 1.1 riastrad */ 566 1.1 riastrad } 567 1.1 riastrad 568 1.1 riastrad if (INTEL_GEN(gt->i915) >= 11) 569 1.1 riastrad ret = gen11_reset_engines(gt, engine_mask, retry); 570 1.1 riastrad else 571 1.1 riastrad ret = gen6_reset_engines(gt, engine_mask, retry); 572 1.1 riastrad 573 1.1 riastrad skip_reset: 574 1.1 riastrad for_each_engine_masked(engine, gt, engine_mask, tmp) 575 1.1 riastrad gen8_engine_reset_cancel(engine); 576 1.1 riastrad 577 1.1 riastrad return ret; 578 1.1 riastrad } 579 1.1 riastrad 580 1.1 riastrad static int mock_reset(struct intel_gt *gt, 581 1.1 riastrad intel_engine_mask_t mask, 582 1.1 riastrad unsigned int retry) 583 1.1 riastrad { 584 1.1 riastrad return 0; 585 1.1 riastrad } 586 1.1 riastrad 587 1.1 riastrad typedef int (*reset_func)(struct intel_gt *, 588 1.1 riastrad intel_engine_mask_t engine_mask, 589 1.1 riastrad unsigned int retry); 590 1.1 riastrad 591 1.1 riastrad static reset_func intel_get_gpu_reset(const struct intel_gt *gt) 592 1.1 riastrad { 593 1.1 riastrad struct drm_i915_private *i915 = gt->i915; 594 1.1 riastrad 595 1.1 riastrad if (is_mock_gt(gt)) 596 1.1 riastrad return mock_reset; 597 1.1 riastrad else if (INTEL_GEN(i915) >= 8) 598 1.1 riastrad return gen8_reset_engines; 599 1.1 riastrad else if (INTEL_GEN(i915) >= 6) 600 1.1 riastrad return gen6_reset_engines; 601 1.1 riastrad else if (INTEL_GEN(i915) >= 5) 602 1.1 riastrad return ilk_do_reset; 603 1.1 riastrad else if (IS_G4X(i915)) 604 1.1 riastrad return g4x_do_reset; 605 1.1 riastrad else if (IS_G33(i915) || IS_PINEVIEW(i915)) 606 1.1 riastrad return g33_do_reset; 607 1.1 riastrad else if (INTEL_GEN(i915) >= 3) 608 1.1 riastrad return i915_do_reset; 609 1.1 riastrad else 610 1.1 riastrad return NULL; 611 1.1 riastrad } 612 1.1 riastrad 613 1.1 riastrad int __intel_gt_reset(struct intel_gt *gt, intel_engine_mask_t engine_mask) 614 1.1 riastrad { 615 1.1 riastrad const int retries = engine_mask == ALL_ENGINES ? RESET_MAX_RETRIES : 1; 616 1.1 riastrad reset_func reset; 617 1.1 riastrad int ret = -ETIMEDOUT; 618 1.1 riastrad int retry; 619 1.1 riastrad 620 1.1 riastrad reset = intel_get_gpu_reset(gt); 621 1.1 riastrad if (!reset) 622 1.1 riastrad return -ENODEV; 623 1.1 riastrad 624 1.1 riastrad /* 625 1.1 riastrad * If the power well sleeps during the reset, the reset 626 1.1 riastrad * request may be dropped and never completes (causing -EIO). 627 1.1 riastrad */ 628 1.1 riastrad intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL); 629 1.1 riastrad for (retry = 0; ret == -ETIMEDOUT && retry < retries; retry++) { 630 1.1 riastrad GT_TRACE(gt, "engine_mask=%x\n", engine_mask); 631 1.1 riastrad preempt_disable(); 632 1.1 riastrad ret = reset(gt, engine_mask, retry); 633 1.1 riastrad preempt_enable(); 634 1.1 riastrad } 635 1.1 riastrad intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL); 636 1.1 riastrad 637 1.1 riastrad return ret; 638 1.1 riastrad } 639 1.1 riastrad 640 1.1 riastrad bool intel_has_gpu_reset(const struct intel_gt *gt) 641 1.1 riastrad { 642 1.1 riastrad if (!i915_modparams.reset) 643 1.1 riastrad return NULL; 644 1.1 riastrad 645 1.1 riastrad return intel_get_gpu_reset(gt); 646 1.1 riastrad } 647 1.1 riastrad 648 1.1 riastrad bool intel_has_reset_engine(const struct intel_gt *gt) 649 1.1 riastrad { 650 1.1 riastrad if (i915_modparams.reset < 2) 651 1.1 riastrad return false; 652 1.1 riastrad 653 1.1 riastrad return INTEL_INFO(gt->i915)->has_reset_engine; 654 1.1 riastrad } 655 1.1 riastrad 656 1.1 riastrad int intel_reset_guc(struct intel_gt *gt) 657 1.1 riastrad { 658 1.1 riastrad u32 guc_domain = 659 1.1 riastrad INTEL_GEN(gt->i915) >= 11 ? GEN11_GRDOM_GUC : GEN9_GRDOM_GUC; 660 1.1 riastrad int ret; 661 1.1 riastrad 662 1.1 riastrad GEM_BUG_ON(!HAS_GT_UC(gt->i915)); 663 1.1 riastrad 664 1.1 riastrad intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL); 665 1.1 riastrad ret = gen6_hw_domain_reset(gt, guc_domain); 666 1.1 riastrad intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL); 667 1.1 riastrad 668 1.1 riastrad return ret; 669 1.1 riastrad } 670 1.1 riastrad 671 1.1 riastrad /* 672 1.1 riastrad * Ensure irq handler finishes, and not run again. 673 1.1 riastrad * Also return the active request so that we only search for it once. 674 1.1 riastrad */ 675 1.1 riastrad static void reset_prepare_engine(struct intel_engine_cs *engine) 676 1.1 riastrad { 677 1.1 riastrad /* 678 1.1 riastrad * During the reset sequence, we must prevent the engine from 679 1.1 riastrad * entering RC6. As the context state is undefined until we restart 680 1.1 riastrad * the engine, if it does enter RC6 during the reset, the state 681 1.1 riastrad * written to the powercontext is undefined and so we may lose 682 1.1 riastrad * GPU state upon resume, i.e. fail to restart after a reset. 683 1.1 riastrad */ 684 1.1 riastrad intel_uncore_forcewake_get(engine->uncore, FORCEWAKE_ALL); 685 1.1 riastrad if (engine->reset.prepare) 686 1.1 riastrad engine->reset.prepare(engine); 687 1.1 riastrad } 688 1.1 riastrad 689 1.1 riastrad static void revoke_mmaps(struct intel_gt *gt) 690 1.1 riastrad { 691 1.1 riastrad int i; 692 1.1 riastrad 693 1.1 riastrad for (i = 0; i < gt->ggtt->num_fences; i++) { 694 1.1 riastrad struct drm_vma_offset_node *node; 695 1.1 riastrad struct i915_vma *vma; 696 1.1 riastrad u64 vma_offset; 697 1.1 riastrad 698 1.1 riastrad vma = READ_ONCE(gt->ggtt->fence_regs[i].vma); 699 1.1 riastrad if (!vma) 700 1.1 riastrad continue; 701 1.1 riastrad 702 1.1 riastrad if (!i915_vma_has_userfault(vma)) 703 1.1 riastrad continue; 704 1.1 riastrad 705 1.1 riastrad GEM_BUG_ON(vma->fence != >->ggtt->fence_regs[i]); 706 1.1 riastrad 707 1.1 riastrad if (!vma->mmo) 708 1.1 riastrad continue; 709 1.1 riastrad 710 1.1 riastrad node = &vma->mmo->vma_node; 711 1.1 riastrad vma_offset = vma->ggtt_view.partial.offset << PAGE_SHIFT; 712 1.1 riastrad 713 1.4 riastrad #ifdef __NetBSD__ 714 1.5 riastrad __USE(vma_offset); 715 1.4 riastrad __USE(node); 716 1.5 riastrad paddr_t pa = gt->i915->ggtt.gmadr.start + vma->node.start; 717 1.5 riastrad vsize_t npgs = vma->size >> PAGE_SHIFT; 718 1.5 riastrad while (npgs --> 0) 719 1.5 riastrad pmap_pv_protect(pa + (npgs << PAGE_SHIFT), 720 1.5 riastrad VM_PROT_NONE); 721 1.4 riastrad #else 722 1.1 riastrad unmap_mapping_range(gt->i915->drm.anon_inode->i_mapping, 723 1.1 riastrad drm_vma_node_offset_addr(node) + vma_offset, 724 1.1 riastrad vma->size, 725 1.1 riastrad 1); 726 1.4 riastrad #endif 727 1.1 riastrad } 728 1.1 riastrad } 729 1.1 riastrad 730 1.1 riastrad static intel_engine_mask_t reset_prepare(struct intel_gt *gt) 731 1.1 riastrad { 732 1.1 riastrad struct intel_engine_cs *engine; 733 1.1 riastrad intel_engine_mask_t awake = 0; 734 1.1 riastrad enum intel_engine_id id; 735 1.1 riastrad 736 1.1 riastrad for_each_engine(engine, gt, id) { 737 1.1 riastrad if (intel_engine_pm_get_if_awake(engine)) 738 1.1 riastrad awake |= engine->mask; 739 1.1 riastrad reset_prepare_engine(engine); 740 1.1 riastrad } 741 1.1 riastrad 742 1.1 riastrad intel_uc_reset_prepare(>->uc); 743 1.1 riastrad 744 1.1 riastrad return awake; 745 1.1 riastrad } 746 1.1 riastrad 747 1.1 riastrad static void gt_revoke(struct intel_gt *gt) 748 1.1 riastrad { 749 1.1 riastrad revoke_mmaps(gt); 750 1.1 riastrad } 751 1.1 riastrad 752 1.1 riastrad static int gt_reset(struct intel_gt *gt, intel_engine_mask_t stalled_mask) 753 1.1 riastrad { 754 1.1 riastrad struct intel_engine_cs *engine; 755 1.1 riastrad enum intel_engine_id id; 756 1.1 riastrad int err; 757 1.1 riastrad 758 1.1 riastrad /* 759 1.1 riastrad * Everything depends on having the GTT running, so we need to start 760 1.1 riastrad * there. 761 1.1 riastrad */ 762 1.1 riastrad err = i915_ggtt_enable_hw(gt->i915); 763 1.1 riastrad if (err) 764 1.1 riastrad return err; 765 1.1 riastrad 766 1.1 riastrad for_each_engine(engine, gt, id) 767 1.1 riastrad __intel_engine_reset(engine, stalled_mask & engine->mask); 768 1.1 riastrad 769 1.1 riastrad i915_gem_restore_fences(gt->ggtt); 770 1.1 riastrad 771 1.1 riastrad return err; 772 1.1 riastrad } 773 1.1 riastrad 774 1.1 riastrad static void reset_finish_engine(struct intel_engine_cs *engine) 775 1.1 riastrad { 776 1.1 riastrad if (engine->reset.finish) 777 1.1 riastrad engine->reset.finish(engine); 778 1.1 riastrad intel_uncore_forcewake_put(engine->uncore, FORCEWAKE_ALL); 779 1.1 riastrad 780 1.1 riastrad intel_engine_signal_breadcrumbs(engine); 781 1.1 riastrad } 782 1.1 riastrad 783 1.1 riastrad static void reset_finish(struct intel_gt *gt, intel_engine_mask_t awake) 784 1.1 riastrad { 785 1.1 riastrad struct intel_engine_cs *engine; 786 1.1 riastrad enum intel_engine_id id; 787 1.1 riastrad 788 1.1 riastrad for_each_engine(engine, gt, id) { 789 1.1 riastrad reset_finish_engine(engine); 790 1.1 riastrad if (awake & engine->mask) 791 1.1 riastrad intel_engine_pm_put(engine); 792 1.1 riastrad } 793 1.1 riastrad } 794 1.1 riastrad 795 1.1 riastrad static void nop_submit_request(struct i915_request *request) 796 1.1 riastrad { 797 1.1 riastrad struct intel_engine_cs *engine = request->engine; 798 1.1 riastrad unsigned long flags; 799 1.1 riastrad 800 1.1 riastrad RQ_TRACE(request, "-EIO\n"); 801 1.1 riastrad dma_fence_set_error(&request->fence, -EIO); 802 1.1 riastrad 803 1.1 riastrad spin_lock_irqsave(&engine->active.lock, flags); 804 1.1 riastrad __i915_request_submit(request); 805 1.1 riastrad i915_request_mark_complete(request); 806 1.1 riastrad spin_unlock_irqrestore(&engine->active.lock, flags); 807 1.1 riastrad 808 1.1 riastrad intel_engine_signal_breadcrumbs(engine); 809 1.1 riastrad } 810 1.1 riastrad 811 1.1 riastrad static void __intel_gt_set_wedged(struct intel_gt *gt) 812 1.1 riastrad { 813 1.1 riastrad struct intel_engine_cs *engine; 814 1.1 riastrad intel_engine_mask_t awake; 815 1.1 riastrad enum intel_engine_id id; 816 1.1 riastrad 817 1.1 riastrad if (test_bit(I915_WEDGED, >->reset.flags)) 818 1.1 riastrad return; 819 1.1 riastrad 820 1.1 riastrad if (GEM_SHOW_DEBUG() && !intel_engines_are_idle(gt)) { 821 1.1 riastrad struct drm_printer p = drm_debug_printer(__func__); 822 1.1 riastrad 823 1.1 riastrad for_each_engine(engine, gt, id) 824 1.1 riastrad intel_engine_dump(engine, &p, "%s\n", engine->name); 825 1.1 riastrad } 826 1.1 riastrad 827 1.1 riastrad GT_TRACE(gt, "start\n"); 828 1.1 riastrad 829 1.1 riastrad /* 830 1.1 riastrad * First, stop submission to hw, but do not yet complete requests by 831 1.1 riastrad * rolling the global seqno forward (since this would complete requests 832 1.1 riastrad * for which we haven't set the fence error to EIO yet). 833 1.1 riastrad */ 834 1.1 riastrad awake = reset_prepare(gt); 835 1.1 riastrad 836 1.1 riastrad /* Even if the GPU reset fails, it should still stop the engines */ 837 1.1 riastrad if (!INTEL_INFO(gt->i915)->gpu_reset_clobbers_display) 838 1.1 riastrad __intel_gt_reset(gt, ALL_ENGINES); 839 1.1 riastrad 840 1.1 riastrad for_each_engine(engine, gt, id) 841 1.1 riastrad engine->submit_request = nop_submit_request; 842 1.1 riastrad 843 1.1 riastrad /* 844 1.1 riastrad * Make sure no request can slip through without getting completed by 845 1.1 riastrad * either this call here to intel_engine_write_global_seqno, or the one 846 1.1 riastrad * in nop_submit_request. 847 1.1 riastrad */ 848 1.1 riastrad synchronize_rcu_expedited(); 849 1.1 riastrad set_bit(I915_WEDGED, >->reset.flags); 850 1.1 riastrad 851 1.1 riastrad /* Mark all executing requests as skipped */ 852 1.1 riastrad for_each_engine(engine, gt, id) 853 1.1 riastrad if (engine->reset.cancel) 854 1.1 riastrad engine->reset.cancel(engine); 855 1.1 riastrad 856 1.1 riastrad reset_finish(gt, awake); 857 1.1 riastrad 858 1.1 riastrad GT_TRACE(gt, "end\n"); 859 1.1 riastrad } 860 1.1 riastrad 861 1.1 riastrad void intel_gt_set_wedged(struct intel_gt *gt) 862 1.1 riastrad { 863 1.1 riastrad intel_wakeref_t wakeref; 864 1.1 riastrad 865 1.1 riastrad mutex_lock(>->reset.mutex); 866 1.1 riastrad with_intel_runtime_pm(gt->uncore->rpm, wakeref) 867 1.1 riastrad __intel_gt_set_wedged(gt); 868 1.1 riastrad mutex_unlock(>->reset.mutex); 869 1.1 riastrad } 870 1.1 riastrad 871 1.1 riastrad static bool __intel_gt_unset_wedged(struct intel_gt *gt) 872 1.1 riastrad { 873 1.1 riastrad struct intel_gt_timelines *timelines = >->timelines; 874 1.1 riastrad struct intel_timeline *tl; 875 1.1 riastrad bool ok; 876 1.1 riastrad 877 1.1 riastrad if (!test_bit(I915_WEDGED, >->reset.flags)) 878 1.1 riastrad return true; 879 1.1 riastrad 880 1.1 riastrad /* Never fully initialised, recovery impossible */ 881 1.1 riastrad if (test_bit(I915_WEDGED_ON_INIT, >->reset.flags)) 882 1.1 riastrad return false; 883 1.1 riastrad 884 1.1 riastrad GT_TRACE(gt, "start\n"); 885 1.1 riastrad 886 1.1 riastrad /* 887 1.1 riastrad * Before unwedging, make sure that all pending operations 888 1.1 riastrad * are flushed and errored out - we may have requests waiting upon 889 1.1 riastrad * third party fences. We marked all inflight requests as EIO, and 890 1.1 riastrad * every execbuf since returned EIO, for consistency we want all 891 1.1 riastrad * the currently pending requests to also be marked as EIO, which 892 1.1 riastrad * is done inside our nop_submit_request - and so we must wait. 893 1.1 riastrad * 894 1.1 riastrad * No more can be submitted until we reset the wedged bit. 895 1.1 riastrad */ 896 1.1 riastrad spin_lock(&timelines->lock); 897 1.1 riastrad list_for_each_entry(tl, &timelines->active_list, link) { 898 1.1 riastrad struct dma_fence *fence; 899 1.1 riastrad 900 1.1 riastrad fence = i915_active_fence_get(&tl->last_request); 901 1.1 riastrad if (!fence) 902 1.1 riastrad continue; 903 1.1 riastrad 904 1.1 riastrad spin_unlock(&timelines->lock); 905 1.1 riastrad 906 1.1 riastrad /* 907 1.1 riastrad * All internal dependencies (i915_requests) will have 908 1.1 riastrad * been flushed by the set-wedge, but we may be stuck waiting 909 1.1 riastrad * for external fences. These should all be capped to 10s 910 1.1 riastrad * (I915_FENCE_TIMEOUT) so this wait should not be unbounded 911 1.1 riastrad * in the worst case. 912 1.1 riastrad */ 913 1.1 riastrad dma_fence_default_wait(fence, false, MAX_SCHEDULE_TIMEOUT); 914 1.1 riastrad dma_fence_put(fence); 915 1.1 riastrad 916 1.1 riastrad /* Restart iteration after droping lock */ 917 1.1 riastrad spin_lock(&timelines->lock); 918 1.1 riastrad tl = list_entry(&timelines->active_list, typeof(*tl), link); 919 1.1 riastrad } 920 1.1 riastrad spin_unlock(&timelines->lock); 921 1.1 riastrad 922 1.1 riastrad /* We must reset pending GPU events before restoring our submission */ 923 1.1 riastrad ok = !HAS_EXECLISTS(gt->i915); /* XXX better agnosticism desired */ 924 1.1 riastrad if (!INTEL_INFO(gt->i915)->gpu_reset_clobbers_display) 925 1.1 riastrad ok = __intel_gt_reset(gt, ALL_ENGINES) == 0; 926 1.1 riastrad if (!ok) { 927 1.1 riastrad /* 928 1.1 riastrad * Warn CI about the unrecoverable wedged condition. 929 1.1 riastrad * Time for a reboot. 930 1.1 riastrad */ 931 1.1 riastrad add_taint_for_CI(TAINT_WARN); 932 1.1 riastrad return false; 933 1.1 riastrad } 934 1.1 riastrad 935 1.1 riastrad /* 936 1.1 riastrad * Undo nop_submit_request. We prevent all new i915 requests from 937 1.1 riastrad * being queued (by disallowing execbuf whilst wedged) so having 938 1.1 riastrad * waited for all active requests above, we know the system is idle 939 1.1 riastrad * and do not have to worry about a thread being inside 940 1.1 riastrad * engine->submit_request() as we swap over. So unlike installing 941 1.1 riastrad * the nop_submit_request on reset, we can do this from normal 942 1.1 riastrad * context and do not require stop_machine(). 943 1.1 riastrad */ 944 1.1 riastrad intel_engines_reset_default_submission(gt); 945 1.1 riastrad 946 1.1 riastrad GT_TRACE(gt, "end\n"); 947 1.1 riastrad 948 1.1 riastrad smp_mb__before_atomic(); /* complete takeover before enabling execbuf */ 949 1.1 riastrad clear_bit(I915_WEDGED, >->reset.flags); 950 1.1 riastrad 951 1.1 riastrad return true; 952 1.1 riastrad } 953 1.1 riastrad 954 1.1 riastrad bool intel_gt_unset_wedged(struct intel_gt *gt) 955 1.1 riastrad { 956 1.1 riastrad bool result; 957 1.1 riastrad 958 1.1 riastrad mutex_lock(>->reset.mutex); 959 1.1 riastrad result = __intel_gt_unset_wedged(gt); 960 1.1 riastrad mutex_unlock(>->reset.mutex); 961 1.1 riastrad 962 1.1 riastrad return result; 963 1.1 riastrad } 964 1.1 riastrad 965 1.1 riastrad static int do_reset(struct intel_gt *gt, intel_engine_mask_t stalled_mask) 966 1.1 riastrad { 967 1.1 riastrad int err, i; 968 1.1 riastrad 969 1.1 riastrad gt_revoke(gt); 970 1.1 riastrad 971 1.1 riastrad err = __intel_gt_reset(gt, ALL_ENGINES); 972 1.1 riastrad for (i = 0; err && i < RESET_MAX_RETRIES; i++) { 973 1.1 riastrad msleep(10 * (i + 1)); 974 1.1 riastrad err = __intel_gt_reset(gt, ALL_ENGINES); 975 1.1 riastrad } 976 1.1 riastrad if (err) 977 1.1 riastrad return err; 978 1.1 riastrad 979 1.1 riastrad return gt_reset(gt, stalled_mask); 980 1.1 riastrad } 981 1.1 riastrad 982 1.1 riastrad static int resume(struct intel_gt *gt) 983 1.1 riastrad { 984 1.1 riastrad struct intel_engine_cs *engine; 985 1.1 riastrad enum intel_engine_id id; 986 1.1 riastrad int ret; 987 1.1 riastrad 988 1.1 riastrad for_each_engine(engine, gt, id) { 989 1.1 riastrad ret = engine->resume(engine); 990 1.1 riastrad if (ret) 991 1.1 riastrad return ret; 992 1.1 riastrad } 993 1.1 riastrad 994 1.1 riastrad return 0; 995 1.1 riastrad } 996 1.1 riastrad 997 1.1 riastrad /** 998 1.1 riastrad * intel_gt_reset - reset chip after a hang 999 1.1 riastrad * @gt: #intel_gt to reset 1000 1.1 riastrad * @stalled_mask: mask of the stalled engines with the guilty requests 1001 1.1 riastrad * @reason: user error message for why we are resetting 1002 1.1 riastrad * 1003 1.1 riastrad * Reset the chip. Useful if a hang is detected. Marks the device as wedged 1004 1.1 riastrad * on failure. 1005 1.1 riastrad * 1006 1.1 riastrad * Procedure is fairly simple: 1007 1.1 riastrad * - reset the chip using the reset reg 1008 1.1 riastrad * - re-init context state 1009 1.1 riastrad * - re-init hardware status page 1010 1.1 riastrad * - re-init ring buffer 1011 1.1 riastrad * - re-init interrupt state 1012 1.1 riastrad * - re-init display 1013 1.1 riastrad */ 1014 1.1 riastrad void intel_gt_reset(struct intel_gt *gt, 1015 1.1 riastrad intel_engine_mask_t stalled_mask, 1016 1.1 riastrad const char *reason) 1017 1.1 riastrad { 1018 1.1 riastrad intel_engine_mask_t awake; 1019 1.1 riastrad int ret; 1020 1.1 riastrad 1021 1.1 riastrad GT_TRACE(gt, "flags=%lx\n", gt->reset.flags); 1022 1.1 riastrad 1023 1.1 riastrad might_sleep(); 1024 1.1 riastrad GEM_BUG_ON(!test_bit(I915_RESET_BACKOFF, >->reset.flags)); 1025 1.1 riastrad mutex_lock(>->reset.mutex); 1026 1.1 riastrad 1027 1.1 riastrad /* Clear any previous failed attempts at recovery. Time to try again. */ 1028 1.1 riastrad if (!__intel_gt_unset_wedged(gt)) 1029 1.1 riastrad goto unlock; 1030 1.1 riastrad 1031 1.1 riastrad if (reason) 1032 1.1 riastrad dev_notice(gt->i915->drm.dev, 1033 1.1 riastrad "Resetting chip for %s\n", reason); 1034 1.1 riastrad atomic_inc(>->i915->gpu_error.reset_count); 1035 1.1 riastrad 1036 1.1 riastrad awake = reset_prepare(gt); 1037 1.1 riastrad 1038 1.1 riastrad if (!intel_has_gpu_reset(gt)) { 1039 1.1 riastrad if (i915_modparams.reset) 1040 1.1 riastrad dev_err(gt->i915->drm.dev, "GPU reset not supported\n"); 1041 1.1 riastrad else 1042 1.1 riastrad DRM_DEBUG_DRIVER("GPU reset disabled\n"); 1043 1.1 riastrad goto error; 1044 1.1 riastrad } 1045 1.1 riastrad 1046 1.1 riastrad if (INTEL_INFO(gt->i915)->gpu_reset_clobbers_display) 1047 1.1 riastrad intel_runtime_pm_disable_interrupts(gt->i915); 1048 1.1 riastrad 1049 1.1 riastrad if (do_reset(gt, stalled_mask)) { 1050 1.1 riastrad dev_err(gt->i915->drm.dev, "Failed to reset chip\n"); 1051 1.1 riastrad goto taint; 1052 1.1 riastrad } 1053 1.1 riastrad 1054 1.1 riastrad if (INTEL_INFO(gt->i915)->gpu_reset_clobbers_display) 1055 1.1 riastrad intel_runtime_pm_enable_interrupts(gt->i915); 1056 1.1 riastrad 1057 1.1 riastrad intel_overlay_reset(gt->i915); 1058 1.1 riastrad 1059 1.1 riastrad /* 1060 1.1 riastrad * Next we need to restore the context, but we don't use those 1061 1.1 riastrad * yet either... 1062 1.1 riastrad * 1063 1.1 riastrad * Ring buffer needs to be re-initialized in the KMS case, or if X 1064 1.1 riastrad * was running at the time of the reset (i.e. we weren't VT 1065 1.1 riastrad * switched away). 1066 1.1 riastrad */ 1067 1.1 riastrad ret = intel_gt_init_hw(gt); 1068 1.1 riastrad if (ret) { 1069 1.1 riastrad DRM_ERROR("Failed to initialise HW following reset (%d)\n", 1070 1.1 riastrad ret); 1071 1.1 riastrad goto taint; 1072 1.1 riastrad } 1073 1.1 riastrad 1074 1.1 riastrad ret = resume(gt); 1075 1.1 riastrad if (ret) 1076 1.1 riastrad goto taint; 1077 1.1 riastrad 1078 1.1 riastrad finish: 1079 1.1 riastrad reset_finish(gt, awake); 1080 1.1 riastrad unlock: 1081 1.1 riastrad mutex_unlock(>->reset.mutex); 1082 1.1 riastrad return; 1083 1.1 riastrad 1084 1.1 riastrad taint: 1085 1.1 riastrad /* 1086 1.1 riastrad * History tells us that if we cannot reset the GPU now, we 1087 1.1 riastrad * never will. This then impacts everything that is run 1088 1.1 riastrad * subsequently. On failing the reset, we mark the driver 1089 1.1 riastrad * as wedged, preventing further execution on the GPU. 1090 1.1 riastrad * We also want to go one step further and add a taint to the 1091 1.1 riastrad * kernel so that any subsequent faults can be traced back to 1092 1.1 riastrad * this failure. This is important for CI, where if the 1093 1.1 riastrad * GPU/driver fails we would like to reboot and restart testing 1094 1.1 riastrad * rather than continue on into oblivion. For everyone else, 1095 1.1 riastrad * the system should still plod along, but they have been warned! 1096 1.1 riastrad */ 1097 1.1 riastrad add_taint_for_CI(TAINT_WARN); 1098 1.1 riastrad error: 1099 1.1 riastrad __intel_gt_set_wedged(gt); 1100 1.1 riastrad goto finish; 1101 1.1 riastrad } 1102 1.1 riastrad 1103 1.1 riastrad static inline int intel_gt_reset_engine(struct intel_engine_cs *engine) 1104 1.1 riastrad { 1105 1.1 riastrad return __intel_gt_reset(engine->gt, engine->mask); 1106 1.1 riastrad } 1107 1.1 riastrad 1108 1.1 riastrad /** 1109 1.1 riastrad * intel_engine_reset - reset GPU engine to recover from a hang 1110 1.1 riastrad * @engine: engine to reset 1111 1.1 riastrad * @msg: reason for GPU reset; or NULL for no dev_notice() 1112 1.1 riastrad * 1113 1.1 riastrad * Reset a specific GPU engine. Useful if a hang is detected. 1114 1.1 riastrad * Returns zero on successful reset or otherwise an error code. 1115 1.1 riastrad * 1116 1.1 riastrad * Procedure is: 1117 1.1 riastrad * - identifies the request that caused the hang and it is dropped 1118 1.1 riastrad * - reset engine (which will force the engine to idle) 1119 1.1 riastrad * - re-init/configure engine 1120 1.1 riastrad */ 1121 1.1 riastrad int intel_engine_reset(struct intel_engine_cs *engine, const char *msg) 1122 1.1 riastrad { 1123 1.1 riastrad struct intel_gt *gt = engine->gt; 1124 1.1 riastrad bool uses_guc = intel_engine_in_guc_submission_mode(engine); 1125 1.1 riastrad int ret; 1126 1.1 riastrad 1127 1.1 riastrad ENGINE_TRACE(engine, "flags=%lx\n", gt->reset.flags); 1128 1.1 riastrad GEM_BUG_ON(!test_bit(I915_RESET_ENGINE + engine->id, >->reset.flags)); 1129 1.1 riastrad 1130 1.1 riastrad if (!intel_engine_pm_get_if_awake(engine)) 1131 1.1 riastrad return 0; 1132 1.1 riastrad 1133 1.1 riastrad reset_prepare_engine(engine); 1134 1.1 riastrad 1135 1.1 riastrad if (msg) 1136 1.1 riastrad dev_notice(engine->i915->drm.dev, 1137 1.1 riastrad "Resetting %s for %s\n", engine->name, msg); 1138 1.1 riastrad atomic_inc(&engine->i915->gpu_error.reset_engine_count[engine->uabi_class]); 1139 1.1 riastrad 1140 1.1 riastrad if (!uses_guc) 1141 1.1 riastrad ret = intel_gt_reset_engine(engine); 1142 1.1 riastrad else 1143 1.1 riastrad ret = intel_guc_reset_engine(&engine->gt->uc.guc, engine); 1144 1.1 riastrad if (ret) { 1145 1.1 riastrad /* If we fail here, we expect to fallback to a global reset */ 1146 1.1 riastrad DRM_DEBUG_DRIVER("%sFailed to reset %s, ret=%d\n", 1147 1.1 riastrad uses_guc ? "GuC " : "", 1148 1.1 riastrad engine->name, ret); 1149 1.1 riastrad goto out; 1150 1.1 riastrad } 1151 1.1 riastrad 1152 1.1 riastrad /* 1153 1.1 riastrad * The request that caused the hang is stuck on elsp, we know the 1154 1.1 riastrad * active request and can drop it, adjust head to skip the offending 1155 1.1 riastrad * request to resume executing remaining requests in the queue. 1156 1.1 riastrad */ 1157 1.1 riastrad __intel_engine_reset(engine, true); 1158 1.1 riastrad 1159 1.1 riastrad /* 1160 1.1 riastrad * The engine and its registers (and workarounds in case of render) 1161 1.1 riastrad * have been reset to their default values. Follow the init_ring 1162 1.1 riastrad * process to program RING_MODE, HWSP and re-enable submission. 1163 1.1 riastrad */ 1164 1.1 riastrad ret = engine->resume(engine); 1165 1.1 riastrad 1166 1.1 riastrad out: 1167 1.1 riastrad intel_engine_cancel_stop_cs(engine); 1168 1.1 riastrad reset_finish_engine(engine); 1169 1.1 riastrad intel_engine_pm_put_async(engine); 1170 1.1 riastrad return ret; 1171 1.1 riastrad } 1172 1.1 riastrad 1173 1.1 riastrad static void intel_gt_reset_global(struct intel_gt *gt, 1174 1.1 riastrad u32 engine_mask, 1175 1.1 riastrad const char *reason) 1176 1.1 riastrad { 1177 1.4 riastrad #ifndef __NetBSD__ /* XXX kobject uevent...? */ 1178 1.1 riastrad struct kobject *kobj = >->i915->drm.primary->kdev->kobj; 1179 1.1 riastrad char *error_event[] = { I915_ERROR_UEVENT "=1", NULL }; 1180 1.1 riastrad char *reset_event[] = { I915_RESET_UEVENT "=1", NULL }; 1181 1.1 riastrad char *reset_done_event[] = { I915_ERROR_UEVENT "=0", NULL }; 1182 1.3 riastrad #endif 1183 1.1 riastrad struct intel_wedge_me w; 1184 1.1 riastrad 1185 1.3 riastrad #ifndef __NetBSD__ 1186 1.1 riastrad kobject_uevent_env(kobj, KOBJ_CHANGE, error_event); 1187 1.3 riastrad #endif 1188 1.1 riastrad 1189 1.1 riastrad DRM_DEBUG_DRIVER("resetting chip\n"); 1190 1.3 riastrad #ifndef __NetBSD__ 1191 1.1 riastrad kobject_uevent_env(kobj, KOBJ_CHANGE, reset_event); 1192 1.3 riastrad #endif 1193 1.1 riastrad 1194 1.1 riastrad /* Use a watchdog to ensure that our reset completes */ 1195 1.1 riastrad intel_wedge_on_timeout(&w, gt, 5 * HZ) { 1196 1.1 riastrad intel_prepare_reset(gt->i915); 1197 1.1 riastrad 1198 1.1 riastrad /* Flush everyone using a resource about to be clobbered */ 1199 1.1 riastrad synchronize_srcu_expedited(>->reset.backoff_srcu); 1200 1.1 riastrad 1201 1.1 riastrad intel_gt_reset(gt, engine_mask, reason); 1202 1.1 riastrad 1203 1.1 riastrad intel_finish_reset(gt->i915); 1204 1.1 riastrad } 1205 1.1 riastrad 1206 1.3 riastrad #ifndef __NetBSD__ /* XXX kobj uevent...? */ 1207 1.1 riastrad if (!test_bit(I915_WEDGED, >->reset.flags)) 1208 1.1 riastrad kobject_uevent_env(kobj, KOBJ_CHANGE, reset_done_event); 1209 1.3 riastrad #endif 1210 1.1 riastrad } 1211 1.1 riastrad 1212 1.1 riastrad /** 1213 1.1 riastrad * intel_gt_handle_error - handle a gpu error 1214 1.1 riastrad * @gt: the intel_gt 1215 1.1 riastrad * @engine_mask: mask representing engines that are hung 1216 1.1 riastrad * @flags: control flags 1217 1.1 riastrad * @fmt: Error message format string 1218 1.1 riastrad * 1219 1.1 riastrad * Do some basic checking of register state at error time and 1220 1.1 riastrad * dump it to the syslog. Also call i915_capture_error_state() to make 1221 1.1 riastrad * sure we get a record and make it available in debugfs. Fire a uevent 1222 1.1 riastrad * so userspace knows something bad happened (should trigger collection 1223 1.1 riastrad * of a ring dump etc.). 1224 1.1 riastrad */ 1225 1.1 riastrad void intel_gt_handle_error(struct intel_gt *gt, 1226 1.1 riastrad intel_engine_mask_t engine_mask, 1227 1.1 riastrad unsigned long flags, 1228 1.1 riastrad const char *fmt, ...) 1229 1.1 riastrad { 1230 1.1 riastrad struct intel_engine_cs *engine; 1231 1.1 riastrad intel_wakeref_t wakeref; 1232 1.1 riastrad intel_engine_mask_t tmp; 1233 1.1 riastrad char error_msg[80]; 1234 1.1 riastrad char *msg = NULL; 1235 1.1 riastrad 1236 1.1 riastrad if (fmt) { 1237 1.1 riastrad va_list args; 1238 1.1 riastrad 1239 1.1 riastrad va_start(args, fmt); 1240 1.1 riastrad vscnprintf(error_msg, sizeof(error_msg), fmt, args); 1241 1.1 riastrad va_end(args); 1242 1.1 riastrad 1243 1.1 riastrad msg = error_msg; 1244 1.1 riastrad } 1245 1.1 riastrad 1246 1.1 riastrad /* 1247 1.1 riastrad * In most cases it's guaranteed that we get here with an RPM 1248 1.1 riastrad * reference held, for example because there is a pending GPU 1249 1.1 riastrad * request that won't finish until the reset is done. This 1250 1.1 riastrad * isn't the case at least when we get here by doing a 1251 1.1 riastrad * simulated reset via debugfs, so get an RPM reference. 1252 1.1 riastrad */ 1253 1.1 riastrad wakeref = intel_runtime_pm_get(gt->uncore->rpm); 1254 1.1 riastrad 1255 1.1 riastrad engine_mask &= INTEL_INFO(gt->i915)->engine_mask; 1256 1.1 riastrad 1257 1.1 riastrad if (flags & I915_ERROR_CAPTURE) { 1258 1.1 riastrad i915_capture_error_state(gt->i915); 1259 1.1 riastrad intel_gt_clear_error_registers(gt, engine_mask); 1260 1.1 riastrad } 1261 1.1 riastrad 1262 1.1 riastrad /* 1263 1.1 riastrad * Try engine reset when available. We fall back to full reset if 1264 1.1 riastrad * single reset fails. 1265 1.1 riastrad */ 1266 1.1 riastrad if (intel_has_reset_engine(gt) && !intel_gt_is_wedged(gt)) { 1267 1.1 riastrad for_each_engine_masked(engine, gt, engine_mask, tmp) { 1268 1.1 riastrad BUILD_BUG_ON(I915_RESET_MODESET >= I915_RESET_ENGINE); 1269 1.1 riastrad if (test_and_set_bit(I915_RESET_ENGINE + engine->id, 1270 1.1 riastrad >->reset.flags)) 1271 1.1 riastrad continue; 1272 1.1 riastrad 1273 1.1 riastrad if (intel_engine_reset(engine, msg) == 0) 1274 1.1 riastrad engine_mask &= ~engine->mask; 1275 1.1 riastrad 1276 1.1 riastrad clear_and_wake_up_bit(I915_RESET_ENGINE + engine->id, 1277 1.1 riastrad >->reset.flags); 1278 1.1 riastrad } 1279 1.1 riastrad } 1280 1.1 riastrad 1281 1.1 riastrad if (!engine_mask) 1282 1.1 riastrad goto out; 1283 1.1 riastrad 1284 1.1 riastrad /* Full reset needs the mutex, stop any other user trying to do so. */ 1285 1.1 riastrad if (test_and_set_bit(I915_RESET_BACKOFF, >->reset.flags)) { 1286 1.3 riastrad int ret; 1287 1.4 riastrad spin_lock(>->reset.lock); 1288 1.3 riastrad DRM_SPIN_WAIT_NOINTR_UNTIL(ret, 1289 1.4 riastrad >->reset.queue, 1290 1.4 riastrad >->reset.lock, 1291 1.4 riastrad !test_bit(I915_RESET_BACKOFF, >->reset.flags)); 1292 1.4 riastrad spin_unlock(>->reset.lock); 1293 1.1 riastrad goto out; /* piggy-back on the other reset */ 1294 1.1 riastrad } 1295 1.1 riastrad 1296 1.1 riastrad /* Make sure i915_reset_trylock() sees the I915_RESET_BACKOFF */ 1297 1.1 riastrad synchronize_rcu_expedited(); 1298 1.1 riastrad 1299 1.1 riastrad /* Prevent any other reset-engine attempt. */ 1300 1.1 riastrad for_each_engine(engine, gt, tmp) { 1301 1.1 riastrad while (test_and_set_bit(I915_RESET_ENGINE + engine->id, 1302 1.1 riastrad >->reset.flags)) 1303 1.1 riastrad wait_on_bit(>->reset.flags, 1304 1.1 riastrad I915_RESET_ENGINE + engine->id, 1305 1.1 riastrad TASK_UNINTERRUPTIBLE); 1306 1.1 riastrad } 1307 1.1 riastrad 1308 1.1 riastrad intel_gt_reset_global(gt, engine_mask, msg); 1309 1.1 riastrad 1310 1.1 riastrad for_each_engine(engine, gt, tmp) 1311 1.1 riastrad clear_bit_unlock(I915_RESET_ENGINE + engine->id, 1312 1.1 riastrad >->reset.flags); 1313 1.1 riastrad clear_bit_unlock(I915_RESET_BACKOFF, >->reset.flags); 1314 1.1 riastrad smp_mb__after_atomic(); 1315 1.4 riastrad spin_lock(>->reset.lock); 1316 1.4 riastrad DRM_SPIN_WAKEUP_ALL(>->reset.queue, >->reset.lock); 1317 1.4 riastrad spin_unlock(>->reset.lock); 1318 1.1 riastrad 1319 1.1 riastrad out: 1320 1.1 riastrad intel_runtime_pm_put(gt->uncore->rpm, wakeref); 1321 1.1 riastrad } 1322 1.1 riastrad 1323 1.1 riastrad int intel_gt_reset_trylock(struct intel_gt *gt, int *srcu) 1324 1.1 riastrad { 1325 1.1 riastrad might_lock(>->reset.backoff_srcu); 1326 1.1 riastrad might_sleep(); 1327 1.1 riastrad 1328 1.1 riastrad rcu_read_lock(); 1329 1.1 riastrad while (test_bit(I915_RESET_BACKOFF, >->reset.flags)) { 1330 1.1 riastrad rcu_read_unlock(); 1331 1.1 riastrad 1332 1.4 riastrad int ret; 1333 1.4 riastrad spin_lock(>->reset.lock); 1334 1.4 riastrad DRM_SPIN_WAIT_UNTIL(ret, >->reset.queue, >->reset.lock, 1335 1.4 riastrad !test_bit(I915_RESET_BACKOFF, >->reset.flags)); 1336 1.4 riastrad spin_unlock(>->reset.lock); 1337 1.4 riastrad if (ret) 1338 1.1 riastrad return -EINTR; 1339 1.1 riastrad 1340 1.1 riastrad rcu_read_lock(); 1341 1.1 riastrad } 1342 1.1 riastrad *srcu = srcu_read_lock(>->reset.backoff_srcu); 1343 1.1 riastrad rcu_read_unlock(); 1344 1.1 riastrad 1345 1.1 riastrad return 0; 1346 1.1 riastrad } 1347 1.1 riastrad 1348 1.1 riastrad void intel_gt_reset_unlock(struct intel_gt *gt, int tag) 1349 1.1 riastrad __releases(>->reset.backoff_srcu) 1350 1.1 riastrad { 1351 1.1 riastrad srcu_read_unlock(>->reset.backoff_srcu, tag); 1352 1.1 riastrad } 1353 1.1 riastrad 1354 1.1 riastrad int intel_gt_terminally_wedged(struct intel_gt *gt) 1355 1.1 riastrad { 1356 1.1 riastrad might_sleep(); 1357 1.1 riastrad 1358 1.1 riastrad if (!intel_gt_is_wedged(gt)) 1359 1.1 riastrad return 0; 1360 1.1 riastrad 1361 1.1 riastrad if (intel_gt_has_init_error(gt)) 1362 1.1 riastrad return -EIO; 1363 1.1 riastrad 1364 1.1 riastrad /* Reset still in progress? Maybe we will recover? */ 1365 1.4 riastrad int ret; 1366 1.4 riastrad spin_lock(>->reset.lock); 1367 1.4 riastrad DRM_SPIN_WAIT_UNTIL(ret, >->reset.queue, >->reset.lock, 1368 1.4 riastrad !test_bit(I915_RESET_BACKOFF, >->reset.flags)); 1369 1.4 riastrad spin_unlock(>->reset.lock); 1370 1.4 riastrad if (ret) 1371 1.1 riastrad return -EINTR; 1372 1.1 riastrad 1373 1.1 riastrad return intel_gt_is_wedged(gt) ? -EIO : 0; 1374 1.1 riastrad } 1375 1.1 riastrad 1376 1.1 riastrad void intel_gt_set_wedged_on_init(struct intel_gt *gt) 1377 1.1 riastrad { 1378 1.1 riastrad BUILD_BUG_ON(I915_RESET_ENGINE + I915_NUM_ENGINES > 1379 1.1 riastrad I915_WEDGED_ON_INIT); 1380 1.1 riastrad intel_gt_set_wedged(gt); 1381 1.1 riastrad set_bit(I915_WEDGED_ON_INIT, >->reset.flags); 1382 1.1 riastrad } 1383 1.1 riastrad 1384 1.1 riastrad void intel_gt_init_reset(struct intel_gt *gt) 1385 1.1 riastrad { 1386 1.4 riastrad spin_lock_init(>->reset.lock); 1387 1.4 riastrad DRM_INIT_WAITQUEUE(>->reset.queue, "i915rst"); 1388 1.1 riastrad mutex_init(>->reset.mutex); 1389 1.1 riastrad init_srcu_struct(>->reset.backoff_srcu); 1390 1.1 riastrad 1391 1.1 riastrad /* no GPU until we are ready! */ 1392 1.1 riastrad __set_bit(I915_WEDGED, >->reset.flags); 1393 1.1 riastrad } 1394 1.1 riastrad 1395 1.1 riastrad void intel_gt_fini_reset(struct intel_gt *gt) 1396 1.1 riastrad { 1397 1.1 riastrad cleanup_srcu_struct(>->reset.backoff_srcu); 1398 1.4 riastrad DRM_DESTROY_WAITQUEUE(>->reset.queue); 1399 1.6 riastrad mutex_destroy(>->reset.mutex); 1400 1.4 riastrad spin_lock_destroy(>->reset.lock); 1401 1.1 riastrad } 1402 1.1 riastrad 1403 1.1 riastrad static void intel_wedge_me(struct work_struct *work) 1404 1.1 riastrad { 1405 1.1 riastrad struct intel_wedge_me *w = container_of(work, typeof(*w), work.work); 1406 1.1 riastrad 1407 1.1 riastrad dev_err(w->gt->i915->drm.dev, 1408 1.1 riastrad "%s timed out, cancelling all in-flight rendering.\n", 1409 1.1 riastrad w->name); 1410 1.1 riastrad intel_gt_set_wedged(w->gt); 1411 1.1 riastrad } 1412 1.1 riastrad 1413 1.1 riastrad void __intel_init_wedge(struct intel_wedge_me *w, 1414 1.1 riastrad struct intel_gt *gt, 1415 1.1 riastrad long timeout, 1416 1.1 riastrad const char *name) 1417 1.1 riastrad { 1418 1.1 riastrad w->gt = gt; 1419 1.1 riastrad w->name = name; 1420 1.1 riastrad 1421 1.1 riastrad INIT_DELAYED_WORK_ONSTACK(&w->work, intel_wedge_me); 1422 1.1 riastrad schedule_delayed_work(&w->work, timeout); 1423 1.1 riastrad } 1424 1.1 riastrad 1425 1.1 riastrad void __intel_fini_wedge(struct intel_wedge_me *w) 1426 1.1 riastrad { 1427 1.1 riastrad cancel_delayed_work_sync(&w->work); 1428 1.1 riastrad destroy_delayed_work_on_stack(&w->work); 1429 1.1 riastrad w->gt = NULL; 1430 1.1 riastrad } 1431 1.1 riastrad 1432 1.1 riastrad #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) 1433 1.1 riastrad #include "selftest_reset.c" 1434 1.1 riastrad #include "selftest_hangcheck.c" 1435 1.1 riastrad #endif 1436