Home | History | Annotate | Line # | Download | only in gt
      1  1.6  riastrad /*	$NetBSD: intel_reset.c,v 1.6 2021/12/19 12:32:15 riastradh Exp $	*/
      2  1.1  riastrad 
      3  1.1  riastrad /*
      4  1.1  riastrad  * SPDX-License-Identifier: MIT
      5  1.1  riastrad  *
      6  1.1  riastrad  * Copyright  2008-2018 Intel Corporation
      7  1.1  riastrad  */
      8  1.1  riastrad 
      9  1.1  riastrad #include <sys/cdefs.h>
     10  1.6  riastrad __KERNEL_RCSID(0, "$NetBSD: intel_reset.c,v 1.6 2021/12/19 12:32:15 riastradh Exp $");
     11  1.1  riastrad 
     12  1.1  riastrad #include <linux/sched/mm.h>
     13  1.1  riastrad #include <linux/stop_machine.h>
     14  1.1  riastrad 
     15  1.1  riastrad #include "display/intel_display_types.h"
     16  1.1  riastrad #include "display/intel_overlay.h"
     17  1.1  riastrad 
     18  1.1  riastrad #include "gem/i915_gem_context.h"
     19  1.1  riastrad 
     20  1.1  riastrad #include "i915_drv.h"
     21  1.1  riastrad #include "i915_gpu_error.h"
     22  1.1  riastrad #include "i915_irq.h"
     23  1.1  riastrad #include "intel_engine_pm.h"
     24  1.1  riastrad #include "intel_gt.h"
     25  1.1  riastrad #include "intel_gt_pm.h"
     26  1.1  riastrad #include "intel_reset.h"
     27  1.1  riastrad 
     28  1.1  riastrad #include "uc/intel_guc.h"
     29  1.1  riastrad #include "uc/intel_guc_submission.h"
     30  1.1  riastrad 
     31  1.4  riastrad #include <linux/nbsd-namespace.h>
     32  1.4  riastrad 
     33  1.1  riastrad #define RESET_MAX_RETRIES 3
     34  1.1  riastrad 
     35  1.1  riastrad /* XXX How to handle concurrent GGTT updates using tiling registers? */
     36  1.1  riastrad #define RESET_UNDER_STOP_MACHINE 0
     37  1.1  riastrad 
     38  1.1  riastrad static void rmw_set_fw(struct intel_uncore *uncore, i915_reg_t reg, u32 set)
     39  1.1  riastrad {
     40  1.1  riastrad 	intel_uncore_rmw_fw(uncore, reg, 0, set);
     41  1.1  riastrad }
     42  1.1  riastrad 
     43  1.1  riastrad static void rmw_clear_fw(struct intel_uncore *uncore, i915_reg_t reg, u32 clr)
     44  1.1  riastrad {
     45  1.1  riastrad 	intel_uncore_rmw_fw(uncore, reg, clr, 0);
     46  1.1  riastrad }
     47  1.1  riastrad 
     48  1.1  riastrad static void engine_skip_context(struct i915_request *rq)
     49  1.1  riastrad {
     50  1.1  riastrad 	struct intel_engine_cs *engine = rq->engine;
     51  1.1  riastrad 	struct intel_context *hung_ctx = rq->context;
     52  1.1  riastrad 
     53  1.1  riastrad 	if (!i915_request_is_active(rq))
     54  1.1  riastrad 		return;
     55  1.1  riastrad 
     56  1.1  riastrad 	lockdep_assert_held(&engine->active.lock);
     57  1.1  riastrad 	list_for_each_entry_continue(rq, &engine->active.requests, sched.link)
     58  1.1  riastrad 		if (rq->context == hung_ctx)
     59  1.1  riastrad 			i915_request_skip(rq, -EIO);
     60  1.1  riastrad }
     61  1.1  riastrad 
     62  1.1  riastrad static void client_mark_guilty(struct i915_gem_context *ctx, bool banned)
     63  1.1  riastrad {
     64  1.1  riastrad 	struct drm_i915_file_private *file_priv = ctx->file_priv;
     65  1.1  riastrad 	unsigned long prev_hang;
     66  1.1  riastrad 	unsigned int score;
     67  1.1  riastrad 
     68  1.1  riastrad 	if (IS_ERR_OR_NULL(file_priv))
     69  1.1  riastrad 		return;
     70  1.1  riastrad 
     71  1.1  riastrad 	score = 0;
     72  1.1  riastrad 	if (banned)
     73  1.1  riastrad 		score = I915_CLIENT_SCORE_CONTEXT_BAN;
     74  1.1  riastrad 
     75  1.1  riastrad 	prev_hang = xchg(&file_priv->hang_timestamp, jiffies);
     76  1.1  riastrad 	if (time_before(jiffies, prev_hang + I915_CLIENT_FAST_HANG_JIFFIES))
     77  1.1  riastrad 		score += I915_CLIENT_SCORE_HANG_FAST;
     78  1.1  riastrad 
     79  1.1  riastrad 	if (score) {
     80  1.1  riastrad 		atomic_add(score, &file_priv->ban_score);
     81  1.1  riastrad 
     82  1.1  riastrad 		DRM_DEBUG_DRIVER("client %s: gained %u ban score, now %u\n",
     83  1.1  riastrad 				 ctx->name, score,
     84  1.1  riastrad 				 atomic_read(&file_priv->ban_score));
     85  1.1  riastrad 	}
     86  1.1  riastrad }
     87  1.1  riastrad 
     88  1.1  riastrad static bool mark_guilty(struct i915_request *rq)
     89  1.1  riastrad {
     90  1.1  riastrad 	struct i915_gem_context *ctx;
     91  1.1  riastrad 	unsigned long prev_hang;
     92  1.1  riastrad 	bool banned;
     93  1.1  riastrad 	int i;
     94  1.1  riastrad 
     95  1.1  riastrad 	rcu_read_lock();
     96  1.1  riastrad 	ctx = rcu_dereference(rq->context->gem_context);
     97  1.1  riastrad 	if (ctx && !kref_get_unless_zero(&ctx->ref))
     98  1.1  riastrad 		ctx = NULL;
     99  1.1  riastrad 	rcu_read_unlock();
    100  1.1  riastrad 	if (!ctx)
    101  1.1  riastrad 		return false;
    102  1.1  riastrad 
    103  1.1  riastrad 	if (i915_gem_context_is_closed(ctx)) {
    104  1.1  riastrad 		intel_context_set_banned(rq->context);
    105  1.1  riastrad 		banned = true;
    106  1.1  riastrad 		goto out;
    107  1.1  riastrad 	}
    108  1.1  riastrad 
    109  1.1  riastrad 	atomic_inc(&ctx->guilty_count);
    110  1.1  riastrad 
    111  1.1  riastrad 	/* Cool contexts are too cool to be banned! (Used for reset testing.) */
    112  1.1  riastrad 	if (!i915_gem_context_is_bannable(ctx)) {
    113  1.1  riastrad 		banned = false;
    114  1.1  riastrad 		goto out;
    115  1.1  riastrad 	}
    116  1.1  riastrad 
    117  1.1  riastrad 	dev_notice(ctx->i915->drm.dev,
    118  1.1  riastrad 		   "%s context reset due to GPU hang\n",
    119  1.1  riastrad 		   ctx->name);
    120  1.1  riastrad 
    121  1.1  riastrad 	/* Record the timestamp for the last N hangs */
    122  1.1  riastrad 	prev_hang = ctx->hang_timestamp[0];
    123  1.1  riastrad 	for (i = 0; i < ARRAY_SIZE(ctx->hang_timestamp) - 1; i++)
    124  1.1  riastrad 		ctx->hang_timestamp[i] = ctx->hang_timestamp[i + 1];
    125  1.1  riastrad 	ctx->hang_timestamp[i] = jiffies;
    126  1.1  riastrad 
    127  1.1  riastrad 	/* If we have hung N+1 times in rapid succession, we ban the context! */
    128  1.1  riastrad 	banned = !i915_gem_context_is_recoverable(ctx);
    129  1.1  riastrad 	if (time_before(jiffies, prev_hang + CONTEXT_FAST_HANG_JIFFIES))
    130  1.1  riastrad 		banned = true;
    131  1.1  riastrad 	if (banned) {
    132  1.1  riastrad 		DRM_DEBUG_DRIVER("context %s: guilty %d, banned\n",
    133  1.1  riastrad 				 ctx->name, atomic_read(&ctx->guilty_count));
    134  1.1  riastrad 		intel_context_set_banned(rq->context);
    135  1.1  riastrad 	}
    136  1.1  riastrad 
    137  1.1  riastrad 	client_mark_guilty(ctx, banned);
    138  1.1  riastrad 
    139  1.1  riastrad out:
    140  1.1  riastrad 	i915_gem_context_put(ctx);
    141  1.1  riastrad 	return banned;
    142  1.1  riastrad }
    143  1.1  riastrad 
    144  1.1  riastrad static void mark_innocent(struct i915_request *rq)
    145  1.1  riastrad {
    146  1.1  riastrad 	struct i915_gem_context *ctx;
    147  1.1  riastrad 
    148  1.1  riastrad 	rcu_read_lock();
    149  1.1  riastrad 	ctx = rcu_dereference(rq->context->gem_context);
    150  1.1  riastrad 	if (ctx)
    151  1.1  riastrad 		atomic_inc(&ctx->active_count);
    152  1.1  riastrad 	rcu_read_unlock();
    153  1.1  riastrad }
    154  1.1  riastrad 
    155  1.1  riastrad void __i915_request_reset(struct i915_request *rq, bool guilty)
    156  1.1  riastrad {
    157  1.1  riastrad 	RQ_TRACE(rq, "guilty? %s\n", yesno(guilty));
    158  1.1  riastrad 
    159  1.1  riastrad 	GEM_BUG_ON(i915_request_completed(rq));
    160  1.1  riastrad 
    161  1.1  riastrad 	rcu_read_lock(); /* protect the GEM context */
    162  1.1  riastrad 	if (guilty) {
    163  1.1  riastrad 		i915_request_skip(rq, -EIO);
    164  1.1  riastrad 		if (mark_guilty(rq))
    165  1.1  riastrad 			engine_skip_context(rq);
    166  1.1  riastrad 	} else {
    167  1.1  riastrad 		dma_fence_set_error(&rq->fence, -EAGAIN);
    168  1.1  riastrad 		mark_innocent(rq);
    169  1.1  riastrad 	}
    170  1.1  riastrad 	rcu_read_unlock();
    171  1.1  riastrad }
    172  1.1  riastrad 
    173  1.1  riastrad static bool i915_in_reset(struct pci_dev *pdev)
    174  1.1  riastrad {
    175  1.1  riastrad 	u8 gdrst;
    176  1.1  riastrad 
    177  1.1  riastrad 	pci_read_config_byte(pdev, I915_GDRST, &gdrst);
    178  1.1  riastrad 	return gdrst & GRDOM_RESET_STATUS;
    179  1.1  riastrad }
    180  1.1  riastrad 
    181  1.1  riastrad static int i915_do_reset(struct intel_gt *gt,
    182  1.1  riastrad 			 intel_engine_mask_t engine_mask,
    183  1.1  riastrad 			 unsigned int retry)
    184  1.1  riastrad {
    185  1.1  riastrad 	struct pci_dev *pdev = gt->i915->drm.pdev;
    186  1.1  riastrad 	int err;
    187  1.1  riastrad 
    188  1.1  riastrad 	/* Assert reset for at least 20 usec, and wait for acknowledgement. */
    189  1.1  riastrad 	pci_write_config_byte(pdev, I915_GDRST, GRDOM_RESET_ENABLE);
    190  1.1  riastrad 	udelay(50);
    191  1.1  riastrad 	err = wait_for_atomic(i915_in_reset(pdev), 50);
    192  1.1  riastrad 
    193  1.1  riastrad 	/* Clear the reset request. */
    194  1.1  riastrad 	pci_write_config_byte(pdev, I915_GDRST, 0);
    195  1.1  riastrad 	udelay(50);
    196  1.1  riastrad 	if (!err)
    197  1.1  riastrad 		err = wait_for_atomic(!i915_in_reset(pdev), 50);
    198  1.1  riastrad 
    199  1.1  riastrad 	return err;
    200  1.1  riastrad }
    201  1.1  riastrad 
    202  1.1  riastrad static bool g4x_reset_complete(struct pci_dev *pdev)
    203  1.1  riastrad {
    204  1.1  riastrad 	u8 gdrst;
    205  1.1  riastrad 
    206  1.1  riastrad 	pci_read_config_byte(pdev, I915_GDRST, &gdrst);
    207  1.1  riastrad 	return (gdrst & GRDOM_RESET_ENABLE) == 0;
    208  1.1  riastrad }
    209  1.1  riastrad 
    210  1.1  riastrad static int g33_do_reset(struct intel_gt *gt,
    211  1.1  riastrad 			intel_engine_mask_t engine_mask,
    212  1.1  riastrad 			unsigned int retry)
    213  1.1  riastrad {
    214  1.1  riastrad 	struct pci_dev *pdev = gt->i915->drm.pdev;
    215  1.1  riastrad 
    216  1.1  riastrad 	pci_write_config_byte(pdev, I915_GDRST, GRDOM_RESET_ENABLE);
    217  1.1  riastrad 	return wait_for_atomic(g4x_reset_complete(pdev), 50);
    218  1.1  riastrad }
    219  1.1  riastrad 
    220  1.1  riastrad static int g4x_do_reset(struct intel_gt *gt,
    221  1.1  riastrad 			intel_engine_mask_t engine_mask,
    222  1.1  riastrad 			unsigned int retry)
    223  1.1  riastrad {
    224  1.1  riastrad 	struct pci_dev *pdev = gt->i915->drm.pdev;
    225  1.1  riastrad 	struct intel_uncore *uncore = gt->uncore;
    226  1.1  riastrad 	int ret;
    227  1.1  riastrad 
    228  1.1  riastrad 	/* WaVcpClkGateDisableForMediaReset:ctg,elk */
    229  1.1  riastrad 	rmw_set_fw(uncore, VDECCLK_GATE_D, VCP_UNIT_CLOCK_GATE_DISABLE);
    230  1.1  riastrad 	intel_uncore_posting_read_fw(uncore, VDECCLK_GATE_D);
    231  1.1  riastrad 
    232  1.1  riastrad 	pci_write_config_byte(pdev, I915_GDRST,
    233  1.1  riastrad 			      GRDOM_MEDIA | GRDOM_RESET_ENABLE);
    234  1.1  riastrad 	ret =  wait_for_atomic(g4x_reset_complete(pdev), 50);
    235  1.1  riastrad 	if (ret) {
    236  1.1  riastrad 		DRM_DEBUG_DRIVER("Wait for media reset failed\n");
    237  1.1  riastrad 		goto out;
    238  1.1  riastrad 	}
    239  1.1  riastrad 
    240  1.1  riastrad 	pci_write_config_byte(pdev, I915_GDRST,
    241  1.1  riastrad 			      GRDOM_RENDER | GRDOM_RESET_ENABLE);
    242  1.1  riastrad 	ret =  wait_for_atomic(g4x_reset_complete(pdev), 50);
    243  1.1  riastrad 	if (ret) {
    244  1.1  riastrad 		DRM_DEBUG_DRIVER("Wait for render reset failed\n");
    245  1.1  riastrad 		goto out;
    246  1.1  riastrad 	}
    247  1.1  riastrad 
    248  1.1  riastrad out:
    249  1.1  riastrad 	pci_write_config_byte(pdev, I915_GDRST, 0);
    250  1.1  riastrad 
    251  1.1  riastrad 	rmw_clear_fw(uncore, VDECCLK_GATE_D, VCP_UNIT_CLOCK_GATE_DISABLE);
    252  1.1  riastrad 	intel_uncore_posting_read_fw(uncore, VDECCLK_GATE_D);
    253  1.1  riastrad 
    254  1.1  riastrad 	return ret;
    255  1.1  riastrad }
    256  1.1  riastrad 
    257  1.1  riastrad static int ilk_do_reset(struct intel_gt *gt, intel_engine_mask_t engine_mask,
    258  1.1  riastrad 			unsigned int retry)
    259  1.1  riastrad {
    260  1.1  riastrad 	struct intel_uncore *uncore = gt->uncore;
    261  1.1  riastrad 	int ret;
    262  1.1  riastrad 
    263  1.1  riastrad 	intel_uncore_write_fw(uncore, ILK_GDSR,
    264  1.1  riastrad 			      ILK_GRDOM_RENDER | ILK_GRDOM_RESET_ENABLE);
    265  1.1  riastrad 	ret = __intel_wait_for_register_fw(uncore, ILK_GDSR,
    266  1.1  riastrad 					   ILK_GRDOM_RESET_ENABLE, 0,
    267  1.1  riastrad 					   5000, 0,
    268  1.1  riastrad 					   NULL);
    269  1.1  riastrad 	if (ret) {
    270  1.1  riastrad 		DRM_DEBUG_DRIVER("Wait for render reset failed\n");
    271  1.1  riastrad 		goto out;
    272  1.1  riastrad 	}
    273  1.1  riastrad 
    274  1.1  riastrad 	intel_uncore_write_fw(uncore, ILK_GDSR,
    275  1.1  riastrad 			      ILK_GRDOM_MEDIA | ILK_GRDOM_RESET_ENABLE);
    276  1.1  riastrad 	ret = __intel_wait_for_register_fw(uncore, ILK_GDSR,
    277  1.1  riastrad 					   ILK_GRDOM_RESET_ENABLE, 0,
    278  1.1  riastrad 					   5000, 0,
    279  1.1  riastrad 					   NULL);
    280  1.1  riastrad 	if (ret) {
    281  1.1  riastrad 		DRM_DEBUG_DRIVER("Wait for media reset failed\n");
    282  1.1  riastrad 		goto out;
    283  1.1  riastrad 	}
    284  1.1  riastrad 
    285  1.1  riastrad out:
    286  1.1  riastrad 	intel_uncore_write_fw(uncore, ILK_GDSR, 0);
    287  1.1  riastrad 	intel_uncore_posting_read_fw(uncore, ILK_GDSR);
    288  1.1  riastrad 	return ret;
    289  1.1  riastrad }
    290  1.1  riastrad 
    291  1.1  riastrad /* Reset the hardware domains (GENX_GRDOM_*) specified by mask */
    292  1.1  riastrad static int gen6_hw_domain_reset(struct intel_gt *gt, u32 hw_domain_mask)
    293  1.1  riastrad {
    294  1.1  riastrad 	struct intel_uncore *uncore = gt->uncore;
    295  1.1  riastrad 	int err;
    296  1.1  riastrad 
    297  1.1  riastrad 	/*
    298  1.1  riastrad 	 * GEN6_GDRST is not in the gt power well, no need to check
    299  1.1  riastrad 	 * for fifo space for the write or forcewake the chip for
    300  1.1  riastrad 	 * the read
    301  1.1  riastrad 	 */
    302  1.1  riastrad 	intel_uncore_write_fw(uncore, GEN6_GDRST, hw_domain_mask);
    303  1.1  riastrad 
    304  1.1  riastrad 	/* Wait for the device to ack the reset requests */
    305  1.1  riastrad 	err = __intel_wait_for_register_fw(uncore,
    306  1.1  riastrad 					   GEN6_GDRST, hw_domain_mask, 0,
    307  1.1  riastrad 					   500, 0,
    308  1.1  riastrad 					   NULL);
    309  1.1  riastrad 	if (err)
    310  1.1  riastrad 		DRM_DEBUG_DRIVER("Wait for 0x%08x engines reset failed\n",
    311  1.1  riastrad 				 hw_domain_mask);
    312  1.1  riastrad 
    313  1.1  riastrad 	return err;
    314  1.1  riastrad }
    315  1.1  riastrad 
    316  1.1  riastrad static int gen6_reset_engines(struct intel_gt *gt,
    317  1.1  riastrad 			      intel_engine_mask_t engine_mask,
    318  1.1  riastrad 			      unsigned int retry)
    319  1.1  riastrad {
    320  1.1  riastrad 	static const u32 hw_engine_mask[] = {
    321  1.1  riastrad 		[RCS0]  = GEN6_GRDOM_RENDER,
    322  1.1  riastrad 		[BCS0]  = GEN6_GRDOM_BLT,
    323  1.1  riastrad 		[VCS0]  = GEN6_GRDOM_MEDIA,
    324  1.1  riastrad 		[VCS1]  = GEN8_GRDOM_MEDIA2,
    325  1.1  riastrad 		[VECS0] = GEN6_GRDOM_VECS,
    326  1.1  riastrad 	};
    327  1.1  riastrad 	struct intel_engine_cs *engine;
    328  1.1  riastrad 	u32 hw_mask;
    329  1.1  riastrad 
    330  1.1  riastrad 	if (engine_mask == ALL_ENGINES) {
    331  1.1  riastrad 		hw_mask = GEN6_GRDOM_FULL;
    332  1.1  riastrad 	} else {
    333  1.1  riastrad 		intel_engine_mask_t tmp;
    334  1.1  riastrad 
    335  1.1  riastrad 		hw_mask = 0;
    336  1.1  riastrad 		for_each_engine_masked(engine, gt, engine_mask, tmp) {
    337  1.1  riastrad 			GEM_BUG_ON(engine->id >= ARRAY_SIZE(hw_engine_mask));
    338  1.1  riastrad 			hw_mask |= hw_engine_mask[engine->id];
    339  1.1  riastrad 		}
    340  1.1  riastrad 	}
    341  1.1  riastrad 
    342  1.1  riastrad 	return gen6_hw_domain_reset(gt, hw_mask);
    343  1.1  riastrad }
    344  1.1  riastrad 
    345  1.1  riastrad static int gen11_lock_sfc(struct intel_engine_cs *engine, u32 *hw_mask)
    346  1.1  riastrad {
    347  1.1  riastrad 	struct intel_uncore *uncore = engine->uncore;
    348  1.1  riastrad 	u8 vdbox_sfc_access = RUNTIME_INFO(engine->i915)->vdbox_sfc_access;
    349  1.1  riastrad 	i915_reg_t sfc_forced_lock, sfc_forced_lock_ack;
    350  1.1  riastrad 	u32 sfc_forced_lock_bit, sfc_forced_lock_ack_bit;
    351  1.1  riastrad 	i915_reg_t sfc_usage;
    352  1.1  riastrad 	u32 sfc_usage_bit;
    353  1.1  riastrad 	u32 sfc_reset_bit;
    354  1.1  riastrad 	int ret;
    355  1.1  riastrad 
    356  1.1  riastrad 	switch (engine->class) {
    357  1.1  riastrad 	case VIDEO_DECODE_CLASS:
    358  1.1  riastrad 		if ((BIT(engine->instance) & vdbox_sfc_access) == 0)
    359  1.1  riastrad 			return 0;
    360  1.1  riastrad 
    361  1.1  riastrad 		sfc_forced_lock = GEN11_VCS_SFC_FORCED_LOCK(engine);
    362  1.1  riastrad 		sfc_forced_lock_bit = GEN11_VCS_SFC_FORCED_LOCK_BIT;
    363  1.1  riastrad 
    364  1.1  riastrad 		sfc_forced_lock_ack = GEN11_VCS_SFC_LOCK_STATUS(engine);
    365  1.1  riastrad 		sfc_forced_lock_ack_bit  = GEN11_VCS_SFC_LOCK_ACK_BIT;
    366  1.1  riastrad 
    367  1.1  riastrad 		sfc_usage = GEN11_VCS_SFC_LOCK_STATUS(engine);
    368  1.1  riastrad 		sfc_usage_bit = GEN11_VCS_SFC_USAGE_BIT;
    369  1.1  riastrad 		sfc_reset_bit = GEN11_VCS_SFC_RESET_BIT(engine->instance);
    370  1.1  riastrad 		break;
    371  1.1  riastrad 
    372  1.1  riastrad 	case VIDEO_ENHANCEMENT_CLASS:
    373  1.1  riastrad 		sfc_forced_lock = GEN11_VECS_SFC_FORCED_LOCK(engine);
    374  1.1  riastrad 		sfc_forced_lock_bit = GEN11_VECS_SFC_FORCED_LOCK_BIT;
    375  1.1  riastrad 
    376  1.1  riastrad 		sfc_forced_lock_ack = GEN11_VECS_SFC_LOCK_ACK(engine);
    377  1.1  riastrad 		sfc_forced_lock_ack_bit  = GEN11_VECS_SFC_LOCK_ACK_BIT;
    378  1.1  riastrad 
    379  1.1  riastrad 		sfc_usage = GEN11_VECS_SFC_USAGE(engine);
    380  1.1  riastrad 		sfc_usage_bit = GEN11_VECS_SFC_USAGE_BIT;
    381  1.1  riastrad 		sfc_reset_bit = GEN11_VECS_SFC_RESET_BIT(engine->instance);
    382  1.1  riastrad 		break;
    383  1.1  riastrad 
    384  1.1  riastrad 	default:
    385  1.1  riastrad 		return 0;
    386  1.1  riastrad 	}
    387  1.1  riastrad 
    388  1.1  riastrad 	/*
    389  1.1  riastrad 	 * If the engine is using a SFC, tell the engine that a software reset
    390  1.1  riastrad 	 * is going to happen. The engine will then try to force lock the SFC.
    391  1.1  riastrad 	 * If SFC ends up being locked to the engine we want to reset, we have
    392  1.1  riastrad 	 * to reset it as well (we will unlock it once the reset sequence is
    393  1.1  riastrad 	 * completed).
    394  1.1  riastrad 	 */
    395  1.1  riastrad 	if (!(intel_uncore_read_fw(uncore, sfc_usage) & sfc_usage_bit))
    396  1.1  riastrad 		return 0;
    397  1.1  riastrad 
    398  1.1  riastrad 	rmw_set_fw(uncore, sfc_forced_lock, sfc_forced_lock_bit);
    399  1.1  riastrad 
    400  1.1  riastrad 	ret = __intel_wait_for_register_fw(uncore,
    401  1.1  riastrad 					   sfc_forced_lock_ack,
    402  1.1  riastrad 					   sfc_forced_lock_ack_bit,
    403  1.1  riastrad 					   sfc_forced_lock_ack_bit,
    404  1.1  riastrad 					   1000, 0, NULL);
    405  1.1  riastrad 
    406  1.1  riastrad 	/* Was the SFC released while we were trying to lock it? */
    407  1.1  riastrad 	if (!(intel_uncore_read_fw(uncore, sfc_usage) & sfc_usage_bit))
    408  1.1  riastrad 		return 0;
    409  1.1  riastrad 
    410  1.1  riastrad 	if (ret) {
    411  1.1  riastrad 		DRM_DEBUG_DRIVER("Wait for SFC forced lock ack failed\n");
    412  1.1  riastrad 		return ret;
    413  1.1  riastrad 	}
    414  1.1  riastrad 
    415  1.1  riastrad 	*hw_mask |= sfc_reset_bit;
    416  1.1  riastrad 	return 0;
    417  1.1  riastrad }
    418  1.1  riastrad 
    419  1.1  riastrad static void gen11_unlock_sfc(struct intel_engine_cs *engine)
    420  1.1  riastrad {
    421  1.1  riastrad 	struct intel_uncore *uncore = engine->uncore;
    422  1.1  riastrad 	u8 vdbox_sfc_access = RUNTIME_INFO(engine->i915)->vdbox_sfc_access;
    423  1.1  riastrad 	i915_reg_t sfc_forced_lock;
    424  1.1  riastrad 	u32 sfc_forced_lock_bit;
    425  1.1  riastrad 
    426  1.1  riastrad 	switch (engine->class) {
    427  1.1  riastrad 	case VIDEO_DECODE_CLASS:
    428  1.1  riastrad 		if ((BIT(engine->instance) & vdbox_sfc_access) == 0)
    429  1.1  riastrad 			return;
    430  1.1  riastrad 
    431  1.1  riastrad 		sfc_forced_lock = GEN11_VCS_SFC_FORCED_LOCK(engine);
    432  1.1  riastrad 		sfc_forced_lock_bit = GEN11_VCS_SFC_FORCED_LOCK_BIT;
    433  1.1  riastrad 		break;
    434  1.1  riastrad 
    435  1.1  riastrad 	case VIDEO_ENHANCEMENT_CLASS:
    436  1.1  riastrad 		sfc_forced_lock = GEN11_VECS_SFC_FORCED_LOCK(engine);
    437  1.1  riastrad 		sfc_forced_lock_bit = GEN11_VECS_SFC_FORCED_LOCK_BIT;
    438  1.1  riastrad 		break;
    439  1.1  riastrad 
    440  1.1  riastrad 	default:
    441  1.1  riastrad 		return;
    442  1.1  riastrad 	}
    443  1.1  riastrad 
    444  1.1  riastrad 	rmw_clear_fw(uncore, sfc_forced_lock, sfc_forced_lock_bit);
    445  1.1  riastrad }
    446  1.1  riastrad 
    447  1.1  riastrad static int gen11_reset_engines(struct intel_gt *gt,
    448  1.1  riastrad 			       intel_engine_mask_t engine_mask,
    449  1.1  riastrad 			       unsigned int retry)
    450  1.1  riastrad {
    451  1.1  riastrad 	static const u32 hw_engine_mask[] = {
    452  1.1  riastrad 		[RCS0]  = GEN11_GRDOM_RENDER,
    453  1.1  riastrad 		[BCS0]  = GEN11_GRDOM_BLT,
    454  1.1  riastrad 		[VCS0]  = GEN11_GRDOM_MEDIA,
    455  1.1  riastrad 		[VCS1]  = GEN11_GRDOM_MEDIA2,
    456  1.1  riastrad 		[VCS2]  = GEN11_GRDOM_MEDIA3,
    457  1.1  riastrad 		[VCS3]  = GEN11_GRDOM_MEDIA4,
    458  1.1  riastrad 		[VECS0] = GEN11_GRDOM_VECS,
    459  1.1  riastrad 		[VECS1] = GEN11_GRDOM_VECS2,
    460  1.1  riastrad 	};
    461  1.1  riastrad 	struct intel_engine_cs *engine;
    462  1.1  riastrad 	intel_engine_mask_t tmp;
    463  1.1  riastrad 	u32 hw_mask;
    464  1.1  riastrad 	int ret;
    465  1.1  riastrad 
    466  1.1  riastrad 	if (engine_mask == ALL_ENGINES) {
    467  1.1  riastrad 		hw_mask = GEN11_GRDOM_FULL;
    468  1.1  riastrad 	} else {
    469  1.1  riastrad 		hw_mask = 0;
    470  1.1  riastrad 		for_each_engine_masked(engine, gt, engine_mask, tmp) {
    471  1.1  riastrad 			GEM_BUG_ON(engine->id >= ARRAY_SIZE(hw_engine_mask));
    472  1.1  riastrad 			hw_mask |= hw_engine_mask[engine->id];
    473  1.1  riastrad 			ret = gen11_lock_sfc(engine, &hw_mask);
    474  1.1  riastrad 			if (ret)
    475  1.1  riastrad 				goto sfc_unlock;
    476  1.1  riastrad 		}
    477  1.1  riastrad 	}
    478  1.1  riastrad 
    479  1.1  riastrad 	ret = gen6_hw_domain_reset(gt, hw_mask);
    480  1.1  riastrad 
    481  1.1  riastrad sfc_unlock:
    482  1.1  riastrad 	/*
    483  1.1  riastrad 	 * We unlock the SFC based on the lock status and not the result of
    484  1.1  riastrad 	 * gen11_lock_sfc to make sure that we clean properly if something
    485  1.1  riastrad 	 * wrong happened during the lock (e.g. lock acquired after timeout
    486  1.1  riastrad 	 * expiration).
    487  1.1  riastrad 	 */
    488  1.1  riastrad 	if (engine_mask != ALL_ENGINES)
    489  1.1  riastrad 		for_each_engine_masked(engine, gt, engine_mask, tmp)
    490  1.1  riastrad 			gen11_unlock_sfc(engine);
    491  1.1  riastrad 
    492  1.1  riastrad 	return ret;
    493  1.1  riastrad }
    494  1.1  riastrad 
    495  1.1  riastrad static int gen8_engine_reset_prepare(struct intel_engine_cs *engine)
    496  1.1  riastrad {
    497  1.1  riastrad 	struct intel_uncore *uncore = engine->uncore;
    498  1.1  riastrad 	const i915_reg_t reg = RING_RESET_CTL(engine->mmio_base);
    499  1.1  riastrad 	u32 request, mask, ack;
    500  1.1  riastrad 	int ret;
    501  1.1  riastrad 
    502  1.1  riastrad 	ack = intel_uncore_read_fw(uncore, reg);
    503  1.1  riastrad 	if (ack & RESET_CTL_CAT_ERROR) {
    504  1.1  riastrad 		/*
    505  1.1  riastrad 		 * For catastrophic errors, ready-for-reset sequence
    506  1.1  riastrad 		 * needs to be bypassed: HAS#396813
    507  1.1  riastrad 		 */
    508  1.1  riastrad 		request = RESET_CTL_CAT_ERROR;
    509  1.1  riastrad 		mask = RESET_CTL_CAT_ERROR;
    510  1.1  riastrad 
    511  1.1  riastrad 		/* Catastrophic errors need to be cleared by HW */
    512  1.1  riastrad 		ack = 0;
    513  1.1  riastrad 	} else if (!(ack & RESET_CTL_READY_TO_RESET)) {
    514  1.1  riastrad 		request = RESET_CTL_REQUEST_RESET;
    515  1.1  riastrad 		mask = RESET_CTL_READY_TO_RESET;
    516  1.1  riastrad 		ack = RESET_CTL_READY_TO_RESET;
    517  1.1  riastrad 	} else {
    518  1.1  riastrad 		return 0;
    519  1.1  riastrad 	}
    520  1.1  riastrad 
    521  1.1  riastrad 	intel_uncore_write_fw(uncore, reg, _MASKED_BIT_ENABLE(request));
    522  1.1  riastrad 	ret = __intel_wait_for_register_fw(uncore, reg, mask, ack,
    523  1.1  riastrad 					   700, 0, NULL);
    524  1.1  riastrad 	if (ret)
    525  1.1  riastrad 		DRM_ERROR("%s reset request timed out: {request: %08x, RESET_CTL: %08x}\n",
    526  1.1  riastrad 			  engine->name, request,
    527  1.1  riastrad 			  intel_uncore_read_fw(uncore, reg));
    528  1.1  riastrad 
    529  1.1  riastrad 	return ret;
    530  1.1  riastrad }
    531  1.1  riastrad 
    532  1.1  riastrad static void gen8_engine_reset_cancel(struct intel_engine_cs *engine)
    533  1.1  riastrad {
    534  1.1  riastrad 	intel_uncore_write_fw(engine->uncore,
    535  1.1  riastrad 			      RING_RESET_CTL(engine->mmio_base),
    536  1.1  riastrad 			      _MASKED_BIT_DISABLE(RESET_CTL_REQUEST_RESET));
    537  1.1  riastrad }
    538  1.1  riastrad 
    539  1.1  riastrad static int gen8_reset_engines(struct intel_gt *gt,
    540  1.1  riastrad 			      intel_engine_mask_t engine_mask,
    541  1.1  riastrad 			      unsigned int retry)
    542  1.1  riastrad {
    543  1.1  riastrad 	struct intel_engine_cs *engine;
    544  1.1  riastrad 	const bool reset_non_ready = retry >= 1;
    545  1.1  riastrad 	intel_engine_mask_t tmp;
    546  1.1  riastrad 	int ret;
    547  1.1  riastrad 
    548  1.1  riastrad 	for_each_engine_masked(engine, gt, engine_mask, tmp) {
    549  1.1  riastrad 		ret = gen8_engine_reset_prepare(engine);
    550  1.1  riastrad 		if (ret && !reset_non_ready)
    551  1.1  riastrad 			goto skip_reset;
    552  1.1  riastrad 
    553  1.1  riastrad 		/*
    554  1.1  riastrad 		 * If this is not the first failed attempt to prepare,
    555  1.1  riastrad 		 * we decide to proceed anyway.
    556  1.1  riastrad 		 *
    557  1.1  riastrad 		 * By doing so we risk context corruption and with
    558  1.1  riastrad 		 * some gens (kbl), possible system hang if reset
    559  1.1  riastrad 		 * happens during active bb execution.
    560  1.1  riastrad 		 *
    561  1.1  riastrad 		 * We rather take context corruption instead of
    562  1.1  riastrad 		 * failed reset with a wedged driver/gpu. And
    563  1.1  riastrad 		 * active bb execution case should be covered by
    564  1.1  riastrad 		 * stop_engines() we have before the reset.
    565  1.1  riastrad 		 */
    566  1.1  riastrad 	}
    567  1.1  riastrad 
    568  1.1  riastrad 	if (INTEL_GEN(gt->i915) >= 11)
    569  1.1  riastrad 		ret = gen11_reset_engines(gt, engine_mask, retry);
    570  1.1  riastrad 	else
    571  1.1  riastrad 		ret = gen6_reset_engines(gt, engine_mask, retry);
    572  1.1  riastrad 
    573  1.1  riastrad skip_reset:
    574  1.1  riastrad 	for_each_engine_masked(engine, gt, engine_mask, tmp)
    575  1.1  riastrad 		gen8_engine_reset_cancel(engine);
    576  1.1  riastrad 
    577  1.1  riastrad 	return ret;
    578  1.1  riastrad }
    579  1.1  riastrad 
    580  1.1  riastrad static int mock_reset(struct intel_gt *gt,
    581  1.1  riastrad 		      intel_engine_mask_t mask,
    582  1.1  riastrad 		      unsigned int retry)
    583  1.1  riastrad {
    584  1.1  riastrad 	return 0;
    585  1.1  riastrad }
    586  1.1  riastrad 
    587  1.1  riastrad typedef int (*reset_func)(struct intel_gt *,
    588  1.1  riastrad 			  intel_engine_mask_t engine_mask,
    589  1.1  riastrad 			  unsigned int retry);
    590  1.1  riastrad 
    591  1.1  riastrad static reset_func intel_get_gpu_reset(const struct intel_gt *gt)
    592  1.1  riastrad {
    593  1.1  riastrad 	struct drm_i915_private *i915 = gt->i915;
    594  1.1  riastrad 
    595  1.1  riastrad 	if (is_mock_gt(gt))
    596  1.1  riastrad 		return mock_reset;
    597  1.1  riastrad 	else if (INTEL_GEN(i915) >= 8)
    598  1.1  riastrad 		return gen8_reset_engines;
    599  1.1  riastrad 	else if (INTEL_GEN(i915) >= 6)
    600  1.1  riastrad 		return gen6_reset_engines;
    601  1.1  riastrad 	else if (INTEL_GEN(i915) >= 5)
    602  1.1  riastrad 		return ilk_do_reset;
    603  1.1  riastrad 	else if (IS_G4X(i915))
    604  1.1  riastrad 		return g4x_do_reset;
    605  1.1  riastrad 	else if (IS_G33(i915) || IS_PINEVIEW(i915))
    606  1.1  riastrad 		return g33_do_reset;
    607  1.1  riastrad 	else if (INTEL_GEN(i915) >= 3)
    608  1.1  riastrad 		return i915_do_reset;
    609  1.1  riastrad 	else
    610  1.1  riastrad 		return NULL;
    611  1.1  riastrad }
    612  1.1  riastrad 
    613  1.1  riastrad int __intel_gt_reset(struct intel_gt *gt, intel_engine_mask_t engine_mask)
    614  1.1  riastrad {
    615  1.1  riastrad 	const int retries = engine_mask == ALL_ENGINES ? RESET_MAX_RETRIES : 1;
    616  1.1  riastrad 	reset_func reset;
    617  1.1  riastrad 	int ret = -ETIMEDOUT;
    618  1.1  riastrad 	int retry;
    619  1.1  riastrad 
    620  1.1  riastrad 	reset = intel_get_gpu_reset(gt);
    621  1.1  riastrad 	if (!reset)
    622  1.1  riastrad 		return -ENODEV;
    623  1.1  riastrad 
    624  1.1  riastrad 	/*
    625  1.1  riastrad 	 * If the power well sleeps during the reset, the reset
    626  1.1  riastrad 	 * request may be dropped and never completes (causing -EIO).
    627  1.1  riastrad 	 */
    628  1.1  riastrad 	intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL);
    629  1.1  riastrad 	for (retry = 0; ret == -ETIMEDOUT && retry < retries; retry++) {
    630  1.1  riastrad 		GT_TRACE(gt, "engine_mask=%x\n", engine_mask);
    631  1.1  riastrad 		preempt_disable();
    632  1.1  riastrad 		ret = reset(gt, engine_mask, retry);
    633  1.1  riastrad 		preempt_enable();
    634  1.1  riastrad 	}
    635  1.1  riastrad 	intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL);
    636  1.1  riastrad 
    637  1.1  riastrad 	return ret;
    638  1.1  riastrad }
    639  1.1  riastrad 
    640  1.1  riastrad bool intel_has_gpu_reset(const struct intel_gt *gt)
    641  1.1  riastrad {
    642  1.1  riastrad 	if (!i915_modparams.reset)
    643  1.1  riastrad 		return NULL;
    644  1.1  riastrad 
    645  1.1  riastrad 	return intel_get_gpu_reset(gt);
    646  1.1  riastrad }
    647  1.1  riastrad 
    648  1.1  riastrad bool intel_has_reset_engine(const struct intel_gt *gt)
    649  1.1  riastrad {
    650  1.1  riastrad 	if (i915_modparams.reset < 2)
    651  1.1  riastrad 		return false;
    652  1.1  riastrad 
    653  1.1  riastrad 	return INTEL_INFO(gt->i915)->has_reset_engine;
    654  1.1  riastrad }
    655  1.1  riastrad 
    656  1.1  riastrad int intel_reset_guc(struct intel_gt *gt)
    657  1.1  riastrad {
    658  1.1  riastrad 	u32 guc_domain =
    659  1.1  riastrad 		INTEL_GEN(gt->i915) >= 11 ? GEN11_GRDOM_GUC : GEN9_GRDOM_GUC;
    660  1.1  riastrad 	int ret;
    661  1.1  riastrad 
    662  1.1  riastrad 	GEM_BUG_ON(!HAS_GT_UC(gt->i915));
    663  1.1  riastrad 
    664  1.1  riastrad 	intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL);
    665  1.1  riastrad 	ret = gen6_hw_domain_reset(gt, guc_domain);
    666  1.1  riastrad 	intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL);
    667  1.1  riastrad 
    668  1.1  riastrad 	return ret;
    669  1.1  riastrad }
    670  1.1  riastrad 
    671  1.1  riastrad /*
    672  1.1  riastrad  * Ensure irq handler finishes, and not run again.
    673  1.1  riastrad  * Also return the active request so that we only search for it once.
    674  1.1  riastrad  */
    675  1.1  riastrad static void reset_prepare_engine(struct intel_engine_cs *engine)
    676  1.1  riastrad {
    677  1.1  riastrad 	/*
    678  1.1  riastrad 	 * During the reset sequence, we must prevent the engine from
    679  1.1  riastrad 	 * entering RC6. As the context state is undefined until we restart
    680  1.1  riastrad 	 * the engine, if it does enter RC6 during the reset, the state
    681  1.1  riastrad 	 * written to the powercontext is undefined and so we may lose
    682  1.1  riastrad 	 * GPU state upon resume, i.e. fail to restart after a reset.
    683  1.1  riastrad 	 */
    684  1.1  riastrad 	intel_uncore_forcewake_get(engine->uncore, FORCEWAKE_ALL);
    685  1.1  riastrad 	if (engine->reset.prepare)
    686  1.1  riastrad 		engine->reset.prepare(engine);
    687  1.1  riastrad }
    688  1.1  riastrad 
    689  1.1  riastrad static void revoke_mmaps(struct intel_gt *gt)
    690  1.1  riastrad {
    691  1.1  riastrad 	int i;
    692  1.1  riastrad 
    693  1.1  riastrad 	for (i = 0; i < gt->ggtt->num_fences; i++) {
    694  1.1  riastrad 		struct drm_vma_offset_node *node;
    695  1.1  riastrad 		struct i915_vma *vma;
    696  1.1  riastrad 		u64 vma_offset;
    697  1.1  riastrad 
    698  1.1  riastrad 		vma = READ_ONCE(gt->ggtt->fence_regs[i].vma);
    699  1.1  riastrad 		if (!vma)
    700  1.1  riastrad 			continue;
    701  1.1  riastrad 
    702  1.1  riastrad 		if (!i915_vma_has_userfault(vma))
    703  1.1  riastrad 			continue;
    704  1.1  riastrad 
    705  1.1  riastrad 		GEM_BUG_ON(vma->fence != &gt->ggtt->fence_regs[i]);
    706  1.1  riastrad 
    707  1.1  riastrad 		if (!vma->mmo)
    708  1.1  riastrad 			continue;
    709  1.1  riastrad 
    710  1.1  riastrad 		node = &vma->mmo->vma_node;
    711  1.1  riastrad 		vma_offset = vma->ggtt_view.partial.offset << PAGE_SHIFT;
    712  1.1  riastrad 
    713  1.4  riastrad #ifdef __NetBSD__
    714  1.5  riastrad 		__USE(vma_offset);
    715  1.4  riastrad 		__USE(node);
    716  1.5  riastrad 		paddr_t pa = gt->i915->ggtt.gmadr.start + vma->node.start;
    717  1.5  riastrad 		vsize_t npgs = vma->size >> PAGE_SHIFT;
    718  1.5  riastrad 		while (npgs --> 0)
    719  1.5  riastrad 			pmap_pv_protect(pa + (npgs << PAGE_SHIFT),
    720  1.5  riastrad 			    VM_PROT_NONE);
    721  1.4  riastrad #else
    722  1.1  riastrad 		unmap_mapping_range(gt->i915->drm.anon_inode->i_mapping,
    723  1.1  riastrad 				    drm_vma_node_offset_addr(node) + vma_offset,
    724  1.1  riastrad 				    vma->size,
    725  1.1  riastrad 				    1);
    726  1.4  riastrad #endif
    727  1.1  riastrad 	}
    728  1.1  riastrad }
    729  1.1  riastrad 
    730  1.1  riastrad static intel_engine_mask_t reset_prepare(struct intel_gt *gt)
    731  1.1  riastrad {
    732  1.1  riastrad 	struct intel_engine_cs *engine;
    733  1.1  riastrad 	intel_engine_mask_t awake = 0;
    734  1.1  riastrad 	enum intel_engine_id id;
    735  1.1  riastrad 
    736  1.1  riastrad 	for_each_engine(engine, gt, id) {
    737  1.1  riastrad 		if (intel_engine_pm_get_if_awake(engine))
    738  1.1  riastrad 			awake |= engine->mask;
    739  1.1  riastrad 		reset_prepare_engine(engine);
    740  1.1  riastrad 	}
    741  1.1  riastrad 
    742  1.1  riastrad 	intel_uc_reset_prepare(&gt->uc);
    743  1.1  riastrad 
    744  1.1  riastrad 	return awake;
    745  1.1  riastrad }
    746  1.1  riastrad 
    747  1.1  riastrad static void gt_revoke(struct intel_gt *gt)
    748  1.1  riastrad {
    749  1.1  riastrad 	revoke_mmaps(gt);
    750  1.1  riastrad }
    751  1.1  riastrad 
    752  1.1  riastrad static int gt_reset(struct intel_gt *gt, intel_engine_mask_t stalled_mask)
    753  1.1  riastrad {
    754  1.1  riastrad 	struct intel_engine_cs *engine;
    755  1.1  riastrad 	enum intel_engine_id id;
    756  1.1  riastrad 	int err;
    757  1.1  riastrad 
    758  1.1  riastrad 	/*
    759  1.1  riastrad 	 * Everything depends on having the GTT running, so we need to start
    760  1.1  riastrad 	 * there.
    761  1.1  riastrad 	 */
    762  1.1  riastrad 	err = i915_ggtt_enable_hw(gt->i915);
    763  1.1  riastrad 	if (err)
    764  1.1  riastrad 		return err;
    765  1.1  riastrad 
    766  1.1  riastrad 	for_each_engine(engine, gt, id)
    767  1.1  riastrad 		__intel_engine_reset(engine, stalled_mask & engine->mask);
    768  1.1  riastrad 
    769  1.1  riastrad 	i915_gem_restore_fences(gt->ggtt);
    770  1.1  riastrad 
    771  1.1  riastrad 	return err;
    772  1.1  riastrad }
    773  1.1  riastrad 
    774  1.1  riastrad static void reset_finish_engine(struct intel_engine_cs *engine)
    775  1.1  riastrad {
    776  1.1  riastrad 	if (engine->reset.finish)
    777  1.1  riastrad 		engine->reset.finish(engine);
    778  1.1  riastrad 	intel_uncore_forcewake_put(engine->uncore, FORCEWAKE_ALL);
    779  1.1  riastrad 
    780  1.1  riastrad 	intel_engine_signal_breadcrumbs(engine);
    781  1.1  riastrad }
    782  1.1  riastrad 
    783  1.1  riastrad static void reset_finish(struct intel_gt *gt, intel_engine_mask_t awake)
    784  1.1  riastrad {
    785  1.1  riastrad 	struct intel_engine_cs *engine;
    786  1.1  riastrad 	enum intel_engine_id id;
    787  1.1  riastrad 
    788  1.1  riastrad 	for_each_engine(engine, gt, id) {
    789  1.1  riastrad 		reset_finish_engine(engine);
    790  1.1  riastrad 		if (awake & engine->mask)
    791  1.1  riastrad 			intel_engine_pm_put(engine);
    792  1.1  riastrad 	}
    793  1.1  riastrad }
    794  1.1  riastrad 
    795  1.1  riastrad static void nop_submit_request(struct i915_request *request)
    796  1.1  riastrad {
    797  1.1  riastrad 	struct intel_engine_cs *engine = request->engine;
    798  1.1  riastrad 	unsigned long flags;
    799  1.1  riastrad 
    800  1.1  riastrad 	RQ_TRACE(request, "-EIO\n");
    801  1.1  riastrad 	dma_fence_set_error(&request->fence, -EIO);
    802  1.1  riastrad 
    803  1.1  riastrad 	spin_lock_irqsave(&engine->active.lock, flags);
    804  1.1  riastrad 	__i915_request_submit(request);
    805  1.1  riastrad 	i915_request_mark_complete(request);
    806  1.1  riastrad 	spin_unlock_irqrestore(&engine->active.lock, flags);
    807  1.1  riastrad 
    808  1.1  riastrad 	intel_engine_signal_breadcrumbs(engine);
    809  1.1  riastrad }
    810  1.1  riastrad 
    811  1.1  riastrad static void __intel_gt_set_wedged(struct intel_gt *gt)
    812  1.1  riastrad {
    813  1.1  riastrad 	struct intel_engine_cs *engine;
    814  1.1  riastrad 	intel_engine_mask_t awake;
    815  1.1  riastrad 	enum intel_engine_id id;
    816  1.1  riastrad 
    817  1.1  riastrad 	if (test_bit(I915_WEDGED, &gt->reset.flags))
    818  1.1  riastrad 		return;
    819  1.1  riastrad 
    820  1.1  riastrad 	if (GEM_SHOW_DEBUG() && !intel_engines_are_idle(gt)) {
    821  1.1  riastrad 		struct drm_printer p = drm_debug_printer(__func__);
    822  1.1  riastrad 
    823  1.1  riastrad 		for_each_engine(engine, gt, id)
    824  1.1  riastrad 			intel_engine_dump(engine, &p, "%s\n", engine->name);
    825  1.1  riastrad 	}
    826  1.1  riastrad 
    827  1.1  riastrad 	GT_TRACE(gt, "start\n");
    828  1.1  riastrad 
    829  1.1  riastrad 	/*
    830  1.1  riastrad 	 * First, stop submission to hw, but do not yet complete requests by
    831  1.1  riastrad 	 * rolling the global seqno forward (since this would complete requests
    832  1.1  riastrad 	 * for which we haven't set the fence error to EIO yet).
    833  1.1  riastrad 	 */
    834  1.1  riastrad 	awake = reset_prepare(gt);
    835  1.1  riastrad 
    836  1.1  riastrad 	/* Even if the GPU reset fails, it should still stop the engines */
    837  1.1  riastrad 	if (!INTEL_INFO(gt->i915)->gpu_reset_clobbers_display)
    838  1.1  riastrad 		__intel_gt_reset(gt, ALL_ENGINES);
    839  1.1  riastrad 
    840  1.1  riastrad 	for_each_engine(engine, gt, id)
    841  1.1  riastrad 		engine->submit_request = nop_submit_request;
    842  1.1  riastrad 
    843  1.1  riastrad 	/*
    844  1.1  riastrad 	 * Make sure no request can slip through without getting completed by
    845  1.1  riastrad 	 * either this call here to intel_engine_write_global_seqno, or the one
    846  1.1  riastrad 	 * in nop_submit_request.
    847  1.1  riastrad 	 */
    848  1.1  riastrad 	synchronize_rcu_expedited();
    849  1.1  riastrad 	set_bit(I915_WEDGED, &gt->reset.flags);
    850  1.1  riastrad 
    851  1.1  riastrad 	/* Mark all executing requests as skipped */
    852  1.1  riastrad 	for_each_engine(engine, gt, id)
    853  1.1  riastrad 		if (engine->reset.cancel)
    854  1.1  riastrad 			engine->reset.cancel(engine);
    855  1.1  riastrad 
    856  1.1  riastrad 	reset_finish(gt, awake);
    857  1.1  riastrad 
    858  1.1  riastrad 	GT_TRACE(gt, "end\n");
    859  1.1  riastrad }
    860  1.1  riastrad 
    861  1.1  riastrad void intel_gt_set_wedged(struct intel_gt *gt)
    862  1.1  riastrad {
    863  1.1  riastrad 	intel_wakeref_t wakeref;
    864  1.1  riastrad 
    865  1.1  riastrad 	mutex_lock(&gt->reset.mutex);
    866  1.1  riastrad 	with_intel_runtime_pm(gt->uncore->rpm, wakeref)
    867  1.1  riastrad 		__intel_gt_set_wedged(gt);
    868  1.1  riastrad 	mutex_unlock(&gt->reset.mutex);
    869  1.1  riastrad }
    870  1.1  riastrad 
    871  1.1  riastrad static bool __intel_gt_unset_wedged(struct intel_gt *gt)
    872  1.1  riastrad {
    873  1.1  riastrad 	struct intel_gt_timelines *timelines = &gt->timelines;
    874  1.1  riastrad 	struct intel_timeline *tl;
    875  1.1  riastrad 	bool ok;
    876  1.1  riastrad 
    877  1.1  riastrad 	if (!test_bit(I915_WEDGED, &gt->reset.flags))
    878  1.1  riastrad 		return true;
    879  1.1  riastrad 
    880  1.1  riastrad 	/* Never fully initialised, recovery impossible */
    881  1.1  riastrad 	if (test_bit(I915_WEDGED_ON_INIT, &gt->reset.flags))
    882  1.1  riastrad 		return false;
    883  1.1  riastrad 
    884  1.1  riastrad 	GT_TRACE(gt, "start\n");
    885  1.1  riastrad 
    886  1.1  riastrad 	/*
    887  1.1  riastrad 	 * Before unwedging, make sure that all pending operations
    888  1.1  riastrad 	 * are flushed and errored out - we may have requests waiting upon
    889  1.1  riastrad 	 * third party fences. We marked all inflight requests as EIO, and
    890  1.1  riastrad 	 * every execbuf since returned EIO, for consistency we want all
    891  1.1  riastrad 	 * the currently pending requests to also be marked as EIO, which
    892  1.1  riastrad 	 * is done inside our nop_submit_request - and so we must wait.
    893  1.1  riastrad 	 *
    894  1.1  riastrad 	 * No more can be submitted until we reset the wedged bit.
    895  1.1  riastrad 	 */
    896  1.1  riastrad 	spin_lock(&timelines->lock);
    897  1.1  riastrad 	list_for_each_entry(tl, &timelines->active_list, link) {
    898  1.1  riastrad 		struct dma_fence *fence;
    899  1.1  riastrad 
    900  1.1  riastrad 		fence = i915_active_fence_get(&tl->last_request);
    901  1.1  riastrad 		if (!fence)
    902  1.1  riastrad 			continue;
    903  1.1  riastrad 
    904  1.1  riastrad 		spin_unlock(&timelines->lock);
    905  1.1  riastrad 
    906  1.1  riastrad 		/*
    907  1.1  riastrad 		 * All internal dependencies (i915_requests) will have
    908  1.1  riastrad 		 * been flushed by the set-wedge, but we may be stuck waiting
    909  1.1  riastrad 		 * for external fences. These should all be capped to 10s
    910  1.1  riastrad 		 * (I915_FENCE_TIMEOUT) so this wait should not be unbounded
    911  1.1  riastrad 		 * in the worst case.
    912  1.1  riastrad 		 */
    913  1.1  riastrad 		dma_fence_default_wait(fence, false, MAX_SCHEDULE_TIMEOUT);
    914  1.1  riastrad 		dma_fence_put(fence);
    915  1.1  riastrad 
    916  1.1  riastrad 		/* Restart iteration after droping lock */
    917  1.1  riastrad 		spin_lock(&timelines->lock);
    918  1.1  riastrad 		tl = list_entry(&timelines->active_list, typeof(*tl), link);
    919  1.1  riastrad 	}
    920  1.1  riastrad 	spin_unlock(&timelines->lock);
    921  1.1  riastrad 
    922  1.1  riastrad 	/* We must reset pending GPU events before restoring our submission */
    923  1.1  riastrad 	ok = !HAS_EXECLISTS(gt->i915); /* XXX better agnosticism desired */
    924  1.1  riastrad 	if (!INTEL_INFO(gt->i915)->gpu_reset_clobbers_display)
    925  1.1  riastrad 		ok = __intel_gt_reset(gt, ALL_ENGINES) == 0;
    926  1.1  riastrad 	if (!ok) {
    927  1.1  riastrad 		/*
    928  1.1  riastrad 		 * Warn CI about the unrecoverable wedged condition.
    929  1.1  riastrad 		 * Time for a reboot.
    930  1.1  riastrad 		 */
    931  1.1  riastrad 		add_taint_for_CI(TAINT_WARN);
    932  1.1  riastrad 		return false;
    933  1.1  riastrad 	}
    934  1.1  riastrad 
    935  1.1  riastrad 	/*
    936  1.1  riastrad 	 * Undo nop_submit_request. We prevent all new i915 requests from
    937  1.1  riastrad 	 * being queued (by disallowing execbuf whilst wedged) so having
    938  1.1  riastrad 	 * waited for all active requests above, we know the system is idle
    939  1.1  riastrad 	 * and do not have to worry about a thread being inside
    940  1.1  riastrad 	 * engine->submit_request() as we swap over. So unlike installing
    941  1.1  riastrad 	 * the nop_submit_request on reset, we can do this from normal
    942  1.1  riastrad 	 * context and do not require stop_machine().
    943  1.1  riastrad 	 */
    944  1.1  riastrad 	intel_engines_reset_default_submission(gt);
    945  1.1  riastrad 
    946  1.1  riastrad 	GT_TRACE(gt, "end\n");
    947  1.1  riastrad 
    948  1.1  riastrad 	smp_mb__before_atomic(); /* complete takeover before enabling execbuf */
    949  1.1  riastrad 	clear_bit(I915_WEDGED, &gt->reset.flags);
    950  1.1  riastrad 
    951  1.1  riastrad 	return true;
    952  1.1  riastrad }
    953  1.1  riastrad 
    954  1.1  riastrad bool intel_gt_unset_wedged(struct intel_gt *gt)
    955  1.1  riastrad {
    956  1.1  riastrad 	bool result;
    957  1.1  riastrad 
    958  1.1  riastrad 	mutex_lock(&gt->reset.mutex);
    959  1.1  riastrad 	result = __intel_gt_unset_wedged(gt);
    960  1.1  riastrad 	mutex_unlock(&gt->reset.mutex);
    961  1.1  riastrad 
    962  1.1  riastrad 	return result;
    963  1.1  riastrad }
    964  1.1  riastrad 
    965  1.1  riastrad static int do_reset(struct intel_gt *gt, intel_engine_mask_t stalled_mask)
    966  1.1  riastrad {
    967  1.1  riastrad 	int err, i;
    968  1.1  riastrad 
    969  1.1  riastrad 	gt_revoke(gt);
    970  1.1  riastrad 
    971  1.1  riastrad 	err = __intel_gt_reset(gt, ALL_ENGINES);
    972  1.1  riastrad 	for (i = 0; err && i < RESET_MAX_RETRIES; i++) {
    973  1.1  riastrad 		msleep(10 * (i + 1));
    974  1.1  riastrad 		err = __intel_gt_reset(gt, ALL_ENGINES);
    975  1.1  riastrad 	}
    976  1.1  riastrad 	if (err)
    977  1.1  riastrad 		return err;
    978  1.1  riastrad 
    979  1.1  riastrad 	return gt_reset(gt, stalled_mask);
    980  1.1  riastrad }
    981  1.1  riastrad 
    982  1.1  riastrad static int resume(struct intel_gt *gt)
    983  1.1  riastrad {
    984  1.1  riastrad 	struct intel_engine_cs *engine;
    985  1.1  riastrad 	enum intel_engine_id id;
    986  1.1  riastrad 	int ret;
    987  1.1  riastrad 
    988  1.1  riastrad 	for_each_engine(engine, gt, id) {
    989  1.1  riastrad 		ret = engine->resume(engine);
    990  1.1  riastrad 		if (ret)
    991  1.1  riastrad 			return ret;
    992  1.1  riastrad 	}
    993  1.1  riastrad 
    994  1.1  riastrad 	return 0;
    995  1.1  riastrad }
    996  1.1  riastrad 
    997  1.1  riastrad /**
    998  1.1  riastrad  * intel_gt_reset - reset chip after a hang
    999  1.1  riastrad  * @gt: #intel_gt to reset
   1000  1.1  riastrad  * @stalled_mask: mask of the stalled engines with the guilty requests
   1001  1.1  riastrad  * @reason: user error message for why we are resetting
   1002  1.1  riastrad  *
   1003  1.1  riastrad  * Reset the chip.  Useful if a hang is detected. Marks the device as wedged
   1004  1.1  riastrad  * on failure.
   1005  1.1  riastrad  *
   1006  1.1  riastrad  * Procedure is fairly simple:
   1007  1.1  riastrad  *   - reset the chip using the reset reg
   1008  1.1  riastrad  *   - re-init context state
   1009  1.1  riastrad  *   - re-init hardware status page
   1010  1.1  riastrad  *   - re-init ring buffer
   1011  1.1  riastrad  *   - re-init interrupt state
   1012  1.1  riastrad  *   - re-init display
   1013  1.1  riastrad  */
   1014  1.1  riastrad void intel_gt_reset(struct intel_gt *gt,
   1015  1.1  riastrad 		    intel_engine_mask_t stalled_mask,
   1016  1.1  riastrad 		    const char *reason)
   1017  1.1  riastrad {
   1018  1.1  riastrad 	intel_engine_mask_t awake;
   1019  1.1  riastrad 	int ret;
   1020  1.1  riastrad 
   1021  1.1  riastrad 	GT_TRACE(gt, "flags=%lx\n", gt->reset.flags);
   1022  1.1  riastrad 
   1023  1.1  riastrad 	might_sleep();
   1024  1.1  riastrad 	GEM_BUG_ON(!test_bit(I915_RESET_BACKOFF, &gt->reset.flags));
   1025  1.1  riastrad 	mutex_lock(&gt->reset.mutex);
   1026  1.1  riastrad 
   1027  1.1  riastrad 	/* Clear any previous failed attempts at recovery. Time to try again. */
   1028  1.1  riastrad 	if (!__intel_gt_unset_wedged(gt))
   1029  1.1  riastrad 		goto unlock;
   1030  1.1  riastrad 
   1031  1.1  riastrad 	if (reason)
   1032  1.1  riastrad 		dev_notice(gt->i915->drm.dev,
   1033  1.1  riastrad 			   "Resetting chip for %s\n", reason);
   1034  1.1  riastrad 	atomic_inc(&gt->i915->gpu_error.reset_count);
   1035  1.1  riastrad 
   1036  1.1  riastrad 	awake = reset_prepare(gt);
   1037  1.1  riastrad 
   1038  1.1  riastrad 	if (!intel_has_gpu_reset(gt)) {
   1039  1.1  riastrad 		if (i915_modparams.reset)
   1040  1.1  riastrad 			dev_err(gt->i915->drm.dev, "GPU reset not supported\n");
   1041  1.1  riastrad 		else
   1042  1.1  riastrad 			DRM_DEBUG_DRIVER("GPU reset disabled\n");
   1043  1.1  riastrad 		goto error;
   1044  1.1  riastrad 	}
   1045  1.1  riastrad 
   1046  1.1  riastrad 	if (INTEL_INFO(gt->i915)->gpu_reset_clobbers_display)
   1047  1.1  riastrad 		intel_runtime_pm_disable_interrupts(gt->i915);
   1048  1.1  riastrad 
   1049  1.1  riastrad 	if (do_reset(gt, stalled_mask)) {
   1050  1.1  riastrad 		dev_err(gt->i915->drm.dev, "Failed to reset chip\n");
   1051  1.1  riastrad 		goto taint;
   1052  1.1  riastrad 	}
   1053  1.1  riastrad 
   1054  1.1  riastrad 	if (INTEL_INFO(gt->i915)->gpu_reset_clobbers_display)
   1055  1.1  riastrad 		intel_runtime_pm_enable_interrupts(gt->i915);
   1056  1.1  riastrad 
   1057  1.1  riastrad 	intel_overlay_reset(gt->i915);
   1058  1.1  riastrad 
   1059  1.1  riastrad 	/*
   1060  1.1  riastrad 	 * Next we need to restore the context, but we don't use those
   1061  1.1  riastrad 	 * yet either...
   1062  1.1  riastrad 	 *
   1063  1.1  riastrad 	 * Ring buffer needs to be re-initialized in the KMS case, or if X
   1064  1.1  riastrad 	 * was running at the time of the reset (i.e. we weren't VT
   1065  1.1  riastrad 	 * switched away).
   1066  1.1  riastrad 	 */
   1067  1.1  riastrad 	ret = intel_gt_init_hw(gt);
   1068  1.1  riastrad 	if (ret) {
   1069  1.1  riastrad 		DRM_ERROR("Failed to initialise HW following reset (%d)\n",
   1070  1.1  riastrad 			  ret);
   1071  1.1  riastrad 		goto taint;
   1072  1.1  riastrad 	}
   1073  1.1  riastrad 
   1074  1.1  riastrad 	ret = resume(gt);
   1075  1.1  riastrad 	if (ret)
   1076  1.1  riastrad 		goto taint;
   1077  1.1  riastrad 
   1078  1.1  riastrad finish:
   1079  1.1  riastrad 	reset_finish(gt, awake);
   1080  1.1  riastrad unlock:
   1081  1.1  riastrad 	mutex_unlock(&gt->reset.mutex);
   1082  1.1  riastrad 	return;
   1083  1.1  riastrad 
   1084  1.1  riastrad taint:
   1085  1.1  riastrad 	/*
   1086  1.1  riastrad 	 * History tells us that if we cannot reset the GPU now, we
   1087  1.1  riastrad 	 * never will. This then impacts everything that is run
   1088  1.1  riastrad 	 * subsequently. On failing the reset, we mark the driver
   1089  1.1  riastrad 	 * as wedged, preventing further execution on the GPU.
   1090  1.1  riastrad 	 * We also want to go one step further and add a taint to the
   1091  1.1  riastrad 	 * kernel so that any subsequent faults can be traced back to
   1092  1.1  riastrad 	 * this failure. This is important for CI, where if the
   1093  1.1  riastrad 	 * GPU/driver fails we would like to reboot and restart testing
   1094  1.1  riastrad 	 * rather than continue on into oblivion. For everyone else,
   1095  1.1  riastrad 	 * the system should still plod along, but they have been warned!
   1096  1.1  riastrad 	 */
   1097  1.1  riastrad 	add_taint_for_CI(TAINT_WARN);
   1098  1.1  riastrad error:
   1099  1.1  riastrad 	__intel_gt_set_wedged(gt);
   1100  1.1  riastrad 	goto finish;
   1101  1.1  riastrad }
   1102  1.1  riastrad 
   1103  1.1  riastrad static inline int intel_gt_reset_engine(struct intel_engine_cs *engine)
   1104  1.1  riastrad {
   1105  1.1  riastrad 	return __intel_gt_reset(engine->gt, engine->mask);
   1106  1.1  riastrad }
   1107  1.1  riastrad 
   1108  1.1  riastrad /**
   1109  1.1  riastrad  * intel_engine_reset - reset GPU engine to recover from a hang
   1110  1.1  riastrad  * @engine: engine to reset
   1111  1.1  riastrad  * @msg: reason for GPU reset; or NULL for no dev_notice()
   1112  1.1  riastrad  *
   1113  1.1  riastrad  * Reset a specific GPU engine. Useful if a hang is detected.
   1114  1.1  riastrad  * Returns zero on successful reset or otherwise an error code.
   1115  1.1  riastrad  *
   1116  1.1  riastrad  * Procedure is:
   1117  1.1  riastrad  *  - identifies the request that caused the hang and it is dropped
   1118  1.1  riastrad  *  - reset engine (which will force the engine to idle)
   1119  1.1  riastrad  *  - re-init/configure engine
   1120  1.1  riastrad  */
   1121  1.1  riastrad int intel_engine_reset(struct intel_engine_cs *engine, const char *msg)
   1122  1.1  riastrad {
   1123  1.1  riastrad 	struct intel_gt *gt = engine->gt;
   1124  1.1  riastrad 	bool uses_guc = intel_engine_in_guc_submission_mode(engine);
   1125  1.1  riastrad 	int ret;
   1126  1.1  riastrad 
   1127  1.1  riastrad 	ENGINE_TRACE(engine, "flags=%lx\n", gt->reset.flags);
   1128  1.1  riastrad 	GEM_BUG_ON(!test_bit(I915_RESET_ENGINE + engine->id, &gt->reset.flags));
   1129  1.1  riastrad 
   1130  1.1  riastrad 	if (!intel_engine_pm_get_if_awake(engine))
   1131  1.1  riastrad 		return 0;
   1132  1.1  riastrad 
   1133  1.1  riastrad 	reset_prepare_engine(engine);
   1134  1.1  riastrad 
   1135  1.1  riastrad 	if (msg)
   1136  1.1  riastrad 		dev_notice(engine->i915->drm.dev,
   1137  1.1  riastrad 			   "Resetting %s for %s\n", engine->name, msg);
   1138  1.1  riastrad 	atomic_inc(&engine->i915->gpu_error.reset_engine_count[engine->uabi_class]);
   1139  1.1  riastrad 
   1140  1.1  riastrad 	if (!uses_guc)
   1141  1.1  riastrad 		ret = intel_gt_reset_engine(engine);
   1142  1.1  riastrad 	else
   1143  1.1  riastrad 		ret = intel_guc_reset_engine(&engine->gt->uc.guc, engine);
   1144  1.1  riastrad 	if (ret) {
   1145  1.1  riastrad 		/* If we fail here, we expect to fallback to a global reset */
   1146  1.1  riastrad 		DRM_DEBUG_DRIVER("%sFailed to reset %s, ret=%d\n",
   1147  1.1  riastrad 				 uses_guc ? "GuC " : "",
   1148  1.1  riastrad 				 engine->name, ret);
   1149  1.1  riastrad 		goto out;
   1150  1.1  riastrad 	}
   1151  1.1  riastrad 
   1152  1.1  riastrad 	/*
   1153  1.1  riastrad 	 * The request that caused the hang is stuck on elsp, we know the
   1154  1.1  riastrad 	 * active request and can drop it, adjust head to skip the offending
   1155  1.1  riastrad 	 * request to resume executing remaining requests in the queue.
   1156  1.1  riastrad 	 */
   1157  1.1  riastrad 	__intel_engine_reset(engine, true);
   1158  1.1  riastrad 
   1159  1.1  riastrad 	/*
   1160  1.1  riastrad 	 * The engine and its registers (and workarounds in case of render)
   1161  1.1  riastrad 	 * have been reset to their default values. Follow the init_ring
   1162  1.1  riastrad 	 * process to program RING_MODE, HWSP and re-enable submission.
   1163  1.1  riastrad 	 */
   1164  1.1  riastrad 	ret = engine->resume(engine);
   1165  1.1  riastrad 
   1166  1.1  riastrad out:
   1167  1.1  riastrad 	intel_engine_cancel_stop_cs(engine);
   1168  1.1  riastrad 	reset_finish_engine(engine);
   1169  1.1  riastrad 	intel_engine_pm_put_async(engine);
   1170  1.1  riastrad 	return ret;
   1171  1.1  riastrad }
   1172  1.1  riastrad 
   1173  1.1  riastrad static void intel_gt_reset_global(struct intel_gt *gt,
   1174  1.1  riastrad 				  u32 engine_mask,
   1175  1.1  riastrad 				  const char *reason)
   1176  1.1  riastrad {
   1177  1.4  riastrad #ifndef __NetBSD__		/* XXX kobject uevent...?  */
   1178  1.1  riastrad 	struct kobject *kobj = &gt->i915->drm.primary->kdev->kobj;
   1179  1.1  riastrad 	char *error_event[] = { I915_ERROR_UEVENT "=1", NULL };
   1180  1.1  riastrad 	char *reset_event[] = { I915_RESET_UEVENT "=1", NULL };
   1181  1.1  riastrad 	char *reset_done_event[] = { I915_ERROR_UEVENT "=0", NULL };
   1182  1.3  riastrad #endif
   1183  1.1  riastrad 	struct intel_wedge_me w;
   1184  1.1  riastrad 
   1185  1.3  riastrad #ifndef __NetBSD__
   1186  1.1  riastrad 	kobject_uevent_env(kobj, KOBJ_CHANGE, error_event);
   1187  1.3  riastrad #endif
   1188  1.1  riastrad 
   1189  1.1  riastrad 	DRM_DEBUG_DRIVER("resetting chip\n");
   1190  1.3  riastrad #ifndef __NetBSD__
   1191  1.1  riastrad 	kobject_uevent_env(kobj, KOBJ_CHANGE, reset_event);
   1192  1.3  riastrad #endif
   1193  1.1  riastrad 
   1194  1.1  riastrad 	/* Use a watchdog to ensure that our reset completes */
   1195  1.1  riastrad 	intel_wedge_on_timeout(&w, gt, 5 * HZ) {
   1196  1.1  riastrad 		intel_prepare_reset(gt->i915);
   1197  1.1  riastrad 
   1198  1.1  riastrad 		/* Flush everyone using a resource about to be clobbered */
   1199  1.1  riastrad 		synchronize_srcu_expedited(&gt->reset.backoff_srcu);
   1200  1.1  riastrad 
   1201  1.1  riastrad 		intel_gt_reset(gt, engine_mask, reason);
   1202  1.1  riastrad 
   1203  1.1  riastrad 		intel_finish_reset(gt->i915);
   1204  1.1  riastrad 	}
   1205  1.1  riastrad 
   1206  1.3  riastrad #ifndef __NetBSD__		/* XXX kobj uevent...?  */
   1207  1.1  riastrad 	if (!test_bit(I915_WEDGED, &gt->reset.flags))
   1208  1.1  riastrad 		kobject_uevent_env(kobj, KOBJ_CHANGE, reset_done_event);
   1209  1.3  riastrad #endif
   1210  1.1  riastrad }
   1211  1.1  riastrad 
   1212  1.1  riastrad /**
   1213  1.1  riastrad  * intel_gt_handle_error - handle a gpu error
   1214  1.1  riastrad  * @gt: the intel_gt
   1215  1.1  riastrad  * @engine_mask: mask representing engines that are hung
   1216  1.1  riastrad  * @flags: control flags
   1217  1.1  riastrad  * @fmt: Error message format string
   1218  1.1  riastrad  *
   1219  1.1  riastrad  * Do some basic checking of register state at error time and
   1220  1.1  riastrad  * dump it to the syslog.  Also call i915_capture_error_state() to make
   1221  1.1  riastrad  * sure we get a record and make it available in debugfs.  Fire a uevent
   1222  1.1  riastrad  * so userspace knows something bad happened (should trigger collection
   1223  1.1  riastrad  * of a ring dump etc.).
   1224  1.1  riastrad  */
   1225  1.1  riastrad void intel_gt_handle_error(struct intel_gt *gt,
   1226  1.1  riastrad 			   intel_engine_mask_t engine_mask,
   1227  1.1  riastrad 			   unsigned long flags,
   1228  1.1  riastrad 			   const char *fmt, ...)
   1229  1.1  riastrad {
   1230  1.1  riastrad 	struct intel_engine_cs *engine;
   1231  1.1  riastrad 	intel_wakeref_t wakeref;
   1232  1.1  riastrad 	intel_engine_mask_t tmp;
   1233  1.1  riastrad 	char error_msg[80];
   1234  1.1  riastrad 	char *msg = NULL;
   1235  1.1  riastrad 
   1236  1.1  riastrad 	if (fmt) {
   1237  1.1  riastrad 		va_list args;
   1238  1.1  riastrad 
   1239  1.1  riastrad 		va_start(args, fmt);
   1240  1.1  riastrad 		vscnprintf(error_msg, sizeof(error_msg), fmt, args);
   1241  1.1  riastrad 		va_end(args);
   1242  1.1  riastrad 
   1243  1.1  riastrad 		msg = error_msg;
   1244  1.1  riastrad 	}
   1245  1.1  riastrad 
   1246  1.1  riastrad 	/*
   1247  1.1  riastrad 	 * In most cases it's guaranteed that we get here with an RPM
   1248  1.1  riastrad 	 * reference held, for example because there is a pending GPU
   1249  1.1  riastrad 	 * request that won't finish until the reset is done. This
   1250  1.1  riastrad 	 * isn't the case at least when we get here by doing a
   1251  1.1  riastrad 	 * simulated reset via debugfs, so get an RPM reference.
   1252  1.1  riastrad 	 */
   1253  1.1  riastrad 	wakeref = intel_runtime_pm_get(gt->uncore->rpm);
   1254  1.1  riastrad 
   1255  1.1  riastrad 	engine_mask &= INTEL_INFO(gt->i915)->engine_mask;
   1256  1.1  riastrad 
   1257  1.1  riastrad 	if (flags & I915_ERROR_CAPTURE) {
   1258  1.1  riastrad 		i915_capture_error_state(gt->i915);
   1259  1.1  riastrad 		intel_gt_clear_error_registers(gt, engine_mask);
   1260  1.1  riastrad 	}
   1261  1.1  riastrad 
   1262  1.1  riastrad 	/*
   1263  1.1  riastrad 	 * Try engine reset when available. We fall back to full reset if
   1264  1.1  riastrad 	 * single reset fails.
   1265  1.1  riastrad 	 */
   1266  1.1  riastrad 	if (intel_has_reset_engine(gt) && !intel_gt_is_wedged(gt)) {
   1267  1.1  riastrad 		for_each_engine_masked(engine, gt, engine_mask, tmp) {
   1268  1.1  riastrad 			BUILD_BUG_ON(I915_RESET_MODESET >= I915_RESET_ENGINE);
   1269  1.1  riastrad 			if (test_and_set_bit(I915_RESET_ENGINE + engine->id,
   1270  1.1  riastrad 					     &gt->reset.flags))
   1271  1.1  riastrad 				continue;
   1272  1.1  riastrad 
   1273  1.1  riastrad 			if (intel_engine_reset(engine, msg) == 0)
   1274  1.1  riastrad 				engine_mask &= ~engine->mask;
   1275  1.1  riastrad 
   1276  1.1  riastrad 			clear_and_wake_up_bit(I915_RESET_ENGINE + engine->id,
   1277  1.1  riastrad 					      &gt->reset.flags);
   1278  1.1  riastrad 		}
   1279  1.1  riastrad 	}
   1280  1.1  riastrad 
   1281  1.1  riastrad 	if (!engine_mask)
   1282  1.1  riastrad 		goto out;
   1283  1.1  riastrad 
   1284  1.1  riastrad 	/* Full reset needs the mutex, stop any other user trying to do so. */
   1285  1.1  riastrad 	if (test_and_set_bit(I915_RESET_BACKOFF, &gt->reset.flags)) {
   1286  1.3  riastrad 		int ret;
   1287  1.4  riastrad 		spin_lock(&gt->reset.lock);
   1288  1.3  riastrad 		DRM_SPIN_WAIT_NOINTR_UNTIL(ret,
   1289  1.4  riastrad 		    &gt->reset.queue,
   1290  1.4  riastrad 		    &gt->reset.lock,
   1291  1.4  riastrad 		    !test_bit(I915_RESET_BACKOFF, &gt->reset.flags));
   1292  1.4  riastrad 		spin_unlock(&gt->reset.lock);
   1293  1.1  riastrad 		goto out; /* piggy-back on the other reset */
   1294  1.1  riastrad 	}
   1295  1.1  riastrad 
   1296  1.1  riastrad 	/* Make sure i915_reset_trylock() sees the I915_RESET_BACKOFF */
   1297  1.1  riastrad 	synchronize_rcu_expedited();
   1298  1.1  riastrad 
   1299  1.1  riastrad 	/* Prevent any other reset-engine attempt. */
   1300  1.1  riastrad 	for_each_engine(engine, gt, tmp) {
   1301  1.1  riastrad 		while (test_and_set_bit(I915_RESET_ENGINE + engine->id,
   1302  1.1  riastrad 					&gt->reset.flags))
   1303  1.1  riastrad 			wait_on_bit(&gt->reset.flags,
   1304  1.1  riastrad 				    I915_RESET_ENGINE + engine->id,
   1305  1.1  riastrad 				    TASK_UNINTERRUPTIBLE);
   1306  1.1  riastrad 	}
   1307  1.1  riastrad 
   1308  1.1  riastrad 	intel_gt_reset_global(gt, engine_mask, msg);
   1309  1.1  riastrad 
   1310  1.1  riastrad 	for_each_engine(engine, gt, tmp)
   1311  1.1  riastrad 		clear_bit_unlock(I915_RESET_ENGINE + engine->id,
   1312  1.1  riastrad 				 &gt->reset.flags);
   1313  1.1  riastrad 	clear_bit_unlock(I915_RESET_BACKOFF, &gt->reset.flags);
   1314  1.1  riastrad 	smp_mb__after_atomic();
   1315  1.4  riastrad 	spin_lock(&gt->reset.lock);
   1316  1.4  riastrad 	DRM_SPIN_WAKEUP_ALL(&gt->reset.queue, &gt->reset.lock);
   1317  1.4  riastrad 	spin_unlock(&gt->reset.lock);
   1318  1.1  riastrad 
   1319  1.1  riastrad out:
   1320  1.1  riastrad 	intel_runtime_pm_put(gt->uncore->rpm, wakeref);
   1321  1.1  riastrad }
   1322  1.1  riastrad 
   1323  1.1  riastrad int intel_gt_reset_trylock(struct intel_gt *gt, int *srcu)
   1324  1.1  riastrad {
   1325  1.1  riastrad 	might_lock(&gt->reset.backoff_srcu);
   1326  1.1  riastrad 	might_sleep();
   1327  1.1  riastrad 
   1328  1.1  riastrad 	rcu_read_lock();
   1329  1.1  riastrad 	while (test_bit(I915_RESET_BACKOFF, &gt->reset.flags)) {
   1330  1.1  riastrad 		rcu_read_unlock();
   1331  1.1  riastrad 
   1332  1.4  riastrad 		int ret;
   1333  1.4  riastrad 		spin_lock(&gt->reset.lock);
   1334  1.4  riastrad 		DRM_SPIN_WAIT_UNTIL(ret, &gt->reset.queue, &gt->reset.lock,
   1335  1.4  riastrad 		    !test_bit(I915_RESET_BACKOFF, &gt->reset.flags));
   1336  1.4  riastrad 		spin_unlock(&gt->reset.lock);
   1337  1.4  riastrad 		if (ret)
   1338  1.1  riastrad 			return -EINTR;
   1339  1.1  riastrad 
   1340  1.1  riastrad 		rcu_read_lock();
   1341  1.1  riastrad 	}
   1342  1.1  riastrad 	*srcu = srcu_read_lock(&gt->reset.backoff_srcu);
   1343  1.1  riastrad 	rcu_read_unlock();
   1344  1.1  riastrad 
   1345  1.1  riastrad 	return 0;
   1346  1.1  riastrad }
   1347  1.1  riastrad 
   1348  1.1  riastrad void intel_gt_reset_unlock(struct intel_gt *gt, int tag)
   1349  1.1  riastrad __releases(&gt->reset.backoff_srcu)
   1350  1.1  riastrad {
   1351  1.1  riastrad 	srcu_read_unlock(&gt->reset.backoff_srcu, tag);
   1352  1.1  riastrad }
   1353  1.1  riastrad 
   1354  1.1  riastrad int intel_gt_terminally_wedged(struct intel_gt *gt)
   1355  1.1  riastrad {
   1356  1.1  riastrad 	might_sleep();
   1357  1.1  riastrad 
   1358  1.1  riastrad 	if (!intel_gt_is_wedged(gt))
   1359  1.1  riastrad 		return 0;
   1360  1.1  riastrad 
   1361  1.1  riastrad 	if (intel_gt_has_init_error(gt))
   1362  1.1  riastrad 		return -EIO;
   1363  1.1  riastrad 
   1364  1.1  riastrad 	/* Reset still in progress? Maybe we will recover? */
   1365  1.4  riastrad 	int ret;
   1366  1.4  riastrad 	spin_lock(&gt->reset.lock);
   1367  1.4  riastrad 	DRM_SPIN_WAIT_UNTIL(ret, &gt->reset.queue, &gt->reset.lock,
   1368  1.4  riastrad 	    !test_bit(I915_RESET_BACKOFF, &gt->reset.flags));
   1369  1.4  riastrad 	spin_unlock(&gt->reset.lock);
   1370  1.4  riastrad 	if (ret)
   1371  1.1  riastrad 		return -EINTR;
   1372  1.1  riastrad 
   1373  1.1  riastrad 	return intel_gt_is_wedged(gt) ? -EIO : 0;
   1374  1.1  riastrad }
   1375  1.1  riastrad 
   1376  1.1  riastrad void intel_gt_set_wedged_on_init(struct intel_gt *gt)
   1377  1.1  riastrad {
   1378  1.1  riastrad 	BUILD_BUG_ON(I915_RESET_ENGINE + I915_NUM_ENGINES >
   1379  1.1  riastrad 		     I915_WEDGED_ON_INIT);
   1380  1.1  riastrad 	intel_gt_set_wedged(gt);
   1381  1.1  riastrad 	set_bit(I915_WEDGED_ON_INIT, &gt->reset.flags);
   1382  1.1  riastrad }
   1383  1.1  riastrad 
   1384  1.1  riastrad void intel_gt_init_reset(struct intel_gt *gt)
   1385  1.1  riastrad {
   1386  1.4  riastrad 	spin_lock_init(&gt->reset.lock);
   1387  1.4  riastrad 	DRM_INIT_WAITQUEUE(&gt->reset.queue, "i915rst");
   1388  1.1  riastrad 	mutex_init(&gt->reset.mutex);
   1389  1.1  riastrad 	init_srcu_struct(&gt->reset.backoff_srcu);
   1390  1.1  riastrad 
   1391  1.1  riastrad 	/* no GPU until we are ready! */
   1392  1.1  riastrad 	__set_bit(I915_WEDGED, &gt->reset.flags);
   1393  1.1  riastrad }
   1394  1.1  riastrad 
   1395  1.1  riastrad void intel_gt_fini_reset(struct intel_gt *gt)
   1396  1.1  riastrad {
   1397  1.1  riastrad 	cleanup_srcu_struct(&gt->reset.backoff_srcu);
   1398  1.4  riastrad 	DRM_DESTROY_WAITQUEUE(&gt->reset.queue);
   1399  1.6  riastrad 	mutex_destroy(&gt->reset.mutex);
   1400  1.4  riastrad 	spin_lock_destroy(&gt->reset.lock);
   1401  1.1  riastrad }
   1402  1.1  riastrad 
   1403  1.1  riastrad static void intel_wedge_me(struct work_struct *work)
   1404  1.1  riastrad {
   1405  1.1  riastrad 	struct intel_wedge_me *w = container_of(work, typeof(*w), work.work);
   1406  1.1  riastrad 
   1407  1.1  riastrad 	dev_err(w->gt->i915->drm.dev,
   1408  1.1  riastrad 		"%s timed out, cancelling all in-flight rendering.\n",
   1409  1.1  riastrad 		w->name);
   1410  1.1  riastrad 	intel_gt_set_wedged(w->gt);
   1411  1.1  riastrad }
   1412  1.1  riastrad 
   1413  1.1  riastrad void __intel_init_wedge(struct intel_wedge_me *w,
   1414  1.1  riastrad 			struct intel_gt *gt,
   1415  1.1  riastrad 			long timeout,
   1416  1.1  riastrad 			const char *name)
   1417  1.1  riastrad {
   1418  1.1  riastrad 	w->gt = gt;
   1419  1.1  riastrad 	w->name = name;
   1420  1.1  riastrad 
   1421  1.1  riastrad 	INIT_DELAYED_WORK_ONSTACK(&w->work, intel_wedge_me);
   1422  1.1  riastrad 	schedule_delayed_work(&w->work, timeout);
   1423  1.1  riastrad }
   1424  1.1  riastrad 
   1425  1.1  riastrad void __intel_fini_wedge(struct intel_wedge_me *w)
   1426  1.1  riastrad {
   1427  1.1  riastrad 	cancel_delayed_work_sync(&w->work);
   1428  1.1  riastrad 	destroy_delayed_work_on_stack(&w->work);
   1429  1.1  riastrad 	w->gt = NULL;
   1430  1.1  riastrad }
   1431  1.1  riastrad 
   1432  1.1  riastrad #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
   1433  1.1  riastrad #include "selftest_reset.c"
   1434  1.1  riastrad #include "selftest_hangcheck.c"
   1435  1.1  riastrad #endif
   1436