Home | History | Annotate | Line # | Download | only in i915
      1 /*	$NetBSD: i915_gpu_error.c,v 1.13 2021/12/19 12:25:46 riastradh Exp $	*/
      2 
      3 /*
      4  * Copyright (c) 2008 Intel Corporation
      5  *
      6  * Permission is hereby granted, free of charge, to any person obtaining a
      7  * copy of this software and associated documentation files (the "Software"),
      8  * to deal in the Software without restriction, including without limitation
      9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
     10  * and/or sell copies of the Software, and to permit persons to whom the
     11  * Software is furnished to do so, subject to the following conditions:
     12  *
     13  * The above copyright notice and this permission notice (including the next
     14  * paragraph) shall be included in all copies or substantial portions of the
     15  * Software.
     16  *
     17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
     22  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
     23  * IN THE SOFTWARE.
     24  *
     25  * Authors:
     26  *    Eric Anholt <eric (at) anholt.net>
     27  *    Keith Packard <keithp (at) keithp.com>
     28  *    Mika Kuoppala <mika.kuoppala (at) intel.com>
     29  *
     30  */
     31 
     32 #include <sys/cdefs.h>
     33 __KERNEL_RCSID(0, "$NetBSD: i915_gpu_error.c,v 1.13 2021/12/19 12:25:46 riastradh Exp $");
     34 
     35 #include <sys/param.h>
     36 #include <linux/ascii85.h>
     37 #include <linux/nmi.h>
     38 #include <linux/pagevec.h>
     39 #include <linux/scatterlist.h>
     40 #include <linux/utsname.h>
     41 #include <linux/zlib.h>
     42 
     43 #include <drm/drm_print.h>
     44 
     45 #include "display/intel_atomic.h"
     46 #include "display/intel_overlay.h"
     47 
     48 #include "gem/i915_gem_context.h"
     49 #include "gem/i915_gem_lmem.h"
     50 #include "gt/intel_gt_pm.h"
     51 
     52 #include "i915_drv.h"
     53 #include "i915_gpu_error.h"
     54 #include "i915_memcpy.h"
     55 #include "i915_scatterlist.h"
     56 #include "intel_csr.h"
     57 
     58 #define ALLOW_FAIL (GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN)
     59 #define ATOMIC_MAYFAIL (GFP_ATOMIC | __GFP_NOWARN)
     60 
     61 static void __sg_set_buf(struct scatterlist *sg,
     62 			 void *addr, unsigned int len, loff_t it)
     63 {
     64 	sg->page_link = (unsigned long)virt_to_page(addr);
     65 	sg->offset = offset_in_page(addr);
     66 	sg->length = len;
     67 	sg->dma_address = it;
     68 }
     69 
     70 static bool __i915_error_grow(struct drm_i915_error_state_buf *e, size_t len)
     71 {
     72 	if (!len)
     73 		return false;
     74 
     75 	if (e->bytes + len + 1 <= e->size)
     76 		return true;
     77 
     78 	if (e->bytes) {
     79 		__sg_set_buf(e->cur++, e->buf, e->bytes, e->iter);
     80 		e->iter += e->bytes;
     81 		e->buf = NULL;
     82 		e->bytes = 0;
     83 	}
     84 
     85 	if (e->cur == e->end) {
     86 		struct scatterlist *sgl;
     87 
     88 		sgl = (typeof(sgl))__get_free_page(ALLOW_FAIL);
     89 		if (!sgl) {
     90 			e->err = -ENOMEM;
     91 			return false;
     92 		}
     93 
     94 		if (e->cur) {
     95 			e->cur->offset = 0;
     96 			e->cur->length = 0;
     97 			e->cur->page_link =
     98 				(unsigned long)sgl | SG_CHAIN;
     99 		} else {
    100 			e->sgl = sgl;
    101 		}
    102 
    103 		e->cur = sgl;
    104 		e->end = sgl + SG_MAX_SINGLE_ALLOC - 1;
    105 	}
    106 
    107 	e->size = ALIGN(len + 1, SZ_64K);
    108 	e->buf = kmalloc(e->size, ALLOW_FAIL);
    109 	if (!e->buf) {
    110 		e->size = PAGE_ALIGN(len + 1);
    111 		e->buf = kmalloc(e->size, GFP_KERNEL);
    112 	}
    113 	if (!e->buf) {
    114 		e->err = -ENOMEM;
    115 		return false;
    116 	}
    117 
    118 	return true;
    119 }
    120 
    121 __printf(2, 0)
    122 static void i915_error_vprintf(struct drm_i915_error_state_buf *e,
    123 			       const char *fmt, va_list args)
    124 {
    125 	va_list ap;
    126 	int len;
    127 
    128 	if (e->err)
    129 		return;
    130 
    131 	va_copy(ap, args);
    132 	len = vsnprintf(NULL, 0, fmt, ap);
    133 	va_end(ap);
    134 	if (len <= 0) {
    135 		e->err = len;
    136 		return;
    137 	}
    138 
    139 	if (!__i915_error_grow(e, len))
    140 		return;
    141 
    142 	GEM_BUG_ON(e->bytes >= e->size);
    143 	len = vscnprintf(e->buf + e->bytes, e->size - e->bytes, fmt, args);
    144 	if (len < 0) {
    145 		e->err = len;
    146 		return;
    147 	}
    148 	e->bytes += len;
    149 }
    150 
    151 static void i915_error_puts(struct drm_i915_error_state_buf *e, const char *str)
    152 {
    153 	unsigned len;
    154 
    155 	if (e->err || !str)
    156 		return;
    157 
    158 	len = strlen(str);
    159 	if (!__i915_error_grow(e, len))
    160 		return;
    161 
    162 	GEM_BUG_ON(e->bytes + len > e->size);
    163 	memcpy(e->buf + e->bytes, str, len);
    164 	e->bytes += len;
    165 }
    166 
    167 #define err_printf(e, ...) i915_error_printf(e, __VA_ARGS__)
    168 #define err_puts(e, s) i915_error_puts(e, s)
    169 
    170 static void __i915_printfn_error(struct drm_printer *p, struct va_format *vaf)
    171 {
    172 	i915_error_vprintf(p->arg, vaf->fmt, *vaf->va);
    173 }
    174 
    175 static inline struct drm_printer
    176 i915_error_printer(struct drm_i915_error_state_buf *e)
    177 {
    178 	struct drm_printer p = {
    179 		.printfn = __i915_printfn_error,
    180 		.arg = e,
    181 	};
    182 	return p;
    183 }
    184 
    185 /* single threaded page allocator with a reserved stash for emergencies */
    186 static void pool_fini(struct pagevec *pv)
    187 {
    188 	pagevec_release(pv);
    189 }
    190 
    191 static int pool_refill(struct pagevec *pv, gfp_t gfp)
    192 {
    193 	while (pagevec_space(pv)) {
    194 		struct page *p;
    195 
    196 		p = alloc_page(gfp);
    197 		if (!p)
    198 			return -ENOMEM;
    199 
    200 		pagevec_add(pv, p);
    201 	}
    202 
    203 	return 0;
    204 }
    205 
    206 static int pool_init(struct pagevec *pv, gfp_t gfp)
    207 {
    208 	int err;
    209 
    210 	pagevec_init(pv);
    211 
    212 	err = pool_refill(pv, gfp);
    213 	if (err)
    214 		pool_fini(pv);
    215 
    216 	return err;
    217 }
    218 
    219 static void *pool_alloc(struct pagevec *pv, gfp_t gfp)
    220 {
    221 	struct page *p;
    222 
    223 	p = alloc_page(gfp);
    224 	if (!p && pagevec_count(pv))
    225 		p = pv->pages[--pv->nr];
    226 
    227 	return p ? page_address(p) : NULL;
    228 }
    229 
    230 static void pool_free(struct pagevec *pv, void *addr)
    231 {
    232 	struct page *p = virt_to_page(addr);
    233 
    234 	if (pagevec_space(pv))
    235 		pagevec_add(pv, p);
    236 	else
    237 		__free_page(p);
    238 }
    239 
    240 #ifdef CONFIG_DRM_I915_COMPRESS_ERROR
    241 
    242 struct i915_vma_compress {
    243 	struct pagevec pool;
    244 	struct z_stream_s zstream;
    245 	void *tmp;
    246 };
    247 
    248 static bool compress_init(struct i915_vma_compress *c)
    249 {
    250 	struct z_stream_s *zstream = &c->zstream;
    251 
    252 	if (pool_init(&c->pool, ALLOW_FAIL))
    253 		return false;
    254 
    255 	zstream->workspace =
    256 		kmalloc(zlib_deflate_workspacesize(MAX_WBITS, MAX_MEM_LEVEL),
    257 			ALLOW_FAIL);
    258 	if (!zstream->workspace) {
    259 		pool_fini(&c->pool);
    260 		return false;
    261 	}
    262 
    263 	c->tmp = NULL;
    264 	if (i915_has_memcpy_from_wc())
    265 		c->tmp = pool_alloc(&c->pool, ALLOW_FAIL);
    266 
    267 	return true;
    268 }
    269 
    270 static bool compress_start(struct i915_vma_compress *c)
    271 {
    272 	struct z_stream_s *zstream = &c->zstream;
    273 	void *workspace = zstream->workspace;
    274 
    275 	memset(zstream, 0, sizeof(*zstream));
    276 	zstream->workspace = workspace;
    277 
    278 	return zlib_deflateInit(zstream, Z_DEFAULT_COMPRESSION) == Z_OK;
    279 }
    280 
    281 static void *compress_next_page(struct i915_vma_compress *c,
    282 				struct i915_vma_coredump *dst)
    283 {
    284 	void *page;
    285 
    286 	if (dst->page_count >= dst->num_pages)
    287 		return ERR_PTR(-ENOSPC);
    288 
    289 	page = pool_alloc(&c->pool, ALLOW_FAIL);
    290 	if (!page)
    291 		return ERR_PTR(-ENOMEM);
    292 
    293 	return dst->pages[dst->page_count++] = page;
    294 }
    295 
    296 static int compress_page(struct i915_vma_compress *c,
    297 			 void *src,
    298 			 struct i915_vma_coredump *dst,
    299 			 bool wc)
    300 {
    301 	struct z_stream_s *zstream = &c->zstream;
    302 
    303 	zstream->next_in = src;
    304 	if (wc && c->tmp && i915_memcpy_from_wc(c->tmp, src, PAGE_SIZE))
    305 		zstream->next_in = c->tmp;
    306 	zstream->avail_in = PAGE_SIZE;
    307 
    308 	do {
    309 		if (zstream->avail_out == 0) {
    310 			zstream->next_out = compress_next_page(c, dst);
    311 			if (IS_ERR(zstream->next_out))
    312 				return PTR_ERR(zstream->next_out);
    313 
    314 			zstream->avail_out = PAGE_SIZE;
    315 		}
    316 
    317 		if (zlib_deflate(zstream, Z_NO_FLUSH) != Z_OK)
    318 			return -EIO;
    319 	} while (zstream->avail_in);
    320 
    321 	/* Fallback to uncompressed if we increase size? */
    322 	if (0 && zstream->total_out > zstream->total_in)
    323 		return -E2BIG;
    324 
    325 	return 0;
    326 }
    327 
    328 static int compress_flush(struct i915_vma_compress *c,
    329 			  struct i915_vma_coredump *dst)
    330 {
    331 	struct z_stream_s *zstream = &c->zstream;
    332 
    333 	do {
    334 		switch (zlib_deflate(zstream, Z_FINISH)) {
    335 		case Z_OK: /* more space requested */
    336 			zstream->next_out = compress_next_page(c, dst);
    337 			if (IS_ERR(zstream->next_out))
    338 				return PTR_ERR(zstream->next_out);
    339 
    340 			zstream->avail_out = PAGE_SIZE;
    341 			break;
    342 
    343 		case Z_STREAM_END:
    344 			goto end;
    345 
    346 		default: /* any error */
    347 			return -EIO;
    348 		}
    349 	} while (1);
    350 
    351 end:
    352 	memset(zstream->next_out, 0, zstream->avail_out);
    353 	dst->unused = zstream->avail_out;
    354 	return 0;
    355 }
    356 
    357 static void compress_finish(struct i915_vma_compress *c)
    358 {
    359 	zlib_deflateEnd(&c->zstream);
    360 }
    361 
    362 static void compress_fini(struct i915_vma_compress *c)
    363 {
    364 	kfree(c->zstream.workspace);
    365 	if (c->tmp)
    366 		pool_free(&c->pool, c->tmp);
    367 	pool_fini(&c->pool);
    368 }
    369 
    370 static void err_compression_marker(struct drm_i915_error_state_buf *m)
    371 {
    372 	err_puts(m, ":");
    373 }
    374 
    375 #else
    376 
    377 struct i915_vma_compress {
    378 	struct pagevec pool;
    379 };
    380 
    381 static bool compress_init(struct i915_vma_compress *c)
    382 {
    383 	return pool_init(&c->pool, ALLOW_FAIL) == 0;
    384 }
    385 
    386 static bool compress_start(struct i915_vma_compress *c)
    387 {
    388 	return true;
    389 }
    390 
    391 static int compress_page(struct i915_vma_compress *c,
    392 			 void *src,
    393 			 struct i915_vma_coredump *dst,
    394 			 bool wc)
    395 {
    396 	void *ptr;
    397 
    398 	ptr = pool_alloc(&c->pool, ALLOW_FAIL);
    399 	if (!ptr)
    400 		return -ENOMEM;
    401 
    402 	if (!(wc && i915_memcpy_from_wc(ptr, src, PAGE_SIZE)))
    403 		memcpy(ptr, src, PAGE_SIZE);
    404 	dst->pages[dst->page_count++] = ptr;
    405 
    406 	return 0;
    407 }
    408 
    409 static int compress_flush(struct i915_vma_compress *c,
    410 			  struct i915_vma_coredump *dst)
    411 {
    412 	return 0;
    413 }
    414 
    415 static void compress_finish(struct i915_vma_compress *c)
    416 {
    417 }
    418 
    419 static void compress_fini(struct i915_vma_compress *c)
    420 {
    421 	pool_fini(&c->pool);
    422 }
    423 
    424 static void err_compression_marker(struct drm_i915_error_state_buf *m)
    425 {
    426 	err_puts(m, "~");
    427 }
    428 
    429 #endif
    430 
    431 static void error_print_instdone(struct drm_i915_error_state_buf *m,
    432 				 const struct intel_engine_coredump *ee)
    433 {
    434 	const struct sseu_dev_info *sseu = &RUNTIME_INFO(m->i915)->sseu;
    435 	int slice;
    436 	int subslice;
    437 
    438 	err_printf(m, "  INSTDONE: 0x%08x\n",
    439 		   ee->instdone.instdone);
    440 
    441 	if (ee->engine->class != RENDER_CLASS || INTEL_GEN(m->i915) <= 3)
    442 		return;
    443 
    444 	err_printf(m, "  SC_INSTDONE: 0x%08x\n",
    445 		   ee->instdone.slice_common);
    446 
    447 	if (INTEL_GEN(m->i915) <= 6)
    448 		return;
    449 
    450 	for_each_instdone_slice_subslice(m->i915, sseu, slice, subslice)
    451 		err_printf(m, "  SAMPLER_INSTDONE[%d][%d]: 0x%08x\n",
    452 			   slice, subslice,
    453 			   ee->instdone.sampler[slice][subslice]);
    454 
    455 	for_each_instdone_slice_subslice(m->i915, sseu, slice, subslice)
    456 		err_printf(m, "  ROW_INSTDONE[%d][%d]: 0x%08x\n",
    457 			   slice, subslice,
    458 			   ee->instdone.row[slice][subslice]);
    459 }
    460 
    461 static void error_print_request(struct drm_i915_error_state_buf *m,
    462 				const char *prefix,
    463 				const struct i915_request_coredump *erq)
    464 {
    465 	if (!erq->seqno)
    466 		return;
    467 
    468 	err_printf(m, "%s pid %d, seqno %8x:%08x%s%s, prio %d, start %08x, head %08x, tail %08x\n",
    469 		   prefix, erq->pid, erq->context, erq->seqno,
    470 		   test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
    471 			    &erq->flags) ? "!" : "",
    472 		   test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
    473 			    &erq->flags) ? "+" : "",
    474 		   erq->sched_attr.priority,
    475 		   erq->start, erq->head, erq->tail);
    476 }
    477 
    478 static void error_print_context(struct drm_i915_error_state_buf *m,
    479 				const char *header,
    480 				const struct i915_gem_context_coredump *ctx)
    481 {
    482 	err_printf(m, "%s%s[%d] prio %d, guilty %d active %d\n",
    483 		   header, ctx->comm, ctx->pid, ctx->sched_attr.priority,
    484 		   ctx->guilty, ctx->active);
    485 }
    486 
    487 static struct i915_vma_coredump *
    488 __find_vma(struct i915_vma_coredump *vma, const char *name)
    489 {
    490 	while (vma) {
    491 		if (strcmp(vma->name, name) == 0)
    492 			return vma;
    493 		vma = vma->next;
    494 	}
    495 
    496 	return NULL;
    497 }
    498 
    499 static struct i915_vma_coredump *
    500 find_batch(const struct intel_engine_coredump *ee)
    501 {
    502 	return __find_vma(ee->vma, "batch");
    503 }
    504 
    505 static void error_print_engine(struct drm_i915_error_state_buf *m,
    506 			       const struct intel_engine_coredump *ee)
    507 {
    508 	struct i915_vma_coredump *batch;
    509 	int n;
    510 
    511 	err_printf(m, "%s command stream:\n", ee->engine->name);
    512 	err_printf(m, "  CCID:  0x%08x\n", ee->ccid);
    513 	err_printf(m, "  START: 0x%08x\n", ee->start);
    514 	err_printf(m, "  HEAD:  0x%08x [0x%08x]\n", ee->head, ee->rq_head);
    515 	err_printf(m, "  TAIL:  0x%08x [0x%08x, 0x%08x]\n",
    516 		   ee->tail, ee->rq_post, ee->rq_tail);
    517 	err_printf(m, "  CTL:   0x%08x\n", ee->ctl);
    518 	err_printf(m, "  MODE:  0x%08x\n", ee->mode);
    519 	err_printf(m, "  HWS:   0x%08x\n", ee->hws);
    520 	err_printf(m, "  ACTHD: 0x%08x %08x\n",
    521 		   (u32)(ee->acthd>>32), (u32)ee->acthd);
    522 	err_printf(m, "  IPEIR: 0x%08x\n", ee->ipeir);
    523 	err_printf(m, "  IPEHR: 0x%08x\n", ee->ipehr);
    524 
    525 	error_print_instdone(m, ee);
    526 
    527 	batch = find_batch(ee);
    528 	if (batch) {
    529 		u64 start = batch->gtt_offset;
    530 		u64 end = start + batch->gtt_size;
    531 
    532 		err_printf(m, "  batch: [0x%08x_%08x, 0x%08x_%08x]\n",
    533 			   upper_32_bits(start), lower_32_bits(start),
    534 			   upper_32_bits(end), lower_32_bits(end));
    535 	}
    536 	if (INTEL_GEN(m->i915) >= 4) {
    537 		err_printf(m, "  BBADDR: 0x%08x_%08x\n",
    538 			   (u32)(ee->bbaddr>>32), (u32)ee->bbaddr);
    539 		err_printf(m, "  BB_STATE: 0x%08x\n", ee->bbstate);
    540 		err_printf(m, "  INSTPS: 0x%08x\n", ee->instps);
    541 	}
    542 	err_printf(m, "  INSTPM: 0x%08x\n", ee->instpm);
    543 	err_printf(m, "  FADDR: 0x%08x %08x\n", upper_32_bits(ee->faddr),
    544 		   lower_32_bits(ee->faddr));
    545 	if (INTEL_GEN(m->i915) >= 6) {
    546 		err_printf(m, "  RC PSMI: 0x%08x\n", ee->rc_psmi);
    547 		err_printf(m, "  FAULT_REG: 0x%08x\n", ee->fault_reg);
    548 	}
    549 	if (HAS_PPGTT(m->i915)) {
    550 		err_printf(m, "  GFX_MODE: 0x%08x\n", ee->vm_info.gfx_mode);
    551 
    552 		if (INTEL_GEN(m->i915) >= 8) {
    553 			int i;
    554 			for (i = 0; i < 4; i++)
    555 				err_printf(m, "  PDP%d: 0x%016"PRIx64"\n",
    556 					   i, ee->vm_info.pdp[i]);
    557 		} else {
    558 			err_printf(m, "  PP_DIR_BASE: 0x%08x\n",
    559 				   ee->vm_info.pp_dir_base);
    560 		}
    561 	}
    562 	err_printf(m, "  engine reset count: %u\n", ee->reset_count);
    563 
    564 	for (n = 0; n < ee->num_ports; n++) {
    565 		err_printf(m, "  ELSP[%d]:", n);
    566 		error_print_request(m, " ", &ee->execlist[n]);
    567 	}
    568 
    569 	error_print_context(m, "  Active context: ", &ee->context);
    570 }
    571 
    572 void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...)
    573 {
    574 	va_list args;
    575 
    576 	va_start(args, f);
    577 	i915_error_vprintf(e, f, args);
    578 	va_end(args);
    579 }
    580 
    581 static void print_error_vma(struct drm_i915_error_state_buf *m,
    582 			    const struct intel_engine_cs *engine,
    583 			    const struct i915_vma_coredump *vma)
    584 {
    585 	char out[ASCII85_BUFSZ];
    586 	int page;
    587 
    588 	if (!vma)
    589 		return;
    590 
    591 	err_printf(m, "%s --- %s = 0x%08x %08x\n",
    592 		   engine ? engine->name : "global", vma->name,
    593 		   upper_32_bits(vma->gtt_offset),
    594 		   lower_32_bits(vma->gtt_offset));
    595 
    596 	if (vma->gtt_page_sizes > I915_GTT_PAGE_SIZE_4K)
    597 		err_printf(m, "gtt_page_sizes = 0x%08x\n", vma->gtt_page_sizes);
    598 
    599 	err_compression_marker(m);
    600 	for (page = 0; page < vma->page_count; page++) {
    601 		int i, len;
    602 
    603 		len = PAGE_SIZE;
    604 		if (page == vma->page_count - 1)
    605 			len -= vma->unused;
    606 		len = ascii85_encode_len(len);
    607 
    608 		for (i = 0; i < len; i++)
    609 			err_puts(m, ascii85_encode(vma->pages[page][i], out));
    610 	}
    611 	err_puts(m, "\n");
    612 }
    613 
    614 static void err_print_capabilities(struct drm_i915_error_state_buf *m,
    615 				   const struct intel_device_info *info,
    616 				   const struct intel_runtime_info *runtime,
    617 				   const struct intel_driver_caps *caps)
    618 {
    619 	struct drm_printer p = i915_error_printer(m);
    620 
    621 	intel_device_info_print_static(info, &p);
    622 	intel_device_info_print_runtime(runtime, &p);
    623 	intel_device_info_print_topology(&runtime->sseu, &p);
    624 	intel_driver_caps_print(caps, &p);
    625 }
    626 
    627 static void err_print_params(struct drm_i915_error_state_buf *m,
    628 			     const struct i915_params *params)
    629 {
    630 	struct drm_printer p = i915_error_printer(m);
    631 
    632 	i915_params_dump(params, &p);
    633 }
    634 
    635 static void err_print_pciid(struct drm_i915_error_state_buf *m,
    636 			    struct drm_i915_private *i915)
    637 {
    638 	struct pci_dev *pdev = i915->drm.pdev;
    639 
    640 	err_printf(m, "PCI ID: 0x%04x\n", pdev->device);
    641 	err_printf(m, "PCI Revision: 0x%02x\n", pdev->revision);
    642 	err_printf(m, "PCI Subsystem: %04x:%04x\n",
    643 		   pdev->subsystem_vendor,
    644 		   pdev->subsystem_device);
    645 }
    646 
    647 static void err_print_uc(struct drm_i915_error_state_buf *m,
    648 			 const struct intel_uc_coredump *error_uc)
    649 {
    650 	struct drm_printer p = i915_error_printer(m);
    651 
    652 	intel_uc_fw_dump(&error_uc->guc_fw, &p);
    653 	intel_uc_fw_dump(&error_uc->huc_fw, &p);
    654 	print_error_vma(m, NULL, error_uc->guc_log);
    655 }
    656 
    657 static void err_free_sgl(struct scatterlist *sgl)
    658 {
    659 	while (sgl) {
    660 		struct scatterlist *sg;
    661 
    662 		for (sg = sgl; !sg_is_chain(sg); sg++) {
    663 			kfree(sg_virt(sg));
    664 			if (sg_is_last(sg))
    665 				break;
    666 		}
    667 
    668 		sg = sg_is_last(sg) ? NULL : sg_chain_ptr(sg);
    669 		free_page((unsigned long)sgl);
    670 		sgl = sg;
    671 	}
    672 }
    673 
    674 static void err_print_gt(struct drm_i915_error_state_buf *m,
    675 			 struct intel_gt_coredump *gt)
    676 {
    677 	const struct intel_engine_coredump *ee;
    678 	int i;
    679 
    680 	err_printf(m, "GT awake: %s\n", yesno(gt->awake));
    681 	err_printf(m, "EIR: 0x%08x\n", gt->eir);
    682 	err_printf(m, "IER: 0x%08x\n", gt->ier);
    683 	for (i = 0; i < gt->ngtier; i++)
    684 		err_printf(m, "GTIER[%d]: 0x%08x\n", i, gt->gtier[i]);
    685 	err_printf(m, "PGTBL_ER: 0x%08x\n", gt->pgtbl_er);
    686 	err_printf(m, "FORCEWAKE: 0x%08x\n", gt->forcewake);
    687 	err_printf(m, "DERRMR: 0x%08x\n", gt->derrmr);
    688 
    689 	for (i = 0; i < gt->nfence; i++)
    690 		err_printf(m, "  fence[%d] = %08llx\n", i, gt->fence[i]);
    691 
    692 	if (IS_GEN_RANGE(m->i915, 6, 11)) {
    693 		err_printf(m, "ERROR: 0x%08x\n", gt->error);
    694 		err_printf(m, "DONE_REG: 0x%08x\n", gt->done_reg);
    695 	}
    696 
    697 	err_printf(m, "%s\n", error->error_msg);
    698 	err_printf(m, "Time: %"PRIdMAX" s %ld us\n", (intmax_t)error->time.tv_sec,
    699 		   (long)error->time.tv_usec);
    700 	err_printf(m, "Kernel: %d\n", __NetBSD_Version__);
    701 
    702 	if (INTEL_GEN(m->i915) >= 8)
    703 		err_printf(m, "FAULT_TLB_DATA: 0x%08x 0x%08x\n",
    704 			   gt->fault_data1, gt->fault_data0);
    705 
    706 	if (IS_GEN(m->i915, 7))
    707 		err_printf(m, "ERR_INT: 0x%08x\n", gt->err_int);
    708 
    709 	if (IS_GEN_RANGE(m->i915, 8, 11))
    710 		err_printf(m, "GTT_CACHE_EN: 0x%08x\n", gt->gtt_cache);
    711 
    712 	if (IS_GEN(m->i915, 12))
    713 		err_printf(m, "AUX_ERR_DBG: 0x%08x\n", gt->aux_err);
    714 
    715 	if (INTEL_GEN(m->i915) >= 12) {
    716 		int i;
    717 
    718 		for (i = 0; i < GEN12_SFC_DONE_MAX; i++)
    719 			err_printf(m, "  SFC_DONE[%d]: 0x%08x\n", i,
    720 				   gt->sfc_done[i]);
    721 
    722 		err_printf(m, "  GAM_DONE: 0x%08x\n", gt->gam_done);
    723 	}
    724 
    725 	for (ee = gt->engine; ee; ee = ee->next) {
    726 		const struct i915_vma_coredump *vma;
    727 
    728 		error_print_engine(m, ee);
    729 		for (vma = ee->vma; vma; vma = vma->next)
    730 			print_error_vma(m, ee->engine, vma);
    731 	}
    732 
    733 	if (gt->uc)
    734 		err_print_uc(m, gt->uc);
    735 }
    736 
    737 static void __err_print_to_sgl(struct drm_i915_error_state_buf *m,
    738 			       struct i915_gpu_coredump *error)
    739 {
    740 	const struct intel_engine_coredump *ee;
    741 	struct timespec64 ts;
    742 
    743 	if (*error->error_msg)
    744 		err_printf(m, "%s\n", error->error_msg);
    745 	err_printf(m, "Kernel: %s %s\n",
    746 		   init_utsname()->release,
    747 		   init_utsname()->machine);
    748 	err_printf(m, "Driver: %s\n", DRIVER_DATE);
    749 	ts = ktime_to_timespec64(error->time);
    750 	err_printf(m, "Time: %lld s %ld us\n",
    751 		   (s64)ts.tv_sec, ts.tv_nsec / NSEC_PER_USEC);
    752 	ts = ktime_to_timespec64(error->boottime);
    753 	err_printf(m, "Boottime: %lld s %ld us\n",
    754 		   (s64)ts.tv_sec, ts.tv_nsec / NSEC_PER_USEC);
    755 	ts = ktime_to_timespec64(error->uptime);
    756 	err_printf(m, "Uptime: %lld s %ld us\n",
    757 		   (s64)ts.tv_sec, ts.tv_nsec / NSEC_PER_USEC);
    758 	err_printf(m, "Capture: %lu jiffies; %d ms ago\n",
    759 		   error->capture, jiffies_to_msecs(jiffies - error->capture));
    760 
    761 	for (ee = error->gt ? error->gt->engine : NULL; ee; ee = ee->next)
    762 		err_printf(m, "Active process (on ring %s): %s [%d]\n",
    763 			   ee->engine->name,
    764 			   ee->context.comm,
    765 			   ee->context.pid);
    766 
    767 	err_printf(m, "Reset count: %u\n", error->reset_count);
    768 	err_printf(m, "Suspend count: %u\n", error->suspend_count);
    769 	err_printf(m, "Platform: %s\n", intel_platform_name(error->device_info.platform));
    770 	err_printf(m, "Subplatform: 0x%x\n",
    771 		   intel_subplatform(&error->runtime_info,
    772 				     error->device_info.platform));
    773 	err_print_pciid(m, m->i915);
    774 
    775 	err_printf(m, "IOMMU enabled?: %d\n", error->iommu);
    776 
    777 	if (HAS_CSR(m->i915)) {
    778 		struct intel_csr *csr = &m->i915->csr;
    779 
    780 		err_printf(m, "DMC loaded: %s\n",
    781 			   yesno(csr->dmc_payload != NULL));
    782 		err_printf(m, "DMC fw version: %d.%d\n",
    783 			   CSR_VERSION_MAJOR(csr->version),
    784 			   CSR_VERSION_MINOR(csr->version));
    785 	}
    786 
    787 	err_printf(m, "RPM wakelock: %s\n", yesno(error->wakelock));
    788 	err_printf(m, "PM suspended: %s\n", yesno(error->suspended));
    789 
    790 	if (error->gt)
    791 		err_print_gt(m, error->gt);
    792 
    793 	if (error->overlay)
    794 		intel_overlay_print_error_state(m, error->overlay);
    795 
    796 	if (error->display)
    797 		intel_display_print_error_state(m, error->display);
    798 
    799 	err_print_capabilities(m, &error->device_info, &error->runtime_info,
    800 			       &error->driver_caps);
    801 	err_print_params(m, &error->params);
    802 }
    803 
    804 static int err_print_to_sgl(struct i915_gpu_coredump *error)
    805 {
    806 	struct drm_i915_error_state_buf m;
    807 
    808 	if (IS_ERR(error))
    809 		return PTR_ERR(error);
    810 
    811 	if (READ_ONCE(error->sgl))
    812 		return 0;
    813 
    814 	memset(&m, 0, sizeof(m));
    815 	m.i915 = error->i915;
    816 
    817 	__err_print_to_sgl(&m, error);
    818 
    819 	if (m.buf) {
    820 		__sg_set_buf(m.cur++, m.buf, m.bytes, m.iter);
    821 		m.bytes = 0;
    822 		m.buf = NULL;
    823 	}
    824 	if (m.cur) {
    825 		GEM_BUG_ON(m.end < m.cur);
    826 		sg_mark_end(m.cur - 1);
    827 	}
    828 	GEM_BUG_ON(m.sgl && !m.cur);
    829 
    830 	if (m.err) {
    831 		err_free_sgl(m.sgl);
    832 		return m.err;
    833 	}
    834 
    835 	if (cmpxchg(&error->sgl, NULL, m.sgl))
    836 		err_free_sgl(m.sgl);
    837 
    838 	return 0;
    839 }
    840 
    841 ssize_t i915_gpu_coredump_copy_to_buffer(struct i915_gpu_coredump *error,
    842 					 char *buf, loff_t off, size_t rem)
    843 {
    844 	struct scatterlist *sg;
    845 	size_t count;
    846 	loff_t pos;
    847 	int err;
    848 
    849 	if (!error || !rem)
    850 		return 0;
    851 
    852 	err = err_print_to_sgl(error);
    853 	if (err)
    854 		return err;
    855 
    856 	sg = READ_ONCE(error->fit);
    857 	if (!sg || off < sg->dma_address)
    858 		sg = error->sgl;
    859 	if (!sg)
    860 		return 0;
    861 
    862 	pos = sg->dma_address;
    863 	count = 0;
    864 	do {
    865 		size_t len, start;
    866 
    867 		if (sg_is_chain(sg)) {
    868 			sg = sg_chain_ptr(sg);
    869 			GEM_BUG_ON(sg_is_chain(sg));
    870 		}
    871 
    872 		len = sg->length;
    873 		if (pos + len <= off) {
    874 			pos += len;
    875 			continue;
    876 		}
    877 
    878 		start = sg->offset;
    879 		if (pos < off) {
    880 			GEM_BUG_ON(off - pos > len);
    881 			len -= off - pos;
    882 			start += off - pos;
    883 			pos = off;
    884 		}
    885 
    886 		len = min(len, rem);
    887 		GEM_BUG_ON(!len || len > sg->length);
    888 
    889 		memcpy(buf, page_address(sg_page(sg)) + start, len);
    890 
    891 		count += len;
    892 		pos += len;
    893 
    894 		buf += len;
    895 		rem -= len;
    896 		if (!rem) {
    897 			WRITE_ONCE(error->fit, sg);
    898 			break;
    899 		}
    900 	} while (!sg_is_last(sg++));
    901 
    902 	return count;
    903 }
    904 
    905 static void i915_vma_coredump_free(struct i915_vma_coredump *vma)
    906 {
    907 	while (vma) {
    908 		struct i915_vma_coredump *next = vma->next;
    909 		int page;
    910 
    911 		for (page = 0; page < vma->page_count; page++)
    912 			free_page((unsigned long)vma->pages[page]);
    913 
    914 		kfree(vma);
    915 		vma = next;
    916 	}
    917 }
    918 
    919 static void cleanup_params(struct i915_gpu_coredump *error)
    920 {
    921 	i915_params_free(&error->params);
    922 }
    923 
    924 static void cleanup_uc(struct intel_uc_coredump *uc)
    925 {
    926 	kfree(uc->guc_fw.path);
    927 	kfree(uc->huc_fw.path);
    928 	i915_vma_coredump_free(uc->guc_log);
    929 
    930 	kfree(uc);
    931 }
    932 
    933 static void cleanup_gt(struct intel_gt_coredump *gt)
    934 {
    935 	while (gt->engine) {
    936 		struct intel_engine_coredump *ee = gt->engine;
    937 
    938 		gt->engine = ee->next;
    939 
    940 		i915_vma_coredump_free(ee->vma);
    941 		kfree(ee);
    942 	}
    943 
    944 	if (gt->uc)
    945 		cleanup_uc(gt->uc);
    946 
    947 	kfree(gt);
    948 }
    949 
    950 void __i915_gpu_coredump_free(struct kref *error_ref)
    951 {
    952 	struct i915_gpu_coredump *error =
    953 		container_of(error_ref, typeof(*error), ref);
    954 
    955 	while (error->gt) {
    956 		struct intel_gt_coredump *gt = error->gt;
    957 
    958 		error->gt = gt->next;
    959 		cleanup_gt(gt);
    960 	}
    961 
    962 	kfree(error->overlay);
    963 	kfree(error->display);
    964 
    965 	cleanup_params(error);
    966 
    967 	err_free_sgl(error->sgl);
    968 	kfree(error);
    969 }
    970 
    971 #ifdef __NetBSD__
    972 #  define	__aperture_iomem
    973 #  define	__iomem __aperture_iomem
    974 #endif
    975 static struct i915_vma_coredump *
    976 i915_vma_coredump_create(const struct intel_gt *gt,
    977 			 const struct i915_vma *vma,
    978 			 const char *name,
    979 			 struct i915_vma_compress *compress)
    980 {
    981 	struct i915_ggtt *ggtt = gt->ggtt;
    982 	const u64 slot = ggtt->error_capture.start;
    983 	struct i915_vma_coredump *dst;
    984 	unsigned long num_pages;
    985 	struct sgt_iter iter;
    986 	int ret;
    987 
    988 	might_sleep();
    989 
    990 	if (!vma || !vma->pages || !compress)
    991 		return NULL;
    992 
    993 	num_pages = min_t(u64, vma->size, vma->obj->base.size) >> PAGE_SHIFT;
    994 	num_pages = DIV_ROUND_UP(10 * num_pages, 8); /* worstcase zlib growth */
    995 	dst = kmalloc(sizeof(*dst) + num_pages * sizeof(u32 *), ALLOW_FAIL);
    996 	if (!dst)
    997 		return NULL;
    998 
    999 	if (!compress_start(compress)) {
   1000 		kfree(dst);
   1001 		return NULL;
   1002 	}
   1003 
   1004 	strcpy(dst->name, name);
   1005 	dst->next = NULL;
   1006 
   1007 	dst->gtt_offset = vma->node.start;
   1008 	dst->gtt_size = vma->node.size;
   1009 	dst->gtt_page_sizes = vma->page_sizes.gtt;
   1010 	dst->num_pages = num_pages;
   1011 	dst->page_count = 0;
   1012 	dst->unused = 0;
   1013 
   1014 	ret = -EINVAL;
   1015 	if (drm_mm_node_allocated(&ggtt->error_capture)) {
   1016 		void __iomem *s;
   1017 		dma_addr_t dma;
   1018 
   1019 		for_each_sgt_daddr(dma, iter, vma->pages) {
   1020 			ggtt->vm.insert_page(&ggtt->vm, dma, slot,
   1021 					     I915_CACHE_NONE, 0);
   1022 			mb();
   1023 
   1024 			s = io_mapping_map_wc(&ggtt->iomap, slot, PAGE_SIZE);
   1025 			ret = compress_page(compress,
   1026 					    (void  __force *)s, dst,
   1027 					    true);
   1028 #ifdef __NetBSD__
   1029 			io_mapping_unmap_atomic(dev_priv->gtt.mappable, s);
   1030 #else
   1031 			io_mapping_unmap(s);
   1032 #endif
   1033 			if (ret)
   1034 				break;
   1035 		}
   1036 	} else if (i915_gem_object_is_lmem(vma->obj)) {
   1037 		struct intel_memory_region *mem = vma->obj->mm.region;
   1038 		dma_addr_t dma;
   1039 
   1040 		for_each_sgt_daddr(dma, iter, vma->pages) {
   1041 			void __iomem *s;
   1042 
   1043 			s = io_mapping_map_wc(&mem->iomap, dma, PAGE_SIZE);
   1044 			ret = compress_page(compress,
   1045 					    (void __force *)s, dst,
   1046 					    true);
   1047 #ifdef __NetBSD__
   1048 			io_mapping_unmap_atomic(dev_priv->gtt.mappable, s);
   1049 #else
   1050 			io_mapping_unmap(s);
   1051 #endif
   1052 			if (ret)
   1053 				break;
   1054 		}
   1055 	} else {
   1056 		struct page *page;
   1057 
   1058 		for_each_sgt_page(page, iter, vma->pages) {
   1059 			void *s;
   1060 
   1061 			drm_clflush_pages(&page, 1);
   1062 
   1063 			s = kmap(page);
   1064 			ret = compress_page(compress, s, dst, false);
   1065 			kunmap(page);
   1066 
   1067 			drm_clflush_pages(&page, 1);
   1068 
   1069 			if (ret)
   1070 				break;
   1071 		}
   1072 	}
   1073 
   1074 	if (ret || compress_flush(compress, dst)) {
   1075 		while (dst->page_count--)
   1076 			pool_free(&compress->pool, dst->pages[dst->page_count]);
   1077 		kfree(dst);
   1078 		dst = NULL;
   1079 	}
   1080 	compress_finish(compress);
   1081 
   1082 	return dst;
   1083 }
   1084 #ifdef __NetBSD__
   1085 #  undef	__iomem
   1086 #  undef	__aperture_iomem
   1087 #endif
   1088 
   1089 static void gt_record_fences(struct intel_gt_coredump *gt)
   1090 {
   1091 	struct i915_ggtt *ggtt = gt->_gt->ggtt;
   1092 	struct intel_uncore *uncore = gt->_gt->uncore;
   1093 	int i;
   1094 
   1095 	if (INTEL_GEN(uncore->i915) >= 6) {
   1096 		for (i = 0; i < ggtt->num_fences; i++)
   1097 			gt->fence[i] =
   1098 				intel_uncore_read64(uncore,
   1099 						    FENCE_REG_GEN6_LO(i));
   1100 	} else if (INTEL_GEN(uncore->i915) >= 4) {
   1101 		for (i = 0; i < ggtt->num_fences; i++)
   1102 			gt->fence[i] =
   1103 				intel_uncore_read64(uncore,
   1104 						    FENCE_REG_965_LO(i));
   1105 	} else {
   1106 		for (i = 0; i < ggtt->num_fences; i++)
   1107 			gt->fence[i] =
   1108 				intel_uncore_read(uncore, FENCE_REG(i));
   1109 	}
   1110 	gt->nfence = i;
   1111 }
   1112 
   1113 static void engine_record_registers(struct intel_engine_coredump *ee)
   1114 {
   1115 	const struct intel_engine_cs *engine = ee->engine;
   1116 	struct drm_i915_private *i915 = engine->i915;
   1117 
   1118 	if (INTEL_GEN(i915) >= 6) {
   1119 		ee->rc_psmi = ENGINE_READ(engine, RING_PSMI_CTL);
   1120 
   1121 		if (INTEL_GEN(i915) >= 12)
   1122 			ee->fault_reg = intel_uncore_read(engine->uncore,
   1123 							  GEN12_RING_FAULT_REG);
   1124 		else if (INTEL_GEN(i915) >= 8)
   1125 			ee->fault_reg = intel_uncore_read(engine->uncore,
   1126 							  GEN8_RING_FAULT_REG);
   1127 		else
   1128 			ee->fault_reg = GEN6_RING_FAULT_REG_READ(engine);
   1129 	}
   1130 
   1131 	if (INTEL_GEN(i915) >= 4) {
   1132 		ee->faddr = ENGINE_READ(engine, RING_DMA_FADD);
   1133 		ee->ipeir = ENGINE_READ(engine, RING_IPEIR);
   1134 		ee->ipehr = ENGINE_READ(engine, RING_IPEHR);
   1135 		ee->instps = ENGINE_READ(engine, RING_INSTPS);
   1136 		ee->bbaddr = ENGINE_READ(engine, RING_BBADDR);
   1137 		ee->ccid = ENGINE_READ(engine, CCID);
   1138 		if (INTEL_GEN(i915) >= 8) {
   1139 			ee->faddr |= (u64)ENGINE_READ(engine, RING_DMA_FADD_UDW) << 32;
   1140 			ee->bbaddr |= (u64)ENGINE_READ(engine, RING_BBADDR_UDW) << 32;
   1141 		}
   1142 		ee->bbstate = ENGINE_READ(engine, RING_BBSTATE);
   1143 	} else {
   1144 		ee->faddr = ENGINE_READ(engine, DMA_FADD_I8XX);
   1145 		ee->ipeir = ENGINE_READ(engine, IPEIR);
   1146 		ee->ipehr = ENGINE_READ(engine, IPEHR);
   1147 	}
   1148 
   1149 	intel_engine_get_instdone(engine, &ee->instdone);
   1150 
   1151 	ee->instpm = ENGINE_READ(engine, RING_INSTPM);
   1152 	ee->acthd = intel_engine_get_active_head(engine);
   1153 	ee->start = ENGINE_READ(engine, RING_START);
   1154 	ee->head = ENGINE_READ(engine, RING_HEAD);
   1155 	ee->tail = ENGINE_READ(engine, RING_TAIL);
   1156 	ee->ctl = ENGINE_READ(engine, RING_CTL);
   1157 	if (INTEL_GEN(i915) > 2)
   1158 		ee->mode = ENGINE_READ(engine, RING_MI_MODE);
   1159 
   1160 	if (!HWS_NEEDS_PHYSICAL(i915)) {
   1161 		i915_reg_t mmio;
   1162 
   1163 		if (IS_GEN(i915, 7)) {
   1164 			switch (engine->id) {
   1165 			default:
   1166 				MISSING_CASE(engine->id);
   1167 				/* fall through */
   1168 			case RCS0:
   1169 				mmio = RENDER_HWS_PGA_GEN7;
   1170 				break;
   1171 			case BCS0:
   1172 				mmio = BLT_HWS_PGA_GEN7;
   1173 				break;
   1174 			case VCS0:
   1175 				mmio = BSD_HWS_PGA_GEN7;
   1176 				break;
   1177 			case VECS0:
   1178 				mmio = VEBOX_HWS_PGA_GEN7;
   1179 				break;
   1180 			}
   1181 		} else if (IS_GEN(engine->i915, 6)) {
   1182 			mmio = RING_HWS_PGA_GEN6(engine->mmio_base);
   1183 		} else {
   1184 			/* XXX: gen8 returns to sanity */
   1185 			mmio = RING_HWS_PGA(engine->mmio_base);
   1186 		}
   1187 
   1188 		ee->hws = intel_uncore_read(engine->uncore, mmio);
   1189 	}
   1190 
   1191 	ee->reset_count = i915_reset_engine_count(&i915->gpu_error, engine);
   1192 
   1193 	if (HAS_PPGTT(i915)) {
   1194 		int i;
   1195 
   1196 		ee->vm_info.gfx_mode = ENGINE_READ(engine, RING_MODE_GEN7);
   1197 
   1198 		if (IS_GEN(i915, 6)) {
   1199 			ee->vm_info.pp_dir_base =
   1200 				ENGINE_READ(engine, RING_PP_DIR_BASE_READ);
   1201 		} else if (IS_GEN(i915, 7)) {
   1202 			ee->vm_info.pp_dir_base =
   1203 				ENGINE_READ(engine, RING_PP_DIR_BASE);
   1204 		} else if (INTEL_GEN(i915) >= 8) {
   1205 			u32 base = engine->mmio_base;
   1206 
   1207 			for (i = 0; i < 4; i++) {
   1208 				ee->vm_info.pdp[i] =
   1209 					intel_uncore_read(engine->uncore,
   1210 							  GEN8_RING_PDP_UDW(base, i));
   1211 				ee->vm_info.pdp[i] <<= 32;
   1212 				ee->vm_info.pdp[i] |=
   1213 					intel_uncore_read(engine->uncore,
   1214 							  GEN8_RING_PDP_LDW(base, i));
   1215 			}
   1216 		}
   1217 	}
   1218 }
   1219 
   1220 static void record_request(const struct i915_request *request,
   1221 			   struct i915_request_coredump *erq)
   1222 {
   1223 	const struct i915_gem_context *ctx;
   1224 
   1225 	erq->flags = request->fence.flags;
   1226 	erq->context = request->fence.context;
   1227 	erq->seqno = request->fence.seqno;
   1228 	erq->sched_attr = request->sched.attr;
   1229 	erq->start = i915_ggtt_offset(request->ring->vma);
   1230 	erq->head = request->head;
   1231 	erq->tail = request->tail;
   1232 
   1233 	erq->pid = 0;
   1234 	rcu_read_lock();
   1235 	ctx = rcu_dereference(request->context->gem_context);
   1236 	if (ctx)
   1237 		erq->pid = pid_nr(ctx->pid);
   1238 	rcu_read_unlock();
   1239 }
   1240 
   1241 static void engine_record_execlists(struct intel_engine_coredump *ee)
   1242 {
   1243 	const struct intel_engine_execlists * const el = &ee->engine->execlists;
   1244 	struct i915_request * const *port = el->active;
   1245 	unsigned int n = 0;
   1246 
   1247 	while (*port)
   1248 		record_request(*port++, &ee->execlist[n++]);
   1249 
   1250 	ee->num_ports = n;
   1251 }
   1252 
   1253 static bool record_context(struct i915_gem_context_coredump *e,
   1254 			   const struct i915_request *rq)
   1255 {
   1256 	struct i915_gem_context *ctx;
   1257 	struct task_struct *task;
   1258 	bool capture;
   1259 
   1260 	rcu_read_lock();
   1261 	ctx = rcu_dereference(rq->context->gem_context);
   1262 	if (ctx && !kref_get_unless_zero(&ctx->ref))
   1263 		ctx = NULL;
   1264 	rcu_read_unlock();
   1265 	if (!ctx)
   1266 		return false;
   1267 
   1268 	rcu_read_lock();
   1269 	task = pid_task(ctx->pid, PIDTYPE_PID);
   1270 	if (task) {
   1271 		strcpy(e->comm, task->comm);
   1272 		e->pid = task->pid;
   1273 	}
   1274 	rcu_read_unlock();
   1275 
   1276 	e->sched_attr = ctx->sched;
   1277 	e->guilty = atomic_read(&ctx->guilty_count);
   1278 	e->active = atomic_read(&ctx->active_count);
   1279 
   1280 	capture = i915_gem_context_no_error_capture(ctx);
   1281 
   1282 	i915_gem_context_put(ctx);
   1283 	return capture;
   1284 }
   1285 
   1286 struct intel_engine_capture_vma {
   1287 	struct intel_engine_capture_vma *next;
   1288 	struct i915_vma *vma;
   1289 	char name[16];
   1290 };
   1291 
   1292 static struct intel_engine_capture_vma *
   1293 capture_vma(struct intel_engine_capture_vma *next,
   1294 	    struct i915_vma *vma,
   1295 	    const char *name,
   1296 	    gfp_t gfp)
   1297 {
   1298 	struct intel_engine_capture_vma *c;
   1299 
   1300 	if (!vma)
   1301 		return next;
   1302 
   1303 	c = kmalloc(sizeof(*c), gfp);
   1304 	if (!c)
   1305 		return next;
   1306 
   1307 	if (!i915_active_acquire_if_busy(&vma->active)) {
   1308 		kfree(c);
   1309 		return next;
   1310 	}
   1311 
   1312 	strcpy(c->name, name);
   1313 	c->vma = i915_vma_get(vma);
   1314 
   1315 	c->next = next;
   1316 	return c;
   1317 }
   1318 
   1319 static struct intel_engine_capture_vma *
   1320 capture_user(struct intel_engine_capture_vma *capture,
   1321 	     const struct i915_request *rq,
   1322 	     gfp_t gfp)
   1323 {
   1324 	struct i915_capture_list *c;
   1325 
   1326 	for (c = rq->capture_list; c; c = c->next)
   1327 		capture = capture_vma(capture, c->vma, "user", gfp);
   1328 
   1329 	return capture;
   1330 }
   1331 
   1332 static struct i915_vma_coredump *
   1333 capture_object(const struct intel_gt *gt,
   1334 	       struct drm_i915_gem_object *obj,
   1335 	       const char *name,
   1336 	       struct i915_vma_compress *compress)
   1337 {
   1338 	if (obj && i915_gem_object_has_pages(obj)) {
   1339 		struct i915_vma fake = {
   1340 			.node = { .start = U64_MAX, .size = obj->base.size },
   1341 			.size = obj->base.size,
   1342 			.pages = obj->mm.pages,
   1343 			.obj = obj,
   1344 		};
   1345 
   1346 		return i915_vma_coredump_create(gt, &fake, name, compress);
   1347 	} else {
   1348 		return NULL;
   1349 	}
   1350 }
   1351 
   1352 static void add_vma(struct intel_engine_coredump *ee,
   1353 		    struct i915_vma_coredump *vma)
   1354 {
   1355 	if (vma) {
   1356 		vma->next = ee->vma;
   1357 		ee->vma = vma;
   1358 	}
   1359 }
   1360 
   1361 struct intel_engine_coredump *
   1362 intel_engine_coredump_alloc(struct intel_engine_cs *engine, gfp_t gfp)
   1363 {
   1364 	struct intel_engine_coredump *ee;
   1365 
   1366 	ee = kzalloc(sizeof(*ee), gfp);
   1367 	if (!ee)
   1368 		return NULL;
   1369 
   1370 	ee->engine = engine;
   1371 
   1372 	engine_record_registers(ee);
   1373 	engine_record_execlists(ee);
   1374 
   1375 	return ee;
   1376 }
   1377 
   1378 struct intel_engine_capture_vma *
   1379 intel_engine_coredump_add_request(struct intel_engine_coredump *ee,
   1380 				  struct i915_request *rq,
   1381 				  gfp_t gfp)
   1382 {
   1383 	struct intel_engine_capture_vma *vma = NULL;
   1384 
   1385 	ee->simulated |= record_context(&ee->context, rq);
   1386 	if (ee->simulated)
   1387 		return NULL;
   1388 
   1389 	/*
   1390 	 * We need to copy these to an anonymous buffer
   1391 	 * as the simplest method to avoid being overwritten
   1392 	 * by userspace.
   1393 	 */
   1394 	vma = capture_vma(vma, rq->batch, "batch", gfp);
   1395 	vma = capture_user(vma, rq, gfp);
   1396 	vma = capture_vma(vma, rq->ring->vma, "ring", gfp);
   1397 	vma = capture_vma(vma, rq->context->state, "HW context", gfp);
   1398 
   1399 	ee->rq_head = rq->head;
   1400 	ee->rq_post = rq->postfix;
   1401 	ee->rq_tail = rq->tail;
   1402 
   1403 	return vma;
   1404 }
   1405 
   1406 void
   1407 intel_engine_coredump_add_vma(struct intel_engine_coredump *ee,
   1408 			      struct intel_engine_capture_vma *capture,
   1409 			      struct i915_vma_compress *compress)
   1410 {
   1411 	const struct intel_engine_cs *engine = ee->engine;
   1412 
   1413 	while (capture) {
   1414 		struct intel_engine_capture_vma *this = capture;
   1415 		struct i915_vma *vma = this->vma;
   1416 
   1417 		add_vma(ee,
   1418 			i915_vma_coredump_create(engine->gt,
   1419 						 vma, this->name,
   1420 						 compress));
   1421 
   1422 		i915_active_release(&vma->active);
   1423 		i915_vma_put(vma);
   1424 
   1425 		capture = this->next;
   1426 		kfree(this);
   1427 	}
   1428 
   1429 	add_vma(ee,
   1430 		i915_vma_coredump_create(engine->gt,
   1431 					 engine->status_page.vma,
   1432 					 "HW Status",
   1433 					 compress));
   1434 
   1435 	add_vma(ee,
   1436 		i915_vma_coredump_create(engine->gt,
   1437 					 engine->wa_ctx.vma,
   1438 					 "WA context",
   1439 					 compress));
   1440 
   1441 	add_vma(ee,
   1442 		capture_object(engine->gt,
   1443 			       engine->default_state,
   1444 			       "NULL context",
   1445 			       compress));
   1446 }
   1447 
   1448 static struct intel_engine_coredump *
   1449 capture_engine(struct intel_engine_cs *engine,
   1450 	       struct i915_vma_compress *compress)
   1451 {
   1452 	struct intel_engine_capture_vma *capture = NULL;
   1453 	struct intel_engine_coredump *ee;
   1454 	struct i915_request *rq;
   1455 	unsigned long flags;
   1456 
   1457 	ee = intel_engine_coredump_alloc(engine, GFP_KERNEL);
   1458 	if (!ee)
   1459 		return NULL;
   1460 
   1461 	spin_lock_irqsave(&engine->active.lock, flags);
   1462 	rq = intel_engine_find_active_request(engine);
   1463 	if (rq)
   1464 		capture = intel_engine_coredump_add_request(ee, rq,
   1465 							    ATOMIC_MAYFAIL);
   1466 	spin_unlock_irqrestore(&engine->active.lock, flags);
   1467 	if (!capture) {
   1468 		kfree(ee);
   1469 		return NULL;
   1470 	}
   1471 
   1472 	intel_engine_coredump_add_vma(ee, capture, compress);
   1473 
   1474 	return ee;
   1475 }
   1476 
   1477 static void
   1478 gt_record_engines(struct intel_gt_coredump *gt,
   1479 		  struct i915_vma_compress *compress)
   1480 {
   1481 	struct intel_engine_cs *engine;
   1482 	enum intel_engine_id id;
   1483 
   1484 	for_each_engine(engine, gt->_gt, id) {
   1485 		struct intel_engine_coredump *ee;
   1486 
   1487 		/* Refill our page pool before entering atomic section */
   1488 		pool_refill(&compress->pool, ALLOW_FAIL);
   1489 
   1490 		ee = capture_engine(engine, compress);
   1491 		if (!ee)
   1492 			continue;
   1493 
   1494 		gt->simulated |= ee->simulated;
   1495 		if (ee->simulated) {
   1496 			kfree(ee);
   1497 			continue;
   1498 		}
   1499 
   1500 		ee->next = gt->engine;
   1501 		gt->engine = ee;
   1502 	}
   1503 }
   1504 
   1505 static struct intel_uc_coredump *
   1506 gt_record_uc(struct intel_gt_coredump *gt,
   1507 	     struct i915_vma_compress *compress)
   1508 {
   1509 	const struct intel_uc *uc = &gt->_gt->uc;
   1510 	struct intel_uc_coredump *error_uc;
   1511 
   1512 	error_uc = kzalloc(sizeof(*error_uc), ALLOW_FAIL);
   1513 	if (!error_uc)
   1514 		return NULL;
   1515 
   1516 	memcpy(&error_uc->guc_fw, &uc->guc.fw, sizeof(uc->guc.fw));
   1517 	memcpy(&error_uc->huc_fw, &uc->huc.fw, sizeof(uc->huc.fw));
   1518 
   1519 	/* Non-default firmware paths will be specified by the modparam.
   1520 	 * As modparams are generally accesible from the userspace make
   1521 	 * explicit copies of the firmware paths.
   1522 	 */
   1523 	error_uc->guc_fw.path = kstrdup(uc->guc.fw.path, ALLOW_FAIL);
   1524 	error_uc->huc_fw.path = kstrdup(uc->huc.fw.path, ALLOW_FAIL);
   1525 	error_uc->guc_log =
   1526 		i915_vma_coredump_create(gt->_gt,
   1527 					 uc->guc.log.vma, "GuC log buffer",
   1528 					 compress);
   1529 
   1530 	return error_uc;
   1531 }
   1532 
   1533 static void gt_capture_prepare(struct intel_gt_coredump *gt)
   1534 {
   1535 	struct i915_ggtt *ggtt = gt->_gt->ggtt;
   1536 
   1537 	mutex_lock(&ggtt->error_mutex);
   1538 }
   1539 
   1540 static void gt_capture_finish(struct intel_gt_coredump *gt)
   1541 {
   1542 	struct i915_ggtt *ggtt = gt->_gt->ggtt;
   1543 
   1544 	if (drm_mm_node_allocated(&ggtt->error_capture))
   1545 		ggtt->vm.clear_range(&ggtt->vm,
   1546 				     ggtt->error_capture.start,
   1547 				     PAGE_SIZE);
   1548 
   1549 	mutex_unlock(&ggtt->error_mutex);
   1550 }
   1551 
   1552 /* Capture all registers which don't fit into another category. */
   1553 static void gt_record_regs(struct intel_gt_coredump *gt)
   1554 {
   1555 	struct intel_uncore *uncore = gt->_gt->uncore;
   1556 	struct drm_i915_private *i915 = uncore->i915;
   1557 	int i;
   1558 
   1559 	/*
   1560 	 * General organization
   1561 	 * 1. Registers specific to a single generation
   1562 	 * 2. Registers which belong to multiple generations
   1563 	 * 3. Feature specific registers.
   1564 	 * 4. Everything else
   1565 	 * Please try to follow the order.
   1566 	 */
   1567 
   1568 	/* 1: Registers specific to a single generation */
   1569 	if (IS_VALLEYVIEW(i915)) {
   1570 		gt->gtier[0] = intel_uncore_read(uncore, GTIER);
   1571 		gt->ier = intel_uncore_read(uncore, VLV_IER);
   1572 		gt->forcewake = intel_uncore_read_fw(uncore, FORCEWAKE_VLV);
   1573 	}
   1574 
   1575 	if (IS_GEN(i915, 7))
   1576 		gt->err_int = intel_uncore_read(uncore, GEN7_ERR_INT);
   1577 
   1578 	if (INTEL_GEN(i915) >= 12) {
   1579 		gt->fault_data0 = intel_uncore_read(uncore,
   1580 						    GEN12_FAULT_TLB_DATA0);
   1581 		gt->fault_data1 = intel_uncore_read(uncore,
   1582 						    GEN12_FAULT_TLB_DATA1);
   1583 	} else if (INTEL_GEN(i915) >= 8) {
   1584 		gt->fault_data0 = intel_uncore_read(uncore,
   1585 						    GEN8_FAULT_TLB_DATA0);
   1586 		gt->fault_data1 = intel_uncore_read(uncore,
   1587 						    GEN8_FAULT_TLB_DATA1);
   1588 	}
   1589 
   1590 	if (IS_GEN(i915, 6)) {
   1591 		gt->forcewake = intel_uncore_read_fw(uncore, FORCEWAKE);
   1592 		gt->gab_ctl = intel_uncore_read(uncore, GAB_CTL);
   1593 		gt->gfx_mode = intel_uncore_read(uncore, GFX_MODE);
   1594 	}
   1595 
   1596 	/* 2: Registers which belong to multiple generations */
   1597 	if (INTEL_GEN(i915) >= 7)
   1598 		gt->forcewake = intel_uncore_read_fw(uncore, FORCEWAKE_MT);
   1599 
   1600 	if (INTEL_GEN(i915) >= 6) {
   1601 		gt->derrmr = intel_uncore_read(uncore, DERRMR);
   1602 		if (INTEL_GEN(i915) < 12) {
   1603 			gt->error = intel_uncore_read(uncore, ERROR_GEN6);
   1604 			gt->done_reg = intel_uncore_read(uncore, DONE_REG);
   1605 		}
   1606 	}
   1607 
   1608 	/* 3: Feature specific registers */
   1609 	if (IS_GEN_RANGE(i915, 6, 7)) {
   1610 		gt->gam_ecochk = intel_uncore_read(uncore, GAM_ECOCHK);
   1611 		gt->gac_eco = intel_uncore_read(uncore, GAC_ECO_BITS);
   1612 	}
   1613 
   1614 	if (IS_GEN_RANGE(i915, 8, 11))
   1615 		gt->gtt_cache = intel_uncore_read(uncore, HSW_GTT_CACHE_EN);
   1616 
   1617 	if (IS_GEN(i915, 12))
   1618 		gt->aux_err = intel_uncore_read(uncore, GEN12_AUX_ERR_DBG);
   1619 
   1620 	if (INTEL_GEN(i915) >= 12) {
   1621 		for (i = 0; i < GEN12_SFC_DONE_MAX; i++) {
   1622 			gt->sfc_done[i] =
   1623 				intel_uncore_read(uncore, GEN12_SFC_DONE(i));
   1624 		}
   1625 
   1626 		gt->gam_done = intel_uncore_read(uncore, GEN12_GAM_DONE);
   1627 	}
   1628 
   1629 	/* 4: Everything else */
   1630 	if (INTEL_GEN(i915) >= 11) {
   1631 		gt->ier = intel_uncore_read(uncore, GEN8_DE_MISC_IER);
   1632 		gt->gtier[0] =
   1633 			intel_uncore_read(uncore,
   1634 					  GEN11_RENDER_COPY_INTR_ENABLE);
   1635 		gt->gtier[1] =
   1636 			intel_uncore_read(uncore, GEN11_VCS_VECS_INTR_ENABLE);
   1637 		gt->gtier[2] =
   1638 			intel_uncore_read(uncore, GEN11_GUC_SG_INTR_ENABLE);
   1639 		gt->gtier[3] =
   1640 			intel_uncore_read(uncore,
   1641 					  GEN11_GPM_WGBOXPERF_INTR_ENABLE);
   1642 		gt->gtier[4] =
   1643 			intel_uncore_read(uncore,
   1644 					  GEN11_CRYPTO_RSVD_INTR_ENABLE);
   1645 		gt->gtier[5] =
   1646 			intel_uncore_read(uncore,
   1647 					  GEN11_GUNIT_CSME_INTR_ENABLE);
   1648 		gt->ngtier = 6;
   1649 	} else if (INTEL_GEN(i915) >= 8) {
   1650 		gt->ier = intel_uncore_read(uncore, GEN8_DE_MISC_IER);
   1651 		for (i = 0; i < 4; i++)
   1652 			gt->gtier[i] =
   1653 				intel_uncore_read(uncore, GEN8_GT_IER(i));
   1654 		gt->ngtier = 4;
   1655 	} else if (HAS_PCH_SPLIT(i915)) {
   1656 		gt->ier = intel_uncore_read(uncore, DEIER);
   1657 		gt->gtier[0] = intel_uncore_read(uncore, GTIER);
   1658 		gt->ngtier = 1;
   1659 	} else if (IS_GEN(i915, 2)) {
   1660 		gt->ier = intel_uncore_read16(uncore, GEN2_IER);
   1661 	} else if (!IS_VALLEYVIEW(i915)) {
   1662 		gt->ier = intel_uncore_read(uncore, GEN2_IER);
   1663 	}
   1664 	gt->eir = intel_uncore_read(uncore, EIR);
   1665 	gt->pgtbl_er = intel_uncore_read(uncore, PGTBL_ER);
   1666 }
   1667 
   1668 /*
   1669  * Generate a semi-unique error code. The code is not meant to have meaning, The
   1670  * code's only purpose is to try to prevent false duplicated bug reports by
   1671  * grossly estimating a GPU error state.
   1672  *
   1673  * TODO Ideally, hashing the batchbuffer would be a very nice way to determine
   1674  * the hang if we could strip the GTT offset information from it.
   1675  *
   1676  * It's only a small step better than a random number in its current form.
   1677  */
   1678 static u32 generate_ecode(const struct intel_engine_coredump *ee)
   1679 {
   1680 	/*
   1681 	 * IPEHR would be an ideal way to detect errors, as it's the gross
   1682 	 * measure of "the command that hung." However, has some very common
   1683 	 * synchronization commands which almost always appear in the case
   1684 	 * strictly a client bug. Use instdone to differentiate those some.
   1685 	 */
   1686 	return ee ? ee->ipehr ^ ee->instdone.instdone : 0;
   1687 }
   1688 
   1689 static const char *error_msg(struct i915_gpu_coredump *error)
   1690 {
   1691 	struct intel_engine_coredump *first = NULL;
   1692 	struct intel_gt_coredump *gt;
   1693 	intel_engine_mask_t engines;
   1694 	int len;
   1695 
   1696 	engines = 0;
   1697 	for (gt = error->gt; gt; gt = gt->next) {
   1698 		struct intel_engine_coredump *cs;
   1699 
   1700 		if (gt->engine && !first)
   1701 			first = gt->engine;
   1702 
   1703 		for (cs = gt->engine; cs; cs = cs->next)
   1704 			engines |= cs->engine->mask;
   1705 	}
   1706 
   1707 	len = scnprintf(error->error_msg, sizeof(error->error_msg),
   1708 			"GPU HANG: ecode %d:%x:%08x",
   1709 			INTEL_GEN(error->i915), engines,
   1710 			generate_ecode(first));
   1711 	if (first && first->context.pid) {
   1712 		/* Just show the first executing process, more is confusing */
   1713 		len += scnprintf(error->error_msg + len,
   1714 				 sizeof(error->error_msg) - len,
   1715 				 ", in %s [%d]",
   1716 				 first->context.comm, first->context.pid);
   1717 	}
   1718 
   1719 	return error->error_msg;
   1720 }
   1721 
   1722 static void capture_gen(struct i915_gpu_coredump *error)
   1723 {
   1724 	struct drm_i915_private *i915 = error->i915;
   1725 
   1726 	error->wakelock = atomic_read(&i915->runtime_pm.wakeref_count);
   1727 	error->suspended = i915->runtime_pm.suspended;
   1728 
   1729 	error->iommu = -1;
   1730 #ifdef CONFIG_INTEL_IOMMU
   1731 	error->iommu = intel_iommu_gfx_mapped;
   1732 #endif
   1733 	error->reset_count = i915_reset_count(&i915->gpu_error);
   1734 	error->suspend_count = i915->suspend_count;
   1735 
   1736 	i915_params_copy(&error->params, &i915_modparams);
   1737 	memcpy(&error->device_info,
   1738 	       INTEL_INFO(i915),
   1739 	       sizeof(error->device_info));
   1740 	memcpy(&error->runtime_info,
   1741 	       RUNTIME_INFO(i915),
   1742 	       sizeof(error->runtime_info));
   1743 	error->driver_caps = i915->caps;
   1744 }
   1745 
   1746 struct i915_gpu_coredump *
   1747 i915_gpu_coredump_alloc(struct drm_i915_private *i915, gfp_t gfp)
   1748 {
   1749 	struct i915_gpu_coredump *error;
   1750 
   1751 	if (!i915_modparams.error_capture)
   1752 		return NULL;
   1753 
   1754 	error = kzalloc(sizeof(*error), gfp);
   1755 	if (!error)
   1756 		return NULL;
   1757 
   1758 	kref_init(&error->ref);
   1759 	error->i915 = i915;
   1760 
   1761 	error->time = ktime_get_real();
   1762 	error->boottime = ktime_get_boottime();
   1763 	error->uptime = ktime_sub(ktime_get(), i915->gt.last_init_time);
   1764 	error->capture = jiffies;
   1765 
   1766 	capture_gen(error);
   1767 
   1768 	return error;
   1769 }
   1770 
   1771 #define DAY_AS_SECONDS(x) (24 * 60 * 60 * (x))
   1772 
   1773 struct intel_gt_coredump *
   1774 intel_gt_coredump_alloc(struct intel_gt *gt, gfp_t gfp)
   1775 {
   1776 	struct intel_gt_coredump *gc;
   1777 
   1778 	gc = kzalloc(sizeof(*gc), gfp);
   1779 	if (!gc)
   1780 		return NULL;
   1781 
   1782 	gc->_gt = gt;
   1783 	gc->awake = intel_gt_pm_is_awake(gt);
   1784 
   1785 	gt_record_regs(gc);
   1786 	gt_record_fences(gc);
   1787 
   1788 	return gc;
   1789 }
   1790 
   1791 struct i915_vma_compress *
   1792 i915_vma_capture_prepare(struct intel_gt_coredump *gt)
   1793 {
   1794 	struct i915_vma_compress *compress;
   1795 
   1796 	compress = kmalloc(sizeof(*compress), ALLOW_FAIL);
   1797 	if (!compress)
   1798 		return NULL;
   1799 
   1800 	if (!compress_init(compress)) {
   1801 		kfree(compress);
   1802 		return NULL;
   1803 	}
   1804 
   1805 	gt_capture_prepare(gt);
   1806 
   1807 	return compress;
   1808 }
   1809 
   1810 void i915_vma_capture_finish(struct intel_gt_coredump *gt,
   1811 			     struct i915_vma_compress *compress)
   1812 {
   1813 	if (!compress)
   1814 		return;
   1815 
   1816 	gt_capture_finish(gt);
   1817 
   1818 	compress_fini(compress);
   1819 	kfree(compress);
   1820 }
   1821 
   1822 struct i915_gpu_coredump *i915_gpu_coredump(struct drm_i915_private *i915)
   1823 {
   1824 	struct i915_gpu_coredump *error;
   1825 
   1826 	/* Check if GPU capture has been disabled */
   1827 	error = READ_ONCE(i915->gpu_error.first_error);
   1828 	if (IS_ERR(error))
   1829 		return error;
   1830 
   1831 	error = i915_gpu_coredump_alloc(i915, ALLOW_FAIL);
   1832 	if (!error)
   1833 		return ERR_PTR(-ENOMEM);
   1834 
   1835 	error->gt = intel_gt_coredump_alloc(&i915->gt, ALLOW_FAIL);
   1836 	if (error->gt) {
   1837 		struct i915_vma_compress *compress;
   1838 
   1839 		compress = i915_vma_capture_prepare(error->gt);
   1840 		if (!compress) {
   1841 			kfree(error->gt);
   1842 			kfree(error);
   1843 			return ERR_PTR(-ENOMEM);
   1844 		}
   1845 
   1846 		gt_record_engines(error->gt, compress);
   1847 
   1848 		if (INTEL_INFO(i915)->has_gt_uc)
   1849 			error->gt->uc = gt_record_uc(error->gt, compress);
   1850 
   1851 		i915_vma_capture_finish(error->gt, compress);
   1852 
   1853 		error->simulated |= error->gt->simulated;
   1854 	}
   1855 
   1856 	error->overlay = intel_overlay_capture_error_state(i915);
   1857 	error->display = intel_display_capture_error_state(i915);
   1858 
   1859 	return error;
   1860 }
   1861 
   1862 void i915_error_state_store(struct i915_gpu_coredump *error)
   1863 {
   1864 	struct drm_i915_private *i915;
   1865 	static bool warned;
   1866 
   1867 	if (IS_ERR_OR_NULL(error))
   1868 		return;
   1869 
   1870 	i915 = error->i915;
   1871 	dev_info(i915->drm.dev, "%s\n", error_msg(error));
   1872 
   1873 	if (error->simulated ||
   1874 	    cmpxchg(&i915->gpu_error.first_error, NULL, error))
   1875 		return;
   1876 
   1877 	i915_gpu_coredump_get(error);
   1878 
   1879 	if (!xchg(&warned, true) &&
   1880 	    ktime_get_real_seconds() - DRIVER_TIMESTAMP < DAY_AS_SECONDS(180)) {
   1881 #ifdef __NetBSD__
   1882 		pr_info("Please file a bug at https://gnats.NetBSD.org/"
   1883 		    " providing the dmesg log by booting with debug/verbose"
   1884 		    " as in `boot -vx'.\n");
   1885 #else
   1886 		pr_info("GPU hangs can indicate a bug anywhere in the entire gfx stack, including userspace.\n");
   1887 		pr_info("Please file a _new_ bug report at https://gitlab.freedesktop.org/drm/intel/issues/new.\n");
   1888 		pr_info("Please see https://gitlab.freedesktop.org/drm/intel/-/wikis/How-to-file-i915-bugs for details.\n");
   1889 		pr_info("drm/i915 developers can then reassign to the right component if it's not a kernel issue.\n");
   1890 		pr_info("The GPU crash dump is required to analyze GPU hangs, so please always attach it.\n");
   1891 		pr_info("GPU crash dump saved to /sys/class/drm/card%d/error\n",
   1892 			i915->drm.primary->index);
   1893 #endif
   1894 	}
   1895 }
   1896 
   1897 /**
   1898  * i915_capture_error_state - capture an error record for later analysis
   1899  * @i915: i915 device
   1900  *
   1901  * Should be called when an error is detected (either a hang or an error
   1902  * interrupt) to capture error state from the time of the error.  Fills
   1903  * out a structure which becomes available in debugfs for user level tools
   1904  * to pick up.
   1905  */
   1906 void i915_capture_error_state(struct drm_i915_private *i915)
   1907 {
   1908 	struct i915_gpu_coredump *error;
   1909 
   1910 	error = i915_gpu_coredump(i915);
   1911 	if (IS_ERR(error)) {
   1912 		cmpxchg(&i915->gpu_error.first_error, NULL, error);
   1913 		return;
   1914 	}
   1915 
   1916 	i915_error_state_store(error);
   1917 	i915_gpu_coredump_put(error);
   1918 }
   1919 
   1920 struct i915_gpu_coredump *
   1921 i915_first_error_state(struct drm_i915_private *i915)
   1922 {
   1923 	struct i915_gpu_coredump *error;
   1924 
   1925 	spin_lock_irq(&i915->gpu_error.lock);
   1926 	error = i915->gpu_error.first_error;
   1927 	if (!IS_ERR_OR_NULL(error))
   1928 		i915_gpu_coredump_get(error);
   1929 	spin_unlock_irq(&i915->gpu_error.lock);
   1930 
   1931 	return error;
   1932 }
   1933 
   1934 void i915_reset_error_state(struct drm_i915_private *i915)
   1935 {
   1936 	struct i915_gpu_coredump *error;
   1937 
   1938 	spin_lock_irq(&i915->gpu_error.lock);
   1939 	error = i915->gpu_error.first_error;
   1940 	if (error != ERR_PTR(-ENODEV)) /* if disabled, always disabled */
   1941 		i915->gpu_error.first_error = NULL;
   1942 	spin_unlock_irq(&i915->gpu_error.lock);
   1943 
   1944 	if (!IS_ERR_OR_NULL(error))
   1945 		i915_gpu_coredump_put(error);
   1946 }
   1947 
   1948 void i915_disable_error_state(struct drm_i915_private *i915, int err)
   1949 {
   1950 	spin_lock_irq(&i915->gpu_error.lock);
   1951 	if (!i915->gpu_error.first_error)
   1952 		i915->gpu_error.first_error = ERR_PTR(err);
   1953 	spin_unlock_irq(&i915->gpu_error.lock);
   1954 }
   1955