Home | History | Annotate | Line # | Download | only in i915
      1 /*	$NetBSD: i915_gpu_error.h,v 1.2 2021/12/18 23:45:28 riastradh Exp $	*/
      2 
      3 /*
      4  * SPDX-License-Identifier: MIT
      5  *
      6  * Copyright  2008-2018 Intel Corporation
      7  */
      8 
      9 #ifndef _I915_GPU_ERROR_H_
     10 #define _I915_GPU_ERROR_H_
     11 
     12 #include <linux/atomic.h>
     13 #include <linux/kref.h>
     14 #include <linux/ktime.h>
     15 #include <linux/sched.h>
     16 
     17 #include <drm/drm_mm.h>
     18 
     19 #include "gt/intel_engine.h"
     20 #include "gt/uc/intel_uc_fw.h"
     21 
     22 #include "intel_device_info.h"
     23 
     24 #include "i915_gem.h"
     25 #include "i915_gem_gtt.h"
     26 #include "i915_params.h"
     27 #include "i915_scheduler.h"
     28 
     29 struct drm_i915_private;
     30 struct i915_vma_compress;
     31 struct intel_engine_capture_vma;
     32 struct intel_overlay_error_state;
     33 struct intel_display_error_state;
     34 
     35 struct i915_vma_coredump {
     36 	struct i915_vma_coredump *next;
     37 
     38 	char name[20];
     39 
     40 	u64 gtt_offset;
     41 	u64 gtt_size;
     42 	u32 gtt_page_sizes;
     43 
     44 	int num_pages;
     45 	int page_count;
     46 	int unused;
     47 	u32 *pages[0];
     48 };
     49 
     50 struct i915_request_coredump {
     51 	unsigned long flags;
     52 	pid_t pid;
     53 	u32 context;
     54 	u32 seqno;
     55 	u32 start;
     56 	u32 head;
     57 	u32 tail;
     58 	struct i915_sched_attr sched_attr;
     59 };
     60 
     61 struct intel_engine_coredump {
     62 	const struct intel_engine_cs *engine;
     63 
     64 	bool simulated;
     65 	u32 reset_count;
     66 
     67 	/* position of active request inside the ring */
     68 	u32 rq_head, rq_post, rq_tail;
     69 
     70 	/* Register state */
     71 	u32 ccid;
     72 	u32 start;
     73 	u32 tail;
     74 	u32 head;
     75 	u32 ctl;
     76 	u32 mode;
     77 	u32 hws;
     78 	u32 ipeir;
     79 	u32 ipehr;
     80 	u32 bbstate;
     81 	u32 instpm;
     82 	u32 instps;
     83 	u64 bbaddr;
     84 	u64 acthd;
     85 	u32 fault_reg;
     86 	u64 faddr;
     87 	u32 rc_psmi; /* sleep state */
     88 	struct intel_instdone instdone;
     89 
     90 	struct i915_gem_context_coredump {
     91 		char comm[TASK_COMM_LEN];
     92 		pid_t pid;
     93 		int active;
     94 		int guilty;
     95 		struct i915_sched_attr sched_attr;
     96 	} context;
     97 
     98 	struct i915_vma_coredump *vma;
     99 
    100 	struct i915_request_coredump execlist[EXECLIST_MAX_PORTS];
    101 	unsigned int num_ports;
    102 
    103 	struct {
    104 		u32 gfx_mode;
    105 		union {
    106 			u64 pdp[4];
    107 			u32 pp_dir_base;
    108 		};
    109 	} vm_info;
    110 
    111 	struct intel_engine_coredump *next;
    112 };
    113 
    114 struct intel_gt_coredump {
    115 	const struct intel_gt *_gt;
    116 	bool awake;
    117 	bool simulated;
    118 
    119 	/* Generic register state */
    120 	u32 eir;
    121 	u32 pgtbl_er;
    122 	u32 ier;
    123 	u32 gtier[6], ngtier;
    124 	u32 derrmr;
    125 	u32 forcewake;
    126 	u32 error; /* gen6+ */
    127 	u32 err_int; /* gen7 */
    128 	u32 fault_data0; /* gen8, gen9 */
    129 	u32 fault_data1; /* gen8, gen9 */
    130 	u32 done_reg;
    131 	u32 gac_eco;
    132 	u32 gam_ecochk;
    133 	u32 gab_ctl;
    134 	u32 gfx_mode;
    135 	u32 gtt_cache;
    136 	u32 aux_err; /* gen12 */
    137 	u32 sfc_done[GEN12_SFC_DONE_MAX]; /* gen12 */
    138 	u32 gam_done; /* gen12 */
    139 
    140 	u32 nfence;
    141 	u64 fence[I915_MAX_NUM_FENCES];
    142 
    143 	struct intel_engine_coredump *engine;
    144 
    145 	struct intel_uc_coredump {
    146 		struct intel_uc_fw guc_fw;
    147 		struct intel_uc_fw huc_fw;
    148 		struct i915_vma_coredump *guc_log;
    149 	} *uc;
    150 
    151 	struct intel_gt_coredump *next;
    152 };
    153 
    154 struct i915_gpu_coredump {
    155 	struct kref ref;
    156 	ktime_t time;
    157 	ktime_t boottime;
    158 	ktime_t uptime;
    159 	unsigned long capture;
    160 
    161 	struct drm_i915_private *i915;
    162 
    163 	struct intel_gt_coredump *gt;
    164 
    165 	char error_msg[128];
    166 	bool simulated;
    167 	bool wakelock;
    168 	bool suspended;
    169 	int iommu;
    170 	u32 reset_count;
    171 	u32 suspend_count;
    172 
    173 	struct intel_device_info device_info;
    174 	struct intel_runtime_info runtime_info;
    175 	struct intel_driver_caps driver_caps;
    176 	struct i915_params params;
    177 
    178 	struct intel_overlay_error_state *overlay;
    179 	struct intel_display_error_state *display;
    180 
    181 	struct scatterlist *sgl, *fit;
    182 };
    183 
    184 struct i915_gpu_error {
    185 	/* For reset and error_state handling. */
    186 	spinlock_t lock;
    187 	/* Protected by the above dev->gpu_error.lock. */
    188 	struct i915_gpu_coredump *first_error;
    189 
    190 	atomic_t pending_fb_pin;
    191 
    192 	/** Number of times the device has been reset (global) */
    193 	atomic_t reset_count;
    194 
    195 	/** Number of times an engine has been reset */
    196 	atomic_t reset_engine_count[I915_NUM_ENGINES];
    197 };
    198 
    199 struct drm_i915_error_state_buf {
    200 	struct drm_i915_private *i915;
    201 	struct scatterlist *sgl, *cur, *end;
    202 
    203 	char *buf;
    204 	size_t bytes;
    205 	size_t size;
    206 	loff_t iter;
    207 
    208 	int err;
    209 };
    210 
    211 #if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)
    212 
    213 __printf(2, 3)
    214 void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...);
    215 
    216 struct i915_gpu_coredump *i915_gpu_coredump(struct drm_i915_private *i915);
    217 void i915_capture_error_state(struct drm_i915_private *i915);
    218 
    219 struct i915_gpu_coredump *
    220 i915_gpu_coredump_alloc(struct drm_i915_private *i915, gfp_t gfp);
    221 
    222 struct intel_gt_coredump *
    223 intel_gt_coredump_alloc(struct intel_gt *gt, gfp_t gfp);
    224 
    225 struct intel_engine_coredump *
    226 intel_engine_coredump_alloc(struct intel_engine_cs *engine, gfp_t gfp);
    227 
    228 struct intel_engine_capture_vma *
    229 intel_engine_coredump_add_request(struct intel_engine_coredump *ee,
    230 				  struct i915_request *rq,
    231 				  gfp_t gfp);
    232 
    233 void intel_engine_coredump_add_vma(struct intel_engine_coredump *ee,
    234 				   struct intel_engine_capture_vma *capture,
    235 				   struct i915_vma_compress *compress);
    236 
    237 struct i915_vma_compress *
    238 i915_vma_capture_prepare(struct intel_gt_coredump *gt);
    239 
    240 void i915_vma_capture_finish(struct intel_gt_coredump *gt,
    241 			     struct i915_vma_compress *compress);
    242 
    243 void i915_error_state_store(struct i915_gpu_coredump *error);
    244 
    245 static inline struct i915_gpu_coredump *
    246 i915_gpu_coredump_get(struct i915_gpu_coredump *gpu)
    247 {
    248 	kref_get(&gpu->ref);
    249 	return gpu;
    250 }
    251 
    252 ssize_t
    253 i915_gpu_coredump_copy_to_buffer(struct i915_gpu_coredump *error,
    254 				 char *buf, loff_t offset, size_t count);
    255 
    256 void __i915_gpu_coredump_free(struct kref *kref);
    257 static inline void i915_gpu_coredump_put(struct i915_gpu_coredump *gpu)
    258 {
    259 	if (gpu)
    260 		kref_put(&gpu->ref, __i915_gpu_coredump_free);
    261 }
    262 
    263 struct i915_gpu_coredump *i915_first_error_state(struct drm_i915_private *i915);
    264 void i915_reset_error_state(struct drm_i915_private *i915);
    265 void i915_disable_error_state(struct drm_i915_private *i915, int err);
    266 
    267 #else
    268 
    269 static inline void i915_capture_error_state(struct drm_i915_private *i915)
    270 {
    271 }
    272 
    273 static inline struct i915_gpu_coredump *
    274 i915_gpu_coredump_alloc(struct drm_i915_private *i915, gfp_t gfp)
    275 {
    276 	return NULL;
    277 }
    278 
    279 static inline struct intel_gt_coredump *
    280 intel_gt_coredump_alloc(struct intel_gt *gt, gfp_t gfp)
    281 {
    282 	return NULL;
    283 }
    284 
    285 static inline struct intel_engine_coredump *
    286 intel_engine_coredump_alloc(struct intel_engine_cs *engine, gfp_t gfp)
    287 {
    288 	return NULL;
    289 }
    290 
    291 static inline struct intel_engine_capture_vma *
    292 intel_engine_coredump_add_request(struct intel_engine_coredump *ee,
    293 				  struct i915_request *rq,
    294 				  gfp_t gfp)
    295 {
    296 	return NULL;
    297 }
    298 
    299 static inline void
    300 intel_engine_coredump_add_vma(struct intel_engine_coredump *ee,
    301 			      struct intel_engine_capture_vma *capture,
    302 			      struct i915_vma_compress *compress)
    303 {
    304 }
    305 
    306 static inline struct i915_vma_compress *
    307 i915_vma_capture_prepare(struct intel_gt_coredump *gt)
    308 {
    309 	return NULL;
    310 }
    311 
    312 static inline void
    313 i915_vma_capture_finish(struct intel_gt_coredump *gt,
    314 			struct i915_vma_compress *compress)
    315 {
    316 }
    317 
    318 static inline void
    319 i915_error_state_store(struct i915_gpu_coredump *error)
    320 {
    321 }
    322 
    323 static inline void i915_gpu_coredump_put(struct i915_gpu_coredump *gpu)
    324 {
    325 }
    326 
    327 static inline struct i915_gpu_coredump *
    328 i915_first_error_state(struct drm_i915_private *i915)
    329 {
    330 	return ERR_PTR(-ENODEV);
    331 }
    332 
    333 static inline void i915_reset_error_state(struct drm_i915_private *i915)
    334 {
    335 }
    336 
    337 static inline void i915_disable_error_state(struct drm_i915_private *i915,
    338 					    int err)
    339 {
    340 }
    341 
    342 #endif /* IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR) */
    343 
    344 #endif /* _I915_GPU_ERROR_H_ */
    345