1 /* $NetBSD: i915_gpu_error.h,v 1.2 2021/12/18 23:45:28 riastradh Exp $ */ 2 3 /* 4 * SPDX-License-Identifier: MIT 5 * 6 * Copyright 2008-2018 Intel Corporation 7 */ 8 9 #ifndef _I915_GPU_ERROR_H_ 10 #define _I915_GPU_ERROR_H_ 11 12 #include <linux/atomic.h> 13 #include <linux/kref.h> 14 #include <linux/ktime.h> 15 #include <linux/sched.h> 16 17 #include <drm/drm_mm.h> 18 19 #include "gt/intel_engine.h" 20 #include "gt/uc/intel_uc_fw.h" 21 22 #include "intel_device_info.h" 23 24 #include "i915_gem.h" 25 #include "i915_gem_gtt.h" 26 #include "i915_params.h" 27 #include "i915_scheduler.h" 28 29 struct drm_i915_private; 30 struct i915_vma_compress; 31 struct intel_engine_capture_vma; 32 struct intel_overlay_error_state; 33 struct intel_display_error_state; 34 35 struct i915_vma_coredump { 36 struct i915_vma_coredump *next; 37 38 char name[20]; 39 40 u64 gtt_offset; 41 u64 gtt_size; 42 u32 gtt_page_sizes; 43 44 int num_pages; 45 int page_count; 46 int unused; 47 u32 *pages[0]; 48 }; 49 50 struct i915_request_coredump { 51 unsigned long flags; 52 pid_t pid; 53 u32 context; 54 u32 seqno; 55 u32 start; 56 u32 head; 57 u32 tail; 58 struct i915_sched_attr sched_attr; 59 }; 60 61 struct intel_engine_coredump { 62 const struct intel_engine_cs *engine; 63 64 bool simulated; 65 u32 reset_count; 66 67 /* position of active request inside the ring */ 68 u32 rq_head, rq_post, rq_tail; 69 70 /* Register state */ 71 u32 ccid; 72 u32 start; 73 u32 tail; 74 u32 head; 75 u32 ctl; 76 u32 mode; 77 u32 hws; 78 u32 ipeir; 79 u32 ipehr; 80 u32 bbstate; 81 u32 instpm; 82 u32 instps; 83 u64 bbaddr; 84 u64 acthd; 85 u32 fault_reg; 86 u64 faddr; 87 u32 rc_psmi; /* sleep state */ 88 struct intel_instdone instdone; 89 90 struct i915_gem_context_coredump { 91 char comm[TASK_COMM_LEN]; 92 pid_t pid; 93 int active; 94 int guilty; 95 struct i915_sched_attr sched_attr; 96 } context; 97 98 struct i915_vma_coredump *vma; 99 100 struct i915_request_coredump execlist[EXECLIST_MAX_PORTS]; 101 unsigned int num_ports; 102 103 struct { 104 u32 gfx_mode; 105 union { 106 u64 pdp[4]; 107 u32 pp_dir_base; 108 }; 109 } vm_info; 110 111 struct intel_engine_coredump *next; 112 }; 113 114 struct intel_gt_coredump { 115 const struct intel_gt *_gt; 116 bool awake; 117 bool simulated; 118 119 /* Generic register state */ 120 u32 eir; 121 u32 pgtbl_er; 122 u32 ier; 123 u32 gtier[6], ngtier; 124 u32 derrmr; 125 u32 forcewake; 126 u32 error; /* gen6+ */ 127 u32 err_int; /* gen7 */ 128 u32 fault_data0; /* gen8, gen9 */ 129 u32 fault_data1; /* gen8, gen9 */ 130 u32 done_reg; 131 u32 gac_eco; 132 u32 gam_ecochk; 133 u32 gab_ctl; 134 u32 gfx_mode; 135 u32 gtt_cache; 136 u32 aux_err; /* gen12 */ 137 u32 sfc_done[GEN12_SFC_DONE_MAX]; /* gen12 */ 138 u32 gam_done; /* gen12 */ 139 140 u32 nfence; 141 u64 fence[I915_MAX_NUM_FENCES]; 142 143 struct intel_engine_coredump *engine; 144 145 struct intel_uc_coredump { 146 struct intel_uc_fw guc_fw; 147 struct intel_uc_fw huc_fw; 148 struct i915_vma_coredump *guc_log; 149 } *uc; 150 151 struct intel_gt_coredump *next; 152 }; 153 154 struct i915_gpu_coredump { 155 struct kref ref; 156 ktime_t time; 157 ktime_t boottime; 158 ktime_t uptime; 159 unsigned long capture; 160 161 struct drm_i915_private *i915; 162 163 struct intel_gt_coredump *gt; 164 165 char error_msg[128]; 166 bool simulated; 167 bool wakelock; 168 bool suspended; 169 int iommu; 170 u32 reset_count; 171 u32 suspend_count; 172 173 struct intel_device_info device_info; 174 struct intel_runtime_info runtime_info; 175 struct intel_driver_caps driver_caps; 176 struct i915_params params; 177 178 struct intel_overlay_error_state *overlay; 179 struct intel_display_error_state *display; 180 181 struct scatterlist *sgl, *fit; 182 }; 183 184 struct i915_gpu_error { 185 /* For reset and error_state handling. */ 186 spinlock_t lock; 187 /* Protected by the above dev->gpu_error.lock. */ 188 struct i915_gpu_coredump *first_error; 189 190 atomic_t pending_fb_pin; 191 192 /** Number of times the device has been reset (global) */ 193 atomic_t reset_count; 194 195 /** Number of times an engine has been reset */ 196 atomic_t reset_engine_count[I915_NUM_ENGINES]; 197 }; 198 199 struct drm_i915_error_state_buf { 200 struct drm_i915_private *i915; 201 struct scatterlist *sgl, *cur, *end; 202 203 char *buf; 204 size_t bytes; 205 size_t size; 206 loff_t iter; 207 208 int err; 209 }; 210 211 #if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR) 212 213 __printf(2, 3) 214 void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...); 215 216 struct i915_gpu_coredump *i915_gpu_coredump(struct drm_i915_private *i915); 217 void i915_capture_error_state(struct drm_i915_private *i915); 218 219 struct i915_gpu_coredump * 220 i915_gpu_coredump_alloc(struct drm_i915_private *i915, gfp_t gfp); 221 222 struct intel_gt_coredump * 223 intel_gt_coredump_alloc(struct intel_gt *gt, gfp_t gfp); 224 225 struct intel_engine_coredump * 226 intel_engine_coredump_alloc(struct intel_engine_cs *engine, gfp_t gfp); 227 228 struct intel_engine_capture_vma * 229 intel_engine_coredump_add_request(struct intel_engine_coredump *ee, 230 struct i915_request *rq, 231 gfp_t gfp); 232 233 void intel_engine_coredump_add_vma(struct intel_engine_coredump *ee, 234 struct intel_engine_capture_vma *capture, 235 struct i915_vma_compress *compress); 236 237 struct i915_vma_compress * 238 i915_vma_capture_prepare(struct intel_gt_coredump *gt); 239 240 void i915_vma_capture_finish(struct intel_gt_coredump *gt, 241 struct i915_vma_compress *compress); 242 243 void i915_error_state_store(struct i915_gpu_coredump *error); 244 245 static inline struct i915_gpu_coredump * 246 i915_gpu_coredump_get(struct i915_gpu_coredump *gpu) 247 { 248 kref_get(&gpu->ref); 249 return gpu; 250 } 251 252 ssize_t 253 i915_gpu_coredump_copy_to_buffer(struct i915_gpu_coredump *error, 254 char *buf, loff_t offset, size_t count); 255 256 void __i915_gpu_coredump_free(struct kref *kref); 257 static inline void i915_gpu_coredump_put(struct i915_gpu_coredump *gpu) 258 { 259 if (gpu) 260 kref_put(&gpu->ref, __i915_gpu_coredump_free); 261 } 262 263 struct i915_gpu_coredump *i915_first_error_state(struct drm_i915_private *i915); 264 void i915_reset_error_state(struct drm_i915_private *i915); 265 void i915_disable_error_state(struct drm_i915_private *i915, int err); 266 267 #else 268 269 static inline void i915_capture_error_state(struct drm_i915_private *i915) 270 { 271 } 272 273 static inline struct i915_gpu_coredump * 274 i915_gpu_coredump_alloc(struct drm_i915_private *i915, gfp_t gfp) 275 { 276 return NULL; 277 } 278 279 static inline struct intel_gt_coredump * 280 intel_gt_coredump_alloc(struct intel_gt *gt, gfp_t gfp) 281 { 282 return NULL; 283 } 284 285 static inline struct intel_engine_coredump * 286 intel_engine_coredump_alloc(struct intel_engine_cs *engine, gfp_t gfp) 287 { 288 return NULL; 289 } 290 291 static inline struct intel_engine_capture_vma * 292 intel_engine_coredump_add_request(struct intel_engine_coredump *ee, 293 struct i915_request *rq, 294 gfp_t gfp) 295 { 296 return NULL; 297 } 298 299 static inline void 300 intel_engine_coredump_add_vma(struct intel_engine_coredump *ee, 301 struct intel_engine_capture_vma *capture, 302 struct i915_vma_compress *compress) 303 { 304 } 305 306 static inline struct i915_vma_compress * 307 i915_vma_capture_prepare(struct intel_gt_coredump *gt) 308 { 309 return NULL; 310 } 311 312 static inline void 313 i915_vma_capture_finish(struct intel_gt_coredump *gt, 314 struct i915_vma_compress *compress) 315 { 316 } 317 318 static inline void 319 i915_error_state_store(struct i915_gpu_coredump *error) 320 { 321 } 322 323 static inline void i915_gpu_coredump_put(struct i915_gpu_coredump *gpu) 324 { 325 } 326 327 static inline struct i915_gpu_coredump * 328 i915_first_error_state(struct drm_i915_private *i915) 329 { 330 return ERR_PTR(-ENODEV); 331 } 332 333 static inline void i915_reset_error_state(struct drm_i915_private *i915) 334 { 335 } 336 337 static inline void i915_disable_error_state(struct drm_i915_private *i915, 338 int err) 339 { 340 } 341 342 #endif /* IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR) */ 343 344 #endif /* _I915_GPU_ERROR_H_ */ 345