1 /* 2 * SPDX-License-Identifier: MIT 3 * 4 * Copyright © 2008-2018 Intel Corporation 5 */ 6 7 #ifndef _I915_GPU_ERROR_H_ 8 #define _I915_GPU_ERROR_H_ 9 10 #include <linux/atomic.h> 11 #include <linux/kref.h> 12 #include <linux/ktime.h> 13 #include <linux/sched.h> 14 15 #include <drm/drm_mm.h> 16 17 #include "display/intel_display_device.h" 18 #include "display/intel_display_params.h" 19 #include "gt/intel_engine.h" 20 #include "gt/intel_engine_types.h" 21 #include "gt/intel_gt_types.h" 22 #include "gt/uc/intel_uc_fw.h" 23 24 #include "intel_device_info.h" 25 26 #include "i915_gem.h" 27 #include "i915_gem_gtt.h" 28 #include "i915_params.h" 29 #include "i915_scheduler.h" 30 31 struct drm_i915_private; 32 struct i915_vma_compress; 33 struct intel_engine_capture_vma; 34 struct intel_overlay_error_state; 35 36 struct i915_vma_coredump { 37 struct i915_vma_coredump *next; 38 39 char name[20]; 40 41 u64 gtt_offset; 42 u64 gtt_size; 43 u32 gtt_page_sizes; 44 45 int unused; 46 struct list_head page_list; 47 }; 48 49 struct i915_request_coredump { 50 unsigned long flags; 51 pid_t pid; 52 u32 context; 53 u32 seqno; 54 u32 head; 55 u32 tail; 56 struct i915_sched_attr sched_attr; 57 }; 58 59 struct __guc_capture_parsed_output; 60 61 struct intel_engine_coredump { 62 const struct intel_engine_cs *engine; 63 64 bool hung; 65 bool simulated; 66 u32 reset_count; 67 68 /* position of active request inside the ring */ 69 u32 rq_head, rq_post, rq_tail; 70 71 /* Register state */ 72 u32 ccid; 73 u32 start; 74 u32 tail; 75 u32 head; 76 u32 ctl; 77 u32 mode; 78 u32 hws; 79 u32 ipeir; 80 u32 ipehr; 81 u32 esr; 82 u32 bbstate; 83 u32 instpm; 84 u32 instps; 85 u64 bbaddr; 86 u64 acthd; 87 u32 fault_reg; 88 u64 faddr; 89 u32 rc_psmi; /* sleep state */ 90 u32 nopid; 91 u32 excc; 92 u32 cmd_cctl; 93 u32 cscmdop; 94 u32 ctx_sr_ctl; 95 u32 dma_faddr_hi; 96 u32 dma_faddr_lo; 97 struct intel_instdone instdone; 98 99 /* GuC matched capture-lists info */ 100 struct intel_guc_state_capture *guc_capture; 101 struct __guc_capture_parsed_output *guc_capture_node; 102 103 struct i915_gem_context_coredump { 104 char comm[TASK_COMM_LEN]; 105 106 u64 total_runtime; 107 u64 avg_runtime; 108 109 pid_t pid; 110 int active; 111 int guilty; 112 struct i915_sched_attr sched_attr; 113 u32 hwsp_seqno; 114 } context; 115 116 struct i915_vma_coredump *vma; 117 118 struct i915_request_coredump execlist[EXECLIST_MAX_PORTS]; 119 unsigned int num_ports; 120 121 struct { 122 u32 gfx_mode; 123 union { 124 u64 pdp[4]; 125 u32 pp_dir_base; 126 }; 127 } vm_info; 128 129 struct intel_engine_coredump *next; 130 }; 131 132 struct intel_ctb_coredump { 133 u32 raw_head, head; 134 u32 raw_tail, tail; 135 u32 raw_status; 136 u32 desc_offset; 137 u32 cmds_offset; 138 u32 size; 139 }; 140 141 struct intel_gt_coredump { 142 const struct intel_gt *_gt; 143 bool awake; 144 bool simulated; 145 146 struct intel_gt_info info; 147 148 /* Generic register state */ 149 u32 eir; 150 u32 pgtbl_er; 151 u32 ier; 152 u32 gtier[6], ngtier; 153 u32 forcewake; 154 u32 error; /* gen6+ */ 155 u32 err_int; /* gen7 */ 156 u32 fault_data0; /* gen8, gen9 */ 157 u32 fault_data1; /* gen8, gen9 */ 158 u32 done_reg; 159 u32 gac_eco; 160 u32 gam_ecochk; 161 u32 gab_ctl; 162 u32 gfx_mode; 163 u32 gtt_cache; 164 u32 aux_err; /* gen12 */ 165 u32 gam_done; /* gen12 */ 166 u32 clock_frequency; 167 u32 clock_period_ns; 168 169 /* Display related */ 170 u32 derrmr; 171 u32 sfc_done[I915_MAX_SFC]; /* gen12 */ 172 173 u32 nfence; 174 u64 fence[I915_MAX_NUM_FENCES]; 175 176 struct intel_engine_coredump *engine; 177 178 struct intel_uc_coredump { 179 struct intel_uc_fw guc_fw; 180 struct intel_uc_fw huc_fw; 181 struct guc_info { 182 struct intel_ctb_coredump ctb[2]; 183 struct i915_vma_coredump *vma_ctb; 184 struct i915_vma_coredump *vma_log; 185 u32 timestamp; 186 u16 last_fence; 187 bool is_guc_capture; 188 } guc; 189 } *uc; 190 191 struct intel_gt_coredump *next; 192 }; 193 194 struct i915_gpu_coredump { 195 struct kref ref; 196 ktime_t time; 197 ktime_t boottime; 198 ktime_t uptime; 199 unsigned long capture; 200 201 struct drm_i915_private *i915; 202 203 struct intel_gt_coredump *gt; 204 205 char error_msg[128]; 206 bool simulated; 207 bool wakelock; 208 bool suspended; 209 int iommu; 210 u32 reset_count; 211 u32 suspend_count; 212 213 struct intel_device_info device_info; 214 struct intel_runtime_info runtime_info; 215 struct intel_display_device_info display_device_info; 216 struct intel_display_runtime_info display_runtime_info; 217 struct intel_driver_caps driver_caps; 218 struct i915_params params; 219 struct intel_display_params display_params; 220 221 struct intel_overlay_error_state *overlay; 222 223 struct scatterlist *sgl, *fit; 224 }; 225 226 struct i915_gpu_error { 227 /* For reset and error_state handling. */ 228 spinlock_t lock; 229 /* Protected by the above dev->gpu_error.lock. */ 230 struct i915_gpu_coredump *first_error; 231 232 atomic_t pending_fb_pin; 233 234 /** Number of times the device has been reset (global) */ 235 atomic_t reset_count; 236 237 /** Number of times an engine has been reset */ 238 atomic_t reset_engine_count[MAX_ENGINE_CLASS]; 239 }; 240 241 struct drm_i915_error_state_buf { 242 struct drm_i915_private *i915; 243 struct scatterlist *sgl, *cur, *end; 244 245 char *buf; 246 size_t bytes; 247 size_t size; 248 loff_t iter; 249 250 int err; 251 }; 252 253 static inline u32 i915_reset_count(struct i915_gpu_error *error) 254 { 255 return atomic_read(&error->reset_count); 256 } 257 258 static inline u32 i915_reset_engine_count(struct i915_gpu_error *error, 259 const struct intel_engine_cs *engine) 260 { 261 return atomic_read(&error->reset_engine_count[engine->class]); 262 } 263 264 static inline void 265 i915_increase_reset_engine_count(struct i915_gpu_error *error, 266 const struct intel_engine_cs *engine) 267 { 268 atomic_inc(&error->reset_engine_count[engine->class]); 269 } 270 271 #define CORE_DUMP_FLAG_NONE 0x0 272 #define CORE_DUMP_FLAG_IS_GUC_CAPTURE BIT(0) 273 274 #if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR) && IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) 275 void intel_klog_error_capture(struct intel_gt *gt, 276 intel_engine_mask_t engine_mask); 277 #else 278 static inline void intel_klog_error_capture(struct intel_gt *gt, 279 intel_engine_mask_t engine_mask) 280 { 281 } 282 #endif 283 284 #if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR) 285 286 __printf(2, 3) 287 void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...); 288 289 void i915_capture_error_state(struct intel_gt *gt, 290 intel_engine_mask_t engine_mask, u32 dump_flags); 291 292 struct i915_gpu_coredump * 293 i915_gpu_coredump_alloc(struct drm_i915_private *i915, gfp_t gfp); 294 295 struct intel_gt_coredump * 296 intel_gt_coredump_alloc(struct intel_gt *gt, gfp_t gfp, u32 dump_flags); 297 298 struct intel_engine_coredump * 299 intel_engine_coredump_alloc(struct intel_engine_cs *engine, gfp_t gfp, u32 dump_flags); 300 301 struct intel_engine_capture_vma * 302 intel_engine_coredump_add_request(struct intel_engine_coredump *ee, 303 struct i915_request *rq, 304 gfp_t gfp); 305 306 void intel_engine_coredump_add_vma(struct intel_engine_coredump *ee, 307 struct intel_engine_capture_vma *capture, 308 struct i915_vma_compress *compress); 309 310 struct i915_vma_compress * 311 i915_vma_capture_prepare(struct intel_gt_coredump *gt); 312 313 void i915_vma_capture_finish(struct intel_gt_coredump *gt, 314 struct i915_vma_compress *compress); 315 316 void i915_error_state_store(struct i915_gpu_coredump *error); 317 318 static inline struct i915_gpu_coredump * 319 i915_gpu_coredump_get(struct i915_gpu_coredump *gpu) 320 { 321 kref_get(&gpu->ref); 322 return gpu; 323 } 324 325 ssize_t 326 i915_gpu_coredump_copy_to_buffer(struct i915_gpu_coredump *error, 327 char *buf, loff_t offset, size_t count); 328 329 void __i915_gpu_coredump_free(struct kref *kref); 330 static inline void i915_gpu_coredump_put(struct i915_gpu_coredump *gpu) 331 { 332 if (gpu) 333 kref_put(&gpu->ref, __i915_gpu_coredump_free); 334 } 335 336 void i915_reset_error_state(struct drm_i915_private *i915); 337 void i915_disable_error_state(struct drm_i915_private *i915, int err); 338 339 void i915_gpu_error_debugfs_register(struct drm_i915_private *i915); 340 void i915_gpu_error_sysfs_setup(struct drm_i915_private *i915); 341 void i915_gpu_error_sysfs_teardown(struct drm_i915_private *i915); 342 343 #else 344 345 __printf(2, 3) 346 static inline void 347 i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...) 348 { 349 } 350 351 static inline void 352 i915_capture_error_state(struct intel_gt *gt, intel_engine_mask_t engine_mask, u32 dump_flags) 353 { 354 } 355 356 static inline struct i915_gpu_coredump * 357 i915_gpu_coredump_alloc(struct drm_i915_private *i915, gfp_t gfp) 358 { 359 return NULL; 360 } 361 362 static inline struct intel_gt_coredump * 363 intel_gt_coredump_alloc(struct intel_gt *gt, gfp_t gfp, u32 dump_flags) 364 { 365 return NULL; 366 } 367 368 static inline struct intel_engine_coredump * 369 intel_engine_coredump_alloc(struct intel_engine_cs *engine, gfp_t gfp, u32 dump_flags) 370 { 371 return NULL; 372 } 373 374 static inline struct intel_engine_capture_vma * 375 intel_engine_coredump_add_request(struct intel_engine_coredump *ee, 376 struct i915_request *rq, 377 gfp_t gfp) 378 { 379 return NULL; 380 } 381 382 static inline void 383 intel_engine_coredump_add_vma(struct intel_engine_coredump *ee, 384 struct intel_engine_capture_vma *capture, 385 struct i915_vma_compress *compress) 386 { 387 } 388 389 static inline struct i915_vma_compress * 390 i915_vma_capture_prepare(struct intel_gt_coredump *gt) 391 { 392 return NULL; 393 } 394 395 static inline void 396 i915_vma_capture_finish(struct intel_gt_coredump *gt, 397 struct i915_vma_compress *compress) 398 { 399 } 400 401 static inline void 402 i915_error_state_store(struct i915_gpu_coredump *error) 403 { 404 } 405 406 static inline void i915_gpu_coredump_put(struct i915_gpu_coredump *gpu) 407 { 408 } 409 410 static inline void i915_reset_error_state(struct drm_i915_private *i915) 411 { 412 } 413 414 static inline void i915_disable_error_state(struct drm_i915_private *i915, 415 int err) 416 { 417 } 418 419 static inline void i915_gpu_error_debugfs_register(struct drm_i915_private *i915) 420 { 421 } 422 423 static inline void i915_gpu_error_sysfs_setup(struct drm_i915_private *i915) 424 { 425 } 426 427 static inline void i915_gpu_error_sysfs_teardown(struct drm_i915_private *i915) 428 { 429 } 430 431 #endif /* IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR) */ 432 433 #endif /* _I915_GPU_ERROR_H_ */ 434