xref: /linux/drivers/gpu/drm/i915/i915_gpu_error.h (revision 2dcb8e8782d8e4c38903bf37b1a24d3ffd193da7)
1 /*
2  * SPDX-License-Identifier: MIT
3  *
4  * Copyright © 2008-2018 Intel Corporation
5  */
6 
7 #ifndef _I915_GPU_ERROR_H_
8 #define _I915_GPU_ERROR_H_
9 
10 #include <linux/atomic.h>
11 #include <linux/kref.h>
12 #include <linux/ktime.h>
13 #include <linux/sched.h>
14 
15 #include <drm/drm_mm.h>
16 
17 #include "gt/intel_engine.h"
18 #include "gt/intel_gt_types.h"
19 #include "gt/uc/intel_uc_fw.h"
20 
21 #include "intel_device_info.h"
22 
23 #include "i915_gem.h"
24 #include "i915_gem_gtt.h"
25 #include "i915_params.h"
26 #include "i915_scheduler.h"
27 
28 struct drm_i915_private;
29 struct i915_vma_compress;
30 struct intel_engine_capture_vma;
31 struct intel_overlay_error_state;
32 
33 struct i915_vma_coredump {
34 	struct i915_vma_coredump *next;
35 
36 	char name[20];
37 
38 	u64 gtt_offset;
39 	u64 gtt_size;
40 	u32 gtt_page_sizes;
41 
42 	int unused;
43 	struct list_head page_list;
44 };
45 
46 struct i915_request_coredump {
47 	unsigned long flags;
48 	pid_t pid;
49 	u32 context;
50 	u32 seqno;
51 	u32 head;
52 	u32 tail;
53 	struct i915_sched_attr sched_attr;
54 };
55 
56 struct intel_engine_coredump {
57 	const struct intel_engine_cs *engine;
58 
59 	bool hung;
60 	bool simulated;
61 	u32 reset_count;
62 
63 	/* position of active request inside the ring */
64 	u32 rq_head, rq_post, rq_tail;
65 
66 	/* Register state */
67 	u32 ccid;
68 	u32 start;
69 	u32 tail;
70 	u32 head;
71 	u32 ctl;
72 	u32 mode;
73 	u32 hws;
74 	u32 ipeir;
75 	u32 ipehr;
76 	u32 esr;
77 	u32 bbstate;
78 	u32 instpm;
79 	u32 instps;
80 	u64 bbaddr;
81 	u64 acthd;
82 	u32 fault_reg;
83 	u64 faddr;
84 	u32 rc_psmi; /* sleep state */
85 	struct intel_instdone instdone;
86 
87 	struct i915_gem_context_coredump {
88 		char comm[TASK_COMM_LEN];
89 
90 		u64 total_runtime;
91 		u32 avg_runtime;
92 
93 		pid_t pid;
94 		int active;
95 		int guilty;
96 		struct i915_sched_attr sched_attr;
97 	} context;
98 
99 	struct i915_vma_coredump *vma;
100 
101 	struct i915_request_coredump execlist[EXECLIST_MAX_PORTS];
102 	unsigned int num_ports;
103 
104 	struct {
105 		u32 gfx_mode;
106 		union {
107 			u64 pdp[4];
108 			u32 pp_dir_base;
109 		};
110 	} vm_info;
111 
112 	struct intel_engine_coredump *next;
113 };
114 
115 struct intel_gt_coredump {
116 	const struct intel_gt *_gt;
117 	bool awake;
118 	bool simulated;
119 
120 	struct intel_gt_info info;
121 
122 	/* Generic register state */
123 	u32 eir;
124 	u32 pgtbl_er;
125 	u32 ier;
126 	u32 gtier[6], ngtier;
127 	u32 derrmr;
128 	u32 forcewake;
129 	u32 error; /* gen6+ */
130 	u32 err_int; /* gen7 */
131 	u32 fault_data0; /* gen8, gen9 */
132 	u32 fault_data1; /* gen8, gen9 */
133 	u32 done_reg;
134 	u32 gac_eco;
135 	u32 gam_ecochk;
136 	u32 gab_ctl;
137 	u32 gfx_mode;
138 	u32 gtt_cache;
139 	u32 aux_err; /* gen12 */
140 	u32 sfc_done[GEN12_SFC_DONE_MAX]; /* gen12 */
141 	u32 gam_done; /* gen12 */
142 
143 	u32 nfence;
144 	u64 fence[I915_MAX_NUM_FENCES];
145 
146 	struct intel_engine_coredump *engine;
147 
148 	struct intel_uc_coredump {
149 		struct intel_uc_fw guc_fw;
150 		struct intel_uc_fw huc_fw;
151 		struct i915_vma_coredump *guc_log;
152 	} *uc;
153 
154 	struct intel_gt_coredump *next;
155 };
156 
157 struct i915_gpu_coredump {
158 	struct kref ref;
159 	ktime_t time;
160 	ktime_t boottime;
161 	ktime_t uptime;
162 	unsigned long capture;
163 
164 	struct drm_i915_private *i915;
165 
166 	struct intel_gt_coredump *gt;
167 
168 	char error_msg[128];
169 	bool simulated;
170 	bool wakelock;
171 	bool suspended;
172 	int iommu;
173 	u32 reset_count;
174 	u32 suspend_count;
175 
176 	struct intel_device_info device_info;
177 	struct intel_runtime_info runtime_info;
178 	struct intel_driver_caps driver_caps;
179 	struct i915_params params;
180 
181 	struct intel_overlay_error_state *overlay;
182 
183 	struct scatterlist *sgl, *fit;
184 };
185 
186 struct i915_gpu_error {
187 	/* For reset and error_state handling. */
188 	spinlock_t lock;
189 	/* Protected by the above dev->gpu_error.lock. */
190 	struct i915_gpu_coredump *first_error;
191 
192 	atomic_t pending_fb_pin;
193 
194 	/** Number of times the device has been reset (global) */
195 	atomic_t reset_count;
196 
197 	/** Number of times an engine has been reset */
198 	atomic_t reset_engine_count[I915_NUM_ENGINES];
199 };
200 
201 struct drm_i915_error_state_buf {
202 	struct drm_i915_private *i915;
203 	struct scatterlist *sgl, *cur, *end;
204 
205 	char *buf;
206 	size_t bytes;
207 	size_t size;
208 	loff_t iter;
209 
210 	int err;
211 };
212 
213 static inline u32 i915_reset_count(struct i915_gpu_error *error)
214 {
215 	return atomic_read(&error->reset_count);
216 }
217 
218 static inline u32 i915_reset_engine_count(struct i915_gpu_error *error,
219 					  const struct intel_engine_cs *engine)
220 {
221 	return atomic_read(&error->reset_engine_count[engine->uabi_class]);
222 }
223 
224 #if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)
225 
226 __printf(2, 3)
227 void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...);
228 
229 struct i915_gpu_coredump *i915_gpu_coredump(struct intel_gt *gt,
230 					    intel_engine_mask_t engine_mask);
231 void i915_capture_error_state(struct intel_gt *gt,
232 			      intel_engine_mask_t engine_mask);
233 
234 struct i915_gpu_coredump *
235 i915_gpu_coredump_alloc(struct drm_i915_private *i915, gfp_t gfp);
236 
237 struct intel_gt_coredump *
238 intel_gt_coredump_alloc(struct intel_gt *gt, gfp_t gfp);
239 
240 struct intel_engine_coredump *
241 intel_engine_coredump_alloc(struct intel_engine_cs *engine, gfp_t gfp);
242 
243 struct intel_engine_capture_vma *
244 intel_engine_coredump_add_request(struct intel_engine_coredump *ee,
245 				  struct i915_request *rq,
246 				  gfp_t gfp);
247 
248 void intel_engine_coredump_add_vma(struct intel_engine_coredump *ee,
249 				   struct intel_engine_capture_vma *capture,
250 				   struct i915_vma_compress *compress);
251 
252 struct i915_vma_compress *
253 i915_vma_capture_prepare(struct intel_gt_coredump *gt);
254 
255 void i915_vma_capture_finish(struct intel_gt_coredump *gt,
256 			     struct i915_vma_compress *compress);
257 
258 void i915_error_state_store(struct i915_gpu_coredump *error);
259 
260 static inline struct i915_gpu_coredump *
261 i915_gpu_coredump_get(struct i915_gpu_coredump *gpu)
262 {
263 	kref_get(&gpu->ref);
264 	return gpu;
265 }
266 
267 ssize_t
268 i915_gpu_coredump_copy_to_buffer(struct i915_gpu_coredump *error,
269 				 char *buf, loff_t offset, size_t count);
270 
271 void __i915_gpu_coredump_free(struct kref *kref);
272 static inline void i915_gpu_coredump_put(struct i915_gpu_coredump *gpu)
273 {
274 	if (gpu)
275 		kref_put(&gpu->ref, __i915_gpu_coredump_free);
276 }
277 
278 struct i915_gpu_coredump *i915_first_error_state(struct drm_i915_private *i915);
279 void i915_reset_error_state(struct drm_i915_private *i915);
280 void i915_disable_error_state(struct drm_i915_private *i915, int err);
281 
282 #else
283 
284 static inline void
285 i915_capture_error_state(struct intel_gt *gt, intel_engine_mask_t engine_mask)
286 {
287 }
288 
289 static inline struct i915_gpu_coredump *
290 i915_gpu_coredump_alloc(struct drm_i915_private *i915, gfp_t gfp)
291 {
292 	return NULL;
293 }
294 
295 static inline struct intel_gt_coredump *
296 intel_gt_coredump_alloc(struct intel_gt *gt, gfp_t gfp)
297 {
298 	return NULL;
299 }
300 
301 static inline struct intel_engine_coredump *
302 intel_engine_coredump_alloc(struct intel_engine_cs *engine, gfp_t gfp)
303 {
304 	return NULL;
305 }
306 
307 static inline struct intel_engine_capture_vma *
308 intel_engine_coredump_add_request(struct intel_engine_coredump *ee,
309 				  struct i915_request *rq,
310 				  gfp_t gfp)
311 {
312 	return NULL;
313 }
314 
315 static inline void
316 intel_engine_coredump_add_vma(struct intel_engine_coredump *ee,
317 			      struct intel_engine_capture_vma *capture,
318 			      struct i915_vma_compress *compress)
319 {
320 }
321 
322 static inline struct i915_vma_compress *
323 i915_vma_capture_prepare(struct intel_gt_coredump *gt)
324 {
325 	return NULL;
326 }
327 
328 static inline void
329 i915_vma_capture_finish(struct intel_gt_coredump *gt,
330 			struct i915_vma_compress *compress)
331 {
332 }
333 
334 static inline void
335 i915_error_state_store(struct i915_gpu_coredump *error)
336 {
337 }
338 
339 static inline void i915_gpu_coredump_put(struct i915_gpu_coredump *gpu)
340 {
341 }
342 
343 static inline struct i915_gpu_coredump *
344 i915_first_error_state(struct drm_i915_private *i915)
345 {
346 	return ERR_PTR(-ENODEV);
347 }
348 
349 static inline void i915_reset_error_state(struct drm_i915_private *i915)
350 {
351 }
352 
353 static inline void i915_disable_error_state(struct drm_i915_private *i915,
354 					    int err)
355 {
356 }
357 
358 #endif /* IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR) */
359 
360 #endif /* _I915_GPU_ERROR_H_ */
361