xref: /linux/drivers/gpu/drm/xe/xe_devcoredump_types.h (revision 2c1ed907520c50326b8f604907a8478b27881a2e)
1 /* SPDX-License-Identifier: MIT */
2 /*
3  * Copyright © 2023 Intel Corporation
4  */
5 
6 #ifndef _XE_DEVCOREDUMP_TYPES_H_
7 #define _XE_DEVCOREDUMP_TYPES_H_
8 
9 #include <linux/ktime.h>
10 #include <linux/mutex.h>
11 
12 #include "xe_hw_engine_types.h"
13 
14 struct xe_device;
15 struct xe_gt;
16 
17 /**
18  * struct xe_devcoredump_snapshot - Crash snapshot
19  *
20  * This struct contains all the useful information quickly captured at the time
21  * of the crash. So, any subsequent reads of the coredump points to a data that
22  * shows the state of the GPU of when the issue has happened.
23  */
24 struct xe_devcoredump_snapshot {
25 	/** @snapshot_time:  Time of this capture. */
26 	ktime_t snapshot_time;
27 	/** @boot_time:  Relative boot time so the uptime can be calculated. */
28 	ktime_t boot_time;
29 	/** @process_name: Name of process that triggered this gpu hang */
30 	char process_name[TASK_COMM_LEN];
31 	/** @pid: Process id of process that triggered this gpu hang */
32 	pid_t pid;
33 	/** @reason: The reason the coredump was triggered */
34 	char *reason;
35 
36 	/** @gt: Affected GT, used by forcewake for delayed capture */
37 	struct xe_gt *gt;
38 	/** @work: Workqueue for deferred capture outside of signaling context */
39 	struct work_struct work;
40 
41 	/** @guc: GuC snapshots */
42 	struct {
43 		/** @guc.ct: GuC CT snapshot */
44 		struct xe_guc_ct_snapshot *ct;
45 		/** @guc.log: GuC log snapshot */
46 		struct xe_guc_log_snapshot *log;
47 	} guc;
48 
49 	/** @ge: GuC Submission Engine snapshot */
50 	struct xe_guc_submit_exec_queue_snapshot *ge;
51 
52 	/** @hwe: HW Engine snapshot array */
53 	struct xe_hw_engine_snapshot *hwe[XE_NUM_HW_ENGINES];
54 	/** @job: Snapshot of job state */
55 	struct xe_sched_job_snapshot *job;
56 	/**
57 	 * @matched_node: The matched capture node for timedout job
58 	 * this single-node tracker works because devcoredump will always only
59 	 * produce one hw-engine capture per devcoredump event
60 	 */
61 	struct __guc_capture_parsed_output *matched_node;
62 	/** @vm: Snapshot of VM state */
63 	struct xe_vm_snapshot *vm;
64 
65 	/** @read: devcoredump in human readable format */
66 	struct {
67 		/** @read.size: size of devcoredump in human readable format */
68 		ssize_t size;
69 		/** @read.buffer: buffer of devcoredump in human readable format */
70 		char *buffer;
71 	} read;
72 };
73 
74 /**
75  * struct xe_devcoredump - Xe devcoredump main structure
76  *
77  * This struct represents the live and active dev_coredump node.
78  * It is created/populated at the time of a crash/error. Then it
79  * is read later when user access the device coredump data file
80  * for reading the information.
81  */
82 struct xe_devcoredump {
83 	/** @lock: protects access to entire structure */
84 	struct mutex lock;
85 	/** @captured: The snapshot of the first hang has already been taken */
86 	bool captured;
87 	/** @snapshot: Snapshot is captured at time of the first crash */
88 	struct xe_devcoredump_snapshot snapshot;
89 };
90 
91 #endif
92