xref: /linux/drivers/gpu/drm/amd/ras/rascore/ras_cper.h (revision 24f171c7e145f43b9f187578e89b0982ce87e54c)
1 /* SPDX-License-Identifier: MIT */
2 /*
3  * Copyright 2025 Advanced Micro Devices, Inc.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice shall be included in
13  * all copies or substantial portions of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
19  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
21  * OTHER DEALINGS IN THE SOFTWARE.
22  *
23  */
24 #ifndef __RAS_CPER_H__
25 #define __RAS_CPER_H__
26 
27 #define CPER_UUID_MAX_SIZE 16
28 struct ras_cper_guid {
29 	uint8_t b[CPER_UUID_MAX_SIZE];
30 };
31 
32 #define CPER_GUID__INIT(a, b, c, d0, d1, d2, d3, d4, d5, d6, d7)			\
33 	((struct ras_cper_guid)								\
34 	{{ (a) & 0xff, ((a) >> 8) & 0xff, ((a) >> 16) & 0xff, ((a) >> 24) & 0xff, \
35 		(b) & 0xff, ((b) >> 8) & 0xff,					\
36 		(c) & 0xff, ((c) >> 8) & 0xff,					\
37 		(d0), (d1), (d2), (d3), (d4), (d5), (d6), (d7) }})
38 
39 #define CPER_HDR__REV_1          (0x100)
40 #define CPER_SEC__MINOR_REV_1    (0x01)
41 #define CPER_SEC__MAJOR_REV_22   (0x22)
42 #define CPER_OAM_MAX_COUNT      (8)
43 
44 #define CPER_CTX_TYPE__CRASH     (1)
45 #define CPER_CTX_TYPE__BOOT      (9)
46 
47 #define CPER_CREATOR_ID__AMDGPU	"amdgpu"
48 
49 #define CPER_NOTIFY__MCE                                               \
50 	CPER_GUID__INIT(0xE8F56FFE, 0x919C, 0x4cc5, 0xBA, 0x88, 0x65, 0xAB, \
51 		  0xE1, 0x49, 0x13, 0xBB)
52 #define CPER_NOTIFY__CMC                                               \
53 	CPER_GUID__INIT(0x2DCE8BB1, 0xBDD7, 0x450e, 0xB9, 0xAD, 0x9C, 0xF4, \
54 		  0xEB, 0xD4, 0xF8, 0x90)
55 #define BOOT__TYPE                                                     \
56 	CPER_GUID__INIT(0x3D61A466, 0xAB40, 0x409a, 0xA6, 0x98, 0xF3, 0x62, \
57 		  0xD4, 0x64, 0xB3, 0x8F)
58 
59 #define GPU__CRASHDUMP                                                 \
60 	CPER_GUID__INIT(0x32AC0C78, 0x2623, 0x48F6, 0xB0, 0xD0, 0x73, 0x65, \
61 		  0x72, 0x5F, 0xD6, 0xAE)
62 #define GPU__NONSTANDARD_ERROR                                     \
63 	CPER_GUID__INIT(0x32AC0C78, 0x2623, 0x48F6, 0x81, 0xA2, 0xAC, 0x69, \
64 		  0x17, 0x80, 0x55, 0x1D)
65 #define PROC_ERR__SECTION_TYPE                                         \
66 	CPER_GUID__INIT(0xDC3EA0B0, 0xA144, 0x4797, 0xB9, 0x5B, 0x53, 0xFA, \
67 		  0x24, 0x2B, 0x6E, 0x1D)
68 
69 enum ras_cper_type {
70 	RAS_CPER_TYPE_RUNTIME,
71 	RAS_CPER_TYPE_FATAL,
72 	RAS_CPER_TYPE_BOOT,
73 	RAS_CPER_TYPE_RMA,
74 };
75 
76 enum ras_cper_severity {
77 	RAS_CPER_SEV_NON_FATAL_UE   = 0,
78 	RAS_CPER_SEV_FATAL_UE       = 1,
79 	RAS_CPER_SEV_NON_FATAL_CE   = 2,
80 	RAS_CPER_SEV_RMA            = 3,
81 
82 	RAS_CPER_SEV_UNUSED = 10,
83 };
84 
85 enum ras_cper_aca_reg {
86 	RAS_CPER_ACA_REG_CTL    = 0,
87 	RAS_CPER_ACA_REG_STATUS = 1,
88 	RAS_CPER_ACA_REG_ADDR   = 2,
89 	RAS_CPER_ACA_REG_MISC0  = 3,
90 	RAS_CPER_ACA_REG_CONFIG = 4,
91 	RAS_CPER_ACA_REG_IPID   = 5,
92 	RAS_CPER_ACA_REG_SYND   = 6,
93 	RAS_CPER_ACA_REG_DESTAT	= 8,
94 	RAS_CPER_ACA_REG_DEADDR	= 9,
95 	RAS_CPER_ACA_REG_MASK	= 10,
96 
97 	RAS_CPER_ACA_REG_COUNT     = 16,
98 };
99 
100 #pragma pack(push, 1)
101 
102 struct ras_cper_timestamp {
103 	uint8_t seconds;
104 	uint8_t minutes;
105 	uint8_t hours;
106 	uint8_t flag;
107 	uint8_t day;
108 	uint8_t month;
109 	uint8_t year;
110 	uint8_t century;
111 };
112 
113 struct cper_section_hdr {
114 	char                     signature[4];  /* "CPER"  */
115 	uint16_t                 revision;
116 	uint32_t                 signature_end; /* 0xFFFFFFFF */
117 	uint16_t                 sec_cnt;
118 	enum ras_cper_severity error_severity;
119 	union {
120 		struct {
121 			uint32_t platform_id	: 1;
122 			uint32_t timestamp	: 1;
123 			uint32_t partition_id	: 1;
124 			uint32_t reserved	: 29;
125 		} valid_bits;
126 		uint32_t valid_mask;
127 	};
128 	uint32_t		record_length;    /* Total size of CPER Entry */
129 	struct ras_cper_timestamp timestamp;
130 	char			platform_id[16];
131 	struct ras_cper_guid			partition_id;     /* Reserved */
132 	char			creator_id[16];
133 	struct ras_cper_guid			notify_type;      /* CMC, MCE */
134 	char			record_id[8];     /* Unique CPER Entry ID */
135 	uint32_t		flags;            /* Reserved */
136 	uint64_t		persistence_info; /* Reserved */
137 	uint8_t			reserved[12];     /* Reserved */
138 };
139 
140 struct cper_section_descriptor {
141 	uint32_t sec_offset;     /* Offset from the start of CPER entry */
142 	uint32_t sec_length;
143 	uint8_t  revision_minor; /* CPER_SEC_MINOR_REV_1 */
144 	uint8_t  revision_major; /* CPER_SEC_MAJOR_REV_22 */
145 	union {
146 		struct {
147 			uint8_t fru_id		: 1;
148 			uint8_t fru_text	: 1;
149 			uint8_t reserved	: 6;
150 		} valid_bits;
151 		uint8_t valid_mask;
152 	};
153 	uint8_t reserved;
154 	union {
155 		struct {
156 			uint32_t primary		: 1;
157 			uint32_t reserved1		: 2;
158 			uint32_t exceed_err_threshold	: 1;
159 			uint32_t latent_err		: 1;
160 			uint32_t reserved2		: 27;
161 		} flag_bits;
162 		uint32_t flag_mask;
163 	};
164 	struct ras_cper_guid			sec_type;
165 	char				fru_id[16];
166 	enum ras_cper_severity severity;
167 	char				fru_text[20];
168 };
169 
170 struct runtime_hdr {
171 	union {
172 		struct {
173 			uint64_t apic_id		: 1;
174 			uint64_t fw_id			: 1;
175 			uint64_t err_info_cnt		: 6;
176 			uint64_t err_context_cnt	: 6;
177 		} valid_bits;
178 		uint64_t valid_mask;
179 	};
180 	uint64_t apic_id;
181 	char     fw_id[48];
182 };
183 
184 struct runtime_descriptor {
185 	struct ras_cper_guid error_type;
186 	union {
187 		struct {
188 			uint64_t ms_chk			: 1;
189 			uint64_t target_addr_id		: 1;
190 			uint64_t req_id			: 1;
191 			uint64_t resp_id		: 1;
192 			uint64_t instr_ptr		: 1;
193 			uint64_t reserved		: 59;
194 		} valid_bits;
195 		uint64_t        valid_mask;
196 	};
197 	union {
198 		struct {
199 			uint64_t err_type_valid		: 1;
200 			uint64_t pcc_valid		: 1;
201 			uint64_t uncorr_valid		: 1;
202 			uint64_t precise_ip_valid	: 1;
203 			uint64_t restartable_ip_valid	: 1;
204 			uint64_t overflow_valid		: 1;
205 			uint64_t reserved1		: 10;
206 			uint64_t err_type		: 2;
207 			uint64_t pcc			: 1;
208 			uint64_t uncorr			: 1;
209 			uint64_t precised_ip		: 1;
210 			uint64_t restartable_ip		: 1;
211 			uint64_t overflow		: 1;
212 			uint64_t reserved2		: 41;
213 		} ms_chk_bits;
214 		uint64_t ms_chk_mask;
215 	};
216 	uint64_t target_addr_id;
217 	uint64_t req_id;
218 	uint64_t resp_id;
219 	uint64_t instr_ptr;
220 };
221 
222 struct runtime_error_reg {
223 	uint16_t reg_ctx_type;
224 	uint16_t reg_arr_size;
225 	uint32_t msr_addr;
226 	uint64_t mm_reg_addr;
227 	uint64_t reg_dump[RAS_CPER_ACA_REG_COUNT];
228 };
229 
230 struct cper_section_runtime {
231 	struct runtime_hdr  hdr;
232 	struct runtime_descriptor descriptor;
233 	struct runtime_error_reg  reg;
234 };
235 
236 struct crashdump_hdr {
237 	uint64_t reserved1;
238 	uint64_t reserved2;
239 	char     fw_id[48];
240 	uint64_t reserved3[8];
241 };
242 
243 struct fatal_reg_info {
244 	uint64_t status;
245 	uint64_t addr;
246 	uint64_t ipid;
247 	uint64_t synd;
248 };
249 
250 struct crashdump_fatal {
251 	uint16_t reg_ctx_type;
252 	uint16_t reg_arr_size;
253 	uint32_t reserved1;
254 	uint64_t reserved2;
255 	struct fatal_reg_info reg;
256 };
257 
258 struct crashdump_boot {
259 	uint16_t reg_ctx_type;
260 	uint16_t reg_arr_size;
261 	uint32_t reserved1;
262 	uint64_t reserved2;
263 	uint64_t msg[CPER_OAM_MAX_COUNT];
264 };
265 
266 struct cper_section_fatal {
267 	struct crashdump_hdr    hdr;
268 	struct crashdump_fatal  data;
269 };
270 
271 struct cper_section_boot {
272 	struct crashdump_hdr  hdr;
273 	struct crashdump_boot data;
274 };
275 
276 struct ras_cper_fatal_record {
277 	struct cper_section_hdr hdr;
278 	struct cper_section_descriptor descriptor;
279 	struct cper_section_fatal fatal;
280 };
281 #pragma pack(pop)
282 
283 #define RAS_HDR_LEN				(sizeof(struct cper_section_hdr))
284 #define RAS_SEC_DESC_LEN			(sizeof(struct cper_sec_desc))
285 
286 #define RAS_BOOT_SEC_LEN			(sizeof(struct cper_sec_crashdump_boot))
287 #define RAS_FATAL_SEC_LEN			(sizeof(struct cper_sec_crashdump_fatal))
288 #define RAS_NONSTD_SEC_LEN			(sizeof(struct cper_sec_nonstd_err))
289 
290 #define RAS_SEC_DESC_OFFSET(idx)		(RAS_HDR_LEN + (RAS_SEC_DESC_LEN * idx))
291 
292 #define RAS_BOOT_SEC_OFFSET(count, idx) \
293 	(RAS_HDR_LEN + (RAS_SEC_DESC_LEN * count) + (RAS_BOOT_SEC_LEN * idx))
294 #define RAS_FATAL_SEC_OFFSET(count, idx) \
295 	(RAS_HDR_LEN + (RAS_SEC_DESC_LEN * count) + (RAS_FATAL_SEC_LEN * idx))
296 #define RAS_NONSTD_SEC_OFFSET(count, idx) \
297 	(RAS_HDR_LEN + (RAS_SEC_DESC_LEN * count) + (RAS_NONSTD_SEC_LEN * idx))
298 
299 struct ras_core_context;
300 struct ras_log_info;
301 int ras_cper_generate_cper(struct ras_core_context *ras_core,
302 		struct ras_log_info **trace_list, uint32_t count,
303 		uint8_t *buf, uint32_t buf_len, uint32_t *real_data_len);
304 #endif
305