1 /* SPDX-License-Identifier: MIT */ 2 /* 3 * Copyright 2025 Advanced Micro Devices, Inc. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be included in 13 * all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 19 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 21 * OTHER DEALINGS IN THE SOFTWARE. 22 * 23 */ 24 #ifndef __RAS_CPER_H__ 25 #define __RAS_CPER_H__ 26 27 #define CPER_UUID_MAX_SIZE 16 28 struct ras_cper_guid { 29 uint8_t b[CPER_UUID_MAX_SIZE]; 30 }; 31 32 #define CPER_GUID__INIT(a, b, c, d0, d1, d2, d3, d4, d5, d6, d7) \ 33 ((struct ras_cper_guid) \ 34 {{ (a) & 0xff, ((a) >> 8) & 0xff, ((a) >> 16) & 0xff, ((a) >> 24) & 0xff, \ 35 (b) & 0xff, ((b) >> 8) & 0xff, \ 36 (c) & 0xff, ((c) >> 8) & 0xff, \ 37 (d0), (d1), (d2), (d3), (d4), (d5), (d6), (d7) }}) 38 39 #define CPER_HDR__REV_1 (0x100) 40 #define CPER_SEC__MINOR_REV_1 (0x01) 41 #define CPER_SEC__MAJOR_REV_22 (0x22) 42 #define CPER_OAM_MAX_COUNT (8) 43 44 #define CPER_CTX_TYPE__CRASH (1) 45 #define CPER_CTX_TYPE__BOOT (9) 46 47 #define CPER_CREATOR_ID__AMDGPU "amdgpu" 48 49 #define CPER_NOTIFY__MCE \ 50 CPER_GUID__INIT(0xE8F56FFE, 0x919C, 0x4cc5, 0xBA, 0x88, 0x65, 0xAB, \ 51 0xE1, 0x49, 0x13, 0xBB) 52 #define CPER_NOTIFY__CMC \ 53 CPER_GUID__INIT(0x2DCE8BB1, 0xBDD7, 0x450e, 0xB9, 0xAD, 0x9C, 0xF4, \ 54 0xEB, 0xD4, 0xF8, 0x90) 55 #define BOOT__TYPE \ 56 CPER_GUID__INIT(0x3D61A466, 0xAB40, 0x409a, 0xA6, 0x98, 0xF3, 0x62, \ 57 0xD4, 0x64, 0xB3, 0x8F) 58 59 #define GPU__CRASHDUMP \ 60 CPER_GUID__INIT(0x32AC0C78, 0x2623, 0x48F6, 0xB0, 0xD0, 0x73, 0x65, \ 61 0x72, 0x5F, 0xD6, 0xAE) 62 #define GPU__NONSTANDARD_ERROR \ 63 CPER_GUID__INIT(0x32AC0C78, 0x2623, 0x48F6, 0x81, 0xA2, 0xAC, 0x69, \ 64 0x17, 0x80, 0x55, 0x1D) 65 #define PROC_ERR__SECTION_TYPE \ 66 CPER_GUID__INIT(0xDC3EA0B0, 0xA144, 0x4797, 0xB9, 0x5B, 0x53, 0xFA, \ 67 0x24, 0x2B, 0x6E, 0x1D) 68 69 enum ras_cper_type { 70 RAS_CPER_TYPE_RUNTIME, 71 RAS_CPER_TYPE_FATAL, 72 RAS_CPER_TYPE_BOOT, 73 RAS_CPER_TYPE_RMA, 74 }; 75 76 enum ras_cper_severity { 77 RAS_CPER_SEV_NON_FATAL_UE = 0, 78 RAS_CPER_SEV_FATAL_UE = 1, 79 RAS_CPER_SEV_NON_FATAL_CE = 2, 80 RAS_CPER_SEV_RMA = 3, 81 82 RAS_CPER_SEV_UNUSED = 10, 83 }; 84 85 enum ras_cper_aca_reg { 86 RAS_CPER_ACA_REG_CTL = 0, 87 RAS_CPER_ACA_REG_STATUS = 1, 88 RAS_CPER_ACA_REG_ADDR = 2, 89 RAS_CPER_ACA_REG_MISC0 = 3, 90 RAS_CPER_ACA_REG_CONFIG = 4, 91 RAS_CPER_ACA_REG_IPID = 5, 92 RAS_CPER_ACA_REG_SYND = 6, 93 RAS_CPER_ACA_REG_DESTAT = 8, 94 RAS_CPER_ACA_REG_DEADDR = 9, 95 RAS_CPER_ACA_REG_MASK = 10, 96 97 RAS_CPER_ACA_REG_COUNT = 16, 98 }; 99 100 #pragma pack(push, 1) 101 102 struct ras_cper_timestamp { 103 uint8_t seconds; 104 uint8_t minutes; 105 uint8_t hours; 106 uint8_t flag; 107 uint8_t day; 108 uint8_t month; 109 uint8_t year; 110 uint8_t century; 111 }; 112 113 struct cper_section_hdr { 114 char signature[4]; /* "CPER" */ 115 uint16_t revision; 116 uint32_t signature_end; /* 0xFFFFFFFF */ 117 uint16_t sec_cnt; 118 enum ras_cper_severity error_severity; 119 union { 120 struct { 121 uint32_t platform_id : 1; 122 uint32_t timestamp : 1; 123 uint32_t partition_id : 1; 124 uint32_t reserved : 29; 125 } valid_bits; 126 uint32_t valid_mask; 127 }; 128 uint32_t record_length; /* Total size of CPER Entry */ 129 struct ras_cper_timestamp timestamp; 130 char platform_id[16]; 131 struct ras_cper_guid partition_id; /* Reserved */ 132 char creator_id[16]; 133 struct ras_cper_guid notify_type; /* CMC, MCE */ 134 char record_id[8]; /* Unique CPER Entry ID */ 135 uint32_t flags; /* Reserved */ 136 uint64_t persistence_info; /* Reserved */ 137 uint8_t reserved[12]; /* Reserved */ 138 }; 139 140 struct cper_section_descriptor { 141 uint32_t sec_offset; /* Offset from the start of CPER entry */ 142 uint32_t sec_length; 143 uint8_t revision_minor; /* CPER_SEC_MINOR_REV_1 */ 144 uint8_t revision_major; /* CPER_SEC_MAJOR_REV_22 */ 145 union { 146 struct { 147 uint8_t fru_id : 1; 148 uint8_t fru_text : 1; 149 uint8_t reserved : 6; 150 } valid_bits; 151 uint8_t valid_mask; 152 }; 153 uint8_t reserved; 154 union { 155 struct { 156 uint32_t primary : 1; 157 uint32_t reserved1 : 2; 158 uint32_t exceed_err_threshold : 1; 159 uint32_t latent_err : 1; 160 uint32_t reserved2 : 27; 161 } flag_bits; 162 uint32_t flag_mask; 163 }; 164 struct ras_cper_guid sec_type; 165 char fru_id[16]; 166 enum ras_cper_severity severity; 167 char fru_text[20]; 168 }; 169 170 struct runtime_hdr { 171 union { 172 struct { 173 uint64_t apic_id : 1; 174 uint64_t fw_id : 1; 175 uint64_t err_info_cnt : 6; 176 uint64_t err_context_cnt : 6; 177 } valid_bits; 178 uint64_t valid_mask; 179 }; 180 uint64_t apic_id; 181 char fw_id[48]; 182 }; 183 184 struct runtime_descriptor { 185 struct ras_cper_guid error_type; 186 union { 187 struct { 188 uint64_t ms_chk : 1; 189 uint64_t target_addr_id : 1; 190 uint64_t req_id : 1; 191 uint64_t resp_id : 1; 192 uint64_t instr_ptr : 1; 193 uint64_t reserved : 59; 194 } valid_bits; 195 uint64_t valid_mask; 196 }; 197 union { 198 struct { 199 uint64_t err_type_valid : 1; 200 uint64_t pcc_valid : 1; 201 uint64_t uncorr_valid : 1; 202 uint64_t precise_ip_valid : 1; 203 uint64_t restartable_ip_valid : 1; 204 uint64_t overflow_valid : 1; 205 uint64_t reserved1 : 10; 206 uint64_t err_type : 2; 207 uint64_t pcc : 1; 208 uint64_t uncorr : 1; 209 uint64_t precised_ip : 1; 210 uint64_t restartable_ip : 1; 211 uint64_t overflow : 1; 212 uint64_t reserved2 : 41; 213 } ms_chk_bits; 214 uint64_t ms_chk_mask; 215 }; 216 uint64_t target_addr_id; 217 uint64_t req_id; 218 uint64_t resp_id; 219 uint64_t instr_ptr; 220 }; 221 222 struct runtime_error_reg { 223 uint16_t reg_ctx_type; 224 uint16_t reg_arr_size; 225 uint32_t msr_addr; 226 uint64_t mm_reg_addr; 227 uint64_t reg_dump[RAS_CPER_ACA_REG_COUNT]; 228 }; 229 230 struct cper_section_runtime { 231 struct runtime_hdr hdr; 232 struct runtime_descriptor descriptor; 233 struct runtime_error_reg reg; 234 }; 235 236 struct crashdump_hdr { 237 uint64_t reserved1; 238 uint64_t reserved2; 239 char fw_id[48]; 240 uint64_t reserved3[8]; 241 }; 242 243 struct fatal_reg_info { 244 uint64_t status; 245 uint64_t addr; 246 uint64_t ipid; 247 uint64_t synd; 248 }; 249 250 struct crashdump_fatal { 251 uint16_t reg_ctx_type; 252 uint16_t reg_arr_size; 253 uint32_t reserved1; 254 uint64_t reserved2; 255 struct fatal_reg_info reg; 256 }; 257 258 struct crashdump_boot { 259 uint16_t reg_ctx_type; 260 uint16_t reg_arr_size; 261 uint32_t reserved1; 262 uint64_t reserved2; 263 uint64_t msg[CPER_OAM_MAX_COUNT]; 264 }; 265 266 struct cper_section_fatal { 267 struct crashdump_hdr hdr; 268 struct crashdump_fatal data; 269 }; 270 271 struct cper_section_boot { 272 struct crashdump_hdr hdr; 273 struct crashdump_boot data; 274 }; 275 276 struct ras_cper_fatal_record { 277 struct cper_section_hdr hdr; 278 struct cper_section_descriptor descriptor; 279 struct cper_section_fatal fatal; 280 }; 281 #pragma pack(pop) 282 283 #define RAS_HDR_LEN (sizeof(struct cper_section_hdr)) 284 #define RAS_SEC_DESC_LEN (sizeof(struct cper_sec_desc)) 285 286 #define RAS_BOOT_SEC_LEN (sizeof(struct cper_sec_crashdump_boot)) 287 #define RAS_FATAL_SEC_LEN (sizeof(struct cper_sec_crashdump_fatal)) 288 #define RAS_NONSTD_SEC_LEN (sizeof(struct cper_sec_nonstd_err)) 289 290 #define RAS_SEC_DESC_OFFSET(idx) (RAS_HDR_LEN + (RAS_SEC_DESC_LEN * idx)) 291 292 #define RAS_BOOT_SEC_OFFSET(count, idx) \ 293 (RAS_HDR_LEN + (RAS_SEC_DESC_LEN * count) + (RAS_BOOT_SEC_LEN * idx)) 294 #define RAS_FATAL_SEC_OFFSET(count, idx) \ 295 (RAS_HDR_LEN + (RAS_SEC_DESC_LEN * count) + (RAS_FATAL_SEC_LEN * idx)) 296 #define RAS_NONSTD_SEC_OFFSET(count, idx) \ 297 (RAS_HDR_LEN + (RAS_SEC_DESC_LEN * count) + (RAS_NONSTD_SEC_LEN * idx)) 298 299 struct ras_core_context; 300 struct ras_log_info; 301 int ras_cper_generate_cper(struct ras_core_context *ras_core, 302 struct ras_log_info **trace_list, uint32_t count, 303 uint8_t *buf, uint32_t buf_len, uint32_t *real_data_len); 304 #endif 305