1 /* 2 * UEFI Common Platform Error Record (CPER) support 3 * 4 * Copyright (C) 2010, Intel Corp. 5 * Author: Huang Ying <ying.huang@intel.com> 6 * 7 * CPER is the format used to describe platform hardware error by 8 * various tables, such as ERST, BERT and HEST etc. 9 * 10 * For more information about CPER, please refer to Appendix N of UEFI 11 * Specification version 2.4. 12 * 13 * This program is free software; you can redistribute it and/or 14 * modify it under the terms of the GNU General Public License version 15 * 2 as published by the Free Software Foundation. 16 * 17 * This program is distributed in the hope that it will be useful, 18 * but WITHOUT ANY WARRANTY; without even the implied warranty of 19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 20 * GNU General Public License for more details. 21 * 22 * You should have received a copy of the GNU General Public License 23 * along with this program; if not, write to the Free Software 24 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 25 */ 26 27 #include <linux/kernel.h> 28 #include <linux/module.h> 29 #include <linux/time.h> 30 #include <linux/cper.h> 31 #include <linux/dmi.h> 32 #include <linux/acpi.h> 33 #include <linux/pci.h> 34 #include <linux/aer.h> 35 36 #define INDENT_SP " " 37 /* 38 * CPER record ID need to be unique even after reboot, because record 39 * ID is used as index for ERST storage, while CPER records from 40 * multiple boot may co-exist in ERST. 41 */ 42 u64 cper_next_record_id(void) 43 { 44 static atomic64_t seq; 45 46 if (!atomic64_read(&seq)) 47 atomic64_set(&seq, ((u64)get_seconds()) << 32); 48 49 return atomic64_inc_return(&seq); 50 } 51 EXPORT_SYMBOL_GPL(cper_next_record_id); 52 53 static const char *cper_severity_strs[] = { 54 "recoverable", 55 "fatal", 56 "corrected", 57 "info", 58 }; 59 60 static const char *cper_severity_str(unsigned int severity) 61 { 62 return severity < ARRAY_SIZE(cper_severity_strs) ? 63 cper_severity_strs[severity] : "unknown"; 64 } 65 66 /* 67 * cper_print_bits - print strings for set bits 68 * @pfx: prefix for each line, including log level and prefix string 69 * @bits: bit mask 70 * @strs: string array, indexed by bit position 71 * @strs_size: size of the string array: @strs 72 * 73 * For each set bit in @bits, print the corresponding string in @strs. 74 * If the output length is longer than 80, multiple line will be 75 * printed, with @pfx is printed at the beginning of each line. 76 */ 77 void cper_print_bits(const char *pfx, unsigned int bits, 78 const char * const strs[], unsigned int strs_size) 79 { 80 int i, len = 0; 81 const char *str; 82 char buf[84]; 83 84 for (i = 0; i < strs_size; i++) { 85 if (!(bits & (1U << i))) 86 continue; 87 str = strs[i]; 88 if (!str) 89 continue; 90 if (len && len + strlen(str) + 2 > 80) { 91 printk("%s\n", buf); 92 len = 0; 93 } 94 if (!len) 95 len = snprintf(buf, sizeof(buf), "%s%s", pfx, str); 96 else 97 len += snprintf(buf+len, sizeof(buf)-len, ", %s", str); 98 } 99 if (len) 100 printk("%s\n", buf); 101 } 102 103 static const char * const cper_proc_type_strs[] = { 104 "IA32/X64", 105 "IA64", 106 }; 107 108 static const char * const cper_proc_isa_strs[] = { 109 "IA32", 110 "IA64", 111 "X64", 112 }; 113 114 static const char * const cper_proc_error_type_strs[] = { 115 "cache error", 116 "TLB error", 117 "bus error", 118 "micro-architectural error", 119 }; 120 121 static const char * const cper_proc_op_strs[] = { 122 "unknown or generic", 123 "data read", 124 "data write", 125 "instruction execution", 126 }; 127 128 static const char * const cper_proc_flag_strs[] = { 129 "restartable", 130 "precise IP", 131 "overflow", 132 "corrected", 133 }; 134 135 static void cper_print_proc_generic(const char *pfx, 136 const struct cper_sec_proc_generic *proc) 137 { 138 if (proc->validation_bits & CPER_PROC_VALID_TYPE) 139 printk("%s""processor_type: %d, %s\n", pfx, proc->proc_type, 140 proc->proc_type < ARRAY_SIZE(cper_proc_type_strs) ? 141 cper_proc_type_strs[proc->proc_type] : "unknown"); 142 if (proc->validation_bits & CPER_PROC_VALID_ISA) 143 printk("%s""processor_isa: %d, %s\n", pfx, proc->proc_isa, 144 proc->proc_isa < ARRAY_SIZE(cper_proc_isa_strs) ? 145 cper_proc_isa_strs[proc->proc_isa] : "unknown"); 146 if (proc->validation_bits & CPER_PROC_VALID_ERROR_TYPE) { 147 printk("%s""error_type: 0x%02x\n", pfx, proc->proc_error_type); 148 cper_print_bits(pfx, proc->proc_error_type, 149 cper_proc_error_type_strs, 150 ARRAY_SIZE(cper_proc_error_type_strs)); 151 } 152 if (proc->validation_bits & CPER_PROC_VALID_OPERATION) 153 printk("%s""operation: %d, %s\n", pfx, proc->operation, 154 proc->operation < ARRAY_SIZE(cper_proc_op_strs) ? 155 cper_proc_op_strs[proc->operation] : "unknown"); 156 if (proc->validation_bits & CPER_PROC_VALID_FLAGS) { 157 printk("%s""flags: 0x%02x\n", pfx, proc->flags); 158 cper_print_bits(pfx, proc->flags, cper_proc_flag_strs, 159 ARRAY_SIZE(cper_proc_flag_strs)); 160 } 161 if (proc->validation_bits & CPER_PROC_VALID_LEVEL) 162 printk("%s""level: %d\n", pfx, proc->level); 163 if (proc->validation_bits & CPER_PROC_VALID_VERSION) 164 printk("%s""version_info: 0x%016llx\n", pfx, proc->cpu_version); 165 if (proc->validation_bits & CPER_PROC_VALID_ID) 166 printk("%s""processor_id: 0x%016llx\n", pfx, proc->proc_id); 167 if (proc->validation_bits & CPER_PROC_VALID_TARGET_ADDRESS) 168 printk("%s""target_address: 0x%016llx\n", 169 pfx, proc->target_addr); 170 if (proc->validation_bits & CPER_PROC_VALID_REQUESTOR_ID) 171 printk("%s""requestor_id: 0x%016llx\n", 172 pfx, proc->requestor_id); 173 if (proc->validation_bits & CPER_PROC_VALID_RESPONDER_ID) 174 printk("%s""responder_id: 0x%016llx\n", 175 pfx, proc->responder_id); 176 if (proc->validation_bits & CPER_PROC_VALID_IP) 177 printk("%s""IP: 0x%016llx\n", pfx, proc->ip); 178 } 179 180 static const char *cper_mem_err_type_strs[] = { 181 "unknown", 182 "no error", 183 "single-bit ECC", 184 "multi-bit ECC", 185 "single-symbol chipkill ECC", 186 "multi-symbol chipkill ECC", 187 "master abort", 188 "target abort", 189 "parity error", 190 "watchdog timeout", 191 "invalid address", 192 "mirror Broken", 193 "memory sparing", 194 "scrub corrected error", 195 "scrub uncorrected error", 196 "physical memory map-out event", 197 }; 198 199 static void cper_print_mem(const char *pfx, const struct cper_sec_mem_err *mem) 200 { 201 if (mem->validation_bits & CPER_MEM_VALID_ERROR_STATUS) 202 printk("%s""error_status: 0x%016llx\n", pfx, mem->error_status); 203 if (mem->validation_bits & CPER_MEM_VALID_PA) 204 printk("%s""physical_address: 0x%016llx\n", 205 pfx, mem->physical_addr); 206 if (mem->validation_bits & CPER_MEM_VALID_PA_MASK) 207 printk("%s""physical_address_mask: 0x%016llx\n", 208 pfx, mem->physical_addr_mask); 209 if (mem->validation_bits & CPER_MEM_VALID_NODE) 210 pr_debug("node: %d\n", mem->node); 211 if (mem->validation_bits & CPER_MEM_VALID_CARD) 212 pr_debug("card: %d\n", mem->card); 213 if (mem->validation_bits & CPER_MEM_VALID_MODULE) 214 pr_debug("module: %d\n", mem->module); 215 if (mem->validation_bits & CPER_MEM_VALID_RANK_NUMBER) 216 pr_debug("rank: %d\n", mem->rank); 217 if (mem->validation_bits & CPER_MEM_VALID_BANK) 218 pr_debug("bank: %d\n", mem->bank); 219 if (mem->validation_bits & CPER_MEM_VALID_DEVICE) 220 pr_debug("device: %d\n", mem->device); 221 if (mem->validation_bits & CPER_MEM_VALID_ROW) 222 pr_debug("row: %d\n", mem->row); 223 if (mem->validation_bits & CPER_MEM_VALID_COLUMN) 224 pr_debug("column: %d\n", mem->column); 225 if (mem->validation_bits & CPER_MEM_VALID_BIT_POSITION) 226 pr_debug("bit_position: %d\n", mem->bit_pos); 227 if (mem->validation_bits & CPER_MEM_VALID_REQUESTOR_ID) 228 pr_debug("requestor_id: 0x%016llx\n", mem->requestor_id); 229 if (mem->validation_bits & CPER_MEM_VALID_RESPONDER_ID) 230 pr_debug("responder_id: 0x%016llx\n", mem->responder_id); 231 if (mem->validation_bits & CPER_MEM_VALID_TARGET_ID) 232 pr_debug("target_id: 0x%016llx\n", mem->target_id); 233 if (mem->validation_bits & CPER_MEM_VALID_ERROR_TYPE) { 234 u8 etype = mem->error_type; 235 printk("%s""error_type: %d, %s\n", pfx, etype, 236 etype < ARRAY_SIZE(cper_mem_err_type_strs) ? 237 cper_mem_err_type_strs[etype] : "unknown"); 238 } 239 if (mem->validation_bits & CPER_MEM_VALID_MODULE_HANDLE) { 240 const char *bank = NULL, *device = NULL; 241 dmi_memdev_name(mem->mem_dev_handle, &bank, &device); 242 if (bank != NULL && device != NULL) 243 printk("%s""DIMM location: %s %s", pfx, bank, device); 244 else 245 printk("%s""DIMM DMI handle: 0x%.4x", 246 pfx, mem->mem_dev_handle); 247 } 248 } 249 250 static const char *cper_pcie_port_type_strs[] = { 251 "PCIe end point", 252 "legacy PCI end point", 253 "unknown", 254 "unknown", 255 "root port", 256 "upstream switch port", 257 "downstream switch port", 258 "PCIe to PCI/PCI-X bridge", 259 "PCI/PCI-X to PCIe bridge", 260 "root complex integrated endpoint device", 261 "root complex event collector", 262 }; 263 264 static void cper_print_pcie(const char *pfx, const struct cper_sec_pcie *pcie, 265 const struct acpi_generic_data *gdata) 266 { 267 if (pcie->validation_bits & CPER_PCIE_VALID_PORT_TYPE) 268 printk("%s""port_type: %d, %s\n", pfx, pcie->port_type, 269 pcie->port_type < ARRAY_SIZE(cper_pcie_port_type_strs) ? 270 cper_pcie_port_type_strs[pcie->port_type] : "unknown"); 271 if (pcie->validation_bits & CPER_PCIE_VALID_VERSION) 272 printk("%s""version: %d.%d\n", pfx, 273 pcie->version.major, pcie->version.minor); 274 if (pcie->validation_bits & CPER_PCIE_VALID_COMMAND_STATUS) 275 printk("%s""command: 0x%04x, status: 0x%04x\n", pfx, 276 pcie->command, pcie->status); 277 if (pcie->validation_bits & CPER_PCIE_VALID_DEVICE_ID) { 278 const __u8 *p; 279 printk("%s""device_id: %04x:%02x:%02x.%x\n", pfx, 280 pcie->device_id.segment, pcie->device_id.bus, 281 pcie->device_id.device, pcie->device_id.function); 282 printk("%s""slot: %d\n", pfx, 283 pcie->device_id.slot >> CPER_PCIE_SLOT_SHIFT); 284 printk("%s""secondary_bus: 0x%02x\n", pfx, 285 pcie->device_id.secondary_bus); 286 printk("%s""vendor_id: 0x%04x, device_id: 0x%04x\n", pfx, 287 pcie->device_id.vendor_id, pcie->device_id.device_id); 288 p = pcie->device_id.class_code; 289 printk("%s""class_code: %02x%02x%02x\n", pfx, p[0], p[1], p[2]); 290 } 291 if (pcie->validation_bits & CPER_PCIE_VALID_SERIAL_NUMBER) 292 printk("%s""serial number: 0x%04x, 0x%04x\n", pfx, 293 pcie->serial_number.lower, pcie->serial_number.upper); 294 if (pcie->validation_bits & CPER_PCIE_VALID_BRIDGE_CONTROL_STATUS) 295 printk( 296 "%s""bridge: secondary_status: 0x%04x, control: 0x%04x\n", 297 pfx, pcie->bridge.secondary_status, pcie->bridge.control); 298 } 299 300 static void cper_estatus_print_section( 301 const char *pfx, const struct acpi_generic_data *gdata, int sec_no) 302 { 303 uuid_le *sec_type = (uuid_le *)gdata->section_type; 304 __u16 severity; 305 char newpfx[64]; 306 307 severity = gdata->error_severity; 308 printk("%s""Error %d, type: %s\n", pfx, sec_no, 309 cper_severity_str(severity)); 310 if (gdata->validation_bits & CPER_SEC_VALID_FRU_ID) 311 printk("%s""fru_id: %pUl\n", pfx, (uuid_le *)gdata->fru_id); 312 if (gdata->validation_bits & CPER_SEC_VALID_FRU_TEXT) 313 printk("%s""fru_text: %.20s\n", pfx, gdata->fru_text); 314 315 snprintf(newpfx, sizeof(newpfx), "%s%s", pfx, INDENT_SP); 316 if (!uuid_le_cmp(*sec_type, CPER_SEC_PROC_GENERIC)) { 317 struct cper_sec_proc_generic *proc_err = (void *)(gdata + 1); 318 printk("%s""section_type: general processor error\n", newpfx); 319 if (gdata->error_data_length >= sizeof(*proc_err)) 320 cper_print_proc_generic(newpfx, proc_err); 321 else 322 goto err_section_too_small; 323 } else if (!uuid_le_cmp(*sec_type, CPER_SEC_PLATFORM_MEM)) { 324 struct cper_sec_mem_err *mem_err = (void *)(gdata + 1); 325 printk("%s""section_type: memory error\n", newpfx); 326 if (gdata->error_data_length >= sizeof(*mem_err)) 327 cper_print_mem(newpfx, mem_err); 328 else 329 goto err_section_too_small; 330 } else if (!uuid_le_cmp(*sec_type, CPER_SEC_PCIE)) { 331 struct cper_sec_pcie *pcie = (void *)(gdata + 1); 332 printk("%s""section_type: PCIe error\n", newpfx); 333 if (gdata->error_data_length >= sizeof(*pcie)) 334 cper_print_pcie(newpfx, pcie, gdata); 335 else 336 goto err_section_too_small; 337 } else 338 printk("%s""section type: unknown, %pUl\n", newpfx, sec_type); 339 340 return; 341 342 err_section_too_small: 343 pr_err(FW_WARN "error section length is too small\n"); 344 } 345 346 void cper_estatus_print(const char *pfx, 347 const struct acpi_generic_status *estatus) 348 { 349 struct acpi_generic_data *gdata; 350 unsigned int data_len, gedata_len; 351 int sec_no = 0; 352 char newpfx[64]; 353 __u16 severity; 354 355 severity = estatus->error_severity; 356 if (severity == CPER_SEV_CORRECTED) 357 printk("%s%s\n", pfx, 358 "It has been corrected by h/w " 359 "and requires no further action"); 360 printk("%s""event severity: %s\n", pfx, cper_severity_str(severity)); 361 data_len = estatus->data_length; 362 gdata = (struct acpi_generic_data *)(estatus + 1); 363 snprintf(newpfx, sizeof(newpfx), "%s%s", pfx, INDENT_SP); 364 while (data_len >= sizeof(*gdata)) { 365 gedata_len = gdata->error_data_length; 366 cper_estatus_print_section(newpfx, gdata, sec_no); 367 data_len -= gedata_len + sizeof(*gdata); 368 gdata = (void *)(gdata + 1) + gedata_len; 369 sec_no++; 370 } 371 } 372 EXPORT_SYMBOL_GPL(cper_estatus_print); 373 374 int cper_estatus_check_header(const struct acpi_generic_status *estatus) 375 { 376 if (estatus->data_length && 377 estatus->data_length < sizeof(struct acpi_generic_data)) 378 return -EINVAL; 379 if (estatus->raw_data_length && 380 estatus->raw_data_offset < sizeof(*estatus) + estatus->data_length) 381 return -EINVAL; 382 383 return 0; 384 } 385 EXPORT_SYMBOL_GPL(cper_estatus_check_header); 386 387 int cper_estatus_check(const struct acpi_generic_status *estatus) 388 { 389 struct acpi_generic_data *gdata; 390 unsigned int data_len, gedata_len; 391 int rc; 392 393 rc = cper_estatus_check_header(estatus); 394 if (rc) 395 return rc; 396 data_len = estatus->data_length; 397 gdata = (struct acpi_generic_data *)(estatus + 1); 398 while (data_len >= sizeof(*gdata)) { 399 gedata_len = gdata->error_data_length; 400 if (gedata_len > data_len - sizeof(*gdata)) 401 return -EINVAL; 402 data_len -= gedata_len + sizeof(*gdata); 403 gdata = (void *)(gdata + 1) + gedata_len; 404 } 405 if (data_len) 406 return -EINVAL; 407 408 return 0; 409 } 410 EXPORT_SYMBOL_GPL(cper_estatus_check); 411