1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * APEI Generic Hardware Error Source support 4 * 5 * Generic Hardware Error Source provides a way to report platform 6 * hardware errors (such as that from chipset). It works in so called 7 * "Firmware First" mode, that is, hardware errors are reported to 8 * firmware firstly, then reported to Linux by firmware. This way, 9 * some non-standard hardware error registers or non-standard hardware 10 * link can be checked by firmware to produce more hardware error 11 * information for Linux. 12 * 13 * For more information about Generic Hardware Error Source, please 14 * refer to ACPI Specification version 4.0, section 17.3.2.6 15 * 16 * Copyright 2010,2011 Intel Corp. 17 * Author: Huang Ying <ying.huang@intel.com> 18 */ 19 20 #include <linux/arm_sdei.h> 21 #include <linux/kernel.h> 22 #include <linux/moduleparam.h> 23 #include <linux/init.h> 24 #include <linux/acpi.h> 25 #include <linux/bitfield.h> 26 #include <linux/io.h> 27 #include <linux/interrupt.h> 28 #include <linux/timer.h> 29 #include <linux/cper.h> 30 #include <linux/cleanup.h> 31 #include <linux/platform_device.h> 32 #include <linux/mutex.h> 33 #include <linux/ratelimit.h> 34 #include <linux/vmalloc.h> 35 #include <linux/irq_work.h> 36 #include <linux/llist.h> 37 #include <linux/genalloc.h> 38 #include <linux/kfifo.h> 39 #include <linux/pci.h> 40 #include <linux/pfn.h> 41 #include <linux/aer.h> 42 #include <linux/nmi.h> 43 #include <linux/sched/clock.h> 44 #include <linux/uuid.h> 45 #include <linux/ras.h> 46 #include <linux/task_work.h> 47 #include <linux/vmcore_info.h> 48 49 #include <acpi/actbl1.h> 50 #include <acpi/ghes.h> 51 #include <acpi/apei.h> 52 #include <asm/fixmap.h> 53 #include <asm/tlbflush.h> 54 #include <cxl/event.h> 55 #include <ras/ras_event.h> 56 57 #include "apei-internal.h" 58 59 #define GHES_PFX "GHES: " 60 61 #define GHES_ESTATUS_MAX_SIZE 65536 62 #define GHES_ESOURCE_PREALLOC_MAX_SIZE 65536 63 64 #define GHES_ESTATUS_POOL_MIN_ALLOC_ORDER 3 65 66 /* This is just an estimation for memory pool allocation */ 67 #define GHES_ESTATUS_CACHE_AVG_SIZE 512 68 69 #define GHES_ESTATUS_CACHES_SIZE 4 70 71 #define GHES_ESTATUS_IN_CACHE_MAX_NSEC 10000000000ULL 72 /* Prevent too many caches are allocated because of RCU */ 73 #define GHES_ESTATUS_CACHE_ALLOCED_MAX (GHES_ESTATUS_CACHES_SIZE * 3 / 2) 74 75 #define GHES_ESTATUS_CACHE_LEN(estatus_len) \ 76 (sizeof(struct ghes_estatus_cache) + (estatus_len)) 77 #define GHES_ESTATUS_FROM_CACHE(estatus_cache) \ 78 ((struct acpi_hest_generic_status *) \ 79 ((struct ghes_estatus_cache *)(estatus_cache) + 1)) 80 81 #define GHES_ESTATUS_NODE_LEN(estatus_len) \ 82 (sizeof(struct ghes_estatus_node) + (estatus_len)) 83 #define GHES_ESTATUS_FROM_NODE(estatus_node) \ 84 ((struct acpi_hest_generic_status *) \ 85 ((struct ghes_estatus_node *)(estatus_node) + 1)) 86 87 #define GHES_VENDOR_ENTRY_LEN(gdata_len) \ 88 (sizeof(struct ghes_vendor_record_entry) + (gdata_len)) 89 #define GHES_GDATA_FROM_VENDOR_ENTRY(vendor_entry) \ 90 ((struct acpi_hest_generic_data *) \ 91 ((struct ghes_vendor_record_entry *)(vendor_entry) + 1)) 92 93 /* 94 * NMI-like notifications vary by architecture, before the compiler can prune 95 * unused static functions it needs a value for these enums. 96 */ 97 #ifndef CONFIG_ARM_SDE_INTERFACE 98 #define FIX_APEI_GHES_SDEI_NORMAL __end_of_fixed_addresses 99 #define FIX_APEI_GHES_SDEI_CRITICAL __end_of_fixed_addresses 100 #endif 101 102 static ATOMIC_NOTIFIER_HEAD(ghes_report_chain); 103 104 static inline bool is_hest_type_generic_v2(struct ghes *ghes) 105 { 106 return ghes->generic->header.type == ACPI_HEST_TYPE_GENERIC_ERROR_V2; 107 } 108 109 /* 110 * A platform may describe one error source for the handling of synchronous 111 * errors (e.g. MCE or SEA), or for handling asynchronous errors (e.g. SCI 112 * or External Interrupt). On x86, the HEST notifications are always 113 * asynchronous, so only SEA on ARM is delivered as a synchronous 114 * notification. 115 */ 116 static inline bool is_hest_sync_notify(struct ghes *ghes) 117 { 118 u8 notify_type = ghes->generic->notify.type; 119 120 return notify_type == ACPI_HEST_NOTIFY_SEA; 121 } 122 123 /* 124 * This driver isn't really modular, however for the time being, 125 * continuing to use module_param is the easiest way to remain 126 * compatible with existing boot arg use cases. 127 */ 128 bool ghes_disable; 129 module_param_named(disable, ghes_disable, bool, 0); 130 131 /* 132 * "ghes.edac_force_enable" forcibly enables ghes_edac and skips the platform 133 * check. 134 */ 135 static bool ghes_edac_force_enable; 136 module_param_named(edac_force_enable, ghes_edac_force_enable, bool, 0); 137 138 /* 139 * All error sources notified with HED (Hardware Error Device) share a 140 * single notifier callback, so they need to be linked and checked one 141 * by one. This holds true for NMI too. 142 * 143 * RCU is used for these lists, so ghes_list_mutex is only used for 144 * list changing, not for traversing. 145 */ 146 static LIST_HEAD(ghes_hed); 147 static DEFINE_MUTEX(ghes_list_mutex); 148 149 /* 150 * A list of GHES devices which are given to the corresponding EDAC driver 151 * ghes_edac for further use. 152 */ 153 static LIST_HEAD(ghes_devs); 154 static DEFINE_MUTEX(ghes_devs_mutex); 155 156 /* 157 * Because the memory area used to transfer hardware error information 158 * from BIOS to Linux can be determined only in NMI, IRQ or timer 159 * handler, but general ioremap can not be used in atomic context, so 160 * the fixmap is used instead. 161 * 162 * This spinlock is used to prevent the fixmap entry from being used 163 * simultaneously. 164 */ 165 static DEFINE_SPINLOCK(ghes_notify_lock_irq); 166 167 struct ghes_vendor_record_entry { 168 struct work_struct work; 169 int error_severity; 170 char vendor_record[]; 171 }; 172 173 static struct gen_pool *ghes_estatus_pool; 174 175 static struct ghes_estatus_cache __rcu *ghes_estatus_caches[GHES_ESTATUS_CACHES_SIZE]; 176 static atomic_t ghes_estatus_cache_alloced; 177 178 static void __iomem *ghes_map(u64 pfn, enum fixed_addresses fixmap_idx) 179 { 180 phys_addr_t paddr; 181 pgprot_t prot; 182 183 paddr = PFN_PHYS(pfn); 184 prot = arch_apei_get_mem_attribute(paddr); 185 __set_fixmap(fixmap_idx, paddr, prot); 186 187 return (void __iomem *) __fix_to_virt(fixmap_idx); 188 } 189 190 static void ghes_unmap(void __iomem *vaddr, enum fixed_addresses fixmap_idx) 191 { 192 int _idx = virt_to_fix((unsigned long)vaddr); 193 194 WARN_ON_ONCE(fixmap_idx != _idx); 195 clear_fixmap(fixmap_idx); 196 } 197 198 int ghes_estatus_pool_init(unsigned int num_ghes) 199 { 200 unsigned long addr, len; 201 int rc; 202 203 ghes_estatus_pool = gen_pool_create(GHES_ESTATUS_POOL_MIN_ALLOC_ORDER, -1); 204 if (!ghes_estatus_pool) 205 return -ENOMEM; 206 207 len = GHES_ESTATUS_CACHE_AVG_SIZE * GHES_ESTATUS_CACHE_ALLOCED_MAX; 208 len += (num_ghes * GHES_ESOURCE_PREALLOC_MAX_SIZE); 209 210 addr = (unsigned long)vmalloc(PAGE_ALIGN(len)); 211 if (!addr) 212 goto err_pool_alloc; 213 214 rc = gen_pool_add(ghes_estatus_pool, addr, PAGE_ALIGN(len), -1); 215 if (rc) 216 goto err_pool_add; 217 218 return 0; 219 220 err_pool_add: 221 vfree((void *)addr); 222 223 err_pool_alloc: 224 gen_pool_destroy(ghes_estatus_pool); 225 226 return -ENOMEM; 227 } 228 229 /** 230 * ghes_estatus_pool_region_free - free previously allocated memory 231 * from the ghes_estatus_pool. 232 * @addr: address of memory to free. 233 * @size: size of memory to free. 234 * 235 * Returns none. 236 */ 237 void ghes_estatus_pool_region_free(unsigned long addr, u32 size) 238 { 239 gen_pool_free(ghes_estatus_pool, addr, size); 240 } 241 EXPORT_SYMBOL_GPL(ghes_estatus_pool_region_free); 242 243 static int map_gen_v2(struct ghes *ghes) 244 { 245 return apei_map_generic_address(&ghes->generic_v2->read_ack_register); 246 } 247 248 static void unmap_gen_v2(struct ghes *ghes) 249 { 250 apei_unmap_generic_address(&ghes->generic_v2->read_ack_register); 251 } 252 253 static void ghes_ack_error(struct acpi_hest_generic_v2 *gv2) 254 { 255 int rc; 256 u64 val = 0; 257 258 rc = apei_read(&val, &gv2->read_ack_register); 259 if (rc) 260 return; 261 262 val &= gv2->read_ack_preserve << gv2->read_ack_register.bit_offset; 263 val |= gv2->read_ack_write << gv2->read_ack_register.bit_offset; 264 265 apei_write(val, &gv2->read_ack_register); 266 } 267 268 static struct ghes *ghes_new(struct acpi_hest_generic *generic) 269 { 270 struct ghes *ghes; 271 unsigned int error_block_length; 272 int rc; 273 274 ghes = kzalloc(sizeof(*ghes), GFP_KERNEL); 275 if (!ghes) 276 return ERR_PTR(-ENOMEM); 277 278 ghes->generic = generic; 279 if (is_hest_type_generic_v2(ghes)) { 280 rc = map_gen_v2(ghes); 281 if (rc) 282 goto err_free; 283 } 284 285 rc = apei_map_generic_address(&generic->error_status_address); 286 if (rc) 287 goto err_unmap_read_ack_addr; 288 error_block_length = generic->error_block_length; 289 if (error_block_length > GHES_ESTATUS_MAX_SIZE) { 290 pr_warn(FW_WARN GHES_PFX 291 "Error status block length is too long: %u for " 292 "generic hardware error source: %d.\n", 293 error_block_length, generic->header.source_id); 294 error_block_length = GHES_ESTATUS_MAX_SIZE; 295 } 296 ghes->estatus = kmalloc(error_block_length, GFP_KERNEL); 297 if (!ghes->estatus) { 298 rc = -ENOMEM; 299 goto err_unmap_status_addr; 300 } 301 302 return ghes; 303 304 err_unmap_status_addr: 305 apei_unmap_generic_address(&generic->error_status_address); 306 err_unmap_read_ack_addr: 307 if (is_hest_type_generic_v2(ghes)) 308 unmap_gen_v2(ghes); 309 err_free: 310 kfree(ghes); 311 return ERR_PTR(rc); 312 } 313 314 static void ghes_fini(struct ghes *ghes) 315 { 316 kfree(ghes->estatus); 317 apei_unmap_generic_address(&ghes->generic->error_status_address); 318 if (is_hest_type_generic_v2(ghes)) 319 unmap_gen_v2(ghes); 320 } 321 322 static inline int ghes_severity(int severity) 323 { 324 switch (severity) { 325 case CPER_SEV_INFORMATIONAL: 326 return GHES_SEV_NO; 327 case CPER_SEV_CORRECTED: 328 return GHES_SEV_CORRECTED; 329 case CPER_SEV_RECOVERABLE: 330 return GHES_SEV_RECOVERABLE; 331 case CPER_SEV_FATAL: 332 return GHES_SEV_PANIC; 333 default: 334 /* Unknown, go panic */ 335 return GHES_SEV_PANIC; 336 } 337 } 338 339 static void ghes_copy_tofrom_phys(void *buffer, u64 paddr, u32 len, 340 int from_phys, 341 enum fixed_addresses fixmap_idx) 342 { 343 void __iomem *vaddr; 344 u64 offset; 345 u32 trunk; 346 347 while (len > 0) { 348 offset = paddr - (paddr & PAGE_MASK); 349 vaddr = ghes_map(PHYS_PFN(paddr), fixmap_idx); 350 trunk = PAGE_SIZE - offset; 351 trunk = min(trunk, len); 352 if (from_phys) 353 memcpy_fromio(buffer, vaddr + offset, trunk); 354 else 355 memcpy_toio(vaddr + offset, buffer, trunk); 356 len -= trunk; 357 paddr += trunk; 358 buffer += trunk; 359 ghes_unmap(vaddr, fixmap_idx); 360 } 361 } 362 363 /* Check the top-level record header has an appropriate size. */ 364 static int __ghes_check_estatus(struct ghes *ghes, 365 struct acpi_hest_generic_status *estatus) 366 { 367 u32 len = cper_estatus_len(estatus); 368 369 if (len < sizeof(*estatus)) { 370 pr_warn_ratelimited(FW_WARN GHES_PFX "Truncated error status block!\n"); 371 return -EIO; 372 } 373 374 if (len > ghes->generic->error_block_length) { 375 pr_warn_ratelimited(FW_WARN GHES_PFX "Invalid error status block length!\n"); 376 return -EIO; 377 } 378 379 if (cper_estatus_check_header(estatus)) { 380 pr_warn_ratelimited(FW_WARN GHES_PFX "Invalid CPER header!\n"); 381 return -EIO; 382 } 383 384 return 0; 385 } 386 387 /* Read the CPER block, returning its address, and header in estatus. */ 388 static int __ghes_peek_estatus(struct ghes *ghes, 389 struct acpi_hest_generic_status *estatus, 390 u64 *buf_paddr, enum fixed_addresses fixmap_idx) 391 { 392 struct acpi_hest_generic *g = ghes->generic; 393 int rc; 394 395 rc = apei_read(buf_paddr, &g->error_status_address); 396 if (rc) { 397 *buf_paddr = 0; 398 pr_warn_ratelimited(FW_WARN GHES_PFX 399 "Failed to read error status block address for hardware error source: %d.\n", 400 g->header.source_id); 401 return -EIO; 402 } 403 if (!*buf_paddr) 404 return -ENOENT; 405 406 ghes_copy_tofrom_phys(estatus, *buf_paddr, sizeof(*estatus), 1, 407 fixmap_idx); 408 if (!estatus->block_status) { 409 *buf_paddr = 0; 410 return -ENOENT; 411 } 412 413 return 0; 414 } 415 416 static int __ghes_read_estatus(struct acpi_hest_generic_status *estatus, 417 u64 buf_paddr, enum fixed_addresses fixmap_idx, 418 size_t buf_len) 419 { 420 ghes_copy_tofrom_phys(estatus, buf_paddr, buf_len, 1, fixmap_idx); 421 if (cper_estatus_check(estatus)) { 422 pr_warn_ratelimited(FW_WARN GHES_PFX 423 "Failed to read error status block!\n"); 424 return -EIO; 425 } 426 427 return 0; 428 } 429 430 static int ghes_read_estatus(struct ghes *ghes, 431 struct acpi_hest_generic_status *estatus, 432 u64 *buf_paddr, enum fixed_addresses fixmap_idx) 433 { 434 int rc; 435 436 rc = __ghes_peek_estatus(ghes, estatus, buf_paddr, fixmap_idx); 437 if (rc) 438 return rc; 439 440 rc = __ghes_check_estatus(ghes, estatus); 441 if (rc) 442 return rc; 443 444 return __ghes_read_estatus(estatus, *buf_paddr, fixmap_idx, 445 cper_estatus_len(estatus)); 446 } 447 448 static void ghes_clear_estatus(struct ghes *ghes, 449 struct acpi_hest_generic_status *estatus, 450 u64 buf_paddr, enum fixed_addresses fixmap_idx) 451 { 452 estatus->block_status = 0; 453 454 if (!buf_paddr) 455 return; 456 457 ghes_copy_tofrom_phys(estatus, buf_paddr, 458 sizeof(estatus->block_status), 0, 459 fixmap_idx); 460 461 /* 462 * GHESv2 type HEST entries introduce support for error acknowledgment, 463 * so only acknowledge the error if this support is present. 464 */ 465 if (is_hest_type_generic_v2(ghes)) 466 ghes_ack_error(ghes->generic_v2); 467 } 468 469 /** 470 * struct ghes_task_work - for synchronous RAS event 471 * 472 * @twork: callback_head for task work 473 * @pfn: page frame number of corrupted page 474 * @flags: work control flags 475 * 476 * Structure to pass task work to be handled before 477 * returning to user-space via task_work_add(). 478 */ 479 struct ghes_task_work { 480 struct callback_head twork; 481 u64 pfn; 482 int flags; 483 }; 484 485 static void memory_failure_cb(struct callback_head *twork) 486 { 487 struct ghes_task_work *twcb = container_of(twork, struct ghes_task_work, twork); 488 int ret; 489 490 ret = memory_failure(twcb->pfn, twcb->flags); 491 gen_pool_free(ghes_estatus_pool, (unsigned long)twcb, sizeof(*twcb)); 492 493 if (!ret || ret == -EHWPOISON || ret == -EOPNOTSUPP) 494 return; 495 496 pr_err("%#llx: Sending SIGBUS to %s:%d due to hardware memory corruption\n", 497 twcb->pfn, current->comm, task_pid_nr(current)); 498 force_sig(SIGBUS); 499 } 500 501 static bool ghes_do_memory_failure(u64 physical_addr, int flags) 502 { 503 struct ghes_task_work *twcb; 504 unsigned long pfn; 505 506 if (!IS_ENABLED(CONFIG_ACPI_APEI_MEMORY_FAILURE)) 507 return false; 508 509 pfn = PHYS_PFN(physical_addr); 510 511 if (flags == MF_ACTION_REQUIRED && current->mm) { 512 twcb = (void *)gen_pool_alloc(ghes_estatus_pool, sizeof(*twcb)); 513 if (!twcb) 514 return false; 515 516 twcb->pfn = pfn; 517 twcb->flags = flags; 518 init_task_work(&twcb->twork, memory_failure_cb); 519 task_work_add(current, &twcb->twork, TWA_RESUME); 520 return true; 521 } 522 523 memory_failure_queue(pfn, flags); 524 return true; 525 } 526 527 static bool ghes_handle_memory_failure(struct acpi_hest_generic_data *gdata, 528 int sev, bool sync) 529 { 530 int flags = -1; 531 int sec_sev = ghes_severity(gdata->error_severity); 532 struct cper_sec_mem_err *mem_err = acpi_hest_get_payload(gdata); 533 534 if (!(mem_err->validation_bits & CPER_MEM_VALID_PA)) 535 return false; 536 537 /* iff following two events can be handled properly by now */ 538 if (sec_sev == GHES_SEV_CORRECTED && 539 (gdata->flags & CPER_SEC_ERROR_THRESHOLD_EXCEEDED)) 540 flags = MF_SOFT_OFFLINE; 541 if (sev == GHES_SEV_RECOVERABLE && sec_sev == GHES_SEV_RECOVERABLE) 542 flags = sync ? MF_ACTION_REQUIRED : 0; 543 544 if (flags != -1) 545 return ghes_do_memory_failure(mem_err->physical_addr, flags); 546 547 return false; 548 } 549 550 static bool ghes_handle_arm_hw_error(struct acpi_hest_generic_data *gdata, 551 int sev, bool sync) 552 { 553 struct cper_sec_proc_arm *err = acpi_hest_get_payload(gdata); 554 int flags = sync ? MF_ACTION_REQUIRED : 0; 555 char error_type[120]; 556 bool queued = false; 557 int sec_sev, i; 558 char *p; 559 560 sec_sev = ghes_severity(gdata->error_severity); 561 log_arm_hw_error(err, sec_sev); 562 if (sev != GHES_SEV_RECOVERABLE || sec_sev != GHES_SEV_RECOVERABLE) 563 return false; 564 565 p = (char *)(err + 1); 566 for (i = 0; i < err->err_info_num; i++) { 567 struct cper_arm_err_info *err_info = (struct cper_arm_err_info *)p; 568 bool is_cache = err_info->type & CPER_ARM_CACHE_ERROR; 569 bool has_pa = (err_info->validation_bits & CPER_ARM_INFO_VALID_PHYSICAL_ADDR); 570 571 /* 572 * The field (err_info->error_info & BIT(26)) is fixed to set to 573 * 1 in some old firmware of HiSilicon Kunpeng920. We assume that 574 * firmware won't mix corrected errors in an uncorrected section, 575 * and don't filter out 'corrected' error here. 576 */ 577 if (is_cache && has_pa) { 578 queued = ghes_do_memory_failure(err_info->physical_fault_addr, flags); 579 p += err_info->length; 580 continue; 581 } 582 583 cper_bits_to_str(error_type, sizeof(error_type), 584 FIELD_GET(CPER_ARM_ERR_TYPE_MASK, err_info->type), 585 cper_proc_error_type_strs, 586 ARRAY_SIZE(cper_proc_error_type_strs)); 587 588 pr_warn_ratelimited(FW_WARN GHES_PFX 589 "Unhandled processor error type 0x%02x: %s%s\n", 590 err_info->type, error_type, 591 (err_info->type & ~CPER_ARM_ERR_TYPE_MASK) ? " with reserved bit(s)" : ""); 592 p += err_info->length; 593 } 594 595 return queued; 596 } 597 598 /* 599 * PCIe AER errors need to be sent to the AER driver for reporting and 600 * recovery. The GHES severities map to the following AER severities and 601 * require the following handling: 602 * 603 * GHES_SEV_CORRECTABLE -> AER_CORRECTABLE 604 * These need to be reported by the AER driver but no recovery is 605 * necessary. 606 * GHES_SEV_RECOVERABLE -> AER_NONFATAL 607 * GHES_SEV_RECOVERABLE && CPER_SEC_RESET -> AER_FATAL 608 * These both need to be reported and recovered from by the AER driver. 609 * GHES_SEV_PANIC does not make it to this handling since the kernel must 610 * panic. 611 */ 612 static void ghes_handle_aer(struct acpi_hest_generic_data *gdata) 613 { 614 #ifdef CONFIG_ACPI_APEI_PCIEAER 615 struct cper_sec_pcie *pcie_err = acpi_hest_get_payload(gdata); 616 617 if (pcie_err->validation_bits & CPER_PCIE_VALID_DEVICE_ID && 618 pcie_err->validation_bits & CPER_PCIE_VALID_AER_INFO) { 619 unsigned int devfn; 620 int aer_severity; 621 u8 *aer_info; 622 623 devfn = PCI_DEVFN(pcie_err->device_id.device, 624 pcie_err->device_id.function); 625 aer_severity = cper_severity_to_aer(gdata->error_severity); 626 627 /* 628 * If firmware reset the component to contain 629 * the error, we must reinitialize it before 630 * use, so treat it as a fatal AER error. 631 */ 632 if (gdata->flags & CPER_SEC_RESET) 633 aer_severity = AER_FATAL; 634 635 aer_info = (void *)gen_pool_alloc(ghes_estatus_pool, 636 sizeof(struct aer_capability_regs)); 637 if (!aer_info) 638 return; 639 memcpy(aer_info, pcie_err->aer_info, sizeof(struct aer_capability_regs)); 640 641 aer_recover_queue(pcie_err->device_id.segment, 642 pcie_err->device_id.bus, 643 devfn, aer_severity, 644 (struct aer_capability_regs *) 645 aer_info); 646 } 647 #endif 648 } 649 650 static BLOCKING_NOTIFIER_HEAD(vendor_record_notify_list); 651 652 int ghes_register_vendor_record_notifier(struct notifier_block *nb) 653 { 654 return blocking_notifier_chain_register(&vendor_record_notify_list, nb); 655 } 656 EXPORT_SYMBOL_GPL(ghes_register_vendor_record_notifier); 657 658 void ghes_unregister_vendor_record_notifier(struct notifier_block *nb) 659 { 660 blocking_notifier_chain_unregister(&vendor_record_notify_list, nb); 661 } 662 EXPORT_SYMBOL_GPL(ghes_unregister_vendor_record_notifier); 663 664 static void ghes_vendor_record_work_func(struct work_struct *work) 665 { 666 struct ghes_vendor_record_entry *entry; 667 struct acpi_hest_generic_data *gdata; 668 u32 len; 669 670 entry = container_of(work, struct ghes_vendor_record_entry, work); 671 gdata = GHES_GDATA_FROM_VENDOR_ENTRY(entry); 672 673 blocking_notifier_call_chain(&vendor_record_notify_list, 674 entry->error_severity, gdata); 675 676 len = GHES_VENDOR_ENTRY_LEN(acpi_hest_get_record_size(gdata)); 677 gen_pool_free(ghes_estatus_pool, (unsigned long)entry, len); 678 } 679 680 static void ghes_defer_non_standard_event(struct acpi_hest_generic_data *gdata, 681 int sev) 682 { 683 struct acpi_hest_generic_data *copied_gdata; 684 struct ghes_vendor_record_entry *entry; 685 u32 len; 686 687 len = GHES_VENDOR_ENTRY_LEN(acpi_hest_get_record_size(gdata)); 688 entry = (void *)gen_pool_alloc(ghes_estatus_pool, len); 689 if (!entry) 690 return; 691 692 copied_gdata = GHES_GDATA_FROM_VENDOR_ENTRY(entry); 693 memcpy(copied_gdata, gdata, acpi_hest_get_record_size(gdata)); 694 entry->error_severity = sev; 695 696 INIT_WORK(&entry->work, ghes_vendor_record_work_func); 697 schedule_work(&entry->work); 698 } 699 700 /* Room for 8 entries */ 701 #define CXL_CPER_PROT_ERR_FIFO_DEPTH 8 702 static DEFINE_KFIFO(cxl_cper_prot_err_fifo, struct cxl_cper_prot_err_work_data, 703 CXL_CPER_PROT_ERR_FIFO_DEPTH); 704 705 /* Synchronize schedule_work() with cxl_cper_prot_err_work changes */ 706 static DEFINE_SPINLOCK(cxl_cper_prot_err_work_lock); 707 struct work_struct *cxl_cper_prot_err_work; 708 709 static void cxl_cper_post_prot_err(struct cxl_cper_sec_prot_err *prot_err, 710 int severity) 711 { 712 #ifdef CONFIG_ACPI_APEI_PCIEAER 713 struct cxl_cper_prot_err_work_data wd; 714 u8 *dvsec_start, *cap_start; 715 716 if (!(prot_err->valid_bits & PROT_ERR_VALID_AGENT_ADDRESS)) { 717 pr_err_ratelimited("CXL CPER invalid agent type\n"); 718 return; 719 } 720 721 if (!(prot_err->valid_bits & PROT_ERR_VALID_ERROR_LOG)) { 722 pr_err_ratelimited("CXL CPER invalid protocol error log\n"); 723 return; 724 } 725 726 if (prot_err->err_len != sizeof(struct cxl_ras_capability_regs)) { 727 pr_err_ratelimited("CXL CPER invalid RAS Cap size (%u)\n", 728 prot_err->err_len); 729 return; 730 } 731 732 if (!(prot_err->valid_bits & PROT_ERR_VALID_SERIAL_NUMBER)) 733 pr_warn(FW_WARN "CXL CPER no device serial number\n"); 734 735 guard(spinlock_irqsave)(&cxl_cper_prot_err_work_lock); 736 737 if (!cxl_cper_prot_err_work) 738 return; 739 740 switch (prot_err->agent_type) { 741 case RCD: 742 case DEVICE: 743 case LD: 744 case FMLD: 745 case RP: 746 case DSP: 747 case USP: 748 memcpy(&wd.prot_err, prot_err, sizeof(wd.prot_err)); 749 750 dvsec_start = (u8 *)(prot_err + 1); 751 cap_start = dvsec_start + prot_err->dvsec_len; 752 753 memcpy(&wd.ras_cap, cap_start, sizeof(wd.ras_cap)); 754 wd.severity = cper_severity_to_aer(severity); 755 break; 756 default: 757 pr_err_ratelimited("CXL CPER invalid agent type: %d\n", 758 prot_err->agent_type); 759 return; 760 } 761 762 if (!kfifo_put(&cxl_cper_prot_err_fifo, wd)) { 763 pr_err_ratelimited("CXL CPER kfifo overflow\n"); 764 return; 765 } 766 767 schedule_work(cxl_cper_prot_err_work); 768 #endif 769 } 770 771 int cxl_cper_register_prot_err_work(struct work_struct *work) 772 { 773 if (cxl_cper_prot_err_work) 774 return -EINVAL; 775 776 guard(spinlock)(&cxl_cper_prot_err_work_lock); 777 cxl_cper_prot_err_work = work; 778 return 0; 779 } 780 EXPORT_SYMBOL_NS_GPL(cxl_cper_register_prot_err_work, "CXL"); 781 782 int cxl_cper_unregister_prot_err_work(struct work_struct *work) 783 { 784 if (cxl_cper_prot_err_work != work) 785 return -EINVAL; 786 787 guard(spinlock)(&cxl_cper_prot_err_work_lock); 788 cxl_cper_prot_err_work = NULL; 789 return 0; 790 } 791 EXPORT_SYMBOL_NS_GPL(cxl_cper_unregister_prot_err_work, "CXL"); 792 793 int cxl_cper_prot_err_kfifo_get(struct cxl_cper_prot_err_work_data *wd) 794 { 795 return kfifo_get(&cxl_cper_prot_err_fifo, wd); 796 } 797 EXPORT_SYMBOL_NS_GPL(cxl_cper_prot_err_kfifo_get, "CXL"); 798 799 /* Room for 8 entries for each of the 4 event log queues */ 800 #define CXL_CPER_FIFO_DEPTH 32 801 DEFINE_KFIFO(cxl_cper_fifo, struct cxl_cper_work_data, CXL_CPER_FIFO_DEPTH); 802 803 /* Synchronize schedule_work() with cxl_cper_work changes */ 804 static DEFINE_SPINLOCK(cxl_cper_work_lock); 805 struct work_struct *cxl_cper_work; 806 807 static void cxl_cper_post_event(enum cxl_event_type event_type, 808 struct cxl_cper_event_rec *rec) 809 { 810 struct cxl_cper_work_data wd; 811 812 if (rec->hdr.length <= sizeof(rec->hdr) || 813 rec->hdr.length > sizeof(*rec)) { 814 pr_err(FW_WARN "CXL CPER Invalid section length (%u)\n", 815 rec->hdr.length); 816 return; 817 } 818 819 if (!(rec->hdr.validation_bits & CPER_CXL_COMP_EVENT_LOG_VALID)) { 820 pr_err(FW_WARN "CXL CPER invalid event\n"); 821 return; 822 } 823 824 guard(spinlock_irqsave)(&cxl_cper_work_lock); 825 826 if (!cxl_cper_work) 827 return; 828 829 wd.event_type = event_type; 830 memcpy(&wd.rec, rec, sizeof(wd.rec)); 831 832 if (!kfifo_put(&cxl_cper_fifo, wd)) { 833 pr_err_ratelimited("CXL CPER kfifo overflow\n"); 834 return; 835 } 836 837 schedule_work(cxl_cper_work); 838 } 839 840 int cxl_cper_register_work(struct work_struct *work) 841 { 842 if (cxl_cper_work) 843 return -EINVAL; 844 845 guard(spinlock)(&cxl_cper_work_lock); 846 cxl_cper_work = work; 847 return 0; 848 } 849 EXPORT_SYMBOL_NS_GPL(cxl_cper_register_work, "CXL"); 850 851 int cxl_cper_unregister_work(struct work_struct *work) 852 { 853 if (cxl_cper_work != work) 854 return -EINVAL; 855 856 guard(spinlock)(&cxl_cper_work_lock); 857 cxl_cper_work = NULL; 858 return 0; 859 } 860 EXPORT_SYMBOL_NS_GPL(cxl_cper_unregister_work, "CXL"); 861 862 int cxl_cper_kfifo_get(struct cxl_cper_work_data *wd) 863 { 864 return kfifo_get(&cxl_cper_fifo, wd); 865 } 866 EXPORT_SYMBOL_NS_GPL(cxl_cper_kfifo_get, "CXL"); 867 868 static void ghes_log_hwerr(int sev, guid_t *sec_type) 869 { 870 if (sev != CPER_SEV_RECOVERABLE) 871 return; 872 873 if (guid_equal(sec_type, &CPER_SEC_PROC_ARM) || 874 guid_equal(sec_type, &CPER_SEC_PROC_GENERIC) || 875 guid_equal(sec_type, &CPER_SEC_PROC_IA)) { 876 hwerr_log_error_type(HWERR_RECOV_CPU); 877 return; 878 } 879 880 if (guid_equal(sec_type, &CPER_SEC_CXL_PROT_ERR) || 881 guid_equal(sec_type, &CPER_SEC_CXL_GEN_MEDIA_GUID) || 882 guid_equal(sec_type, &CPER_SEC_CXL_DRAM_GUID) || 883 guid_equal(sec_type, &CPER_SEC_CXL_MEM_MODULE_GUID)) { 884 hwerr_log_error_type(HWERR_RECOV_CXL); 885 return; 886 } 887 888 if (guid_equal(sec_type, &CPER_SEC_PCIE) || 889 guid_equal(sec_type, &CPER_SEC_PCI_X_BUS)) { 890 hwerr_log_error_type(HWERR_RECOV_PCI); 891 return; 892 } 893 894 if (guid_equal(sec_type, &CPER_SEC_PLATFORM_MEM)) { 895 hwerr_log_error_type(HWERR_RECOV_MEMORY); 896 return; 897 } 898 899 hwerr_log_error_type(HWERR_RECOV_OTHERS); 900 } 901 902 static void ghes_do_proc(struct ghes *ghes, 903 const struct acpi_hest_generic_status *estatus) 904 { 905 int sev, sec_sev; 906 struct acpi_hest_generic_data *gdata; 907 guid_t *sec_type; 908 const guid_t *fru_id = &guid_null; 909 char *fru_text = ""; 910 bool queued = false; 911 bool sync = is_hest_sync_notify(ghes); 912 913 sev = ghes_severity(estatus->error_severity); 914 apei_estatus_for_each_section(estatus, gdata) { 915 sec_type = (guid_t *)gdata->section_type; 916 sec_sev = ghes_severity(gdata->error_severity); 917 if (gdata->validation_bits & CPER_SEC_VALID_FRU_ID) 918 fru_id = (guid_t *)gdata->fru_id; 919 920 if (gdata->validation_bits & CPER_SEC_VALID_FRU_TEXT) 921 fru_text = gdata->fru_text; 922 923 ghes_log_hwerr(sev, sec_type); 924 if (guid_equal(sec_type, &CPER_SEC_PLATFORM_MEM)) { 925 struct cper_sec_mem_err *mem_err = acpi_hest_get_payload(gdata); 926 927 atomic_notifier_call_chain(&ghes_report_chain, sev, mem_err); 928 929 arch_apei_report_mem_error(sev, mem_err); 930 queued = ghes_handle_memory_failure(gdata, sev, sync); 931 } else if (guid_equal(sec_type, &CPER_SEC_PCIE)) { 932 ghes_handle_aer(gdata); 933 } else if (guid_equal(sec_type, &CPER_SEC_PROC_ARM)) { 934 queued = ghes_handle_arm_hw_error(gdata, sev, sync); 935 } else if (guid_equal(sec_type, &CPER_SEC_CXL_PROT_ERR)) { 936 struct cxl_cper_sec_prot_err *prot_err = acpi_hest_get_payload(gdata); 937 938 cxl_cper_post_prot_err(prot_err, gdata->error_severity); 939 } else if (guid_equal(sec_type, &CPER_SEC_CXL_GEN_MEDIA_GUID)) { 940 struct cxl_cper_event_rec *rec = acpi_hest_get_payload(gdata); 941 942 cxl_cper_post_event(CXL_CPER_EVENT_GEN_MEDIA, rec); 943 } else if (guid_equal(sec_type, &CPER_SEC_CXL_DRAM_GUID)) { 944 struct cxl_cper_event_rec *rec = acpi_hest_get_payload(gdata); 945 946 cxl_cper_post_event(CXL_CPER_EVENT_DRAM, rec); 947 } else if (guid_equal(sec_type, &CPER_SEC_CXL_MEM_MODULE_GUID)) { 948 struct cxl_cper_event_rec *rec = acpi_hest_get_payload(gdata); 949 950 cxl_cper_post_event(CXL_CPER_EVENT_MEM_MODULE, rec); 951 } else { 952 void *err = acpi_hest_get_payload(gdata); 953 954 ghes_defer_non_standard_event(gdata, sev); 955 log_non_standard_event(sec_type, fru_id, fru_text, 956 sec_sev, err, 957 gdata->error_data_length); 958 } 959 } 960 961 /* 962 * If no memory failure work is queued for abnormal synchronous 963 * errors, do a force kill. 964 */ 965 if (sync && !queued) { 966 dev_err(ghes->dev, 967 HW_ERR GHES_PFX "%s:%d: synchronous unrecoverable error (SIGBUS)\n", 968 current->comm, task_pid_nr(current)); 969 force_sig(SIGBUS); 970 } 971 } 972 973 static void __ghes_print_estatus(const char *pfx, 974 const struct acpi_hest_generic *generic, 975 const struct acpi_hest_generic_status *estatus) 976 { 977 static atomic_t seqno; 978 unsigned int curr_seqno; 979 char pfx_seq[64]; 980 981 if (pfx == NULL) { 982 if (ghes_severity(estatus->error_severity) <= 983 GHES_SEV_CORRECTED) 984 pfx = KERN_WARNING; 985 else 986 pfx = KERN_ERR; 987 } 988 curr_seqno = atomic_inc_return(&seqno); 989 snprintf(pfx_seq, sizeof(pfx_seq), "%s{%u}" HW_ERR, pfx, curr_seqno); 990 printk("%s""Hardware error from APEI Generic Hardware Error Source: %d\n", 991 pfx_seq, generic->header.source_id); 992 cper_estatus_print(pfx_seq, estatus); 993 } 994 995 static int ghes_print_estatus(const char *pfx, 996 const struct acpi_hest_generic *generic, 997 const struct acpi_hest_generic_status *estatus) 998 { 999 /* Not more than 2 messages every 5 seconds */ 1000 static DEFINE_RATELIMIT_STATE(ratelimit_corrected, 5*HZ, 2); 1001 static DEFINE_RATELIMIT_STATE(ratelimit_uncorrected, 5*HZ, 2); 1002 struct ratelimit_state *ratelimit; 1003 1004 if (ghes_severity(estatus->error_severity) <= GHES_SEV_CORRECTED) 1005 ratelimit = &ratelimit_corrected; 1006 else 1007 ratelimit = &ratelimit_uncorrected; 1008 if (__ratelimit(ratelimit)) { 1009 __ghes_print_estatus(pfx, generic, estatus); 1010 return 1; 1011 } 1012 return 0; 1013 } 1014 1015 /* 1016 * GHES error status reporting throttle, to report more kinds of 1017 * errors, instead of just most frequently occurred errors. 1018 */ 1019 static int ghes_estatus_cached(struct acpi_hest_generic_status *estatus) 1020 { 1021 u32 len; 1022 int i, cached = 0; 1023 unsigned long long now; 1024 struct ghes_estatus_cache *cache; 1025 struct acpi_hest_generic_status *cache_estatus; 1026 1027 len = cper_estatus_len(estatus); 1028 rcu_read_lock(); 1029 for (i = 0; i < GHES_ESTATUS_CACHES_SIZE; i++) { 1030 cache = rcu_dereference(ghes_estatus_caches[i]); 1031 if (cache == NULL) 1032 continue; 1033 if (len != cache->estatus_len) 1034 continue; 1035 cache_estatus = GHES_ESTATUS_FROM_CACHE(cache); 1036 if (memcmp(estatus, cache_estatus, len)) 1037 continue; 1038 atomic_inc(&cache->count); 1039 now = sched_clock(); 1040 if (now - cache->time_in < GHES_ESTATUS_IN_CACHE_MAX_NSEC) 1041 cached = 1; 1042 break; 1043 } 1044 rcu_read_unlock(); 1045 return cached; 1046 } 1047 1048 static struct ghes_estatus_cache *ghes_estatus_cache_alloc( 1049 struct acpi_hest_generic *generic, 1050 struct acpi_hest_generic_status *estatus) 1051 { 1052 int alloced; 1053 u32 len, cache_len; 1054 struct ghes_estatus_cache *cache; 1055 struct acpi_hest_generic_status *cache_estatus; 1056 1057 alloced = atomic_add_return(1, &ghes_estatus_cache_alloced); 1058 if (alloced > GHES_ESTATUS_CACHE_ALLOCED_MAX) { 1059 atomic_dec(&ghes_estatus_cache_alloced); 1060 return NULL; 1061 } 1062 len = cper_estatus_len(estatus); 1063 cache_len = GHES_ESTATUS_CACHE_LEN(len); 1064 cache = (void *)gen_pool_alloc(ghes_estatus_pool, cache_len); 1065 if (!cache) { 1066 atomic_dec(&ghes_estatus_cache_alloced); 1067 return NULL; 1068 } 1069 cache_estatus = GHES_ESTATUS_FROM_CACHE(cache); 1070 memcpy(cache_estatus, estatus, len); 1071 cache->estatus_len = len; 1072 atomic_set(&cache->count, 0); 1073 cache->generic = generic; 1074 cache->time_in = sched_clock(); 1075 return cache; 1076 } 1077 1078 static void ghes_estatus_cache_rcu_free(struct rcu_head *head) 1079 { 1080 struct ghes_estatus_cache *cache; 1081 u32 len; 1082 1083 cache = container_of(head, struct ghes_estatus_cache, rcu); 1084 len = cper_estatus_len(GHES_ESTATUS_FROM_CACHE(cache)); 1085 len = GHES_ESTATUS_CACHE_LEN(len); 1086 gen_pool_free(ghes_estatus_pool, (unsigned long)cache, len); 1087 atomic_dec(&ghes_estatus_cache_alloced); 1088 } 1089 1090 static void 1091 ghes_estatus_cache_add(struct acpi_hest_generic *generic, 1092 struct acpi_hest_generic_status *estatus) 1093 { 1094 unsigned long long now, duration, period, max_period = 0; 1095 struct ghes_estatus_cache *cache, *new_cache; 1096 struct ghes_estatus_cache __rcu *victim; 1097 int i, slot = -1, count; 1098 1099 new_cache = ghes_estatus_cache_alloc(generic, estatus); 1100 if (!new_cache) 1101 return; 1102 1103 rcu_read_lock(); 1104 now = sched_clock(); 1105 for (i = 0; i < GHES_ESTATUS_CACHES_SIZE; i++) { 1106 cache = rcu_dereference(ghes_estatus_caches[i]); 1107 if (cache == NULL) { 1108 slot = i; 1109 break; 1110 } 1111 duration = now - cache->time_in; 1112 if (duration >= GHES_ESTATUS_IN_CACHE_MAX_NSEC) { 1113 slot = i; 1114 break; 1115 } 1116 count = atomic_read(&cache->count); 1117 period = duration; 1118 do_div(period, (count + 1)); 1119 if (period > max_period) { 1120 max_period = period; 1121 slot = i; 1122 } 1123 } 1124 rcu_read_unlock(); 1125 1126 if (slot != -1) { 1127 /* 1128 * Use release semantics to ensure that ghes_estatus_cached() 1129 * running on another CPU will see the updated cache fields if 1130 * it can see the new value of the pointer. 1131 */ 1132 victim = xchg_release(&ghes_estatus_caches[slot], 1133 RCU_INITIALIZER(new_cache)); 1134 1135 /* 1136 * At this point, victim may point to a cached item different 1137 * from the one based on which we selected the slot. Instead of 1138 * going to the loop again to pick another slot, let's just 1139 * drop the other item anyway: this may cause a false cache 1140 * miss later on, but that won't cause any problems. 1141 */ 1142 if (victim) 1143 call_rcu(&unrcu_pointer(victim)->rcu, 1144 ghes_estatus_cache_rcu_free); 1145 } 1146 } 1147 1148 static void __ghes_panic(struct ghes *ghes, 1149 struct acpi_hest_generic_status *estatus, 1150 u64 buf_paddr, enum fixed_addresses fixmap_idx) 1151 { 1152 const char *msg = GHES_PFX "Fatal hardware error"; 1153 1154 __ghes_print_estatus(KERN_EMERG, ghes->generic, estatus); 1155 1156 add_taint(TAINT_MACHINE_CHECK, LOCKDEP_STILL_OK); 1157 1158 ghes_clear_estatus(ghes, estatus, buf_paddr, fixmap_idx); 1159 1160 if (!panic_timeout) 1161 pr_emerg("%s but panic disabled\n", msg); 1162 1163 panic(msg); 1164 } 1165 1166 static int ghes_proc(struct ghes *ghes) 1167 { 1168 struct acpi_hest_generic_status *estatus = ghes->estatus; 1169 u64 buf_paddr; 1170 int rc; 1171 1172 rc = ghes_read_estatus(ghes, estatus, &buf_paddr, FIX_APEI_GHES_IRQ); 1173 if (rc) 1174 goto out; 1175 1176 if (ghes_severity(estatus->error_severity) >= GHES_SEV_PANIC) 1177 __ghes_panic(ghes, estatus, buf_paddr, FIX_APEI_GHES_IRQ); 1178 1179 if (!ghes_estatus_cached(estatus)) { 1180 if (ghes_print_estatus(NULL, ghes->generic, estatus)) 1181 ghes_estatus_cache_add(ghes->generic, estatus); 1182 } 1183 ghes_do_proc(ghes, estatus); 1184 1185 out: 1186 ghes_clear_estatus(ghes, estatus, buf_paddr, FIX_APEI_GHES_IRQ); 1187 1188 return rc; 1189 } 1190 1191 static void ghes_add_timer(struct ghes *ghes) 1192 { 1193 struct acpi_hest_generic *g = ghes->generic; 1194 unsigned long expire; 1195 1196 if (!g->notify.poll_interval) { 1197 pr_warn(FW_WARN GHES_PFX "Poll interval is 0 for generic hardware error source: %d, disabled.\n", 1198 g->header.source_id); 1199 return; 1200 } 1201 expire = jiffies + msecs_to_jiffies(g->notify.poll_interval); 1202 ghes->timer.expires = round_jiffies_relative(expire); 1203 add_timer(&ghes->timer); 1204 } 1205 1206 static void ghes_poll_func(struct timer_list *t) 1207 { 1208 struct ghes *ghes = timer_container_of(ghes, t, timer); 1209 unsigned long flags; 1210 1211 spin_lock_irqsave(&ghes_notify_lock_irq, flags); 1212 ghes_proc(ghes); 1213 spin_unlock_irqrestore(&ghes_notify_lock_irq, flags); 1214 if (!(ghes->flags & GHES_EXITING)) 1215 ghes_add_timer(ghes); 1216 } 1217 1218 static irqreturn_t ghes_irq_func(int irq, void *data) 1219 { 1220 struct ghes *ghes = data; 1221 unsigned long flags; 1222 int rc; 1223 1224 spin_lock_irqsave(&ghes_notify_lock_irq, flags); 1225 rc = ghes_proc(ghes); 1226 spin_unlock_irqrestore(&ghes_notify_lock_irq, flags); 1227 if (rc) 1228 return IRQ_NONE; 1229 1230 return IRQ_HANDLED; 1231 } 1232 1233 static int ghes_notify_hed(struct notifier_block *this, unsigned long event, 1234 void *data) 1235 { 1236 struct ghes *ghes; 1237 unsigned long flags; 1238 int ret = NOTIFY_DONE; 1239 1240 spin_lock_irqsave(&ghes_notify_lock_irq, flags); 1241 list_for_each_entry_rcu(ghes, &ghes_hed, list) { 1242 if (!ghes_proc(ghes)) 1243 ret = NOTIFY_OK; 1244 } 1245 spin_unlock_irqrestore(&ghes_notify_lock_irq, flags); 1246 1247 return ret; 1248 } 1249 1250 static struct notifier_block ghes_notifier_hed = { 1251 .notifier_call = ghes_notify_hed, 1252 }; 1253 1254 /* 1255 * Handlers for CPER records may not be NMI safe. For example, 1256 * memory_failure_queue() takes spinlocks and calls schedule_work_on(). 1257 * In any NMI-like handler, memory from ghes_estatus_pool is used to save 1258 * estatus, and added to the ghes_estatus_llist. irq_work_queue() causes 1259 * ghes_proc_in_irq() to run in IRQ context where each estatus in 1260 * ghes_estatus_llist is processed. 1261 * 1262 * Memory from the ghes_estatus_pool is also used with the ghes_estatus_cache 1263 * to suppress frequent messages. 1264 */ 1265 static struct llist_head ghes_estatus_llist; 1266 static struct irq_work ghes_proc_irq_work; 1267 1268 static void ghes_proc_in_irq(struct irq_work *irq_work) 1269 { 1270 struct llist_node *llnode, *next; 1271 struct ghes_estatus_node *estatus_node; 1272 struct acpi_hest_generic *generic; 1273 struct acpi_hest_generic_status *estatus; 1274 u32 len, node_len; 1275 1276 llnode = llist_del_all(&ghes_estatus_llist); 1277 /* 1278 * Because the time order of estatus in list is reversed, 1279 * revert it back to proper order. 1280 */ 1281 llnode = llist_reverse_order(llnode); 1282 while (llnode) { 1283 next = llnode->next; 1284 estatus_node = llist_entry(llnode, struct ghes_estatus_node, 1285 llnode); 1286 estatus = GHES_ESTATUS_FROM_NODE(estatus_node); 1287 len = cper_estatus_len(estatus); 1288 node_len = GHES_ESTATUS_NODE_LEN(len); 1289 1290 ghes_do_proc(estatus_node->ghes, estatus); 1291 1292 if (!ghes_estatus_cached(estatus)) { 1293 generic = estatus_node->generic; 1294 if (ghes_print_estatus(NULL, generic, estatus)) 1295 ghes_estatus_cache_add(generic, estatus); 1296 } 1297 gen_pool_free(ghes_estatus_pool, (unsigned long)estatus_node, 1298 node_len); 1299 1300 llnode = next; 1301 } 1302 } 1303 1304 static void ghes_print_queued_estatus(void) 1305 { 1306 struct llist_node *llnode; 1307 struct ghes_estatus_node *estatus_node; 1308 struct acpi_hest_generic *generic; 1309 struct acpi_hest_generic_status *estatus; 1310 1311 llnode = llist_del_all(&ghes_estatus_llist); 1312 /* 1313 * Because the time order of estatus in list is reversed, 1314 * revert it back to proper order. 1315 */ 1316 llnode = llist_reverse_order(llnode); 1317 while (llnode) { 1318 estatus_node = llist_entry(llnode, struct ghes_estatus_node, 1319 llnode); 1320 estatus = GHES_ESTATUS_FROM_NODE(estatus_node); 1321 generic = estatus_node->generic; 1322 ghes_print_estatus(NULL, generic, estatus); 1323 llnode = llnode->next; 1324 } 1325 } 1326 1327 static int ghes_in_nmi_queue_one_entry(struct ghes *ghes, 1328 enum fixed_addresses fixmap_idx) 1329 { 1330 struct acpi_hest_generic_status *estatus, tmp_header; 1331 struct ghes_estatus_node *estatus_node; 1332 u32 len, node_len; 1333 u64 buf_paddr; 1334 int sev, rc; 1335 1336 if (!IS_ENABLED(CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG)) 1337 return -EOPNOTSUPP; 1338 1339 rc = __ghes_peek_estatus(ghes, &tmp_header, &buf_paddr, fixmap_idx); 1340 if (rc) { 1341 ghes_clear_estatus(ghes, &tmp_header, buf_paddr, fixmap_idx); 1342 return rc; 1343 } 1344 1345 rc = __ghes_check_estatus(ghes, &tmp_header); 1346 if (rc) { 1347 ghes_clear_estatus(ghes, &tmp_header, buf_paddr, fixmap_idx); 1348 return rc; 1349 } 1350 1351 len = cper_estatus_len(&tmp_header); 1352 node_len = GHES_ESTATUS_NODE_LEN(len); 1353 estatus_node = (void *)gen_pool_alloc(ghes_estatus_pool, node_len); 1354 if (!estatus_node) 1355 return -ENOMEM; 1356 1357 estatus_node->ghes = ghes; 1358 estatus_node->generic = ghes->generic; 1359 estatus = GHES_ESTATUS_FROM_NODE(estatus_node); 1360 1361 if (__ghes_read_estatus(estatus, buf_paddr, fixmap_idx, len)) { 1362 ghes_clear_estatus(ghes, estatus, buf_paddr, fixmap_idx); 1363 rc = -ENOENT; 1364 goto no_work; 1365 } 1366 1367 sev = ghes_severity(estatus->error_severity); 1368 if (sev >= GHES_SEV_PANIC) { 1369 ghes_print_queued_estatus(); 1370 __ghes_panic(ghes, estatus, buf_paddr, fixmap_idx); 1371 } 1372 1373 ghes_clear_estatus(ghes, &tmp_header, buf_paddr, fixmap_idx); 1374 1375 /* This error has been reported before, don't process it again. */ 1376 if (ghes_estatus_cached(estatus)) 1377 goto no_work; 1378 1379 llist_add(&estatus_node->llnode, &ghes_estatus_llist); 1380 1381 return rc; 1382 1383 no_work: 1384 gen_pool_free(ghes_estatus_pool, (unsigned long)estatus_node, 1385 node_len); 1386 1387 return rc; 1388 } 1389 1390 static int ghes_in_nmi_spool_from_list(struct list_head *rcu_list, 1391 enum fixed_addresses fixmap_idx) 1392 { 1393 int ret = -ENOENT; 1394 struct ghes *ghes; 1395 1396 rcu_read_lock(); 1397 list_for_each_entry_rcu(ghes, rcu_list, list) { 1398 if (!ghes_in_nmi_queue_one_entry(ghes, fixmap_idx)) 1399 ret = 0; 1400 } 1401 rcu_read_unlock(); 1402 1403 if (IS_ENABLED(CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG) && !ret) 1404 irq_work_queue(&ghes_proc_irq_work); 1405 1406 return ret; 1407 } 1408 1409 #ifdef CONFIG_ACPI_APEI_SEA 1410 static LIST_HEAD(ghes_sea); 1411 1412 /* 1413 * Return 0 only if one of the SEA error sources successfully reported an error 1414 * record sent from the firmware. 1415 */ 1416 int ghes_notify_sea(void) 1417 { 1418 static DEFINE_RAW_SPINLOCK(ghes_notify_lock_sea); 1419 int rv; 1420 1421 raw_spin_lock(&ghes_notify_lock_sea); 1422 rv = ghes_in_nmi_spool_from_list(&ghes_sea, FIX_APEI_GHES_SEA); 1423 raw_spin_unlock(&ghes_notify_lock_sea); 1424 1425 return rv; 1426 } 1427 1428 static void ghes_sea_add(struct ghes *ghes) 1429 { 1430 mutex_lock(&ghes_list_mutex); 1431 list_add_rcu(&ghes->list, &ghes_sea); 1432 mutex_unlock(&ghes_list_mutex); 1433 } 1434 1435 static void ghes_sea_remove(struct ghes *ghes) 1436 { 1437 mutex_lock(&ghes_list_mutex); 1438 list_del_rcu(&ghes->list); 1439 mutex_unlock(&ghes_list_mutex); 1440 synchronize_rcu(); 1441 } 1442 #else /* CONFIG_ACPI_APEI_SEA */ 1443 static inline void ghes_sea_add(struct ghes *ghes) { } 1444 static inline void ghes_sea_remove(struct ghes *ghes) { } 1445 #endif /* CONFIG_ACPI_APEI_SEA */ 1446 1447 #ifdef CONFIG_HAVE_ACPI_APEI_NMI 1448 /* 1449 * NMI may be triggered on any CPU, so ghes_in_nmi is used for 1450 * having only one concurrent reader. 1451 */ 1452 static atomic_t ghes_in_nmi = ATOMIC_INIT(0); 1453 1454 static LIST_HEAD(ghes_nmi); 1455 1456 static int ghes_notify_nmi(unsigned int cmd, struct pt_regs *regs) 1457 { 1458 static DEFINE_RAW_SPINLOCK(ghes_notify_lock_nmi); 1459 int ret = NMI_DONE; 1460 1461 if (!atomic_add_unless(&ghes_in_nmi, 1, 1)) 1462 return ret; 1463 1464 raw_spin_lock(&ghes_notify_lock_nmi); 1465 if (!ghes_in_nmi_spool_from_list(&ghes_nmi, FIX_APEI_GHES_NMI)) 1466 ret = NMI_HANDLED; 1467 raw_spin_unlock(&ghes_notify_lock_nmi); 1468 1469 atomic_dec(&ghes_in_nmi); 1470 return ret; 1471 } 1472 1473 static void ghes_nmi_add(struct ghes *ghes) 1474 { 1475 mutex_lock(&ghes_list_mutex); 1476 if (list_empty(&ghes_nmi)) 1477 register_nmi_handler(NMI_LOCAL, ghes_notify_nmi, 0, "ghes"); 1478 list_add_rcu(&ghes->list, &ghes_nmi); 1479 mutex_unlock(&ghes_list_mutex); 1480 } 1481 1482 static void ghes_nmi_remove(struct ghes *ghes) 1483 { 1484 mutex_lock(&ghes_list_mutex); 1485 list_del_rcu(&ghes->list); 1486 if (list_empty(&ghes_nmi)) 1487 unregister_nmi_handler(NMI_LOCAL, "ghes"); 1488 mutex_unlock(&ghes_list_mutex); 1489 /* 1490 * To synchronize with NMI handler, ghes can only be 1491 * freed after NMI handler finishes. 1492 */ 1493 synchronize_rcu(); 1494 } 1495 #else /* CONFIG_HAVE_ACPI_APEI_NMI */ 1496 static inline void ghes_nmi_add(struct ghes *ghes) { } 1497 static inline void ghes_nmi_remove(struct ghes *ghes) { } 1498 #endif /* CONFIG_HAVE_ACPI_APEI_NMI */ 1499 1500 static void ghes_nmi_init_cxt(void) 1501 { 1502 init_irq_work(&ghes_proc_irq_work, ghes_proc_in_irq); 1503 } 1504 1505 static int __ghes_sdei_callback(struct ghes *ghes, 1506 enum fixed_addresses fixmap_idx) 1507 { 1508 if (!ghes_in_nmi_queue_one_entry(ghes, fixmap_idx)) { 1509 irq_work_queue(&ghes_proc_irq_work); 1510 1511 return 0; 1512 } 1513 1514 return -ENOENT; 1515 } 1516 1517 static int ghes_sdei_normal_callback(u32 event_num, struct pt_regs *regs, 1518 void *arg) 1519 { 1520 static DEFINE_RAW_SPINLOCK(ghes_notify_lock_sdei_normal); 1521 struct ghes *ghes = arg; 1522 int err; 1523 1524 raw_spin_lock(&ghes_notify_lock_sdei_normal); 1525 err = __ghes_sdei_callback(ghes, FIX_APEI_GHES_SDEI_NORMAL); 1526 raw_spin_unlock(&ghes_notify_lock_sdei_normal); 1527 1528 return err; 1529 } 1530 1531 static int ghes_sdei_critical_callback(u32 event_num, struct pt_regs *regs, 1532 void *arg) 1533 { 1534 static DEFINE_RAW_SPINLOCK(ghes_notify_lock_sdei_critical); 1535 struct ghes *ghes = arg; 1536 int err; 1537 1538 raw_spin_lock(&ghes_notify_lock_sdei_critical); 1539 err = __ghes_sdei_callback(ghes, FIX_APEI_GHES_SDEI_CRITICAL); 1540 raw_spin_unlock(&ghes_notify_lock_sdei_critical); 1541 1542 return err; 1543 } 1544 1545 static int apei_sdei_register_ghes(struct ghes *ghes) 1546 { 1547 if (!IS_ENABLED(CONFIG_ARM_SDE_INTERFACE)) 1548 return -EOPNOTSUPP; 1549 1550 return sdei_register_ghes(ghes, ghes_sdei_normal_callback, 1551 ghes_sdei_critical_callback); 1552 } 1553 1554 static int apei_sdei_unregister_ghes(struct ghes *ghes) 1555 { 1556 if (!IS_ENABLED(CONFIG_ARM_SDE_INTERFACE)) 1557 return -EOPNOTSUPP; 1558 1559 return sdei_unregister_ghes(ghes); 1560 } 1561 1562 static int ghes_probe(struct platform_device *ghes_dev) 1563 { 1564 struct acpi_hest_generic *generic; 1565 struct ghes *ghes = NULL; 1566 unsigned long flags; 1567 1568 int rc = -EINVAL; 1569 1570 generic = *(struct acpi_hest_generic **)ghes_dev->dev.platform_data; 1571 if (!generic->enabled) 1572 return -ENODEV; 1573 1574 switch (generic->notify.type) { 1575 case ACPI_HEST_NOTIFY_POLLED: 1576 case ACPI_HEST_NOTIFY_EXTERNAL: 1577 case ACPI_HEST_NOTIFY_SCI: 1578 case ACPI_HEST_NOTIFY_GSIV: 1579 case ACPI_HEST_NOTIFY_GPIO: 1580 break; 1581 1582 case ACPI_HEST_NOTIFY_SEA: 1583 if (!IS_ENABLED(CONFIG_ACPI_APEI_SEA)) { 1584 pr_warn(GHES_PFX "Generic hardware error source: %d notified via SEA is not supported\n", 1585 generic->header.source_id); 1586 rc = -ENOTSUPP; 1587 goto err; 1588 } 1589 break; 1590 case ACPI_HEST_NOTIFY_NMI: 1591 if (!IS_ENABLED(CONFIG_HAVE_ACPI_APEI_NMI)) { 1592 pr_warn(GHES_PFX "Generic hardware error source: %d notified via NMI interrupt is not supported!\n", 1593 generic->header.source_id); 1594 goto err; 1595 } 1596 break; 1597 case ACPI_HEST_NOTIFY_SOFTWARE_DELEGATED: 1598 if (!IS_ENABLED(CONFIG_ARM_SDE_INTERFACE)) { 1599 pr_warn(GHES_PFX "Generic hardware error source: %d notified via SDE Interface is not supported!\n", 1600 generic->header.source_id); 1601 goto err; 1602 } 1603 break; 1604 case ACPI_HEST_NOTIFY_LOCAL: 1605 pr_warn(GHES_PFX "Generic hardware error source: %d notified via local interrupt is not supported!\n", 1606 generic->header.source_id); 1607 goto err; 1608 default: 1609 pr_warn(FW_WARN GHES_PFX "Unknown notification type: %u for generic hardware error source: %d\n", 1610 generic->notify.type, generic->header.source_id); 1611 goto err; 1612 } 1613 1614 rc = -EIO; 1615 if (generic->error_block_length < 1616 sizeof(struct acpi_hest_generic_status)) { 1617 pr_warn(FW_BUG GHES_PFX "Invalid error block length: %u for generic hardware error source: %d\n", 1618 generic->error_block_length, generic->header.source_id); 1619 goto err; 1620 } 1621 ghes = ghes_new(generic); 1622 if (IS_ERR(ghes)) { 1623 rc = PTR_ERR(ghes); 1624 ghes = NULL; 1625 goto err; 1626 } 1627 1628 switch (generic->notify.type) { 1629 case ACPI_HEST_NOTIFY_POLLED: 1630 timer_setup(&ghes->timer, ghes_poll_func, 0); 1631 ghes_add_timer(ghes); 1632 break; 1633 case ACPI_HEST_NOTIFY_EXTERNAL: 1634 /* External interrupt vector is GSI */ 1635 rc = acpi_gsi_to_irq(generic->notify.vector, &ghes->irq); 1636 if (rc) { 1637 pr_err(GHES_PFX "Failed to map GSI to IRQ for generic hardware error source: %d\n", 1638 generic->header.source_id); 1639 goto err; 1640 } 1641 rc = request_irq(ghes->irq, ghes_irq_func, IRQF_SHARED, 1642 "GHES IRQ", ghes); 1643 if (rc) { 1644 pr_err(GHES_PFX "Failed to register IRQ for generic hardware error source: %d\n", 1645 generic->header.source_id); 1646 goto err; 1647 } 1648 break; 1649 1650 case ACPI_HEST_NOTIFY_SCI: 1651 case ACPI_HEST_NOTIFY_GSIV: 1652 case ACPI_HEST_NOTIFY_GPIO: 1653 mutex_lock(&ghes_list_mutex); 1654 if (list_empty(&ghes_hed)) 1655 register_acpi_hed_notifier(&ghes_notifier_hed); 1656 list_add_rcu(&ghes->list, &ghes_hed); 1657 mutex_unlock(&ghes_list_mutex); 1658 break; 1659 1660 case ACPI_HEST_NOTIFY_SEA: 1661 ghes_sea_add(ghes); 1662 break; 1663 case ACPI_HEST_NOTIFY_NMI: 1664 ghes_nmi_add(ghes); 1665 break; 1666 case ACPI_HEST_NOTIFY_SOFTWARE_DELEGATED: 1667 rc = apei_sdei_register_ghes(ghes); 1668 if (rc) 1669 goto err; 1670 break; 1671 default: 1672 BUG(); 1673 } 1674 1675 platform_set_drvdata(ghes_dev, ghes); 1676 1677 ghes->dev = &ghes_dev->dev; 1678 1679 mutex_lock(&ghes_devs_mutex); 1680 list_add_tail(&ghes->elist, &ghes_devs); 1681 mutex_unlock(&ghes_devs_mutex); 1682 1683 /* Handle any pending errors right away */ 1684 spin_lock_irqsave(&ghes_notify_lock_irq, flags); 1685 ghes_proc(ghes); 1686 spin_unlock_irqrestore(&ghes_notify_lock_irq, flags); 1687 1688 return 0; 1689 1690 err: 1691 if (ghes) { 1692 ghes_fini(ghes); 1693 kfree(ghes); 1694 } 1695 return rc; 1696 } 1697 1698 static void ghes_remove(struct platform_device *ghes_dev) 1699 { 1700 int rc; 1701 struct ghes *ghes; 1702 struct acpi_hest_generic *generic; 1703 1704 ghes = platform_get_drvdata(ghes_dev); 1705 generic = ghes->generic; 1706 1707 ghes->flags |= GHES_EXITING; 1708 switch (generic->notify.type) { 1709 case ACPI_HEST_NOTIFY_POLLED: 1710 timer_shutdown_sync(&ghes->timer); 1711 break; 1712 case ACPI_HEST_NOTIFY_EXTERNAL: 1713 free_irq(ghes->irq, ghes); 1714 break; 1715 1716 case ACPI_HEST_NOTIFY_SCI: 1717 case ACPI_HEST_NOTIFY_GSIV: 1718 case ACPI_HEST_NOTIFY_GPIO: 1719 mutex_lock(&ghes_list_mutex); 1720 list_del_rcu(&ghes->list); 1721 if (list_empty(&ghes_hed)) 1722 unregister_acpi_hed_notifier(&ghes_notifier_hed); 1723 mutex_unlock(&ghes_list_mutex); 1724 synchronize_rcu(); 1725 break; 1726 1727 case ACPI_HEST_NOTIFY_SEA: 1728 ghes_sea_remove(ghes); 1729 break; 1730 case ACPI_HEST_NOTIFY_NMI: 1731 ghes_nmi_remove(ghes); 1732 break; 1733 case ACPI_HEST_NOTIFY_SOFTWARE_DELEGATED: 1734 rc = apei_sdei_unregister_ghes(ghes); 1735 if (rc) { 1736 /* 1737 * Returning early results in a resource leak, but we're 1738 * only here if stopping the hardware failed. 1739 */ 1740 dev_err(&ghes_dev->dev, "Failed to unregister ghes (%pe)\n", 1741 ERR_PTR(rc)); 1742 return; 1743 } 1744 break; 1745 default: 1746 BUG(); 1747 break; 1748 } 1749 1750 ghes_fini(ghes); 1751 1752 mutex_lock(&ghes_devs_mutex); 1753 list_del(&ghes->elist); 1754 mutex_unlock(&ghes_devs_mutex); 1755 1756 kfree(ghes); 1757 } 1758 1759 static struct platform_driver ghes_platform_driver = { 1760 .driver = { 1761 .name = "GHES", 1762 }, 1763 .probe = ghes_probe, 1764 .remove = ghes_remove, 1765 }; 1766 1767 void __init acpi_ghes_init(void) 1768 { 1769 int rc; 1770 1771 acpi_sdei_init(); 1772 1773 if (acpi_disabled) 1774 return; 1775 1776 switch (hest_disable) { 1777 case HEST_NOT_FOUND: 1778 return; 1779 case HEST_DISABLED: 1780 pr_info(GHES_PFX "HEST is not enabled!\n"); 1781 return; 1782 default: 1783 break; 1784 } 1785 1786 if (ghes_disable) { 1787 pr_info(GHES_PFX "GHES is not enabled!\n"); 1788 return; 1789 } 1790 1791 ghes_nmi_init_cxt(); 1792 1793 rc = platform_driver_register(&ghes_platform_driver); 1794 if (rc) 1795 return; 1796 1797 rc = apei_osc_setup(); 1798 if (rc == 0 && osc_sb_apei_support_acked) 1799 pr_info(GHES_PFX "APEI firmware first mode is enabled by APEI bit and WHEA _OSC.\n"); 1800 else if (rc == 0 && !osc_sb_apei_support_acked) 1801 pr_info(GHES_PFX "APEI firmware first mode is enabled by WHEA _OSC.\n"); 1802 else if (rc && osc_sb_apei_support_acked) 1803 pr_info(GHES_PFX "APEI firmware first mode is enabled by APEI bit.\n"); 1804 else 1805 pr_info(GHES_PFX "Failed to enable APEI firmware first mode.\n"); 1806 } 1807 1808 /* 1809 * Known x86 systems that prefer GHES error reporting: 1810 */ 1811 static struct acpi_platform_list plat_list[] = { 1812 {"HPE ", "Server ", 0, ACPI_SIG_FADT, all_versions}, 1813 { } /* End */ 1814 }; 1815 1816 struct list_head *ghes_get_devices(void) 1817 { 1818 int idx = -1; 1819 1820 if (IS_ENABLED(CONFIG_X86)) { 1821 idx = acpi_match_platform_list(plat_list); 1822 if (idx < 0) { 1823 if (!ghes_edac_force_enable) 1824 return NULL; 1825 1826 pr_warn_once("Force-loading ghes_edac on an unsupported platform. You're on your own!\n"); 1827 } 1828 } else if (list_empty(&ghes_devs)) { 1829 return NULL; 1830 } 1831 1832 return &ghes_devs; 1833 } 1834 EXPORT_SYMBOL_GPL(ghes_get_devices); 1835 1836 void ghes_register_report_chain(struct notifier_block *nb) 1837 { 1838 atomic_notifier_chain_register(&ghes_report_chain, nb); 1839 } 1840 EXPORT_SYMBOL_GPL(ghes_register_report_chain); 1841 1842 void ghes_unregister_report_chain(struct notifier_block *nb) 1843 { 1844 atomic_notifier_chain_unregister(&ghes_report_chain, nb); 1845 } 1846 EXPORT_SYMBOL_GPL(ghes_unregister_report_chain); 1847