1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * FRU (Field-Replaceable Unit) Memory Poison Manager 4 * 5 * Copyright (c) 2024, Advanced Micro Devices, Inc. 6 * All Rights Reserved. 7 * 8 * Authors: 9 * Naveen Krishna Chatradhi <naveenkrishna.chatradhi@amd.com> 10 * Muralidhara M K <muralidhara.mk@amd.com> 11 * Yazen Ghannam <Yazen.Ghannam@amd.com> 12 * 13 * Implementation notes, assumptions, and limitations: 14 * 15 * - FRU memory poison section and memory poison descriptor definitions are not yet 16 * included in the UEFI specification. So they are defined here. Afterwards, they 17 * may be moved to linux/cper.h, if appropriate. 18 * 19 * - Platforms based on AMD MI300 systems will be the first to use these structures. 20 * There are a number of assumptions made here that will need to be generalized 21 * to support other platforms. 22 * 23 * AMD MI300-based platform(s) assumptions: 24 * - Memory errors are reported through x86 MCA. 25 * - The entire DRAM row containing a memory error should be retired. 26 * - There will be (1) FRU memory poison section per CPER. 27 * - The FRU will be the CPU package (processor socket). 28 * - The default number of memory poison descriptor entries should be (8). 29 * - The platform will use ACPI ERST for persistent storage. 30 * - All FRU records should be saved to persistent storage. Module init will 31 * fail if any FRU record is not successfully written. 32 * 33 * - Boot time memory retirement may occur later than ideal due to dependencies 34 * on other libraries and drivers. This leaves a gap where bad memory may be 35 * accessed during early boot stages. 36 * 37 * - Enough memory should be pre-allocated for each FRU record to be able to hold 38 * the expected number of descriptor entries. This, mostly empty, record is 39 * written to storage during init time. Subsequent writes to the same record 40 * should allow the Platform to update the stored record in-place. Otherwise, 41 * if the record is extended, then the Platform may need to perform costly memory 42 * management operations on the storage. For example, the Platform may spend time 43 * in Firmware copying and invalidating memory on a relatively slow SPI ROM. 44 */ 45 46 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 47 48 #include <linux/cper.h> 49 #include <linux/ras.h> 50 #include <linux/cpu.h> 51 52 #include <acpi/apei.h> 53 54 #include <asm/cpu_device_id.h> 55 #include <asm/mce.h> 56 57 #define INVALID_CPU UINT_MAX 58 59 /* Validation Bits */ 60 #define FMP_VALID_ARCH_TYPE BIT_ULL(0) 61 #define FMP_VALID_ARCH BIT_ULL(1) 62 #define FMP_VALID_ID_TYPE BIT_ULL(2) 63 #define FMP_VALID_ID BIT_ULL(3) 64 #define FMP_VALID_LIST_ENTRIES BIT_ULL(4) 65 #define FMP_VALID_LIST BIT_ULL(5) 66 67 /* FRU Architecture Types */ 68 #define FMP_ARCH_TYPE_X86_CPUID_1_EAX 0 69 70 /* FRU ID Types */ 71 #define FMP_ID_TYPE_X86_PPIN 0 72 73 /* FRU Memory Poison Section */ 74 struct cper_sec_fru_mem_poison { 75 u32 checksum; 76 u64 validation_bits; 77 u32 fru_arch_type; 78 u64 fru_arch; 79 u32 fru_id_type; 80 u64 fru_id; 81 u32 nr_entries; 82 } __packed; 83 84 /* FRU Descriptor ID Types */ 85 #define FPD_HW_ID_TYPE_MCA_IPID 0 86 87 /* FRU Descriptor Address Types */ 88 #define FPD_ADDR_TYPE_MCA_ADDR 0 89 90 /* Memory Poison Descriptor */ 91 struct cper_fru_poison_desc { 92 u64 timestamp; 93 u32 hw_id_type; 94 u64 hw_id; 95 u32 addr_type; 96 u64 addr; 97 } __packed; 98 99 /* Collection of headers and sections for easy pointer use. */ 100 struct fru_rec { 101 struct cper_record_header hdr; 102 struct cper_section_descriptor sec_desc; 103 struct cper_sec_fru_mem_poison fmp; 104 struct cper_fru_poison_desc entries[]; 105 } __packed; 106 107 /* 108 * Pointers to the complete CPER record of each FRU. 109 * 110 * Memory allocation will include padded space for descriptor entries. 111 */ 112 static struct fru_rec **fru_records; 113 114 #define CPER_CREATOR_FMP \ 115 GUID_INIT(0xcd5c2993, 0xf4b2, 0x41b2, 0xb5, 0xd4, 0xf9, 0xc3, \ 116 0xa0, 0x33, 0x08, 0x75) 117 118 #define CPER_SECTION_TYPE_FMP \ 119 GUID_INIT(0x5e4706c1, 0x5356, 0x48c6, 0x93, 0x0b, 0x52, 0xf2, \ 120 0x12, 0x0a, 0x44, 0x58) 121 122 /** 123 * DOC: fru_poison_entries (byte) 124 * Maximum number of descriptor entries possible for each FRU. 125 * 126 * Values between '1' and '255' are valid. 127 * No input or '0' will default to FMPM_DEFAULT_MAX_NR_ENTRIES. 128 */ 129 static u8 max_nr_entries; 130 module_param(max_nr_entries, byte, 0644); 131 MODULE_PARM_DESC(max_nr_entries, 132 "Maximum number of memory poison descriptor entries per FRU"); 133 134 #define FMPM_DEFAULT_MAX_NR_ENTRIES 8 135 136 /* Maximum number of FRUs in the system. */ 137 #define FMPM_MAX_NR_FRU 256 138 static unsigned int max_nr_fru; 139 140 /* Total length of record including headers and list of descriptor entries. */ 141 static size_t max_rec_len; 142 143 /* 144 * Protect the local records cache in fru_records and prevent concurrent 145 * writes to storage. This is only needed after init once notifier block 146 * registration is done. 147 */ 148 static DEFINE_MUTEX(fmpm_update_mutex); 149 150 #define for_each_fru(i, rec) \ 151 for (i = 0; rec = fru_records[i], i < max_nr_fru; i++) 152 153 static inline u32 get_fmp_len(struct fru_rec *rec) 154 { 155 return rec->sec_desc.section_length - sizeof(struct cper_section_descriptor); 156 } 157 158 static struct fru_rec *get_fru_record(u64 fru_id) 159 { 160 struct fru_rec *rec; 161 unsigned int i; 162 163 for_each_fru(i, rec) { 164 if (rec->fmp.fru_id == fru_id) 165 return rec; 166 } 167 168 pr_debug("Record not found for FRU 0x%016llx\n", fru_id); 169 170 return NULL; 171 } 172 173 /* 174 * Sum up all bytes within the FRU Memory Poison Section including the Memory 175 * Poison Descriptor entries. 176 * 177 * Don't include the old checksum here. It's a u32 value, so summing each of its 178 * bytes will give the wrong total. 179 */ 180 static u32 do_fmp_checksum(struct cper_sec_fru_mem_poison *fmp, u32 len) 181 { 182 u32 checksum = 0; 183 u8 *buf, *end; 184 185 /* Skip old checksum. */ 186 buf = (u8 *)fmp + sizeof(u32); 187 end = buf + len; 188 189 while (buf < end) 190 checksum += (u8)(*(buf++)); 191 192 return checksum; 193 } 194 195 static int update_record_on_storage(struct fru_rec *rec) 196 { 197 u32 len, checksum; 198 int ret; 199 200 /* Calculate a new checksum. */ 201 len = get_fmp_len(rec); 202 203 /* Get the current total. */ 204 checksum = do_fmp_checksum(&rec->fmp, len); 205 206 /* Use the complement value. */ 207 rec->fmp.checksum = -checksum; 208 209 pr_debug("Writing to storage\n"); 210 211 ret = erst_write(&rec->hdr); 212 if (ret) { 213 pr_warn("Storage update failed for FRU 0x%016llx\n", rec->fmp.fru_id); 214 215 if (ret == -ENOSPC) 216 pr_warn("Not enough space on storage\n"); 217 } 218 219 return ret; 220 } 221 222 static bool rec_has_valid_entries(struct fru_rec *rec) 223 { 224 if (!(rec->fmp.validation_bits & FMP_VALID_LIST_ENTRIES)) 225 return false; 226 227 if (!(rec->fmp.validation_bits & FMP_VALID_LIST)) 228 return false; 229 230 return true; 231 } 232 233 static bool fpds_equal(struct cper_fru_poison_desc *old, struct cper_fru_poison_desc *new) 234 { 235 /* 236 * Ignore timestamp field. 237 * The same physical error may be reported multiple times due to stuck bits, etc. 238 * 239 * Also, order the checks from most->least likely to fail to shortcut the code. 240 */ 241 if (old->addr != new->addr) 242 return false; 243 244 if (old->hw_id != new->hw_id) 245 return false; 246 247 if (old->addr_type != new->addr_type) 248 return false; 249 250 if (old->hw_id_type != new->hw_id_type) 251 return false; 252 253 return true; 254 } 255 256 static bool rec_has_fpd(struct fru_rec *rec, struct cper_fru_poison_desc *fpd) 257 { 258 unsigned int i; 259 260 for (i = 0; i < rec->fmp.nr_entries; i++) { 261 struct cper_fru_poison_desc *fpd_i = &rec->entries[i]; 262 263 if (fpds_equal(fpd_i, fpd)) { 264 pr_debug("Found duplicate record\n"); 265 return true; 266 } 267 } 268 269 return false; 270 } 271 272 static void update_fru_record(struct fru_rec *rec, struct mce *m) 273 { 274 struct cper_sec_fru_mem_poison *fmp = &rec->fmp; 275 struct cper_fru_poison_desc fpd, *fpd_dest; 276 u32 entry = 0; 277 278 mutex_lock(&fmpm_update_mutex); 279 280 memset(&fpd, 0, sizeof(struct cper_fru_poison_desc)); 281 282 fpd.timestamp = m->time; 283 fpd.hw_id_type = FPD_HW_ID_TYPE_MCA_IPID; 284 fpd.hw_id = m->ipid; 285 fpd.addr_type = FPD_ADDR_TYPE_MCA_ADDR; 286 fpd.addr = m->addr; 287 288 /* This is the first entry, so just save it. */ 289 if (!rec_has_valid_entries(rec)) 290 goto save_fpd; 291 292 /* Ignore already recorded errors. */ 293 if (rec_has_fpd(rec, &fpd)) 294 goto out_unlock; 295 296 if (rec->fmp.nr_entries >= max_nr_entries) { 297 pr_warn("Exceeded number of entries for FRU 0x%016llx\n", rec->fmp.fru_id); 298 goto out_unlock; 299 } 300 301 entry = fmp->nr_entries; 302 303 save_fpd: 304 fpd_dest = &rec->entries[entry]; 305 memcpy(fpd_dest, &fpd, sizeof(struct cper_fru_poison_desc)); 306 307 fmp->nr_entries = entry + 1; 308 fmp->validation_bits |= FMP_VALID_LIST_ENTRIES; 309 fmp->validation_bits |= FMP_VALID_LIST; 310 311 pr_debug("Updated FRU 0x%016llx entry #%u\n", fmp->fru_id, entry); 312 313 update_record_on_storage(rec); 314 315 out_unlock: 316 mutex_unlock(&fmpm_update_mutex); 317 } 318 319 static void retire_dram_row(u64 addr, u64 id, u32 cpu) 320 { 321 struct atl_err a_err; 322 323 memset(&a_err, 0, sizeof(struct atl_err)); 324 325 a_err.addr = addr; 326 a_err.ipid = id; 327 a_err.cpu = cpu; 328 329 amd_retire_dram_row(&a_err); 330 } 331 332 static int fru_handle_mem_poison(struct notifier_block *nb, unsigned long val, void *data) 333 { 334 struct mce *m = (struct mce *)data; 335 struct fru_rec *rec; 336 337 if (!mce_is_memory_error(m)) 338 return NOTIFY_DONE; 339 340 retire_dram_row(m->addr, m->ipid, m->extcpu); 341 342 /* 343 * An invalid FRU ID should not happen on real errors. But it 344 * could happen from software error injection, etc. 345 */ 346 rec = get_fru_record(m->ppin); 347 if (!rec) 348 return NOTIFY_DONE; 349 350 update_fru_record(rec, m); 351 352 return NOTIFY_OK; 353 } 354 355 static struct notifier_block fru_mem_poison_nb = { 356 .notifier_call = fru_handle_mem_poison, 357 .priority = MCE_PRIO_LOWEST, 358 }; 359 360 static void retire_mem_fmp(struct fru_rec *rec) 361 { 362 struct cper_sec_fru_mem_poison *fmp = &rec->fmp; 363 unsigned int i, cpu; 364 365 for (i = 0; i < fmp->nr_entries; i++) { 366 struct cper_fru_poison_desc *fpd = &rec->entries[i]; 367 unsigned int err_cpu = INVALID_CPU; 368 369 if (fpd->hw_id_type != FPD_HW_ID_TYPE_MCA_IPID) 370 continue; 371 372 if (fpd->addr_type != FPD_ADDR_TYPE_MCA_ADDR) 373 continue; 374 375 cpus_read_lock(); 376 for_each_online_cpu(cpu) { 377 if (topology_ppin(cpu) == fmp->fru_id) { 378 err_cpu = cpu; 379 break; 380 } 381 } 382 cpus_read_unlock(); 383 384 if (err_cpu == INVALID_CPU) 385 continue; 386 387 retire_dram_row(fpd->addr, fpd->hw_id, err_cpu); 388 } 389 } 390 391 static void retire_mem_records(void) 392 { 393 struct fru_rec *rec; 394 unsigned int i; 395 396 for_each_fru(i, rec) { 397 if (!rec_has_valid_entries(rec)) 398 continue; 399 400 retire_mem_fmp(rec); 401 } 402 } 403 404 /* Set the CPER Record Header and CPER Section Descriptor fields. */ 405 static void set_rec_fields(struct fru_rec *rec) 406 { 407 struct cper_section_descriptor *sec_desc = &rec->sec_desc; 408 struct cper_record_header *hdr = &rec->hdr; 409 410 memcpy(hdr->signature, CPER_SIG_RECORD, CPER_SIG_SIZE); 411 hdr->revision = CPER_RECORD_REV; 412 hdr->signature_end = CPER_SIG_END; 413 414 /* 415 * Currently, it is assumed that there is one FRU Memory Poison 416 * section per CPER. But this may change for other implementations. 417 */ 418 hdr->section_count = 1; 419 420 /* The logged errors are recoverable. Otherwise, they'd never make it here. */ 421 hdr->error_severity = CPER_SEV_RECOVERABLE; 422 423 hdr->validation_bits = 0; 424 hdr->record_length = max_rec_len; 425 hdr->creator_id = CPER_CREATOR_FMP; 426 hdr->notification_type = CPER_NOTIFY_MCE; 427 hdr->record_id = cper_next_record_id(); 428 hdr->flags = CPER_HW_ERROR_FLAGS_PREVERR; 429 430 sec_desc->section_offset = sizeof(struct cper_record_header); 431 sec_desc->section_length = max_rec_len - sizeof(struct cper_record_header); 432 sec_desc->revision = CPER_SEC_REV; 433 sec_desc->validation_bits = 0; 434 sec_desc->flags = CPER_SEC_PRIMARY; 435 sec_desc->section_type = CPER_SECTION_TYPE_FMP; 436 sec_desc->section_severity = CPER_SEV_RECOVERABLE; 437 } 438 439 static int save_new_records(void) 440 { 441 DECLARE_BITMAP(new_records, FMPM_MAX_NR_FRU); 442 struct fru_rec *rec; 443 unsigned int i; 444 int ret = 0; 445 446 for_each_fru(i, rec) { 447 if (rec->hdr.record_length) 448 continue; 449 450 set_rec_fields(rec); 451 452 ret = update_record_on_storage(rec); 453 if (ret) 454 goto out_clear; 455 456 set_bit(i, new_records); 457 } 458 459 return ret; 460 461 out_clear: 462 for_each_fru(i, rec) { 463 if (!test_bit(i, new_records)) 464 continue; 465 466 erst_clear(rec->hdr.record_id); 467 } 468 469 return ret; 470 } 471 472 /* Check that the record matches expected types for the current system.*/ 473 static bool fmp_is_usable(struct fru_rec *rec) 474 { 475 struct cper_sec_fru_mem_poison *fmp = &rec->fmp; 476 u64 cpuid; 477 478 pr_debug("Validation bits: 0x%016llx\n", fmp->validation_bits); 479 480 if (!(fmp->validation_bits & FMP_VALID_ARCH_TYPE)) { 481 pr_debug("Arch type unknown\n"); 482 return false; 483 } 484 485 if (fmp->fru_arch_type != FMP_ARCH_TYPE_X86_CPUID_1_EAX) { 486 pr_debug("Arch type not 'x86 Family/Model/Stepping'\n"); 487 return false; 488 } 489 490 if (!(fmp->validation_bits & FMP_VALID_ARCH)) { 491 pr_debug("Arch value unknown\n"); 492 return false; 493 } 494 495 cpuid = cpuid_eax(1); 496 if (fmp->fru_arch != cpuid) { 497 pr_debug("Arch value mismatch: record = 0x%016llx, system = 0x%016llx\n", 498 fmp->fru_arch, cpuid); 499 return false; 500 } 501 502 if (!(fmp->validation_bits & FMP_VALID_ID_TYPE)) { 503 pr_debug("FRU ID type unknown\n"); 504 return false; 505 } 506 507 if (fmp->fru_id_type != FMP_ID_TYPE_X86_PPIN) { 508 pr_debug("FRU ID type is not 'x86 PPIN'\n"); 509 return false; 510 } 511 512 if (!(fmp->validation_bits & FMP_VALID_ID)) { 513 pr_debug("FRU ID value unknown\n"); 514 return false; 515 } 516 517 return true; 518 } 519 520 static bool fmp_is_valid(struct fru_rec *rec) 521 { 522 struct cper_sec_fru_mem_poison *fmp = &rec->fmp; 523 u32 checksum, len; 524 525 len = get_fmp_len(rec); 526 if (len < sizeof(struct cper_sec_fru_mem_poison)) { 527 pr_debug("fmp length is too small\n"); 528 return false; 529 } 530 531 /* Checksum must sum to zero for the entire section. */ 532 checksum = do_fmp_checksum(fmp, len) + fmp->checksum; 533 if (checksum) { 534 pr_debug("fmp checksum failed: sum = 0x%x\n", checksum); 535 print_hex_dump_debug("fmp record: ", DUMP_PREFIX_NONE, 16, 1, fmp, len, false); 536 return false; 537 } 538 539 if (!fmp_is_usable(rec)) 540 return false; 541 542 return true; 543 } 544 545 static struct fru_rec *get_valid_record(struct fru_rec *old) 546 { 547 struct fru_rec *new; 548 549 if (!fmp_is_valid(old)) { 550 pr_debug("Ignoring invalid record\n"); 551 return NULL; 552 } 553 554 new = get_fru_record(old->fmp.fru_id); 555 if (!new) 556 pr_debug("Ignoring record for absent FRU\n"); 557 558 return new; 559 } 560 561 /* 562 * Fetch saved records from persistent storage. 563 * 564 * For each found record: 565 * - If it was not created by this module, then ignore it. 566 * - If it is valid, then copy its data to the local cache. 567 * - If it is not valid, then erase it. 568 */ 569 static int get_saved_records(void) 570 { 571 struct fru_rec *old, *new; 572 u64 record_id; 573 int ret, pos; 574 ssize_t len; 575 576 /* 577 * Assume saved records match current max size. 578 * 579 * However, this may not be true depending on module parameters. 580 */ 581 old = kmalloc(max_rec_len, GFP_KERNEL); 582 if (!old) { 583 ret = -ENOMEM; 584 goto out; 585 } 586 587 ret = erst_get_record_id_begin(&pos); 588 if (ret < 0) 589 goto out_end; 590 591 while (!erst_get_record_id_next(&pos, &record_id)) { 592 if (record_id == APEI_ERST_INVALID_RECORD_ID) 593 goto out_end; 594 /* 595 * Make sure to clear temporary buffer between reads to avoid 596 * leftover data from records of various sizes. 597 */ 598 memset(old, 0, max_rec_len); 599 600 len = erst_read_record(record_id, &old->hdr, max_rec_len, 601 sizeof(struct fru_rec), &CPER_CREATOR_FMP); 602 if (len < 0) 603 continue; 604 605 if (len > max_rec_len) { 606 pr_debug("Found record larger than max_rec_len\n"); 607 continue; 608 } 609 610 new = get_valid_record(old); 611 if (!new) 612 erst_clear(record_id); 613 614 /* Restore the record */ 615 memcpy(new, old, len); 616 } 617 618 out_end: 619 erst_get_record_id_end(); 620 kfree(old); 621 out: 622 return ret; 623 } 624 625 static void set_fmp_fields(struct fru_rec *rec, unsigned int cpu) 626 { 627 struct cper_sec_fru_mem_poison *fmp = &rec->fmp; 628 629 fmp->fru_arch_type = FMP_ARCH_TYPE_X86_CPUID_1_EAX; 630 fmp->validation_bits |= FMP_VALID_ARCH_TYPE; 631 632 /* Assume all CPUs in the system have the same value for now. */ 633 fmp->fru_arch = cpuid_eax(1); 634 fmp->validation_bits |= FMP_VALID_ARCH; 635 636 fmp->fru_id_type = FMP_ID_TYPE_X86_PPIN; 637 fmp->validation_bits |= FMP_VALID_ID_TYPE; 638 639 fmp->fru_id = topology_ppin(cpu); 640 fmp->validation_bits |= FMP_VALID_ID; 641 } 642 643 static int init_fmps(void) 644 { 645 struct fru_rec *rec; 646 unsigned int i, cpu; 647 int ret = 0; 648 649 for_each_fru(i, rec) { 650 unsigned int fru_cpu = INVALID_CPU; 651 652 cpus_read_lock(); 653 for_each_online_cpu(cpu) { 654 if (topology_physical_package_id(cpu) == i) { 655 fru_cpu = cpu; 656 break; 657 } 658 } 659 cpus_read_unlock(); 660 661 if (fru_cpu == INVALID_CPU) { 662 pr_debug("Failed to find matching CPU for FRU #%u\n", i); 663 ret = -ENODEV; 664 break; 665 } 666 667 set_fmp_fields(rec, fru_cpu); 668 } 669 670 return ret; 671 } 672 673 static int get_system_info(void) 674 { 675 /* Only load on MI300A systems for now. */ 676 if (!(boot_cpu_data.x86_model >= 0x90 && 677 boot_cpu_data.x86_model <= 0x9f)) 678 return -ENODEV; 679 680 if (!cpu_feature_enabled(X86_FEATURE_AMD_PPIN)) { 681 pr_debug("PPIN feature not available\n"); 682 return -ENODEV; 683 } 684 685 /* Use CPU socket as FRU for MI300 systems. */ 686 max_nr_fru = topology_max_packages(); 687 if (!max_nr_fru) 688 return -ENODEV; 689 690 if (max_nr_fru > FMPM_MAX_NR_FRU) { 691 pr_warn("Too many FRUs to manage: found: %u, max: %u\n", 692 max_nr_fru, FMPM_MAX_NR_FRU); 693 return -ENODEV; 694 } 695 696 if (!max_nr_entries) 697 max_nr_entries = FMPM_DEFAULT_MAX_NR_ENTRIES; 698 699 max_rec_len = sizeof(struct fru_rec); 700 max_rec_len += sizeof(struct cper_fru_poison_desc) * max_nr_entries; 701 702 pr_info("max FRUs: %u, max entries: %u, max record length: %lu\n", 703 max_nr_fru, max_nr_entries, max_rec_len); 704 705 return 0; 706 } 707 708 static void free_records(void) 709 { 710 struct fru_rec *rec; 711 int i; 712 713 for_each_fru(i, rec) 714 kfree(rec); 715 716 kfree(fru_records); 717 } 718 719 static int allocate_records(void) 720 { 721 int i, ret = 0; 722 723 fru_records = kcalloc(max_nr_fru, sizeof(struct fru_rec *), GFP_KERNEL); 724 if (!fru_records) { 725 ret = -ENOMEM; 726 goto out; 727 } 728 729 for (i = 0; i < max_nr_fru; i++) { 730 fru_records[i] = kzalloc(max_rec_len, GFP_KERNEL); 731 if (!fru_records[i]) { 732 ret = -ENOMEM; 733 goto out_free; 734 } 735 } 736 737 return ret; 738 739 out_free: 740 for (; i >= 0; i--) 741 kfree(fru_records[i]); 742 743 kfree(fru_records); 744 out: 745 return ret; 746 } 747 748 static const struct x86_cpu_id fmpm_cpuids[] = { 749 X86_MATCH_VENDOR_FAM(AMD, 0x19, NULL), 750 { } 751 }; 752 MODULE_DEVICE_TABLE(x86cpu, fmpm_cpuids); 753 754 static int __init fru_mem_poison_init(void) 755 { 756 int ret; 757 758 if (!x86_match_cpu(fmpm_cpuids)) { 759 ret = -ENODEV; 760 goto out; 761 } 762 763 if (erst_disable) { 764 pr_debug("ERST not available\n"); 765 ret = -ENODEV; 766 goto out; 767 } 768 769 ret = get_system_info(); 770 if (ret) 771 goto out; 772 773 ret = allocate_records(); 774 if (ret) 775 goto out; 776 777 ret = init_fmps(); 778 if (ret) 779 goto out_free; 780 781 ret = get_saved_records(); 782 if (ret) 783 goto out_free; 784 785 ret = save_new_records(); 786 if (ret) 787 goto out_free; 788 789 retire_mem_records(); 790 791 mce_register_decode_chain(&fru_mem_poison_nb); 792 793 pr_info("FRU Memory Poison Manager initialized\n"); 794 return 0; 795 796 out_free: 797 free_records(); 798 out: 799 return ret; 800 } 801 802 static void __exit fru_mem_poison_exit(void) 803 { 804 mce_unregister_decode_chain(&fru_mem_poison_nb); 805 free_records(); 806 } 807 808 module_init(fru_mem_poison_init); 809 module_exit(fru_mem_poison_exit); 810 811 MODULE_LICENSE("GPL"); 812 MODULE_DESCRIPTION("FRU Memory Poison Manager"); 813