xref: /linux/drivers/ras/amd/fmpm.c (revision 7b49a3fb69e785a2425c8dc7dbd0779a0a4c0eb2)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * FRU (Field-Replaceable Unit) Memory Poison Manager
4  *
5  * Copyright (c) 2024, Advanced Micro Devices, Inc.
6  * All Rights Reserved.
7  *
8  * Authors:
9  *	Naveen Krishna Chatradhi <naveenkrishna.chatradhi@amd.com>
10  *	Muralidhara M K <muralidhara.mk@amd.com>
11  *	Yazen Ghannam <Yazen.Ghannam@amd.com>
12  *
13  * Implementation notes, assumptions, and limitations:
14  *
15  * - FRU memory poison section and memory poison descriptor definitions are not yet
16  *   included in the UEFI specification. So they are defined here. Afterwards, they
17  *   may be moved to linux/cper.h, if appropriate.
18  *
19  * - Platforms based on AMD MI300 systems will be the first to use these structures.
20  *   There are a number of assumptions made here that will need to be generalized
21  *   to support other platforms.
22  *
23  *   AMD MI300-based platform(s) assumptions:
24  *   - Memory errors are reported through x86 MCA.
25  *   - The entire DRAM row containing a memory error should be retired.
26  *   - There will be (1) FRU memory poison section per CPER.
27  *   - The FRU will be the CPU package (processor socket).
28  *   - The default number of memory poison descriptor entries should be (8).
29  *   - The platform will use ACPI ERST for persistent storage.
30  *   - All FRU records should be saved to persistent storage. Module init will
31  *     fail if any FRU record is not successfully written.
32  *
33  * - Boot time memory retirement may occur later than ideal due to dependencies
34  *   on other libraries and drivers. This leaves a gap where bad memory may be
35  *   accessed during early boot stages.
36  *
37  * - Enough memory should be pre-allocated for each FRU record to be able to hold
38  *   the expected number of descriptor entries. This, mostly empty, record is
39  *   written to storage during init time. Subsequent writes to the same record
40  *   should allow the Platform to update the stored record in-place. Otherwise,
41  *   if the record is extended, then the Platform may need to perform costly memory
42  *   management operations on the storage. For example, the Platform may spend time
43  *   in Firmware copying and invalidating memory on a relatively slow SPI ROM.
44  */
45 
46 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
47 
48 #include <linux/cper.h>
49 #include <linux/ras.h>
50 #include <linux/cpu.h>
51 
52 #include <acpi/apei.h>
53 
54 #include <asm/cpu_device_id.h>
55 #include <asm/cpuid/api.h>
56 #include <asm/mce.h>
57 
58 #include "../debugfs.h"
59 
60 #include "atl/internal.h"
61 
62 #define INVALID_CPU			UINT_MAX
63 
64 /* Validation Bits */
65 #define FMP_VALID_ARCH_TYPE		BIT_ULL(0)
66 #define FMP_VALID_ARCH			BIT_ULL(1)
67 #define FMP_VALID_ID_TYPE		BIT_ULL(2)
68 #define FMP_VALID_ID			BIT_ULL(3)
69 #define FMP_VALID_LIST_ENTRIES		BIT_ULL(4)
70 #define FMP_VALID_LIST			BIT_ULL(5)
71 
72 /* FRU Architecture Types */
73 #define FMP_ARCH_TYPE_X86_CPUID_1_EAX	0
74 
75 /* FRU ID Types */
76 #define FMP_ID_TYPE_X86_PPIN		0
77 
78 /* FRU Memory Poison Section */
79 struct cper_sec_fru_mem_poison {
80 	u32 checksum;
81 	u64 validation_bits;
82 	u32 fru_arch_type;
83 	u64 fru_arch;
84 	u32 fru_id_type;
85 	u64 fru_id;
86 	u32 nr_entries;
87 } __packed;
88 
89 /* FRU Descriptor ID Types */
90 #define FPD_HW_ID_TYPE_MCA_IPID		0
91 
92 /* FRU Descriptor Address Types */
93 #define FPD_ADDR_TYPE_MCA_ADDR		0
94 
95 /* Memory Poison Descriptor */
96 struct cper_fru_poison_desc {
97 	u64 timestamp;
98 	u32 hw_id_type;
99 	u64 hw_id;
100 	u32 addr_type;
101 	u64 addr;
102 } __packed;
103 
104 /* Collection of headers and sections for easy pointer use. */
105 struct fru_rec {
106 	struct cper_record_header	hdr;
107 	struct cper_section_descriptor	sec_desc;
108 	struct cper_sec_fru_mem_poison	fmp;
109 	struct cper_fru_poison_desc	entries[];
110 } __packed;
111 
112 /*
113  * Pointers to the complete CPER record of each FRU.
114  *
115  * Memory allocation will include padded space for descriptor entries.
116  */
117 static struct fru_rec **fru_records;
118 
119 /* system physical addresses array */
120 static u64 *spa_entries;
121 
122 static struct dentry *fmpm_dfs_dir;
123 static struct dentry *fmpm_dfs_entries;
124 
125 #define CPER_CREATOR_FMP						\
126 	GUID_INIT(0xcd5c2993, 0xf4b2, 0x41b2, 0xb5, 0xd4, 0xf9, 0xc3,	\
127 		  0xa0, 0x33, 0x08, 0x75)
128 
129 #define CPER_SECTION_TYPE_FMP						\
130 	GUID_INIT(0x5e4706c1, 0x5356, 0x48c6, 0x93, 0x0b, 0x52, 0xf2,	\
131 		  0x12, 0x0a, 0x44, 0x58)
132 
133 /**
134  * DOC: max_nr_entries (byte)
135  * Maximum number of descriptor entries possible for each FRU.
136  *
137  * Values between '1' and '255' are valid.
138  * No input or '0' will default to FMPM_DEFAULT_MAX_NR_ENTRIES.
139  */
140 static u8 max_nr_entries;
141 module_param(max_nr_entries, byte, 0644);
142 MODULE_PARM_DESC(max_nr_entries,
143 		 "Maximum number of memory poison descriptor entries per FRU");
144 
145 #define FMPM_DEFAULT_MAX_NR_ENTRIES	8
146 
147 /* Maximum number of FRUs in the system. */
148 #define FMPM_MAX_NR_FRU			256
149 static unsigned int max_nr_fru;
150 
151 /* Total length of record including headers and list of descriptor entries. */
152 static size_t max_rec_len;
153 
154 #define FMPM_MAX_REC_LEN (sizeof(struct fru_rec) + (sizeof(struct cper_fru_poison_desc) * 255))
155 
156 /* Total number of SPA entries across all FRUs. */
157 static unsigned int spa_nr_entries;
158 
159 /*
160  * Protect the local records cache in fru_records and prevent concurrent
161  * writes to storage. This is only needed after init once notifier block
162  * registration is done.
163  *
164  * The majority of a record is fixed at module init and will not change
165  * during run time. The entries within a record will be updated as new
166  * errors are reported. The mutex should be held whenever the entries are
167  * accessed during run time.
168  */
169 static DEFINE_MUTEX(fmpm_update_mutex);
170 
171 #define for_each_fru(i, rec) \
172 	for (i = 0; rec = fru_records[i], i < max_nr_fru; i++)
173 
174 static inline u32 get_fmp_len(struct fru_rec *rec)
175 {
176 	return rec->sec_desc.section_length - sizeof(struct cper_section_descriptor);
177 }
178 
179 static struct fru_rec *get_fru_record(u64 fru_id)
180 {
181 	struct fru_rec *rec;
182 	unsigned int i;
183 
184 	for_each_fru(i, rec) {
185 		if (rec->fmp.fru_id == fru_id)
186 			return rec;
187 	}
188 
189 	pr_debug("Record not found for FRU 0x%016llx\n", fru_id);
190 
191 	return NULL;
192 }
193 
194 /*
195  * Sum up all bytes within the FRU Memory Poison Section including the Memory
196  * Poison Descriptor entries.
197  *
198  * Don't include the old checksum here. It's a u32 value, so summing each of its
199  * bytes will give the wrong total.
200  */
201 static u32 do_fmp_checksum(struct cper_sec_fru_mem_poison *fmp, u32 len)
202 {
203 	u32 checksum = 0;
204 	u8 *buf, *end;
205 
206 	/* Skip old checksum. */
207 	buf = (u8 *)fmp + sizeof(u32);
208 	end = buf + len;
209 
210 	while (buf < end)
211 		checksum += (u8)(*(buf++));
212 
213 	return checksum;
214 }
215 
216 static int update_record_on_storage(struct fru_rec *rec)
217 {
218 	u32 len, checksum;
219 	int ret;
220 
221 	/* Calculate a new checksum. */
222 	len = get_fmp_len(rec);
223 
224 	/* Get the current total. */
225 	checksum = do_fmp_checksum(&rec->fmp, len);
226 
227 	/* Use the complement value. */
228 	rec->fmp.checksum = -checksum;
229 
230 	pr_debug("Writing to storage\n");
231 
232 	ret = erst_write(&rec->hdr);
233 	if (ret) {
234 		pr_warn("Storage update failed for FRU 0x%016llx\n", rec->fmp.fru_id);
235 
236 		if (ret == -ENOSPC)
237 			pr_warn("Not enough space on storage\n");
238 	}
239 
240 	return ret;
241 }
242 
243 static bool rec_has_valid_entries(struct fru_rec *rec)
244 {
245 	if (!(rec->fmp.validation_bits & FMP_VALID_LIST_ENTRIES))
246 		return false;
247 
248 	if (!(rec->fmp.validation_bits & FMP_VALID_LIST))
249 		return false;
250 
251 	return true;
252 }
253 
254 /*
255  * Row retirement is done on MI300 systems, and some bits are 'don't
256  * care' for comparing addresses with unique physical rows.  This
257  * includes all column bits and the row[13] bit.
258  */
259 #define MASK_ADDR(addr)	((addr) & ~(MI300_UMC_MCA_ROW13 | MI300_UMC_MCA_COL))
260 
261 static bool fpds_equal(struct cper_fru_poison_desc *old, struct cper_fru_poison_desc *new)
262 {
263 	/*
264 	 * Ignore timestamp field.
265 	 * The same physical error may be reported multiple times due to stuck bits, etc.
266 	 *
267 	 * Also, order the checks from most->least likely to fail to shortcut the code.
268 	 */
269 	if (MASK_ADDR(old->addr) != MASK_ADDR(new->addr))
270 		return false;
271 
272 	if (old->hw_id != new->hw_id)
273 		return false;
274 
275 	if (old->addr_type != new->addr_type)
276 		return false;
277 
278 	if (old->hw_id_type != new->hw_id_type)
279 		return false;
280 
281 	return true;
282 }
283 
284 static bool rec_has_fpd(struct fru_rec *rec, struct cper_fru_poison_desc *fpd)
285 {
286 	unsigned int i;
287 
288 	for (i = 0; i < rec->fmp.nr_entries; i++) {
289 		struct cper_fru_poison_desc *fpd_i = &rec->entries[i];
290 
291 		if (fpds_equal(fpd_i, fpd)) {
292 			pr_debug("Found duplicate record\n");
293 			return true;
294 		}
295 	}
296 
297 	return false;
298 }
299 
300 static void save_spa(struct fru_rec *rec, unsigned int entry,
301 		     u64 addr, u64 id, unsigned int cpu)
302 {
303 	unsigned int i, fru_idx, spa_entry;
304 	struct atl_err a_err;
305 	unsigned long spa;
306 
307 	if (entry >= max_nr_entries) {
308 		pr_warn_once("FRU descriptor entry %d out-of-bounds (max: %d)\n",
309 			     entry, max_nr_entries);
310 		return;
311 	}
312 
313 	/* spa_nr_entries is always multiple of max_nr_entries */
314 	for (i = 0; i < spa_nr_entries; i += max_nr_entries) {
315 		fru_idx = i / max_nr_entries;
316 		if (fru_records[fru_idx] == rec)
317 			break;
318 	}
319 
320 	if (i >= spa_nr_entries) {
321 		pr_warn_once("FRU record %d not found\n", i);
322 		return;
323 	}
324 
325 	spa_entry = i + entry;
326 	if (spa_entry >= spa_nr_entries) {
327 		pr_warn_once("spa_entries[] index out-of-bounds\n");
328 		return;
329 	}
330 
331 	memset(&a_err, 0, sizeof(struct atl_err));
332 
333 	a_err.addr = addr;
334 	a_err.ipid = id;
335 	a_err.cpu  = cpu;
336 
337 	spa = amd_convert_umc_mca_addr_to_sys_addr(&a_err);
338 	if (IS_ERR_VALUE(spa)) {
339 		pr_debug("Failed to get system address\n");
340 		return;
341 	}
342 
343 	spa_entries[spa_entry] = spa;
344 	pr_debug("fru_idx: %u, entry: %u, spa_entry: %u, spa: 0x%016llx\n",
345 		 fru_idx, entry, spa_entry, spa_entries[spa_entry]);
346 }
347 
348 static void update_fru_record(struct fru_rec *rec, struct mce *m)
349 {
350 	struct cper_sec_fru_mem_poison *fmp = &rec->fmp;
351 	struct cper_fru_poison_desc fpd, *fpd_dest;
352 	u32 entry = 0;
353 
354 	mutex_lock(&fmpm_update_mutex);
355 
356 	memset(&fpd, 0, sizeof(struct cper_fru_poison_desc));
357 
358 	fpd.timestamp	= m->time;
359 	fpd.hw_id_type = FPD_HW_ID_TYPE_MCA_IPID;
360 	fpd.hw_id	= m->ipid;
361 	fpd.addr_type	= FPD_ADDR_TYPE_MCA_ADDR;
362 	fpd.addr	= m->addr;
363 
364 	/* This is the first entry, so just save it. */
365 	if (!rec_has_valid_entries(rec))
366 		goto save_fpd;
367 
368 	/* Ignore already recorded errors. */
369 	if (rec_has_fpd(rec, &fpd))
370 		goto out_unlock;
371 
372 	if (rec->fmp.nr_entries >= max_nr_entries) {
373 		pr_warn("Exceeded number of entries for FRU 0x%016llx\n", rec->fmp.fru_id);
374 		goto out_unlock;
375 	}
376 
377 	entry  = fmp->nr_entries;
378 
379 save_fpd:
380 	save_spa(rec, entry, m->addr, m->ipid, m->extcpu);
381 	fpd_dest  = &rec->entries[entry];
382 	memcpy(fpd_dest, &fpd, sizeof(struct cper_fru_poison_desc));
383 
384 	fmp->nr_entries		 = entry + 1;
385 	fmp->validation_bits	|= FMP_VALID_LIST_ENTRIES;
386 	fmp->validation_bits	|= FMP_VALID_LIST;
387 
388 	pr_debug("Updated FRU 0x%016llx entry #%u\n", fmp->fru_id, entry);
389 
390 	update_record_on_storage(rec);
391 
392 out_unlock:
393 	mutex_unlock(&fmpm_update_mutex);
394 }
395 
396 static void retire_dram_row(u64 addr, u64 id, u32 cpu)
397 {
398 	struct atl_err a_err;
399 
400 	memset(&a_err, 0, sizeof(struct atl_err));
401 
402 	a_err.addr = addr;
403 	a_err.ipid = id;
404 	a_err.cpu  = cpu;
405 
406 	amd_retire_dram_row(&a_err);
407 }
408 
409 static int fru_handle_mem_poison(struct notifier_block *nb, unsigned long val, void *data)
410 {
411 	struct mce *m = (struct mce *)data;
412 	struct fru_rec *rec;
413 
414 	if (!mce_is_memory_error(m))
415 		return NOTIFY_DONE;
416 
417 	retire_dram_row(m->addr, m->ipid, m->extcpu);
418 
419 	/*
420 	 * An invalid FRU ID should not happen on real errors. But it
421 	 * could happen from software error injection, etc.
422 	 */
423 	rec = get_fru_record(m->ppin);
424 	if (!rec)
425 		return NOTIFY_DONE;
426 
427 	update_fru_record(rec, m);
428 
429 	return NOTIFY_OK;
430 }
431 
432 static struct notifier_block fru_mem_poison_nb = {
433 	.notifier_call  = fru_handle_mem_poison,
434 	.priority	= MCE_PRIO_LOWEST,
435 };
436 
437 static void retire_mem_fmp(struct fru_rec *rec)
438 {
439 	struct cper_sec_fru_mem_poison *fmp = &rec->fmp;
440 	unsigned int i, cpu;
441 
442 	for (i = 0; i < fmp->nr_entries; i++) {
443 		struct cper_fru_poison_desc *fpd = &rec->entries[i];
444 		unsigned int err_cpu = INVALID_CPU;
445 
446 		if (fpd->hw_id_type != FPD_HW_ID_TYPE_MCA_IPID)
447 			continue;
448 
449 		if (fpd->addr_type != FPD_ADDR_TYPE_MCA_ADDR)
450 			continue;
451 
452 		cpus_read_lock();
453 		for_each_online_cpu(cpu) {
454 			if (topology_ppin(cpu) == fmp->fru_id) {
455 				err_cpu = cpu;
456 				break;
457 			}
458 		}
459 		cpus_read_unlock();
460 
461 		if (err_cpu == INVALID_CPU)
462 			continue;
463 
464 		retire_dram_row(fpd->addr, fpd->hw_id, err_cpu);
465 		save_spa(rec, i, fpd->addr, fpd->hw_id, err_cpu);
466 	}
467 }
468 
469 static void retire_mem_records(void)
470 {
471 	struct fru_rec *rec;
472 	unsigned int i;
473 
474 	for_each_fru(i, rec) {
475 		if (!rec_has_valid_entries(rec))
476 			continue;
477 
478 		retire_mem_fmp(rec);
479 	}
480 }
481 
482 /* Set the CPER Record Header and CPER Section Descriptor fields. */
483 static void set_rec_fields(struct fru_rec *rec)
484 {
485 	struct cper_section_descriptor	*sec_desc = &rec->sec_desc;
486 	struct cper_record_header	*hdr	  = &rec->hdr;
487 
488 	/*
489 	 * This is a saved record created with fewer max_nr_entries.
490 	 * Update the record lengths and keep everything else as-is.
491 	 */
492 	if (hdr->record_length && hdr->record_length < max_rec_len) {
493 		pr_debug("Growing record 0x%016llx from %u to %zu bytes\n",
494 			 hdr->record_id, hdr->record_length, max_rec_len);
495 		goto update_lengths;
496 	}
497 
498 	memcpy(hdr->signature, CPER_SIG_RECORD, CPER_SIG_SIZE);
499 	hdr->revision			= CPER_RECORD_REV;
500 	hdr->signature_end		= CPER_SIG_END;
501 
502 	/*
503 	 * Currently, it is assumed that there is one FRU Memory Poison
504 	 * section per CPER. But this may change for other implementations.
505 	 */
506 	hdr->section_count		= 1;
507 
508 	/* The logged errors are recoverable. Otherwise, they'd never make it here. */
509 	hdr->error_severity		= CPER_SEV_RECOVERABLE;
510 
511 	hdr->validation_bits		= 0;
512 	hdr->creator_id			= CPER_CREATOR_FMP;
513 	hdr->notification_type		= CPER_NOTIFY_MCE;
514 	hdr->record_id			= cper_next_record_id();
515 	hdr->flags			= CPER_HW_ERROR_FLAGS_PREVERR;
516 
517 	sec_desc->section_offset	= sizeof(struct cper_record_header);
518 	sec_desc->revision		= CPER_SEC_REV;
519 	sec_desc->validation_bits	= 0;
520 	sec_desc->flags			= CPER_SEC_PRIMARY;
521 	sec_desc->section_type		= CPER_SECTION_TYPE_FMP;
522 	sec_desc->section_severity	= CPER_SEV_RECOVERABLE;
523 
524 update_lengths:
525 	hdr->record_length		= max_rec_len;
526 	sec_desc->section_length	= max_rec_len - sizeof(struct cper_record_header);
527 }
528 
529 static int save_new_records(void)
530 {
531 	DECLARE_BITMAP(new_records, FMPM_MAX_NR_FRU);
532 	struct fru_rec *rec;
533 	unsigned int i;
534 	int ret = 0;
535 
536 	for_each_fru(i, rec) {
537 		/* No need to update saved records that match the current record size. */
538 		if (rec->hdr.record_length == max_rec_len)
539 			continue;
540 
541 		if (!rec->hdr.record_length)
542 			set_bit(i, new_records);
543 
544 		set_rec_fields(rec);
545 
546 		ret = update_record_on_storage(rec);
547 		if (ret)
548 			goto out_clear;
549 	}
550 
551 	return ret;
552 
553 out_clear:
554 	for_each_fru(i, rec) {
555 		if (!test_bit(i, new_records))
556 			continue;
557 
558 		erst_clear(rec->hdr.record_id);
559 	}
560 
561 	return ret;
562 }
563 
564 /* Check that the record matches expected types for the current system.*/
565 static bool fmp_is_usable(struct fru_rec *rec)
566 {
567 	struct cper_sec_fru_mem_poison *fmp = &rec->fmp;
568 	u64 cpuid;
569 
570 	pr_debug("Validation bits: 0x%016llx\n", fmp->validation_bits);
571 
572 	if (!(fmp->validation_bits & FMP_VALID_ARCH_TYPE)) {
573 		pr_debug("Arch type unknown\n");
574 		return false;
575 	}
576 
577 	if (fmp->fru_arch_type != FMP_ARCH_TYPE_X86_CPUID_1_EAX) {
578 		pr_debug("Arch type not 'x86 Family/Model/Stepping'\n");
579 		return false;
580 	}
581 
582 	if (!(fmp->validation_bits & FMP_VALID_ARCH)) {
583 		pr_debug("Arch value unknown\n");
584 		return false;
585 	}
586 
587 	cpuid = cpuid_eax(1);
588 	if (fmp->fru_arch != cpuid) {
589 		pr_debug("Arch value mismatch: record = 0x%016llx, system = 0x%016llx\n",
590 			 fmp->fru_arch, cpuid);
591 		return false;
592 	}
593 
594 	if (!(fmp->validation_bits & FMP_VALID_ID_TYPE)) {
595 		pr_debug("FRU ID type unknown\n");
596 		return false;
597 	}
598 
599 	if (fmp->fru_id_type != FMP_ID_TYPE_X86_PPIN) {
600 		pr_debug("FRU ID type is not 'x86 PPIN'\n");
601 		return false;
602 	}
603 
604 	if (!(fmp->validation_bits & FMP_VALID_ID)) {
605 		pr_debug("FRU ID value unknown\n");
606 		return false;
607 	}
608 
609 	return true;
610 }
611 
612 static bool fmp_is_valid(struct fru_rec *rec)
613 {
614 	struct cper_sec_fru_mem_poison *fmp = &rec->fmp;
615 	u32 checksum, len;
616 
617 	len = get_fmp_len(rec);
618 	if (len < sizeof(struct cper_sec_fru_mem_poison)) {
619 		pr_debug("fmp length is too small\n");
620 		return false;
621 	}
622 
623 	/* Checksum must sum to zero for the entire section. */
624 	checksum = do_fmp_checksum(fmp, len) + fmp->checksum;
625 	if (checksum) {
626 		pr_debug("fmp checksum failed: sum = 0x%x\n", checksum);
627 		print_hex_dump_debug("fmp record: ", DUMP_PREFIX_NONE, 16, 1, fmp, len, false);
628 		return false;
629 	}
630 
631 	if (!fmp_is_usable(rec))
632 		return false;
633 
634 	return true;
635 }
636 
637 static struct fru_rec *get_valid_record(struct fru_rec *old)
638 {
639 	struct fru_rec *new;
640 
641 	if (!fmp_is_valid(old)) {
642 		pr_debug("Ignoring invalid record\n");
643 		return NULL;
644 	}
645 
646 	new = get_fru_record(old->fmp.fru_id);
647 	if (!new)
648 		pr_debug("Ignoring record for absent FRU\n");
649 
650 	return new;
651 }
652 
653 /*
654  * Fetch saved records from persistent storage.
655  *
656  * For each found record:
657  * - If it was not created by this module, then ignore it.
658  * - If it is valid, then copy its data to the local cache.
659  * - If it is not valid, then erase it.
660  */
661 static int get_saved_records(void)
662 {
663 	struct fru_rec *old, *new;
664 	u64 record_id;
665 	int ret, pos;
666 	ssize_t len;
667 
668 	old = kmalloc(FMPM_MAX_REC_LEN, GFP_KERNEL);
669 	if (!old) {
670 		ret = -ENOMEM;
671 		goto out;
672 	}
673 
674 	ret = erst_get_record_id_begin(&pos);
675 	if (ret < 0)
676 		goto out_end;
677 
678 	while (!erst_get_record_id_next(&pos, &record_id)) {
679 		if (record_id == APEI_ERST_INVALID_RECORD_ID)
680 			goto out_end;
681 		/*
682 		 * Make sure to clear temporary buffer between reads to avoid
683 		 * leftover data from records of various sizes.
684 		 */
685 		memset(old, 0, FMPM_MAX_REC_LEN);
686 
687 		len = erst_read_record(record_id, &old->hdr, FMPM_MAX_REC_LEN,
688 				       sizeof(struct fru_rec), &CPER_CREATOR_FMP);
689 		if (len < 0)
690 			continue;
691 
692 		new = get_valid_record(old);
693 		if (!new) {
694 			erst_clear(record_id);
695 			continue;
696 		}
697 
698 		if (len > max_rec_len) {
699 			unsigned int saved_nr_entries;
700 
701 			saved_nr_entries  = len - sizeof(struct fru_rec);
702 			saved_nr_entries /= sizeof(struct cper_fru_poison_desc);
703 
704 			pr_warn("Saved record found with %u entries.\n", saved_nr_entries);
705 			pr_warn("Please increase max_nr_entries to %u.\n", saved_nr_entries);
706 
707 			ret = -EINVAL;
708 			goto out_end;
709 		}
710 
711 		/* Restore the record */
712 		memcpy(new, old, len);
713 	}
714 
715 out_end:
716 	erst_get_record_id_end();
717 	kfree(old);
718 out:
719 	return ret;
720 }
721 
722 static void set_fmp_fields(struct fru_rec *rec, unsigned int cpu)
723 {
724 	struct cper_sec_fru_mem_poison *fmp = &rec->fmp;
725 
726 	fmp->fru_arch_type    = FMP_ARCH_TYPE_X86_CPUID_1_EAX;
727 	fmp->validation_bits |= FMP_VALID_ARCH_TYPE;
728 
729 	/* Assume all CPUs in the system have the same value for now. */
730 	fmp->fru_arch	      = cpuid_eax(1);
731 	fmp->validation_bits |= FMP_VALID_ARCH;
732 
733 	fmp->fru_id_type      = FMP_ID_TYPE_X86_PPIN;
734 	fmp->validation_bits |= FMP_VALID_ID_TYPE;
735 
736 	fmp->fru_id	      = topology_ppin(cpu);
737 	fmp->validation_bits |= FMP_VALID_ID;
738 }
739 
740 static int init_fmps(void)
741 {
742 	struct fru_rec *rec;
743 	unsigned int i, cpu;
744 	int ret = 0;
745 
746 	for_each_fru(i, rec) {
747 		unsigned int fru_cpu = INVALID_CPU;
748 
749 		cpus_read_lock();
750 		for_each_online_cpu(cpu) {
751 			if (topology_physical_package_id(cpu) == i) {
752 				fru_cpu = cpu;
753 				break;
754 			}
755 		}
756 		cpus_read_unlock();
757 
758 		if (fru_cpu == INVALID_CPU) {
759 			pr_debug("Failed to find matching CPU for FRU #%u\n", i);
760 			ret = -ENODEV;
761 			break;
762 		}
763 
764 		set_fmp_fields(rec, fru_cpu);
765 	}
766 
767 	return ret;
768 }
769 
770 static int get_system_info(void)
771 {
772 	/* Only load on MI300A systems for now. */
773 	if (!(boot_cpu_data.x86_model >= 0x90 &&
774 	      boot_cpu_data.x86_model <= 0x9f))
775 		return -ENODEV;
776 
777 	if (!cpu_feature_enabled(X86_FEATURE_AMD_PPIN)) {
778 		pr_debug("PPIN feature not available\n");
779 		return -ENODEV;
780 	}
781 
782 	/* Use CPU socket as FRU for MI300 systems. */
783 	max_nr_fru = topology_max_packages();
784 	if (!max_nr_fru)
785 		return -ENODEV;
786 
787 	if (max_nr_fru > FMPM_MAX_NR_FRU) {
788 		pr_warn("Too many FRUs to manage: found: %u, max: %u\n",
789 			max_nr_fru, FMPM_MAX_NR_FRU);
790 		return -ENODEV;
791 	}
792 
793 	if (!max_nr_entries)
794 		max_nr_entries = FMPM_DEFAULT_MAX_NR_ENTRIES;
795 
796 	spa_nr_entries = max_nr_fru * max_nr_entries;
797 
798 	max_rec_len  = sizeof(struct fru_rec);
799 	max_rec_len += sizeof(struct cper_fru_poison_desc) * max_nr_entries;
800 
801 	pr_info("max FRUs: %u, max entries: %u, max record length: %lu\n",
802 		 max_nr_fru, max_nr_entries, max_rec_len);
803 
804 	return 0;
805 }
806 
807 static void free_records(void)
808 {
809 	struct fru_rec *rec;
810 	int i;
811 
812 	for_each_fru(i, rec)
813 		kfree(rec);
814 
815 	kfree(fru_records);
816 	kfree(spa_entries);
817 }
818 
819 static int allocate_records(void)
820 {
821 	int i, ret = 0;
822 
823 	fru_records = kzalloc_objs(struct fru_rec *, max_nr_fru);
824 	if (!fru_records) {
825 		ret = -ENOMEM;
826 		goto out;
827 	}
828 
829 	for (i = 0; i < max_nr_fru; i++) {
830 		fru_records[i] = kzalloc(max_rec_len, GFP_KERNEL);
831 		if (!fru_records[i]) {
832 			ret = -ENOMEM;
833 			goto out_free;
834 		}
835 	}
836 
837 	spa_entries = kcalloc(spa_nr_entries, sizeof(u64), GFP_KERNEL);
838 	if (!spa_entries) {
839 		ret = -ENOMEM;
840 		goto out_free;
841 	}
842 
843 	for (i = 0; i < spa_nr_entries; i++)
844 		spa_entries[i] = INVALID_SPA;
845 
846 	return ret;
847 
848 out_free:
849 	while (--i >= 0)
850 		kfree(fru_records[i]);
851 
852 	kfree(fru_records);
853 out:
854 	return ret;
855 }
856 
857 static void *fmpm_start(struct seq_file *f, loff_t *pos)
858 {
859 	if (*pos >= (spa_nr_entries + 1))
860 		return NULL;
861 	return pos;
862 }
863 
864 static void *fmpm_next(struct seq_file *f, void *data, loff_t *pos)
865 {
866 	if (++(*pos) >= (spa_nr_entries + 1))
867 		return NULL;
868 	return pos;
869 }
870 
871 static void fmpm_stop(struct seq_file *f, void *data)
872 {
873 }
874 
875 #define SHORT_WIDTH	8
876 #define U64_WIDTH	18
877 #define TIMESTAMP_WIDTH	19
878 #define LONG_WIDTH	24
879 #define U64_PAD		(LONG_WIDTH - U64_WIDTH)
880 #define TS_PAD		(LONG_WIDTH - TIMESTAMP_WIDTH)
881 static int fmpm_show(struct seq_file *f, void *data)
882 {
883 	unsigned int fru_idx, entry, spa_entry, line;
884 	struct cper_fru_poison_desc *fpd;
885 	struct fru_rec *rec;
886 
887 	line = *(loff_t *)data;
888 	if (line == 0) {
889 		seq_printf(f, "%-*s", SHORT_WIDTH, "fru_idx");
890 		seq_printf(f, "%-*s", LONG_WIDTH,  "fru_id");
891 		seq_printf(f, "%-*s", SHORT_WIDTH, "entry");
892 		seq_printf(f, "%-*s", LONG_WIDTH,  "timestamp");
893 		seq_printf(f, "%-*s", LONG_WIDTH,  "hw_id");
894 		seq_printf(f, "%-*s", LONG_WIDTH,  "addr");
895 		seq_printf(f, "%-*s", LONG_WIDTH,  "spa");
896 		goto out_newline;
897 	}
898 
899 	spa_entry = line - 1;
900 	fru_idx	  = spa_entry / max_nr_entries;
901 	entry	  = spa_entry % max_nr_entries;
902 
903 	rec = fru_records[fru_idx];
904 	if (!rec)
905 		goto out;
906 
907 	seq_printf(f, "%-*u",		SHORT_WIDTH, fru_idx);
908 	seq_printf(f, "0x%016llx%-*s",	rec->fmp.fru_id, U64_PAD, "");
909 	seq_printf(f, "%-*u",		SHORT_WIDTH, entry);
910 
911 	mutex_lock(&fmpm_update_mutex);
912 
913 	if (entry >= rec->fmp.nr_entries) {
914 		seq_printf(f, "%-*s", LONG_WIDTH, "*");
915 		seq_printf(f, "%-*s", LONG_WIDTH, "*");
916 		seq_printf(f, "%-*s", LONG_WIDTH, "*");
917 		seq_printf(f, "%-*s", LONG_WIDTH, "*");
918 		goto out_unlock;
919 	}
920 
921 	fpd = &rec->entries[entry];
922 
923 	seq_printf(f, "%ptT%-*s",	&fpd->timestamp, TS_PAD,  "");
924 	seq_printf(f, "0x%016llx%-*s",	fpd->hw_id,	 U64_PAD, "");
925 	seq_printf(f, "0x%016llx%-*s",	fpd->addr,	 U64_PAD, "");
926 
927 	if (spa_entries[spa_entry] == INVALID_SPA)
928 		seq_printf(f, "%-*s", LONG_WIDTH, "*");
929 	else
930 		seq_printf(f, "0x%016llx%-*s", spa_entries[spa_entry], U64_PAD, "");
931 
932 out_unlock:
933 	mutex_unlock(&fmpm_update_mutex);
934 out_newline:
935 	seq_putc(f, '\n');
936 out:
937 	return 0;
938 }
939 
940 static const struct seq_operations fmpm_seq_ops = {
941 	.start	= fmpm_start,
942 	.next	= fmpm_next,
943 	.stop	= fmpm_stop,
944 	.show	= fmpm_show,
945 };
946 
947 static int fmpm_open(struct inode *inode, struct file *file)
948 {
949 	return seq_open(file, &fmpm_seq_ops);
950 }
951 
952 static const struct file_operations fmpm_fops = {
953 	.open		= fmpm_open,
954 	.release	= seq_release,
955 	.read		= seq_read,
956 	.llseek		= seq_lseek,
957 };
958 
959 static void setup_debugfs(void)
960 {
961 	struct dentry *dfs = ras_get_debugfs_root();
962 
963 	if (!dfs)
964 		return;
965 
966 	fmpm_dfs_dir = debugfs_create_dir("fmpm", dfs);
967 	if (!fmpm_dfs_dir)
968 		return;
969 
970 	fmpm_dfs_entries = debugfs_create_file("entries", 0400, fmpm_dfs_dir, NULL, &fmpm_fops);
971 	if (!fmpm_dfs_entries)
972 		debugfs_remove(fmpm_dfs_dir);
973 }
974 
975 static const struct x86_cpu_id fmpm_cpuids[] = {
976 	X86_MATCH_VENDOR_FAM(AMD, 0x19, NULL),
977 	{ }
978 };
979 MODULE_DEVICE_TABLE(x86cpu, fmpm_cpuids);
980 
981 static int __init fru_mem_poison_init(void)
982 {
983 	int ret;
984 
985 	if (!x86_match_cpu(fmpm_cpuids)) {
986 		ret = -ENODEV;
987 		goto out;
988 	}
989 
990 	if (erst_disable) {
991 		pr_debug("ERST not available\n");
992 		ret = -ENODEV;
993 		goto out;
994 	}
995 
996 	ret = get_system_info();
997 	if (ret)
998 		goto out;
999 
1000 	ret = allocate_records();
1001 	if (ret)
1002 		goto out;
1003 
1004 	ret = init_fmps();
1005 	if (ret)
1006 		goto out_free;
1007 
1008 	ret = get_saved_records();
1009 	if (ret)
1010 		goto out_free;
1011 
1012 	ret = save_new_records();
1013 	if (ret)
1014 		goto out_free;
1015 
1016 	setup_debugfs();
1017 
1018 	retire_mem_records();
1019 
1020 	mce_register_decode_chain(&fru_mem_poison_nb);
1021 
1022 	pr_info("FRU Memory Poison Manager initialized\n");
1023 	return 0;
1024 
1025 out_free:
1026 	free_records();
1027 out:
1028 	return ret;
1029 }
1030 
1031 static void __exit fru_mem_poison_exit(void)
1032 {
1033 	mce_unregister_decode_chain(&fru_mem_poison_nb);
1034 	debugfs_remove(fmpm_dfs_dir);
1035 	free_records();
1036 }
1037 
1038 module_init(fru_mem_poison_init);
1039 module_exit(fru_mem_poison_exit);
1040 
1041 MODULE_LICENSE("GPL");
1042 MODULE_DESCRIPTION("FRU Memory Poison Manager");
1043