xref: /linux/drivers/acpi/apei/ghes.c (revision ae22a94997b8a03dcb3c922857c203246711f9d4)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * APEI Generic Hardware Error Source support
4  *
5  * Generic Hardware Error Source provides a way to report platform
6  * hardware errors (such as that from chipset). It works in so called
7  * "Firmware First" mode, that is, hardware errors are reported to
8  * firmware firstly, then reported to Linux by firmware. This way,
9  * some non-standard hardware error registers or non-standard hardware
10  * link can be checked by firmware to produce more hardware error
11  * information for Linux.
12  *
13  * For more information about Generic Hardware Error Source, please
14  * refer to ACPI Specification version 4.0, section 17.3.2.6
15  *
16  * Copyright 2010,2011 Intel Corp.
17  *   Author: Huang Ying <ying.huang@intel.com>
18  */
19 
20 #include <linux/arm_sdei.h>
21 #include <linux/kernel.h>
22 #include <linux/moduleparam.h>
23 #include <linux/init.h>
24 #include <linux/acpi.h>
25 #include <linux/io.h>
26 #include <linux/interrupt.h>
27 #include <linux/timer.h>
28 #include <linux/cper.h>
29 #include <linux/platform_device.h>
30 #include <linux/mutex.h>
31 #include <linux/ratelimit.h>
32 #include <linux/vmalloc.h>
33 #include <linux/irq_work.h>
34 #include <linux/llist.h>
35 #include <linux/genalloc.h>
36 #include <linux/pci.h>
37 #include <linux/pfn.h>
38 #include <linux/aer.h>
39 #include <linux/nmi.h>
40 #include <linux/sched/clock.h>
41 #include <linux/uuid.h>
42 #include <linux/ras.h>
43 #include <linux/task_work.h>
44 
45 #include <acpi/actbl1.h>
46 #include <acpi/ghes.h>
47 #include <acpi/apei.h>
48 #include <asm/fixmap.h>
49 #include <asm/tlbflush.h>
50 #include <ras/ras_event.h>
51 
52 #include "apei-internal.h"
53 
54 #define GHES_PFX	"GHES: "
55 
56 #define GHES_ESTATUS_MAX_SIZE		65536
57 #define GHES_ESOURCE_PREALLOC_MAX_SIZE	65536
58 
59 #define GHES_ESTATUS_POOL_MIN_ALLOC_ORDER 3
60 
61 /* This is just an estimation for memory pool allocation */
62 #define GHES_ESTATUS_CACHE_AVG_SIZE	512
63 
64 #define GHES_ESTATUS_CACHES_SIZE	4
65 
66 #define GHES_ESTATUS_IN_CACHE_MAX_NSEC	10000000000ULL
67 /* Prevent too many caches are allocated because of RCU */
68 #define GHES_ESTATUS_CACHE_ALLOCED_MAX	(GHES_ESTATUS_CACHES_SIZE * 3 / 2)
69 
70 #define GHES_ESTATUS_CACHE_LEN(estatus_len)			\
71 	(sizeof(struct ghes_estatus_cache) + (estatus_len))
72 #define GHES_ESTATUS_FROM_CACHE(estatus_cache)			\
73 	((struct acpi_hest_generic_status *)				\
74 	 ((struct ghes_estatus_cache *)(estatus_cache) + 1))
75 
76 #define GHES_ESTATUS_NODE_LEN(estatus_len)			\
77 	(sizeof(struct ghes_estatus_node) + (estatus_len))
78 #define GHES_ESTATUS_FROM_NODE(estatus_node)			\
79 	((struct acpi_hest_generic_status *)				\
80 	 ((struct ghes_estatus_node *)(estatus_node) + 1))
81 
82 #define GHES_VENDOR_ENTRY_LEN(gdata_len)                               \
83 	(sizeof(struct ghes_vendor_record_entry) + (gdata_len))
84 #define GHES_GDATA_FROM_VENDOR_ENTRY(vendor_entry)                     \
85 	((struct acpi_hest_generic_data *)                              \
86 	((struct ghes_vendor_record_entry *)(vendor_entry) + 1))
87 
88 /*
89  *  NMI-like notifications vary by architecture, before the compiler can prune
90  *  unused static functions it needs a value for these enums.
91  */
92 #ifndef CONFIG_ARM_SDE_INTERFACE
93 #define FIX_APEI_GHES_SDEI_NORMAL	__end_of_fixed_addresses
94 #define FIX_APEI_GHES_SDEI_CRITICAL	__end_of_fixed_addresses
95 #endif
96 
97 static ATOMIC_NOTIFIER_HEAD(ghes_report_chain);
98 
99 static inline bool is_hest_type_generic_v2(struct ghes *ghes)
100 {
101 	return ghes->generic->header.type == ACPI_HEST_TYPE_GENERIC_ERROR_V2;
102 }
103 
104 /*
105  * A platform may describe one error source for the handling of synchronous
106  * errors (e.g. MCE or SEA), or for handling asynchronous errors (e.g. SCI
107  * or External Interrupt). On x86, the HEST notifications are always
108  * asynchronous, so only SEA on ARM is delivered as a synchronous
109  * notification.
110  */
111 static inline bool is_hest_sync_notify(struct ghes *ghes)
112 {
113 	u8 notify_type = ghes->generic->notify.type;
114 
115 	return notify_type == ACPI_HEST_NOTIFY_SEA;
116 }
117 
118 /*
119  * This driver isn't really modular, however for the time being,
120  * continuing to use module_param is the easiest way to remain
121  * compatible with existing boot arg use cases.
122  */
123 bool ghes_disable;
124 module_param_named(disable, ghes_disable, bool, 0);
125 
126 /*
127  * "ghes.edac_force_enable" forcibly enables ghes_edac and skips the platform
128  * check.
129  */
130 static bool ghes_edac_force_enable;
131 module_param_named(edac_force_enable, ghes_edac_force_enable, bool, 0);
132 
133 /*
134  * All error sources notified with HED (Hardware Error Device) share a
135  * single notifier callback, so they need to be linked and checked one
136  * by one. This holds true for NMI too.
137  *
138  * RCU is used for these lists, so ghes_list_mutex is only used for
139  * list changing, not for traversing.
140  */
141 static LIST_HEAD(ghes_hed);
142 static DEFINE_MUTEX(ghes_list_mutex);
143 
144 /*
145  * A list of GHES devices which are given to the corresponding EDAC driver
146  * ghes_edac for further use.
147  */
148 static LIST_HEAD(ghes_devs);
149 static DEFINE_MUTEX(ghes_devs_mutex);
150 
151 /*
152  * Because the memory area used to transfer hardware error information
153  * from BIOS to Linux can be determined only in NMI, IRQ or timer
154  * handler, but general ioremap can not be used in atomic context, so
155  * the fixmap is used instead.
156  *
157  * This spinlock is used to prevent the fixmap entry from being used
158  * simultaneously.
159  */
160 static DEFINE_SPINLOCK(ghes_notify_lock_irq);
161 
162 struct ghes_vendor_record_entry {
163 	struct work_struct work;
164 	int error_severity;
165 	char vendor_record[];
166 };
167 
168 static struct gen_pool *ghes_estatus_pool;
169 
170 static struct ghes_estatus_cache __rcu *ghes_estatus_caches[GHES_ESTATUS_CACHES_SIZE];
171 static atomic_t ghes_estatus_cache_alloced;
172 
173 static int ghes_panic_timeout __read_mostly = 30;
174 
175 static void __iomem *ghes_map(u64 pfn, enum fixed_addresses fixmap_idx)
176 {
177 	phys_addr_t paddr;
178 	pgprot_t prot;
179 
180 	paddr = PFN_PHYS(pfn);
181 	prot = arch_apei_get_mem_attribute(paddr);
182 	__set_fixmap(fixmap_idx, paddr, prot);
183 
184 	return (void __iomem *) __fix_to_virt(fixmap_idx);
185 }
186 
187 static void ghes_unmap(void __iomem *vaddr, enum fixed_addresses fixmap_idx)
188 {
189 	int _idx = virt_to_fix((unsigned long)vaddr);
190 
191 	WARN_ON_ONCE(fixmap_idx != _idx);
192 	clear_fixmap(fixmap_idx);
193 }
194 
195 int ghes_estatus_pool_init(unsigned int num_ghes)
196 {
197 	unsigned long addr, len;
198 	int rc;
199 
200 	ghes_estatus_pool = gen_pool_create(GHES_ESTATUS_POOL_MIN_ALLOC_ORDER, -1);
201 	if (!ghes_estatus_pool)
202 		return -ENOMEM;
203 
204 	len = GHES_ESTATUS_CACHE_AVG_SIZE * GHES_ESTATUS_CACHE_ALLOCED_MAX;
205 	len += (num_ghes * GHES_ESOURCE_PREALLOC_MAX_SIZE);
206 
207 	addr = (unsigned long)vmalloc(PAGE_ALIGN(len));
208 	if (!addr)
209 		goto err_pool_alloc;
210 
211 	rc = gen_pool_add(ghes_estatus_pool, addr, PAGE_ALIGN(len), -1);
212 	if (rc)
213 		goto err_pool_add;
214 
215 	return 0;
216 
217 err_pool_add:
218 	vfree((void *)addr);
219 
220 err_pool_alloc:
221 	gen_pool_destroy(ghes_estatus_pool);
222 
223 	return -ENOMEM;
224 }
225 
226 /**
227  * ghes_estatus_pool_region_free - free previously allocated memory
228  *				   from the ghes_estatus_pool.
229  * @addr: address of memory to free.
230  * @size: size of memory to free.
231  *
232  * Returns none.
233  */
234 void ghes_estatus_pool_region_free(unsigned long addr, u32 size)
235 {
236 	gen_pool_free(ghes_estatus_pool, addr, size);
237 }
238 EXPORT_SYMBOL_GPL(ghes_estatus_pool_region_free);
239 
240 static int map_gen_v2(struct ghes *ghes)
241 {
242 	return apei_map_generic_address(&ghes->generic_v2->read_ack_register);
243 }
244 
245 static void unmap_gen_v2(struct ghes *ghes)
246 {
247 	apei_unmap_generic_address(&ghes->generic_v2->read_ack_register);
248 }
249 
250 static void ghes_ack_error(struct acpi_hest_generic_v2 *gv2)
251 {
252 	int rc;
253 	u64 val = 0;
254 
255 	rc = apei_read(&val, &gv2->read_ack_register);
256 	if (rc)
257 		return;
258 
259 	val &= gv2->read_ack_preserve << gv2->read_ack_register.bit_offset;
260 	val |= gv2->read_ack_write    << gv2->read_ack_register.bit_offset;
261 
262 	apei_write(val, &gv2->read_ack_register);
263 }
264 
265 static struct ghes *ghes_new(struct acpi_hest_generic *generic)
266 {
267 	struct ghes *ghes;
268 	unsigned int error_block_length;
269 	int rc;
270 
271 	ghes = kzalloc(sizeof(*ghes), GFP_KERNEL);
272 	if (!ghes)
273 		return ERR_PTR(-ENOMEM);
274 
275 	ghes->generic = generic;
276 	if (is_hest_type_generic_v2(ghes)) {
277 		rc = map_gen_v2(ghes);
278 		if (rc)
279 			goto err_free;
280 	}
281 
282 	rc = apei_map_generic_address(&generic->error_status_address);
283 	if (rc)
284 		goto err_unmap_read_ack_addr;
285 	error_block_length = generic->error_block_length;
286 	if (error_block_length > GHES_ESTATUS_MAX_SIZE) {
287 		pr_warn(FW_WARN GHES_PFX
288 			"Error status block length is too long: %u for "
289 			"generic hardware error source: %d.\n",
290 			error_block_length, generic->header.source_id);
291 		error_block_length = GHES_ESTATUS_MAX_SIZE;
292 	}
293 	ghes->estatus = kmalloc(error_block_length, GFP_KERNEL);
294 	if (!ghes->estatus) {
295 		rc = -ENOMEM;
296 		goto err_unmap_status_addr;
297 	}
298 
299 	return ghes;
300 
301 err_unmap_status_addr:
302 	apei_unmap_generic_address(&generic->error_status_address);
303 err_unmap_read_ack_addr:
304 	if (is_hest_type_generic_v2(ghes))
305 		unmap_gen_v2(ghes);
306 err_free:
307 	kfree(ghes);
308 	return ERR_PTR(rc);
309 }
310 
311 static void ghes_fini(struct ghes *ghes)
312 {
313 	kfree(ghes->estatus);
314 	apei_unmap_generic_address(&ghes->generic->error_status_address);
315 	if (is_hest_type_generic_v2(ghes))
316 		unmap_gen_v2(ghes);
317 }
318 
319 static inline int ghes_severity(int severity)
320 {
321 	switch (severity) {
322 	case CPER_SEV_INFORMATIONAL:
323 		return GHES_SEV_NO;
324 	case CPER_SEV_CORRECTED:
325 		return GHES_SEV_CORRECTED;
326 	case CPER_SEV_RECOVERABLE:
327 		return GHES_SEV_RECOVERABLE;
328 	case CPER_SEV_FATAL:
329 		return GHES_SEV_PANIC;
330 	default:
331 		/* Unknown, go panic */
332 		return GHES_SEV_PANIC;
333 	}
334 }
335 
336 static void ghes_copy_tofrom_phys(void *buffer, u64 paddr, u32 len,
337 				  int from_phys,
338 				  enum fixed_addresses fixmap_idx)
339 {
340 	void __iomem *vaddr;
341 	u64 offset;
342 	u32 trunk;
343 
344 	while (len > 0) {
345 		offset = paddr - (paddr & PAGE_MASK);
346 		vaddr = ghes_map(PHYS_PFN(paddr), fixmap_idx);
347 		trunk = PAGE_SIZE - offset;
348 		trunk = min(trunk, len);
349 		if (from_phys)
350 			memcpy_fromio(buffer, vaddr + offset, trunk);
351 		else
352 			memcpy_toio(vaddr + offset, buffer, trunk);
353 		len -= trunk;
354 		paddr += trunk;
355 		buffer += trunk;
356 		ghes_unmap(vaddr, fixmap_idx);
357 	}
358 }
359 
360 /* Check the top-level record header has an appropriate size. */
361 static int __ghes_check_estatus(struct ghes *ghes,
362 				struct acpi_hest_generic_status *estatus)
363 {
364 	u32 len = cper_estatus_len(estatus);
365 
366 	if (len < sizeof(*estatus)) {
367 		pr_warn_ratelimited(FW_WARN GHES_PFX "Truncated error status block!\n");
368 		return -EIO;
369 	}
370 
371 	if (len > ghes->generic->error_block_length) {
372 		pr_warn_ratelimited(FW_WARN GHES_PFX "Invalid error status block length!\n");
373 		return -EIO;
374 	}
375 
376 	if (cper_estatus_check_header(estatus)) {
377 		pr_warn_ratelimited(FW_WARN GHES_PFX "Invalid CPER header!\n");
378 		return -EIO;
379 	}
380 
381 	return 0;
382 }
383 
384 /* Read the CPER block, returning its address, and header in estatus. */
385 static int __ghes_peek_estatus(struct ghes *ghes,
386 			       struct acpi_hest_generic_status *estatus,
387 			       u64 *buf_paddr, enum fixed_addresses fixmap_idx)
388 {
389 	struct acpi_hest_generic *g = ghes->generic;
390 	int rc;
391 
392 	rc = apei_read(buf_paddr, &g->error_status_address);
393 	if (rc) {
394 		*buf_paddr = 0;
395 		pr_warn_ratelimited(FW_WARN GHES_PFX
396 "Failed to read error status block address for hardware error source: %d.\n",
397 				   g->header.source_id);
398 		return -EIO;
399 	}
400 	if (!*buf_paddr)
401 		return -ENOENT;
402 
403 	ghes_copy_tofrom_phys(estatus, *buf_paddr, sizeof(*estatus), 1,
404 			      fixmap_idx);
405 	if (!estatus->block_status) {
406 		*buf_paddr = 0;
407 		return -ENOENT;
408 	}
409 
410 	return 0;
411 }
412 
413 static int __ghes_read_estatus(struct acpi_hest_generic_status *estatus,
414 			       u64 buf_paddr, enum fixed_addresses fixmap_idx,
415 			       size_t buf_len)
416 {
417 	ghes_copy_tofrom_phys(estatus, buf_paddr, buf_len, 1, fixmap_idx);
418 	if (cper_estatus_check(estatus)) {
419 		pr_warn_ratelimited(FW_WARN GHES_PFX
420 				    "Failed to read error status block!\n");
421 		return -EIO;
422 	}
423 
424 	return 0;
425 }
426 
427 static int ghes_read_estatus(struct ghes *ghes,
428 			     struct acpi_hest_generic_status *estatus,
429 			     u64 *buf_paddr, enum fixed_addresses fixmap_idx)
430 {
431 	int rc;
432 
433 	rc = __ghes_peek_estatus(ghes, estatus, buf_paddr, fixmap_idx);
434 	if (rc)
435 		return rc;
436 
437 	rc = __ghes_check_estatus(ghes, estatus);
438 	if (rc)
439 		return rc;
440 
441 	return __ghes_read_estatus(estatus, *buf_paddr, fixmap_idx,
442 				   cper_estatus_len(estatus));
443 }
444 
445 static void ghes_clear_estatus(struct ghes *ghes,
446 			       struct acpi_hest_generic_status *estatus,
447 			       u64 buf_paddr, enum fixed_addresses fixmap_idx)
448 {
449 	estatus->block_status = 0;
450 
451 	if (!buf_paddr)
452 		return;
453 
454 	ghes_copy_tofrom_phys(estatus, buf_paddr,
455 			      sizeof(estatus->block_status), 0,
456 			      fixmap_idx);
457 
458 	/*
459 	 * GHESv2 type HEST entries introduce support for error acknowledgment,
460 	 * so only acknowledge the error if this support is present.
461 	 */
462 	if (is_hest_type_generic_v2(ghes))
463 		ghes_ack_error(ghes->generic_v2);
464 }
465 
466 /*
467  * Called as task_work before returning to user-space.
468  * Ensure any queued work has been done before we return to the context that
469  * triggered the notification.
470  */
471 static void ghes_kick_task_work(struct callback_head *head)
472 {
473 	struct acpi_hest_generic_status *estatus;
474 	struct ghes_estatus_node *estatus_node;
475 	u32 node_len;
476 
477 	estatus_node = container_of(head, struct ghes_estatus_node, task_work);
478 	if (IS_ENABLED(CONFIG_ACPI_APEI_MEMORY_FAILURE))
479 		memory_failure_queue_kick(estatus_node->task_work_cpu);
480 
481 	estatus = GHES_ESTATUS_FROM_NODE(estatus_node);
482 	node_len = GHES_ESTATUS_NODE_LEN(cper_estatus_len(estatus));
483 	gen_pool_free(ghes_estatus_pool, (unsigned long)estatus_node, node_len);
484 }
485 
486 static bool ghes_do_memory_failure(u64 physical_addr, int flags)
487 {
488 	unsigned long pfn;
489 
490 	if (!IS_ENABLED(CONFIG_ACPI_APEI_MEMORY_FAILURE))
491 		return false;
492 
493 	pfn = PHYS_PFN(physical_addr);
494 	if (!pfn_valid(pfn) && !arch_is_platform_page(physical_addr)) {
495 		pr_warn_ratelimited(FW_WARN GHES_PFX
496 		"Invalid address in generic error data: %#llx\n",
497 		physical_addr);
498 		return false;
499 	}
500 
501 	memory_failure_queue(pfn, flags);
502 	return true;
503 }
504 
505 static bool ghes_handle_memory_failure(struct acpi_hest_generic_data *gdata,
506 				       int sev, bool sync)
507 {
508 	int flags = -1;
509 	int sec_sev = ghes_severity(gdata->error_severity);
510 	struct cper_sec_mem_err *mem_err = acpi_hest_get_payload(gdata);
511 
512 	if (!(mem_err->validation_bits & CPER_MEM_VALID_PA))
513 		return false;
514 
515 	/* iff following two events can be handled properly by now */
516 	if (sec_sev == GHES_SEV_CORRECTED &&
517 	    (gdata->flags & CPER_SEC_ERROR_THRESHOLD_EXCEEDED))
518 		flags = MF_SOFT_OFFLINE;
519 	if (sev == GHES_SEV_RECOVERABLE && sec_sev == GHES_SEV_RECOVERABLE)
520 		flags = sync ? MF_ACTION_REQUIRED : 0;
521 
522 	if (flags != -1)
523 		return ghes_do_memory_failure(mem_err->physical_addr, flags);
524 
525 	return false;
526 }
527 
528 static bool ghes_handle_arm_hw_error(struct acpi_hest_generic_data *gdata,
529 				       int sev, bool sync)
530 {
531 	struct cper_sec_proc_arm *err = acpi_hest_get_payload(gdata);
532 	int flags = sync ? MF_ACTION_REQUIRED : 0;
533 	bool queued = false;
534 	int sec_sev, i;
535 	char *p;
536 
537 	log_arm_hw_error(err);
538 
539 	sec_sev = ghes_severity(gdata->error_severity);
540 	if (sev != GHES_SEV_RECOVERABLE || sec_sev != GHES_SEV_RECOVERABLE)
541 		return false;
542 
543 	p = (char *)(err + 1);
544 	for (i = 0; i < err->err_info_num; i++) {
545 		struct cper_arm_err_info *err_info = (struct cper_arm_err_info *)p;
546 		bool is_cache = (err_info->type == CPER_ARM_CACHE_ERROR);
547 		bool has_pa = (err_info->validation_bits & CPER_ARM_INFO_VALID_PHYSICAL_ADDR);
548 		const char *error_type = "unknown error";
549 
550 		/*
551 		 * The field (err_info->error_info & BIT(26)) is fixed to set to
552 		 * 1 in some old firmware of HiSilicon Kunpeng920. We assume that
553 		 * firmware won't mix corrected errors in an uncorrected section,
554 		 * and don't filter out 'corrected' error here.
555 		 */
556 		if (is_cache && has_pa) {
557 			queued = ghes_do_memory_failure(err_info->physical_fault_addr, flags);
558 			p += err_info->length;
559 			continue;
560 		}
561 
562 		if (err_info->type < ARRAY_SIZE(cper_proc_error_type_strs))
563 			error_type = cper_proc_error_type_strs[err_info->type];
564 
565 		pr_warn_ratelimited(FW_WARN GHES_PFX
566 				    "Unhandled processor error type: %s\n",
567 				    error_type);
568 		p += err_info->length;
569 	}
570 
571 	return queued;
572 }
573 
574 /*
575  * PCIe AER errors need to be sent to the AER driver for reporting and
576  * recovery. The GHES severities map to the following AER severities and
577  * require the following handling:
578  *
579  * GHES_SEV_CORRECTABLE -> AER_CORRECTABLE
580  *     These need to be reported by the AER driver but no recovery is
581  *     necessary.
582  * GHES_SEV_RECOVERABLE -> AER_NONFATAL
583  * GHES_SEV_RECOVERABLE && CPER_SEC_RESET -> AER_FATAL
584  *     These both need to be reported and recovered from by the AER driver.
585  * GHES_SEV_PANIC does not make it to this handling since the kernel must
586  *     panic.
587  */
588 static void ghes_handle_aer(struct acpi_hest_generic_data *gdata)
589 {
590 #ifdef CONFIG_ACPI_APEI_PCIEAER
591 	struct cper_sec_pcie *pcie_err = acpi_hest_get_payload(gdata);
592 
593 	if (pcie_err->validation_bits & CPER_PCIE_VALID_DEVICE_ID &&
594 	    pcie_err->validation_bits & CPER_PCIE_VALID_AER_INFO) {
595 		unsigned int devfn;
596 		int aer_severity;
597 		u8 *aer_info;
598 
599 		devfn = PCI_DEVFN(pcie_err->device_id.device,
600 				  pcie_err->device_id.function);
601 		aer_severity = cper_severity_to_aer(gdata->error_severity);
602 
603 		/*
604 		 * If firmware reset the component to contain
605 		 * the error, we must reinitialize it before
606 		 * use, so treat it as a fatal AER error.
607 		 */
608 		if (gdata->flags & CPER_SEC_RESET)
609 			aer_severity = AER_FATAL;
610 
611 		aer_info = (void *)gen_pool_alloc(ghes_estatus_pool,
612 						  sizeof(struct aer_capability_regs));
613 		if (!aer_info)
614 			return;
615 		memcpy(aer_info, pcie_err->aer_info, sizeof(struct aer_capability_regs));
616 
617 		aer_recover_queue(pcie_err->device_id.segment,
618 				  pcie_err->device_id.bus,
619 				  devfn, aer_severity,
620 				  (struct aer_capability_regs *)
621 				  aer_info);
622 	}
623 #endif
624 }
625 
626 static BLOCKING_NOTIFIER_HEAD(vendor_record_notify_list);
627 
628 int ghes_register_vendor_record_notifier(struct notifier_block *nb)
629 {
630 	return blocking_notifier_chain_register(&vendor_record_notify_list, nb);
631 }
632 EXPORT_SYMBOL_GPL(ghes_register_vendor_record_notifier);
633 
634 void ghes_unregister_vendor_record_notifier(struct notifier_block *nb)
635 {
636 	blocking_notifier_chain_unregister(&vendor_record_notify_list, nb);
637 }
638 EXPORT_SYMBOL_GPL(ghes_unregister_vendor_record_notifier);
639 
640 static void ghes_vendor_record_work_func(struct work_struct *work)
641 {
642 	struct ghes_vendor_record_entry *entry;
643 	struct acpi_hest_generic_data *gdata;
644 	u32 len;
645 
646 	entry = container_of(work, struct ghes_vendor_record_entry, work);
647 	gdata = GHES_GDATA_FROM_VENDOR_ENTRY(entry);
648 
649 	blocking_notifier_call_chain(&vendor_record_notify_list,
650 				     entry->error_severity, gdata);
651 
652 	len = GHES_VENDOR_ENTRY_LEN(acpi_hest_get_record_size(gdata));
653 	gen_pool_free(ghes_estatus_pool, (unsigned long)entry, len);
654 }
655 
656 static void ghes_defer_non_standard_event(struct acpi_hest_generic_data *gdata,
657 					  int sev)
658 {
659 	struct acpi_hest_generic_data *copied_gdata;
660 	struct ghes_vendor_record_entry *entry;
661 	u32 len;
662 
663 	len = GHES_VENDOR_ENTRY_LEN(acpi_hest_get_record_size(gdata));
664 	entry = (void *)gen_pool_alloc(ghes_estatus_pool, len);
665 	if (!entry)
666 		return;
667 
668 	copied_gdata = GHES_GDATA_FROM_VENDOR_ENTRY(entry);
669 	memcpy(copied_gdata, gdata, acpi_hest_get_record_size(gdata));
670 	entry->error_severity = sev;
671 
672 	INIT_WORK(&entry->work, ghes_vendor_record_work_func);
673 	schedule_work(&entry->work);
674 }
675 
676 static bool ghes_do_proc(struct ghes *ghes,
677 			 const struct acpi_hest_generic_status *estatus)
678 {
679 	int sev, sec_sev;
680 	struct acpi_hest_generic_data *gdata;
681 	guid_t *sec_type;
682 	const guid_t *fru_id = &guid_null;
683 	char *fru_text = "";
684 	bool queued = false;
685 	bool sync = is_hest_sync_notify(ghes);
686 
687 	sev = ghes_severity(estatus->error_severity);
688 	apei_estatus_for_each_section(estatus, gdata) {
689 		sec_type = (guid_t *)gdata->section_type;
690 		sec_sev = ghes_severity(gdata->error_severity);
691 		if (gdata->validation_bits & CPER_SEC_VALID_FRU_ID)
692 			fru_id = (guid_t *)gdata->fru_id;
693 
694 		if (gdata->validation_bits & CPER_SEC_VALID_FRU_TEXT)
695 			fru_text = gdata->fru_text;
696 
697 		if (guid_equal(sec_type, &CPER_SEC_PLATFORM_MEM)) {
698 			struct cper_sec_mem_err *mem_err = acpi_hest_get_payload(gdata);
699 
700 			atomic_notifier_call_chain(&ghes_report_chain, sev, mem_err);
701 
702 			arch_apei_report_mem_error(sev, mem_err);
703 			queued = ghes_handle_memory_failure(gdata, sev, sync);
704 		}
705 		else if (guid_equal(sec_type, &CPER_SEC_PCIE)) {
706 			ghes_handle_aer(gdata);
707 		}
708 		else if (guid_equal(sec_type, &CPER_SEC_PROC_ARM)) {
709 			queued = ghes_handle_arm_hw_error(gdata, sev, sync);
710 		} else {
711 			void *err = acpi_hest_get_payload(gdata);
712 
713 			ghes_defer_non_standard_event(gdata, sev);
714 			log_non_standard_event(sec_type, fru_id, fru_text,
715 					       sec_sev, err,
716 					       gdata->error_data_length);
717 		}
718 	}
719 
720 	return queued;
721 }
722 
723 static void __ghes_print_estatus(const char *pfx,
724 				 const struct acpi_hest_generic *generic,
725 				 const struct acpi_hest_generic_status *estatus)
726 {
727 	static atomic_t seqno;
728 	unsigned int curr_seqno;
729 	char pfx_seq[64];
730 
731 	if (pfx == NULL) {
732 		if (ghes_severity(estatus->error_severity) <=
733 		    GHES_SEV_CORRECTED)
734 			pfx = KERN_WARNING;
735 		else
736 			pfx = KERN_ERR;
737 	}
738 	curr_seqno = atomic_inc_return(&seqno);
739 	snprintf(pfx_seq, sizeof(pfx_seq), "%s{%u}" HW_ERR, pfx, curr_seqno);
740 	printk("%s""Hardware error from APEI Generic Hardware Error Source: %d\n",
741 	       pfx_seq, generic->header.source_id);
742 	cper_estatus_print(pfx_seq, estatus);
743 }
744 
745 static int ghes_print_estatus(const char *pfx,
746 			      const struct acpi_hest_generic *generic,
747 			      const struct acpi_hest_generic_status *estatus)
748 {
749 	/* Not more than 2 messages every 5 seconds */
750 	static DEFINE_RATELIMIT_STATE(ratelimit_corrected, 5*HZ, 2);
751 	static DEFINE_RATELIMIT_STATE(ratelimit_uncorrected, 5*HZ, 2);
752 	struct ratelimit_state *ratelimit;
753 
754 	if (ghes_severity(estatus->error_severity) <= GHES_SEV_CORRECTED)
755 		ratelimit = &ratelimit_corrected;
756 	else
757 		ratelimit = &ratelimit_uncorrected;
758 	if (__ratelimit(ratelimit)) {
759 		__ghes_print_estatus(pfx, generic, estatus);
760 		return 1;
761 	}
762 	return 0;
763 }
764 
765 /*
766  * GHES error status reporting throttle, to report more kinds of
767  * errors, instead of just most frequently occurred errors.
768  */
769 static int ghes_estatus_cached(struct acpi_hest_generic_status *estatus)
770 {
771 	u32 len;
772 	int i, cached = 0;
773 	unsigned long long now;
774 	struct ghes_estatus_cache *cache;
775 	struct acpi_hest_generic_status *cache_estatus;
776 
777 	len = cper_estatus_len(estatus);
778 	rcu_read_lock();
779 	for (i = 0; i < GHES_ESTATUS_CACHES_SIZE; i++) {
780 		cache = rcu_dereference(ghes_estatus_caches[i]);
781 		if (cache == NULL)
782 			continue;
783 		if (len != cache->estatus_len)
784 			continue;
785 		cache_estatus = GHES_ESTATUS_FROM_CACHE(cache);
786 		if (memcmp(estatus, cache_estatus, len))
787 			continue;
788 		atomic_inc(&cache->count);
789 		now = sched_clock();
790 		if (now - cache->time_in < GHES_ESTATUS_IN_CACHE_MAX_NSEC)
791 			cached = 1;
792 		break;
793 	}
794 	rcu_read_unlock();
795 	return cached;
796 }
797 
798 static struct ghes_estatus_cache *ghes_estatus_cache_alloc(
799 	struct acpi_hest_generic *generic,
800 	struct acpi_hest_generic_status *estatus)
801 {
802 	int alloced;
803 	u32 len, cache_len;
804 	struct ghes_estatus_cache *cache;
805 	struct acpi_hest_generic_status *cache_estatus;
806 
807 	alloced = atomic_add_return(1, &ghes_estatus_cache_alloced);
808 	if (alloced > GHES_ESTATUS_CACHE_ALLOCED_MAX) {
809 		atomic_dec(&ghes_estatus_cache_alloced);
810 		return NULL;
811 	}
812 	len = cper_estatus_len(estatus);
813 	cache_len = GHES_ESTATUS_CACHE_LEN(len);
814 	cache = (void *)gen_pool_alloc(ghes_estatus_pool, cache_len);
815 	if (!cache) {
816 		atomic_dec(&ghes_estatus_cache_alloced);
817 		return NULL;
818 	}
819 	cache_estatus = GHES_ESTATUS_FROM_CACHE(cache);
820 	memcpy(cache_estatus, estatus, len);
821 	cache->estatus_len = len;
822 	atomic_set(&cache->count, 0);
823 	cache->generic = generic;
824 	cache->time_in = sched_clock();
825 	return cache;
826 }
827 
828 static void ghes_estatus_cache_rcu_free(struct rcu_head *head)
829 {
830 	struct ghes_estatus_cache *cache;
831 	u32 len;
832 
833 	cache = container_of(head, struct ghes_estatus_cache, rcu);
834 	len = cper_estatus_len(GHES_ESTATUS_FROM_CACHE(cache));
835 	len = GHES_ESTATUS_CACHE_LEN(len);
836 	gen_pool_free(ghes_estatus_pool, (unsigned long)cache, len);
837 	atomic_dec(&ghes_estatus_cache_alloced);
838 }
839 
840 static void
841 ghes_estatus_cache_add(struct acpi_hest_generic *generic,
842 		       struct acpi_hest_generic_status *estatus)
843 {
844 	unsigned long long now, duration, period, max_period = 0;
845 	struct ghes_estatus_cache *cache, *new_cache;
846 	struct ghes_estatus_cache __rcu *victim;
847 	int i, slot = -1, count;
848 
849 	new_cache = ghes_estatus_cache_alloc(generic, estatus);
850 	if (!new_cache)
851 		return;
852 
853 	rcu_read_lock();
854 	now = sched_clock();
855 	for (i = 0; i < GHES_ESTATUS_CACHES_SIZE; i++) {
856 		cache = rcu_dereference(ghes_estatus_caches[i]);
857 		if (cache == NULL) {
858 			slot = i;
859 			break;
860 		}
861 		duration = now - cache->time_in;
862 		if (duration >= GHES_ESTATUS_IN_CACHE_MAX_NSEC) {
863 			slot = i;
864 			break;
865 		}
866 		count = atomic_read(&cache->count);
867 		period = duration;
868 		do_div(period, (count + 1));
869 		if (period > max_period) {
870 			max_period = period;
871 			slot = i;
872 		}
873 	}
874 	rcu_read_unlock();
875 
876 	if (slot != -1) {
877 		/*
878 		 * Use release semantics to ensure that ghes_estatus_cached()
879 		 * running on another CPU will see the updated cache fields if
880 		 * it can see the new value of the pointer.
881 		 */
882 		victim = xchg_release(&ghes_estatus_caches[slot],
883 				      RCU_INITIALIZER(new_cache));
884 
885 		/*
886 		 * At this point, victim may point to a cached item different
887 		 * from the one based on which we selected the slot. Instead of
888 		 * going to the loop again to pick another slot, let's just
889 		 * drop the other item anyway: this may cause a false cache
890 		 * miss later on, but that won't cause any problems.
891 		 */
892 		if (victim)
893 			call_rcu(&unrcu_pointer(victim)->rcu,
894 				 ghes_estatus_cache_rcu_free);
895 	}
896 }
897 
898 static void __ghes_panic(struct ghes *ghes,
899 			 struct acpi_hest_generic_status *estatus,
900 			 u64 buf_paddr, enum fixed_addresses fixmap_idx)
901 {
902 	__ghes_print_estatus(KERN_EMERG, ghes->generic, estatus);
903 
904 	ghes_clear_estatus(ghes, estatus, buf_paddr, fixmap_idx);
905 
906 	/* reboot to log the error! */
907 	if (!panic_timeout)
908 		panic_timeout = ghes_panic_timeout;
909 	panic("Fatal hardware error!");
910 }
911 
912 static int ghes_proc(struct ghes *ghes)
913 {
914 	struct acpi_hest_generic_status *estatus = ghes->estatus;
915 	u64 buf_paddr;
916 	int rc;
917 
918 	rc = ghes_read_estatus(ghes, estatus, &buf_paddr, FIX_APEI_GHES_IRQ);
919 	if (rc)
920 		goto out;
921 
922 	if (ghes_severity(estatus->error_severity) >= GHES_SEV_PANIC)
923 		__ghes_panic(ghes, estatus, buf_paddr, FIX_APEI_GHES_IRQ);
924 
925 	if (!ghes_estatus_cached(estatus)) {
926 		if (ghes_print_estatus(NULL, ghes->generic, estatus))
927 			ghes_estatus_cache_add(ghes->generic, estatus);
928 	}
929 	ghes_do_proc(ghes, estatus);
930 
931 out:
932 	ghes_clear_estatus(ghes, estatus, buf_paddr, FIX_APEI_GHES_IRQ);
933 
934 	return rc;
935 }
936 
937 static void ghes_add_timer(struct ghes *ghes)
938 {
939 	struct acpi_hest_generic *g = ghes->generic;
940 	unsigned long expire;
941 
942 	if (!g->notify.poll_interval) {
943 		pr_warn(FW_WARN GHES_PFX "Poll interval is 0 for generic hardware error source: %d, disabled.\n",
944 			g->header.source_id);
945 		return;
946 	}
947 	expire = jiffies + msecs_to_jiffies(g->notify.poll_interval);
948 	ghes->timer.expires = round_jiffies_relative(expire);
949 	add_timer(&ghes->timer);
950 }
951 
952 static void ghes_poll_func(struct timer_list *t)
953 {
954 	struct ghes *ghes = from_timer(ghes, t, timer);
955 	unsigned long flags;
956 
957 	spin_lock_irqsave(&ghes_notify_lock_irq, flags);
958 	ghes_proc(ghes);
959 	spin_unlock_irqrestore(&ghes_notify_lock_irq, flags);
960 	if (!(ghes->flags & GHES_EXITING))
961 		ghes_add_timer(ghes);
962 }
963 
964 static irqreturn_t ghes_irq_func(int irq, void *data)
965 {
966 	struct ghes *ghes = data;
967 	unsigned long flags;
968 	int rc;
969 
970 	spin_lock_irqsave(&ghes_notify_lock_irq, flags);
971 	rc = ghes_proc(ghes);
972 	spin_unlock_irqrestore(&ghes_notify_lock_irq, flags);
973 	if (rc)
974 		return IRQ_NONE;
975 
976 	return IRQ_HANDLED;
977 }
978 
979 static int ghes_notify_hed(struct notifier_block *this, unsigned long event,
980 			   void *data)
981 {
982 	struct ghes *ghes;
983 	unsigned long flags;
984 	int ret = NOTIFY_DONE;
985 
986 	spin_lock_irqsave(&ghes_notify_lock_irq, flags);
987 	rcu_read_lock();
988 	list_for_each_entry_rcu(ghes, &ghes_hed, list) {
989 		if (!ghes_proc(ghes))
990 			ret = NOTIFY_OK;
991 	}
992 	rcu_read_unlock();
993 	spin_unlock_irqrestore(&ghes_notify_lock_irq, flags);
994 
995 	return ret;
996 }
997 
998 static struct notifier_block ghes_notifier_hed = {
999 	.notifier_call = ghes_notify_hed,
1000 };
1001 
1002 /*
1003  * Handlers for CPER records may not be NMI safe. For example,
1004  * memory_failure_queue() takes spinlocks and calls schedule_work_on().
1005  * In any NMI-like handler, memory from ghes_estatus_pool is used to save
1006  * estatus, and added to the ghes_estatus_llist. irq_work_queue() causes
1007  * ghes_proc_in_irq() to run in IRQ context where each estatus in
1008  * ghes_estatus_llist is processed.
1009  *
1010  * Memory from the ghes_estatus_pool is also used with the ghes_estatus_cache
1011  * to suppress frequent messages.
1012  */
1013 static struct llist_head ghes_estatus_llist;
1014 static struct irq_work ghes_proc_irq_work;
1015 
1016 static void ghes_proc_in_irq(struct irq_work *irq_work)
1017 {
1018 	struct llist_node *llnode, *next;
1019 	struct ghes_estatus_node *estatus_node;
1020 	struct acpi_hest_generic *generic;
1021 	struct acpi_hest_generic_status *estatus;
1022 	bool task_work_pending;
1023 	u32 len, node_len;
1024 	int ret;
1025 
1026 	llnode = llist_del_all(&ghes_estatus_llist);
1027 	/*
1028 	 * Because the time order of estatus in list is reversed,
1029 	 * revert it back to proper order.
1030 	 */
1031 	llnode = llist_reverse_order(llnode);
1032 	while (llnode) {
1033 		next = llnode->next;
1034 		estatus_node = llist_entry(llnode, struct ghes_estatus_node,
1035 					   llnode);
1036 		estatus = GHES_ESTATUS_FROM_NODE(estatus_node);
1037 		len = cper_estatus_len(estatus);
1038 		node_len = GHES_ESTATUS_NODE_LEN(len);
1039 		task_work_pending = ghes_do_proc(estatus_node->ghes, estatus);
1040 		if (!ghes_estatus_cached(estatus)) {
1041 			generic = estatus_node->generic;
1042 			if (ghes_print_estatus(NULL, generic, estatus))
1043 				ghes_estatus_cache_add(generic, estatus);
1044 		}
1045 
1046 		if (task_work_pending && current->mm) {
1047 			estatus_node->task_work.func = ghes_kick_task_work;
1048 			estatus_node->task_work_cpu = smp_processor_id();
1049 			ret = task_work_add(current, &estatus_node->task_work,
1050 					    TWA_RESUME);
1051 			if (ret)
1052 				estatus_node->task_work.func = NULL;
1053 		}
1054 
1055 		if (!estatus_node->task_work.func)
1056 			gen_pool_free(ghes_estatus_pool,
1057 				      (unsigned long)estatus_node, node_len);
1058 
1059 		llnode = next;
1060 	}
1061 }
1062 
1063 static void ghes_print_queued_estatus(void)
1064 {
1065 	struct llist_node *llnode;
1066 	struct ghes_estatus_node *estatus_node;
1067 	struct acpi_hest_generic *generic;
1068 	struct acpi_hest_generic_status *estatus;
1069 
1070 	llnode = llist_del_all(&ghes_estatus_llist);
1071 	/*
1072 	 * Because the time order of estatus in list is reversed,
1073 	 * revert it back to proper order.
1074 	 */
1075 	llnode = llist_reverse_order(llnode);
1076 	while (llnode) {
1077 		estatus_node = llist_entry(llnode, struct ghes_estatus_node,
1078 					   llnode);
1079 		estatus = GHES_ESTATUS_FROM_NODE(estatus_node);
1080 		generic = estatus_node->generic;
1081 		ghes_print_estatus(NULL, generic, estatus);
1082 		llnode = llnode->next;
1083 	}
1084 }
1085 
1086 static int ghes_in_nmi_queue_one_entry(struct ghes *ghes,
1087 				       enum fixed_addresses fixmap_idx)
1088 {
1089 	struct acpi_hest_generic_status *estatus, tmp_header;
1090 	struct ghes_estatus_node *estatus_node;
1091 	u32 len, node_len;
1092 	u64 buf_paddr;
1093 	int sev, rc;
1094 
1095 	if (!IS_ENABLED(CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG))
1096 		return -EOPNOTSUPP;
1097 
1098 	rc = __ghes_peek_estatus(ghes, &tmp_header, &buf_paddr, fixmap_idx);
1099 	if (rc) {
1100 		ghes_clear_estatus(ghes, &tmp_header, buf_paddr, fixmap_idx);
1101 		return rc;
1102 	}
1103 
1104 	rc = __ghes_check_estatus(ghes, &tmp_header);
1105 	if (rc) {
1106 		ghes_clear_estatus(ghes, &tmp_header, buf_paddr, fixmap_idx);
1107 		return rc;
1108 	}
1109 
1110 	len = cper_estatus_len(&tmp_header);
1111 	node_len = GHES_ESTATUS_NODE_LEN(len);
1112 	estatus_node = (void *)gen_pool_alloc(ghes_estatus_pool, node_len);
1113 	if (!estatus_node)
1114 		return -ENOMEM;
1115 
1116 	estatus_node->ghes = ghes;
1117 	estatus_node->generic = ghes->generic;
1118 	estatus_node->task_work.func = NULL;
1119 	estatus = GHES_ESTATUS_FROM_NODE(estatus_node);
1120 
1121 	if (__ghes_read_estatus(estatus, buf_paddr, fixmap_idx, len)) {
1122 		ghes_clear_estatus(ghes, estatus, buf_paddr, fixmap_idx);
1123 		rc = -ENOENT;
1124 		goto no_work;
1125 	}
1126 
1127 	sev = ghes_severity(estatus->error_severity);
1128 	if (sev >= GHES_SEV_PANIC) {
1129 		ghes_print_queued_estatus();
1130 		__ghes_panic(ghes, estatus, buf_paddr, fixmap_idx);
1131 	}
1132 
1133 	ghes_clear_estatus(ghes, &tmp_header, buf_paddr, fixmap_idx);
1134 
1135 	/* This error has been reported before, don't process it again. */
1136 	if (ghes_estatus_cached(estatus))
1137 		goto no_work;
1138 
1139 	llist_add(&estatus_node->llnode, &ghes_estatus_llist);
1140 
1141 	return rc;
1142 
1143 no_work:
1144 	gen_pool_free(ghes_estatus_pool, (unsigned long)estatus_node,
1145 		      node_len);
1146 
1147 	return rc;
1148 }
1149 
1150 static int ghes_in_nmi_spool_from_list(struct list_head *rcu_list,
1151 				       enum fixed_addresses fixmap_idx)
1152 {
1153 	int ret = -ENOENT;
1154 	struct ghes *ghes;
1155 
1156 	rcu_read_lock();
1157 	list_for_each_entry_rcu(ghes, rcu_list, list) {
1158 		if (!ghes_in_nmi_queue_one_entry(ghes, fixmap_idx))
1159 			ret = 0;
1160 	}
1161 	rcu_read_unlock();
1162 
1163 	if (IS_ENABLED(CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG) && !ret)
1164 		irq_work_queue(&ghes_proc_irq_work);
1165 
1166 	return ret;
1167 }
1168 
1169 #ifdef CONFIG_ACPI_APEI_SEA
1170 static LIST_HEAD(ghes_sea);
1171 
1172 /*
1173  * Return 0 only if one of the SEA error sources successfully reported an error
1174  * record sent from the firmware.
1175  */
1176 int ghes_notify_sea(void)
1177 {
1178 	static DEFINE_RAW_SPINLOCK(ghes_notify_lock_sea);
1179 	int rv;
1180 
1181 	raw_spin_lock(&ghes_notify_lock_sea);
1182 	rv = ghes_in_nmi_spool_from_list(&ghes_sea, FIX_APEI_GHES_SEA);
1183 	raw_spin_unlock(&ghes_notify_lock_sea);
1184 
1185 	return rv;
1186 }
1187 
1188 static void ghes_sea_add(struct ghes *ghes)
1189 {
1190 	mutex_lock(&ghes_list_mutex);
1191 	list_add_rcu(&ghes->list, &ghes_sea);
1192 	mutex_unlock(&ghes_list_mutex);
1193 }
1194 
1195 static void ghes_sea_remove(struct ghes *ghes)
1196 {
1197 	mutex_lock(&ghes_list_mutex);
1198 	list_del_rcu(&ghes->list);
1199 	mutex_unlock(&ghes_list_mutex);
1200 	synchronize_rcu();
1201 }
1202 #else /* CONFIG_ACPI_APEI_SEA */
1203 static inline void ghes_sea_add(struct ghes *ghes) { }
1204 static inline void ghes_sea_remove(struct ghes *ghes) { }
1205 #endif /* CONFIG_ACPI_APEI_SEA */
1206 
1207 #ifdef CONFIG_HAVE_ACPI_APEI_NMI
1208 /*
1209  * NMI may be triggered on any CPU, so ghes_in_nmi is used for
1210  * having only one concurrent reader.
1211  */
1212 static atomic_t ghes_in_nmi = ATOMIC_INIT(0);
1213 
1214 static LIST_HEAD(ghes_nmi);
1215 
1216 static int ghes_notify_nmi(unsigned int cmd, struct pt_regs *regs)
1217 {
1218 	static DEFINE_RAW_SPINLOCK(ghes_notify_lock_nmi);
1219 	int ret = NMI_DONE;
1220 
1221 	if (!atomic_add_unless(&ghes_in_nmi, 1, 1))
1222 		return ret;
1223 
1224 	raw_spin_lock(&ghes_notify_lock_nmi);
1225 	if (!ghes_in_nmi_spool_from_list(&ghes_nmi, FIX_APEI_GHES_NMI))
1226 		ret = NMI_HANDLED;
1227 	raw_spin_unlock(&ghes_notify_lock_nmi);
1228 
1229 	atomic_dec(&ghes_in_nmi);
1230 	return ret;
1231 }
1232 
1233 static void ghes_nmi_add(struct ghes *ghes)
1234 {
1235 	mutex_lock(&ghes_list_mutex);
1236 	if (list_empty(&ghes_nmi))
1237 		register_nmi_handler(NMI_LOCAL, ghes_notify_nmi, 0, "ghes");
1238 	list_add_rcu(&ghes->list, &ghes_nmi);
1239 	mutex_unlock(&ghes_list_mutex);
1240 }
1241 
1242 static void ghes_nmi_remove(struct ghes *ghes)
1243 {
1244 	mutex_lock(&ghes_list_mutex);
1245 	list_del_rcu(&ghes->list);
1246 	if (list_empty(&ghes_nmi))
1247 		unregister_nmi_handler(NMI_LOCAL, "ghes");
1248 	mutex_unlock(&ghes_list_mutex);
1249 	/*
1250 	 * To synchronize with NMI handler, ghes can only be
1251 	 * freed after NMI handler finishes.
1252 	 */
1253 	synchronize_rcu();
1254 }
1255 #else /* CONFIG_HAVE_ACPI_APEI_NMI */
1256 static inline void ghes_nmi_add(struct ghes *ghes) { }
1257 static inline void ghes_nmi_remove(struct ghes *ghes) { }
1258 #endif /* CONFIG_HAVE_ACPI_APEI_NMI */
1259 
1260 static void ghes_nmi_init_cxt(void)
1261 {
1262 	init_irq_work(&ghes_proc_irq_work, ghes_proc_in_irq);
1263 }
1264 
1265 static int __ghes_sdei_callback(struct ghes *ghes,
1266 				enum fixed_addresses fixmap_idx)
1267 {
1268 	if (!ghes_in_nmi_queue_one_entry(ghes, fixmap_idx)) {
1269 		irq_work_queue(&ghes_proc_irq_work);
1270 
1271 		return 0;
1272 	}
1273 
1274 	return -ENOENT;
1275 }
1276 
1277 static int ghes_sdei_normal_callback(u32 event_num, struct pt_regs *regs,
1278 				      void *arg)
1279 {
1280 	static DEFINE_RAW_SPINLOCK(ghes_notify_lock_sdei_normal);
1281 	struct ghes *ghes = arg;
1282 	int err;
1283 
1284 	raw_spin_lock(&ghes_notify_lock_sdei_normal);
1285 	err = __ghes_sdei_callback(ghes, FIX_APEI_GHES_SDEI_NORMAL);
1286 	raw_spin_unlock(&ghes_notify_lock_sdei_normal);
1287 
1288 	return err;
1289 }
1290 
1291 static int ghes_sdei_critical_callback(u32 event_num, struct pt_regs *regs,
1292 				       void *arg)
1293 {
1294 	static DEFINE_RAW_SPINLOCK(ghes_notify_lock_sdei_critical);
1295 	struct ghes *ghes = arg;
1296 	int err;
1297 
1298 	raw_spin_lock(&ghes_notify_lock_sdei_critical);
1299 	err = __ghes_sdei_callback(ghes, FIX_APEI_GHES_SDEI_CRITICAL);
1300 	raw_spin_unlock(&ghes_notify_lock_sdei_critical);
1301 
1302 	return err;
1303 }
1304 
1305 static int apei_sdei_register_ghes(struct ghes *ghes)
1306 {
1307 	if (!IS_ENABLED(CONFIG_ARM_SDE_INTERFACE))
1308 		return -EOPNOTSUPP;
1309 
1310 	return sdei_register_ghes(ghes, ghes_sdei_normal_callback,
1311 				 ghes_sdei_critical_callback);
1312 }
1313 
1314 static int apei_sdei_unregister_ghes(struct ghes *ghes)
1315 {
1316 	if (!IS_ENABLED(CONFIG_ARM_SDE_INTERFACE))
1317 		return -EOPNOTSUPP;
1318 
1319 	return sdei_unregister_ghes(ghes);
1320 }
1321 
1322 static int ghes_probe(struct platform_device *ghes_dev)
1323 {
1324 	struct acpi_hest_generic *generic;
1325 	struct ghes *ghes = NULL;
1326 	unsigned long flags;
1327 
1328 	int rc = -EINVAL;
1329 
1330 	generic = *(struct acpi_hest_generic **)ghes_dev->dev.platform_data;
1331 	if (!generic->enabled)
1332 		return -ENODEV;
1333 
1334 	switch (generic->notify.type) {
1335 	case ACPI_HEST_NOTIFY_POLLED:
1336 	case ACPI_HEST_NOTIFY_EXTERNAL:
1337 	case ACPI_HEST_NOTIFY_SCI:
1338 	case ACPI_HEST_NOTIFY_GSIV:
1339 	case ACPI_HEST_NOTIFY_GPIO:
1340 		break;
1341 
1342 	case ACPI_HEST_NOTIFY_SEA:
1343 		if (!IS_ENABLED(CONFIG_ACPI_APEI_SEA)) {
1344 			pr_warn(GHES_PFX "Generic hardware error source: %d notified via SEA is not supported\n",
1345 				generic->header.source_id);
1346 			rc = -ENOTSUPP;
1347 			goto err;
1348 		}
1349 		break;
1350 	case ACPI_HEST_NOTIFY_NMI:
1351 		if (!IS_ENABLED(CONFIG_HAVE_ACPI_APEI_NMI)) {
1352 			pr_warn(GHES_PFX "Generic hardware error source: %d notified via NMI interrupt is not supported!\n",
1353 				generic->header.source_id);
1354 			goto err;
1355 		}
1356 		break;
1357 	case ACPI_HEST_NOTIFY_SOFTWARE_DELEGATED:
1358 		if (!IS_ENABLED(CONFIG_ARM_SDE_INTERFACE)) {
1359 			pr_warn(GHES_PFX "Generic hardware error source: %d notified via SDE Interface is not supported!\n",
1360 				generic->header.source_id);
1361 			goto err;
1362 		}
1363 		break;
1364 	case ACPI_HEST_NOTIFY_LOCAL:
1365 		pr_warn(GHES_PFX "Generic hardware error source: %d notified via local interrupt is not supported!\n",
1366 			generic->header.source_id);
1367 		goto err;
1368 	default:
1369 		pr_warn(FW_WARN GHES_PFX "Unknown notification type: %u for generic hardware error source: %d\n",
1370 			generic->notify.type, generic->header.source_id);
1371 		goto err;
1372 	}
1373 
1374 	rc = -EIO;
1375 	if (generic->error_block_length <
1376 	    sizeof(struct acpi_hest_generic_status)) {
1377 		pr_warn(FW_BUG GHES_PFX "Invalid error block length: %u for generic hardware error source: %d\n",
1378 			generic->error_block_length, generic->header.source_id);
1379 		goto err;
1380 	}
1381 	ghes = ghes_new(generic);
1382 	if (IS_ERR(ghes)) {
1383 		rc = PTR_ERR(ghes);
1384 		ghes = NULL;
1385 		goto err;
1386 	}
1387 
1388 	switch (generic->notify.type) {
1389 	case ACPI_HEST_NOTIFY_POLLED:
1390 		timer_setup(&ghes->timer, ghes_poll_func, 0);
1391 		ghes_add_timer(ghes);
1392 		break;
1393 	case ACPI_HEST_NOTIFY_EXTERNAL:
1394 		/* External interrupt vector is GSI */
1395 		rc = acpi_gsi_to_irq(generic->notify.vector, &ghes->irq);
1396 		if (rc) {
1397 			pr_err(GHES_PFX "Failed to map GSI to IRQ for generic hardware error source: %d\n",
1398 			       generic->header.source_id);
1399 			goto err;
1400 		}
1401 		rc = request_irq(ghes->irq, ghes_irq_func, IRQF_SHARED,
1402 				 "GHES IRQ", ghes);
1403 		if (rc) {
1404 			pr_err(GHES_PFX "Failed to register IRQ for generic hardware error source: %d\n",
1405 			       generic->header.source_id);
1406 			goto err;
1407 		}
1408 		break;
1409 
1410 	case ACPI_HEST_NOTIFY_SCI:
1411 	case ACPI_HEST_NOTIFY_GSIV:
1412 	case ACPI_HEST_NOTIFY_GPIO:
1413 		mutex_lock(&ghes_list_mutex);
1414 		if (list_empty(&ghes_hed))
1415 			register_acpi_hed_notifier(&ghes_notifier_hed);
1416 		list_add_rcu(&ghes->list, &ghes_hed);
1417 		mutex_unlock(&ghes_list_mutex);
1418 		break;
1419 
1420 	case ACPI_HEST_NOTIFY_SEA:
1421 		ghes_sea_add(ghes);
1422 		break;
1423 	case ACPI_HEST_NOTIFY_NMI:
1424 		ghes_nmi_add(ghes);
1425 		break;
1426 	case ACPI_HEST_NOTIFY_SOFTWARE_DELEGATED:
1427 		rc = apei_sdei_register_ghes(ghes);
1428 		if (rc)
1429 			goto err;
1430 		break;
1431 	default:
1432 		BUG();
1433 	}
1434 
1435 	platform_set_drvdata(ghes_dev, ghes);
1436 
1437 	ghes->dev = &ghes_dev->dev;
1438 
1439 	mutex_lock(&ghes_devs_mutex);
1440 	list_add_tail(&ghes->elist, &ghes_devs);
1441 	mutex_unlock(&ghes_devs_mutex);
1442 
1443 	/* Handle any pending errors right away */
1444 	spin_lock_irqsave(&ghes_notify_lock_irq, flags);
1445 	ghes_proc(ghes);
1446 	spin_unlock_irqrestore(&ghes_notify_lock_irq, flags);
1447 
1448 	return 0;
1449 
1450 err:
1451 	if (ghes) {
1452 		ghes_fini(ghes);
1453 		kfree(ghes);
1454 	}
1455 	return rc;
1456 }
1457 
1458 static void ghes_remove(struct platform_device *ghes_dev)
1459 {
1460 	int rc;
1461 	struct ghes *ghes;
1462 	struct acpi_hest_generic *generic;
1463 
1464 	ghes = platform_get_drvdata(ghes_dev);
1465 	generic = ghes->generic;
1466 
1467 	ghes->flags |= GHES_EXITING;
1468 	switch (generic->notify.type) {
1469 	case ACPI_HEST_NOTIFY_POLLED:
1470 		timer_shutdown_sync(&ghes->timer);
1471 		break;
1472 	case ACPI_HEST_NOTIFY_EXTERNAL:
1473 		free_irq(ghes->irq, ghes);
1474 		break;
1475 
1476 	case ACPI_HEST_NOTIFY_SCI:
1477 	case ACPI_HEST_NOTIFY_GSIV:
1478 	case ACPI_HEST_NOTIFY_GPIO:
1479 		mutex_lock(&ghes_list_mutex);
1480 		list_del_rcu(&ghes->list);
1481 		if (list_empty(&ghes_hed))
1482 			unregister_acpi_hed_notifier(&ghes_notifier_hed);
1483 		mutex_unlock(&ghes_list_mutex);
1484 		synchronize_rcu();
1485 		break;
1486 
1487 	case ACPI_HEST_NOTIFY_SEA:
1488 		ghes_sea_remove(ghes);
1489 		break;
1490 	case ACPI_HEST_NOTIFY_NMI:
1491 		ghes_nmi_remove(ghes);
1492 		break;
1493 	case ACPI_HEST_NOTIFY_SOFTWARE_DELEGATED:
1494 		rc = apei_sdei_unregister_ghes(ghes);
1495 		if (rc) {
1496 			/*
1497 			 * Returning early results in a resource leak, but we're
1498 			 * only here if stopping the hardware failed.
1499 			 */
1500 			dev_err(&ghes_dev->dev, "Failed to unregister ghes (%pe)\n",
1501 				ERR_PTR(rc));
1502 			return;
1503 		}
1504 		break;
1505 	default:
1506 		BUG();
1507 		break;
1508 	}
1509 
1510 	ghes_fini(ghes);
1511 
1512 	mutex_lock(&ghes_devs_mutex);
1513 	list_del(&ghes->elist);
1514 	mutex_unlock(&ghes_devs_mutex);
1515 
1516 	kfree(ghes);
1517 }
1518 
1519 static struct platform_driver ghes_platform_driver = {
1520 	.driver		= {
1521 		.name	= "GHES",
1522 	},
1523 	.probe		= ghes_probe,
1524 	.remove_new	= ghes_remove,
1525 };
1526 
1527 void __init acpi_ghes_init(void)
1528 {
1529 	int rc;
1530 
1531 	sdei_init();
1532 
1533 	if (acpi_disabled)
1534 		return;
1535 
1536 	switch (hest_disable) {
1537 	case HEST_NOT_FOUND:
1538 		return;
1539 	case HEST_DISABLED:
1540 		pr_info(GHES_PFX "HEST is not enabled!\n");
1541 		return;
1542 	default:
1543 		break;
1544 	}
1545 
1546 	if (ghes_disable) {
1547 		pr_info(GHES_PFX "GHES is not enabled!\n");
1548 		return;
1549 	}
1550 
1551 	ghes_nmi_init_cxt();
1552 
1553 	rc = platform_driver_register(&ghes_platform_driver);
1554 	if (rc)
1555 		return;
1556 
1557 	rc = apei_osc_setup();
1558 	if (rc == 0 && osc_sb_apei_support_acked)
1559 		pr_info(GHES_PFX "APEI firmware first mode is enabled by APEI bit and WHEA _OSC.\n");
1560 	else if (rc == 0 && !osc_sb_apei_support_acked)
1561 		pr_info(GHES_PFX "APEI firmware first mode is enabled by WHEA _OSC.\n");
1562 	else if (rc && osc_sb_apei_support_acked)
1563 		pr_info(GHES_PFX "APEI firmware first mode is enabled by APEI bit.\n");
1564 	else
1565 		pr_info(GHES_PFX "Failed to enable APEI firmware first mode.\n");
1566 }
1567 
1568 /*
1569  * Known x86 systems that prefer GHES error reporting:
1570  */
1571 static struct acpi_platform_list plat_list[] = {
1572 	{"HPE   ", "Server  ", 0, ACPI_SIG_FADT, all_versions},
1573 	{ } /* End */
1574 };
1575 
1576 struct list_head *ghes_get_devices(void)
1577 {
1578 	int idx = -1;
1579 
1580 	if (IS_ENABLED(CONFIG_X86)) {
1581 		idx = acpi_match_platform_list(plat_list);
1582 		if (idx < 0) {
1583 			if (!ghes_edac_force_enable)
1584 				return NULL;
1585 
1586 			pr_warn_once("Force-loading ghes_edac on an unsupported platform. You're on your own!\n");
1587 		}
1588 	} else if (list_empty(&ghes_devs)) {
1589 		return NULL;
1590 	}
1591 
1592 	return &ghes_devs;
1593 }
1594 EXPORT_SYMBOL_GPL(ghes_get_devices);
1595 
1596 void ghes_register_report_chain(struct notifier_block *nb)
1597 {
1598 	atomic_notifier_chain_register(&ghes_report_chain, nb);
1599 }
1600 EXPORT_SYMBOL_GPL(ghes_register_report_chain);
1601 
1602 void ghes_unregister_report_chain(struct notifier_block *nb)
1603 {
1604 	atomic_notifier_chain_unregister(&ghes_report_chain, nb);
1605 }
1606 EXPORT_SYMBOL_GPL(ghes_unregister_report_chain);
1607