xref: /linux/drivers/acpi/apei/ghes.c (revision ebf68996de0ab250c5d520eb2291ab65643e9a1e)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * APEI Generic Hardware Error Source support
4  *
5  * Generic Hardware Error Source provides a way to report platform
6  * hardware errors (such as that from chipset). It works in so called
7  * "Firmware First" mode, that is, hardware errors are reported to
8  * firmware firstly, then reported to Linux by firmware. This way,
9  * some non-standard hardware error registers or non-standard hardware
10  * link can be checked by firmware to produce more hardware error
11  * information for Linux.
12  *
13  * For more information about Generic Hardware Error Source, please
14  * refer to ACPI Specification version 4.0, section 17.3.2.6
15  *
16  * Copyright 2010,2011 Intel Corp.
17  *   Author: Huang Ying <ying.huang@intel.com>
18  */
19 
20 #include <linux/arm_sdei.h>
21 #include <linux/kernel.h>
22 #include <linux/moduleparam.h>
23 #include <linux/init.h>
24 #include <linux/acpi.h>
25 #include <linux/io.h>
26 #include <linux/interrupt.h>
27 #include <linux/timer.h>
28 #include <linux/cper.h>
29 #include <linux/platform_device.h>
30 #include <linux/mutex.h>
31 #include <linux/ratelimit.h>
32 #include <linux/vmalloc.h>
33 #include <linux/irq_work.h>
34 #include <linux/llist.h>
35 #include <linux/genalloc.h>
36 #include <linux/pci.h>
37 #include <linux/pfn.h>
38 #include <linux/aer.h>
39 #include <linux/nmi.h>
40 #include <linux/sched/clock.h>
41 #include <linux/uuid.h>
42 #include <linux/ras.h>
43 
44 #include <acpi/actbl1.h>
45 #include <acpi/ghes.h>
46 #include <acpi/apei.h>
47 #include <asm/fixmap.h>
48 #include <asm/tlbflush.h>
49 #include <ras/ras_event.h>
50 
51 #include "apei-internal.h"
52 
53 #define GHES_PFX	"GHES: "
54 
55 #define GHES_ESTATUS_MAX_SIZE		65536
56 #define GHES_ESOURCE_PREALLOC_MAX_SIZE	65536
57 
58 #define GHES_ESTATUS_POOL_MIN_ALLOC_ORDER 3
59 
60 /* This is just an estimation for memory pool allocation */
61 #define GHES_ESTATUS_CACHE_AVG_SIZE	512
62 
63 #define GHES_ESTATUS_CACHES_SIZE	4
64 
65 #define GHES_ESTATUS_IN_CACHE_MAX_NSEC	10000000000ULL
66 /* Prevent too many caches are allocated because of RCU */
67 #define GHES_ESTATUS_CACHE_ALLOCED_MAX	(GHES_ESTATUS_CACHES_SIZE * 3 / 2)
68 
69 #define GHES_ESTATUS_CACHE_LEN(estatus_len)			\
70 	(sizeof(struct ghes_estatus_cache) + (estatus_len))
71 #define GHES_ESTATUS_FROM_CACHE(estatus_cache)			\
72 	((struct acpi_hest_generic_status *)				\
73 	 ((struct ghes_estatus_cache *)(estatus_cache) + 1))
74 
75 #define GHES_ESTATUS_NODE_LEN(estatus_len)			\
76 	(sizeof(struct ghes_estatus_node) + (estatus_len))
77 #define GHES_ESTATUS_FROM_NODE(estatus_node)			\
78 	((struct acpi_hest_generic_status *)				\
79 	 ((struct ghes_estatus_node *)(estatus_node) + 1))
80 
81 /*
82  *  NMI-like notifications vary by architecture, before the compiler can prune
83  *  unused static functions it needs a value for these enums.
84  */
85 #ifndef CONFIG_ARM_SDE_INTERFACE
86 #define FIX_APEI_GHES_SDEI_NORMAL	__end_of_fixed_addresses
87 #define FIX_APEI_GHES_SDEI_CRITICAL	__end_of_fixed_addresses
88 #endif
89 
90 static inline bool is_hest_type_generic_v2(struct ghes *ghes)
91 {
92 	return ghes->generic->header.type == ACPI_HEST_TYPE_GENERIC_ERROR_V2;
93 }
94 
95 /*
96  * This driver isn't really modular, however for the time being,
97  * continuing to use module_param is the easiest way to remain
98  * compatible with existing boot arg use cases.
99  */
100 bool ghes_disable;
101 module_param_named(disable, ghes_disable, bool, 0);
102 
103 /*
104  * All error sources notified with HED (Hardware Error Device) share a
105  * single notifier callback, so they need to be linked and checked one
106  * by one. This holds true for NMI too.
107  *
108  * RCU is used for these lists, so ghes_list_mutex is only used for
109  * list changing, not for traversing.
110  */
111 static LIST_HEAD(ghes_hed);
112 static DEFINE_MUTEX(ghes_list_mutex);
113 
114 /*
115  * Because the memory area used to transfer hardware error information
116  * from BIOS to Linux can be determined only in NMI, IRQ or timer
117  * handler, but general ioremap can not be used in atomic context, so
118  * the fixmap is used instead.
119  *
120  * This spinlock is used to prevent the fixmap entry from being used
121  * simultaneously.
122  */
123 static DEFINE_SPINLOCK(ghes_notify_lock_irq);
124 
125 static struct gen_pool *ghes_estatus_pool;
126 static unsigned long ghes_estatus_pool_size_request;
127 
128 static struct ghes_estatus_cache *ghes_estatus_caches[GHES_ESTATUS_CACHES_SIZE];
129 static atomic_t ghes_estatus_cache_alloced;
130 
131 static int ghes_panic_timeout __read_mostly = 30;
132 
133 static void __iomem *ghes_map(u64 pfn, enum fixed_addresses fixmap_idx)
134 {
135 	phys_addr_t paddr;
136 	pgprot_t prot;
137 
138 	paddr = PFN_PHYS(pfn);
139 	prot = arch_apei_get_mem_attribute(paddr);
140 	__set_fixmap(fixmap_idx, paddr, prot);
141 
142 	return (void __iomem *) __fix_to_virt(fixmap_idx);
143 }
144 
145 static void ghes_unmap(void __iomem *vaddr, enum fixed_addresses fixmap_idx)
146 {
147 	int _idx = virt_to_fix((unsigned long)vaddr);
148 
149 	WARN_ON_ONCE(fixmap_idx != _idx);
150 	clear_fixmap(fixmap_idx);
151 }
152 
153 int ghes_estatus_pool_init(int num_ghes)
154 {
155 	unsigned long addr, len;
156 
157 	ghes_estatus_pool = gen_pool_create(GHES_ESTATUS_POOL_MIN_ALLOC_ORDER, -1);
158 	if (!ghes_estatus_pool)
159 		return -ENOMEM;
160 
161 	len = GHES_ESTATUS_CACHE_AVG_SIZE * GHES_ESTATUS_CACHE_ALLOCED_MAX;
162 	len += (num_ghes * GHES_ESOURCE_PREALLOC_MAX_SIZE);
163 
164 	ghes_estatus_pool_size_request = PAGE_ALIGN(len);
165 	addr = (unsigned long)vmalloc(PAGE_ALIGN(len));
166 	if (!addr)
167 		return -ENOMEM;
168 
169 	/*
170 	 * New allocation must be visible in all pgd before it can be found by
171 	 * an NMI allocating from the pool.
172 	 */
173 	vmalloc_sync_all();
174 
175 	return gen_pool_add(ghes_estatus_pool, addr, PAGE_ALIGN(len), -1);
176 }
177 
178 static int map_gen_v2(struct ghes *ghes)
179 {
180 	return apei_map_generic_address(&ghes->generic_v2->read_ack_register);
181 }
182 
183 static void unmap_gen_v2(struct ghes *ghes)
184 {
185 	apei_unmap_generic_address(&ghes->generic_v2->read_ack_register);
186 }
187 
188 static void ghes_ack_error(struct acpi_hest_generic_v2 *gv2)
189 {
190 	int rc;
191 	u64 val = 0;
192 
193 	rc = apei_read(&val, &gv2->read_ack_register);
194 	if (rc)
195 		return;
196 
197 	val &= gv2->read_ack_preserve << gv2->read_ack_register.bit_offset;
198 	val |= gv2->read_ack_write    << gv2->read_ack_register.bit_offset;
199 
200 	apei_write(val, &gv2->read_ack_register);
201 }
202 
203 static struct ghes *ghes_new(struct acpi_hest_generic *generic)
204 {
205 	struct ghes *ghes;
206 	unsigned int error_block_length;
207 	int rc;
208 
209 	ghes = kzalloc(sizeof(*ghes), GFP_KERNEL);
210 	if (!ghes)
211 		return ERR_PTR(-ENOMEM);
212 
213 	ghes->generic = generic;
214 	if (is_hest_type_generic_v2(ghes)) {
215 		rc = map_gen_v2(ghes);
216 		if (rc)
217 			goto err_free;
218 	}
219 
220 	rc = apei_map_generic_address(&generic->error_status_address);
221 	if (rc)
222 		goto err_unmap_read_ack_addr;
223 	error_block_length = generic->error_block_length;
224 	if (error_block_length > GHES_ESTATUS_MAX_SIZE) {
225 		pr_warning(FW_WARN GHES_PFX
226 			   "Error status block length is too long: %u for "
227 			   "generic hardware error source: %d.\n",
228 			   error_block_length, generic->header.source_id);
229 		error_block_length = GHES_ESTATUS_MAX_SIZE;
230 	}
231 	ghes->estatus = kmalloc(error_block_length, GFP_KERNEL);
232 	if (!ghes->estatus) {
233 		rc = -ENOMEM;
234 		goto err_unmap_status_addr;
235 	}
236 
237 	return ghes;
238 
239 err_unmap_status_addr:
240 	apei_unmap_generic_address(&generic->error_status_address);
241 err_unmap_read_ack_addr:
242 	if (is_hest_type_generic_v2(ghes))
243 		unmap_gen_v2(ghes);
244 err_free:
245 	kfree(ghes);
246 	return ERR_PTR(rc);
247 }
248 
249 static void ghes_fini(struct ghes *ghes)
250 {
251 	kfree(ghes->estatus);
252 	apei_unmap_generic_address(&ghes->generic->error_status_address);
253 	if (is_hest_type_generic_v2(ghes))
254 		unmap_gen_v2(ghes);
255 }
256 
257 static inline int ghes_severity(int severity)
258 {
259 	switch (severity) {
260 	case CPER_SEV_INFORMATIONAL:
261 		return GHES_SEV_NO;
262 	case CPER_SEV_CORRECTED:
263 		return GHES_SEV_CORRECTED;
264 	case CPER_SEV_RECOVERABLE:
265 		return GHES_SEV_RECOVERABLE;
266 	case CPER_SEV_FATAL:
267 		return GHES_SEV_PANIC;
268 	default:
269 		/* Unknown, go panic */
270 		return GHES_SEV_PANIC;
271 	}
272 }
273 
274 static void ghes_copy_tofrom_phys(void *buffer, u64 paddr, u32 len,
275 				  int from_phys,
276 				  enum fixed_addresses fixmap_idx)
277 {
278 	void __iomem *vaddr;
279 	u64 offset;
280 	u32 trunk;
281 
282 	while (len > 0) {
283 		offset = paddr - (paddr & PAGE_MASK);
284 		vaddr = ghes_map(PHYS_PFN(paddr), fixmap_idx);
285 		trunk = PAGE_SIZE - offset;
286 		trunk = min(trunk, len);
287 		if (from_phys)
288 			memcpy_fromio(buffer, vaddr + offset, trunk);
289 		else
290 			memcpy_toio(vaddr + offset, buffer, trunk);
291 		len -= trunk;
292 		paddr += trunk;
293 		buffer += trunk;
294 		ghes_unmap(vaddr, fixmap_idx);
295 	}
296 }
297 
298 /* Check the top-level record header has an appropriate size. */
299 static int __ghes_check_estatus(struct ghes *ghes,
300 				struct acpi_hest_generic_status *estatus)
301 {
302 	u32 len = cper_estatus_len(estatus);
303 
304 	if (len < sizeof(*estatus)) {
305 		pr_warn_ratelimited(FW_WARN GHES_PFX "Truncated error status block!\n");
306 		return -EIO;
307 	}
308 
309 	if (len > ghes->generic->error_block_length) {
310 		pr_warn_ratelimited(FW_WARN GHES_PFX "Invalid error status block length!\n");
311 		return -EIO;
312 	}
313 
314 	if (cper_estatus_check_header(estatus)) {
315 		pr_warn_ratelimited(FW_WARN GHES_PFX "Invalid CPER header!\n");
316 		return -EIO;
317 	}
318 
319 	return 0;
320 }
321 
322 /* Read the CPER block, returning its address, and header in estatus. */
323 static int __ghes_peek_estatus(struct ghes *ghes,
324 			       struct acpi_hest_generic_status *estatus,
325 			       u64 *buf_paddr, enum fixed_addresses fixmap_idx)
326 {
327 	struct acpi_hest_generic *g = ghes->generic;
328 	int rc;
329 
330 	rc = apei_read(buf_paddr, &g->error_status_address);
331 	if (rc) {
332 		*buf_paddr = 0;
333 		pr_warn_ratelimited(FW_WARN GHES_PFX
334 "Failed to read error status block address for hardware error source: %d.\n",
335 				   g->header.source_id);
336 		return -EIO;
337 	}
338 	if (!*buf_paddr)
339 		return -ENOENT;
340 
341 	ghes_copy_tofrom_phys(estatus, *buf_paddr, sizeof(*estatus), 1,
342 			      fixmap_idx);
343 	if (!estatus->block_status) {
344 		*buf_paddr = 0;
345 		return -ENOENT;
346 	}
347 
348 	return __ghes_check_estatus(ghes, estatus);
349 }
350 
351 static int __ghes_read_estatus(struct acpi_hest_generic_status *estatus,
352 			       u64 buf_paddr, enum fixed_addresses fixmap_idx,
353 			       size_t buf_len)
354 {
355 	ghes_copy_tofrom_phys(estatus, buf_paddr, buf_len, 1, fixmap_idx);
356 	if (cper_estatus_check(estatus)) {
357 		pr_warn_ratelimited(FW_WARN GHES_PFX
358 				    "Failed to read error status block!\n");
359 		return -EIO;
360 	}
361 
362 	return 0;
363 }
364 
365 static int ghes_read_estatus(struct ghes *ghes,
366 			     struct acpi_hest_generic_status *estatus,
367 			     u64 *buf_paddr, enum fixed_addresses fixmap_idx)
368 {
369 	int rc;
370 
371 	rc = __ghes_peek_estatus(ghes, estatus, buf_paddr, fixmap_idx);
372 	if (rc)
373 		return rc;
374 
375 	rc = __ghes_check_estatus(ghes, estatus);
376 	if (rc)
377 		return rc;
378 
379 	return __ghes_read_estatus(estatus, *buf_paddr, fixmap_idx,
380 				   cper_estatus_len(estatus));
381 }
382 
383 static void ghes_clear_estatus(struct ghes *ghes,
384 			       struct acpi_hest_generic_status *estatus,
385 			       u64 buf_paddr, enum fixed_addresses fixmap_idx)
386 {
387 	estatus->block_status = 0;
388 
389 	if (!buf_paddr)
390 		return;
391 
392 	ghes_copy_tofrom_phys(estatus, buf_paddr,
393 			      sizeof(estatus->block_status), 0,
394 			      fixmap_idx);
395 
396 	/*
397 	 * GHESv2 type HEST entries introduce support for error acknowledgment,
398 	 * so only acknowledge the error if this support is present.
399 	 */
400 	if (is_hest_type_generic_v2(ghes))
401 		ghes_ack_error(ghes->generic_v2);
402 }
403 
404 static void ghes_handle_memory_failure(struct acpi_hest_generic_data *gdata, int sev)
405 {
406 #ifdef CONFIG_ACPI_APEI_MEMORY_FAILURE
407 	unsigned long pfn;
408 	int flags = -1;
409 	int sec_sev = ghes_severity(gdata->error_severity);
410 	struct cper_sec_mem_err *mem_err = acpi_hest_get_payload(gdata);
411 
412 	if (!(mem_err->validation_bits & CPER_MEM_VALID_PA))
413 		return;
414 
415 	pfn = mem_err->physical_addr >> PAGE_SHIFT;
416 	if (!pfn_valid(pfn)) {
417 		pr_warn_ratelimited(FW_WARN GHES_PFX
418 		"Invalid address in generic error data: %#llx\n",
419 		mem_err->physical_addr);
420 		return;
421 	}
422 
423 	/* iff following two events can be handled properly by now */
424 	if (sec_sev == GHES_SEV_CORRECTED &&
425 	    (gdata->flags & CPER_SEC_ERROR_THRESHOLD_EXCEEDED))
426 		flags = MF_SOFT_OFFLINE;
427 	if (sev == GHES_SEV_RECOVERABLE && sec_sev == GHES_SEV_RECOVERABLE)
428 		flags = 0;
429 
430 	if (flags != -1)
431 		memory_failure_queue(pfn, flags);
432 #endif
433 }
434 
435 /*
436  * PCIe AER errors need to be sent to the AER driver for reporting and
437  * recovery. The GHES severities map to the following AER severities and
438  * require the following handling:
439  *
440  * GHES_SEV_CORRECTABLE -> AER_CORRECTABLE
441  *     These need to be reported by the AER driver but no recovery is
442  *     necessary.
443  * GHES_SEV_RECOVERABLE -> AER_NONFATAL
444  * GHES_SEV_RECOVERABLE && CPER_SEC_RESET -> AER_FATAL
445  *     These both need to be reported and recovered from by the AER driver.
446  * GHES_SEV_PANIC does not make it to this handling since the kernel must
447  *     panic.
448  */
449 static void ghes_handle_aer(struct acpi_hest_generic_data *gdata)
450 {
451 #ifdef CONFIG_ACPI_APEI_PCIEAER
452 	struct cper_sec_pcie *pcie_err = acpi_hest_get_payload(gdata);
453 
454 	if (pcie_err->validation_bits & CPER_PCIE_VALID_DEVICE_ID &&
455 	    pcie_err->validation_bits & CPER_PCIE_VALID_AER_INFO) {
456 		unsigned int devfn;
457 		int aer_severity;
458 
459 		devfn = PCI_DEVFN(pcie_err->device_id.device,
460 				  pcie_err->device_id.function);
461 		aer_severity = cper_severity_to_aer(gdata->error_severity);
462 
463 		/*
464 		 * If firmware reset the component to contain
465 		 * the error, we must reinitialize it before
466 		 * use, so treat it as a fatal AER error.
467 		 */
468 		if (gdata->flags & CPER_SEC_RESET)
469 			aer_severity = AER_FATAL;
470 
471 		aer_recover_queue(pcie_err->device_id.segment,
472 				  pcie_err->device_id.bus,
473 				  devfn, aer_severity,
474 				  (struct aer_capability_regs *)
475 				  pcie_err->aer_info);
476 	}
477 #endif
478 }
479 
480 static void ghes_do_proc(struct ghes *ghes,
481 			 const struct acpi_hest_generic_status *estatus)
482 {
483 	int sev, sec_sev;
484 	struct acpi_hest_generic_data *gdata;
485 	guid_t *sec_type;
486 	guid_t *fru_id = &NULL_UUID_LE;
487 	char *fru_text = "";
488 
489 	sev = ghes_severity(estatus->error_severity);
490 	apei_estatus_for_each_section(estatus, gdata) {
491 		sec_type = (guid_t *)gdata->section_type;
492 		sec_sev = ghes_severity(gdata->error_severity);
493 		if (gdata->validation_bits & CPER_SEC_VALID_FRU_ID)
494 			fru_id = (guid_t *)gdata->fru_id;
495 
496 		if (gdata->validation_bits & CPER_SEC_VALID_FRU_TEXT)
497 			fru_text = gdata->fru_text;
498 
499 		if (guid_equal(sec_type, &CPER_SEC_PLATFORM_MEM)) {
500 			struct cper_sec_mem_err *mem_err = acpi_hest_get_payload(gdata);
501 
502 			ghes_edac_report_mem_error(sev, mem_err);
503 
504 			arch_apei_report_mem_error(sev, mem_err);
505 			ghes_handle_memory_failure(gdata, sev);
506 		}
507 		else if (guid_equal(sec_type, &CPER_SEC_PCIE)) {
508 			ghes_handle_aer(gdata);
509 		}
510 		else if (guid_equal(sec_type, &CPER_SEC_PROC_ARM)) {
511 			struct cper_sec_proc_arm *err = acpi_hest_get_payload(gdata);
512 
513 			log_arm_hw_error(err);
514 		} else {
515 			void *err = acpi_hest_get_payload(gdata);
516 
517 			log_non_standard_event(sec_type, fru_id, fru_text,
518 					       sec_sev, err,
519 					       gdata->error_data_length);
520 		}
521 	}
522 }
523 
524 static void __ghes_print_estatus(const char *pfx,
525 				 const struct acpi_hest_generic *generic,
526 				 const struct acpi_hest_generic_status *estatus)
527 {
528 	static atomic_t seqno;
529 	unsigned int curr_seqno;
530 	char pfx_seq[64];
531 
532 	if (pfx == NULL) {
533 		if (ghes_severity(estatus->error_severity) <=
534 		    GHES_SEV_CORRECTED)
535 			pfx = KERN_WARNING;
536 		else
537 			pfx = KERN_ERR;
538 	}
539 	curr_seqno = atomic_inc_return(&seqno);
540 	snprintf(pfx_seq, sizeof(pfx_seq), "%s{%u}" HW_ERR, pfx, curr_seqno);
541 	printk("%s""Hardware error from APEI Generic Hardware Error Source: %d\n",
542 	       pfx_seq, generic->header.source_id);
543 	cper_estatus_print(pfx_seq, estatus);
544 }
545 
546 static int ghes_print_estatus(const char *pfx,
547 			      const struct acpi_hest_generic *generic,
548 			      const struct acpi_hest_generic_status *estatus)
549 {
550 	/* Not more than 2 messages every 5 seconds */
551 	static DEFINE_RATELIMIT_STATE(ratelimit_corrected, 5*HZ, 2);
552 	static DEFINE_RATELIMIT_STATE(ratelimit_uncorrected, 5*HZ, 2);
553 	struct ratelimit_state *ratelimit;
554 
555 	if (ghes_severity(estatus->error_severity) <= GHES_SEV_CORRECTED)
556 		ratelimit = &ratelimit_corrected;
557 	else
558 		ratelimit = &ratelimit_uncorrected;
559 	if (__ratelimit(ratelimit)) {
560 		__ghes_print_estatus(pfx, generic, estatus);
561 		return 1;
562 	}
563 	return 0;
564 }
565 
566 /*
567  * GHES error status reporting throttle, to report more kinds of
568  * errors, instead of just most frequently occurred errors.
569  */
570 static int ghes_estatus_cached(struct acpi_hest_generic_status *estatus)
571 {
572 	u32 len;
573 	int i, cached = 0;
574 	unsigned long long now;
575 	struct ghes_estatus_cache *cache;
576 	struct acpi_hest_generic_status *cache_estatus;
577 
578 	len = cper_estatus_len(estatus);
579 	rcu_read_lock();
580 	for (i = 0; i < GHES_ESTATUS_CACHES_SIZE; i++) {
581 		cache = rcu_dereference(ghes_estatus_caches[i]);
582 		if (cache == NULL)
583 			continue;
584 		if (len != cache->estatus_len)
585 			continue;
586 		cache_estatus = GHES_ESTATUS_FROM_CACHE(cache);
587 		if (memcmp(estatus, cache_estatus, len))
588 			continue;
589 		atomic_inc(&cache->count);
590 		now = sched_clock();
591 		if (now - cache->time_in < GHES_ESTATUS_IN_CACHE_MAX_NSEC)
592 			cached = 1;
593 		break;
594 	}
595 	rcu_read_unlock();
596 	return cached;
597 }
598 
599 static struct ghes_estatus_cache *ghes_estatus_cache_alloc(
600 	struct acpi_hest_generic *generic,
601 	struct acpi_hest_generic_status *estatus)
602 {
603 	int alloced;
604 	u32 len, cache_len;
605 	struct ghes_estatus_cache *cache;
606 	struct acpi_hest_generic_status *cache_estatus;
607 
608 	alloced = atomic_add_return(1, &ghes_estatus_cache_alloced);
609 	if (alloced > GHES_ESTATUS_CACHE_ALLOCED_MAX) {
610 		atomic_dec(&ghes_estatus_cache_alloced);
611 		return NULL;
612 	}
613 	len = cper_estatus_len(estatus);
614 	cache_len = GHES_ESTATUS_CACHE_LEN(len);
615 	cache = (void *)gen_pool_alloc(ghes_estatus_pool, cache_len);
616 	if (!cache) {
617 		atomic_dec(&ghes_estatus_cache_alloced);
618 		return NULL;
619 	}
620 	cache_estatus = GHES_ESTATUS_FROM_CACHE(cache);
621 	memcpy(cache_estatus, estatus, len);
622 	cache->estatus_len = len;
623 	atomic_set(&cache->count, 0);
624 	cache->generic = generic;
625 	cache->time_in = sched_clock();
626 	return cache;
627 }
628 
629 static void ghes_estatus_cache_free(struct ghes_estatus_cache *cache)
630 {
631 	u32 len;
632 
633 	len = cper_estatus_len(GHES_ESTATUS_FROM_CACHE(cache));
634 	len = GHES_ESTATUS_CACHE_LEN(len);
635 	gen_pool_free(ghes_estatus_pool, (unsigned long)cache, len);
636 	atomic_dec(&ghes_estatus_cache_alloced);
637 }
638 
639 static void ghes_estatus_cache_rcu_free(struct rcu_head *head)
640 {
641 	struct ghes_estatus_cache *cache;
642 
643 	cache = container_of(head, struct ghes_estatus_cache, rcu);
644 	ghes_estatus_cache_free(cache);
645 }
646 
647 static void ghes_estatus_cache_add(
648 	struct acpi_hest_generic *generic,
649 	struct acpi_hest_generic_status *estatus)
650 {
651 	int i, slot = -1, count;
652 	unsigned long long now, duration, period, max_period = 0;
653 	struct ghes_estatus_cache *cache, *slot_cache = NULL, *new_cache;
654 
655 	new_cache = ghes_estatus_cache_alloc(generic, estatus);
656 	if (new_cache == NULL)
657 		return;
658 	rcu_read_lock();
659 	now = sched_clock();
660 	for (i = 0; i < GHES_ESTATUS_CACHES_SIZE; i++) {
661 		cache = rcu_dereference(ghes_estatus_caches[i]);
662 		if (cache == NULL) {
663 			slot = i;
664 			slot_cache = NULL;
665 			break;
666 		}
667 		duration = now - cache->time_in;
668 		if (duration >= GHES_ESTATUS_IN_CACHE_MAX_NSEC) {
669 			slot = i;
670 			slot_cache = cache;
671 			break;
672 		}
673 		count = atomic_read(&cache->count);
674 		period = duration;
675 		do_div(period, (count + 1));
676 		if (period > max_period) {
677 			max_period = period;
678 			slot = i;
679 			slot_cache = cache;
680 		}
681 	}
682 	/* new_cache must be put into array after its contents are written */
683 	smp_wmb();
684 	if (slot != -1 && cmpxchg(ghes_estatus_caches + slot,
685 				  slot_cache, new_cache) == slot_cache) {
686 		if (slot_cache)
687 			call_rcu(&slot_cache->rcu, ghes_estatus_cache_rcu_free);
688 	} else
689 		ghes_estatus_cache_free(new_cache);
690 	rcu_read_unlock();
691 }
692 
693 static void __ghes_panic(struct ghes *ghes,
694 			 struct acpi_hest_generic_status *estatus,
695 			 u64 buf_paddr, enum fixed_addresses fixmap_idx)
696 {
697 	__ghes_print_estatus(KERN_EMERG, ghes->generic, estatus);
698 
699 	ghes_clear_estatus(ghes, estatus, buf_paddr, fixmap_idx);
700 
701 	/* reboot to log the error! */
702 	if (!panic_timeout)
703 		panic_timeout = ghes_panic_timeout;
704 	panic("Fatal hardware error!");
705 }
706 
707 static int ghes_proc(struct ghes *ghes)
708 {
709 	struct acpi_hest_generic_status *estatus = ghes->estatus;
710 	u64 buf_paddr;
711 	int rc;
712 
713 	rc = ghes_read_estatus(ghes, estatus, &buf_paddr, FIX_APEI_GHES_IRQ);
714 	if (rc)
715 		goto out;
716 
717 	if (ghes_severity(estatus->error_severity) >= GHES_SEV_PANIC)
718 		__ghes_panic(ghes, estatus, buf_paddr, FIX_APEI_GHES_IRQ);
719 
720 	if (!ghes_estatus_cached(estatus)) {
721 		if (ghes_print_estatus(NULL, ghes->generic, estatus))
722 			ghes_estatus_cache_add(ghes->generic, estatus);
723 	}
724 	ghes_do_proc(ghes, estatus);
725 
726 out:
727 	ghes_clear_estatus(ghes, estatus, buf_paddr, FIX_APEI_GHES_IRQ);
728 
729 	return rc;
730 }
731 
732 static void ghes_add_timer(struct ghes *ghes)
733 {
734 	struct acpi_hest_generic *g = ghes->generic;
735 	unsigned long expire;
736 
737 	if (!g->notify.poll_interval) {
738 		pr_warning(FW_WARN GHES_PFX "Poll interval is 0 for generic hardware error source: %d, disabled.\n",
739 			   g->header.source_id);
740 		return;
741 	}
742 	expire = jiffies + msecs_to_jiffies(g->notify.poll_interval);
743 	ghes->timer.expires = round_jiffies_relative(expire);
744 	add_timer(&ghes->timer);
745 }
746 
747 static void ghes_poll_func(struct timer_list *t)
748 {
749 	struct ghes *ghes = from_timer(ghes, t, timer);
750 	unsigned long flags;
751 
752 	spin_lock_irqsave(&ghes_notify_lock_irq, flags);
753 	ghes_proc(ghes);
754 	spin_unlock_irqrestore(&ghes_notify_lock_irq, flags);
755 	if (!(ghes->flags & GHES_EXITING))
756 		ghes_add_timer(ghes);
757 }
758 
759 static irqreturn_t ghes_irq_func(int irq, void *data)
760 {
761 	struct ghes *ghes = data;
762 	unsigned long flags;
763 	int rc;
764 
765 	spin_lock_irqsave(&ghes_notify_lock_irq, flags);
766 	rc = ghes_proc(ghes);
767 	spin_unlock_irqrestore(&ghes_notify_lock_irq, flags);
768 	if (rc)
769 		return IRQ_NONE;
770 
771 	return IRQ_HANDLED;
772 }
773 
774 static int ghes_notify_hed(struct notifier_block *this, unsigned long event,
775 			   void *data)
776 {
777 	struct ghes *ghes;
778 	unsigned long flags;
779 	int ret = NOTIFY_DONE;
780 
781 	spin_lock_irqsave(&ghes_notify_lock_irq, flags);
782 	rcu_read_lock();
783 	list_for_each_entry_rcu(ghes, &ghes_hed, list) {
784 		if (!ghes_proc(ghes))
785 			ret = NOTIFY_OK;
786 	}
787 	rcu_read_unlock();
788 	spin_unlock_irqrestore(&ghes_notify_lock_irq, flags);
789 
790 	return ret;
791 }
792 
793 static struct notifier_block ghes_notifier_hed = {
794 	.notifier_call = ghes_notify_hed,
795 };
796 
797 /*
798  * Handlers for CPER records may not be NMI safe. For example,
799  * memory_failure_queue() takes spinlocks and calls schedule_work_on().
800  * In any NMI-like handler, memory from ghes_estatus_pool is used to save
801  * estatus, and added to the ghes_estatus_llist. irq_work_queue() causes
802  * ghes_proc_in_irq() to run in IRQ context where each estatus in
803  * ghes_estatus_llist is processed.
804  *
805  * Memory from the ghes_estatus_pool is also used with the ghes_estatus_cache
806  * to suppress frequent messages.
807  */
808 static struct llist_head ghes_estatus_llist;
809 static struct irq_work ghes_proc_irq_work;
810 
811 static void ghes_proc_in_irq(struct irq_work *irq_work)
812 {
813 	struct llist_node *llnode, *next;
814 	struct ghes_estatus_node *estatus_node;
815 	struct acpi_hest_generic *generic;
816 	struct acpi_hest_generic_status *estatus;
817 	u32 len, node_len;
818 
819 	llnode = llist_del_all(&ghes_estatus_llist);
820 	/*
821 	 * Because the time order of estatus in list is reversed,
822 	 * revert it back to proper order.
823 	 */
824 	llnode = llist_reverse_order(llnode);
825 	while (llnode) {
826 		next = llnode->next;
827 		estatus_node = llist_entry(llnode, struct ghes_estatus_node,
828 					   llnode);
829 		estatus = GHES_ESTATUS_FROM_NODE(estatus_node);
830 		len = cper_estatus_len(estatus);
831 		node_len = GHES_ESTATUS_NODE_LEN(len);
832 		ghes_do_proc(estatus_node->ghes, estatus);
833 		if (!ghes_estatus_cached(estatus)) {
834 			generic = estatus_node->generic;
835 			if (ghes_print_estatus(NULL, generic, estatus))
836 				ghes_estatus_cache_add(generic, estatus);
837 		}
838 		gen_pool_free(ghes_estatus_pool, (unsigned long)estatus_node,
839 			      node_len);
840 		llnode = next;
841 	}
842 }
843 
844 static void ghes_print_queued_estatus(void)
845 {
846 	struct llist_node *llnode;
847 	struct ghes_estatus_node *estatus_node;
848 	struct acpi_hest_generic *generic;
849 	struct acpi_hest_generic_status *estatus;
850 
851 	llnode = llist_del_all(&ghes_estatus_llist);
852 	/*
853 	 * Because the time order of estatus in list is reversed,
854 	 * revert it back to proper order.
855 	 */
856 	llnode = llist_reverse_order(llnode);
857 	while (llnode) {
858 		estatus_node = llist_entry(llnode, struct ghes_estatus_node,
859 					   llnode);
860 		estatus = GHES_ESTATUS_FROM_NODE(estatus_node);
861 		generic = estatus_node->generic;
862 		ghes_print_estatus(NULL, generic, estatus);
863 		llnode = llnode->next;
864 	}
865 }
866 
867 static int ghes_in_nmi_queue_one_entry(struct ghes *ghes,
868 				       enum fixed_addresses fixmap_idx)
869 {
870 	struct acpi_hest_generic_status *estatus, tmp_header;
871 	struct ghes_estatus_node *estatus_node;
872 	u32 len, node_len;
873 	u64 buf_paddr;
874 	int sev, rc;
875 
876 	if (!IS_ENABLED(CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG))
877 		return -EOPNOTSUPP;
878 
879 	rc = __ghes_peek_estatus(ghes, &tmp_header, &buf_paddr, fixmap_idx);
880 	if (rc) {
881 		ghes_clear_estatus(ghes, &tmp_header, buf_paddr, fixmap_idx);
882 		return rc;
883 	}
884 
885 	rc = __ghes_check_estatus(ghes, &tmp_header);
886 	if (rc) {
887 		ghes_clear_estatus(ghes, &tmp_header, buf_paddr, fixmap_idx);
888 		return rc;
889 	}
890 
891 	len = cper_estatus_len(&tmp_header);
892 	node_len = GHES_ESTATUS_NODE_LEN(len);
893 	estatus_node = (void *)gen_pool_alloc(ghes_estatus_pool, node_len);
894 	if (!estatus_node)
895 		return -ENOMEM;
896 
897 	estatus_node->ghes = ghes;
898 	estatus_node->generic = ghes->generic;
899 	estatus = GHES_ESTATUS_FROM_NODE(estatus_node);
900 
901 	if (__ghes_read_estatus(estatus, buf_paddr, fixmap_idx, len)) {
902 		ghes_clear_estatus(ghes, estatus, buf_paddr, fixmap_idx);
903 		rc = -ENOENT;
904 		goto no_work;
905 	}
906 
907 	sev = ghes_severity(estatus->error_severity);
908 	if (sev >= GHES_SEV_PANIC) {
909 		ghes_print_queued_estatus();
910 		__ghes_panic(ghes, estatus, buf_paddr, fixmap_idx);
911 	}
912 
913 	ghes_clear_estatus(ghes, &tmp_header, buf_paddr, fixmap_idx);
914 
915 	/* This error has been reported before, don't process it again. */
916 	if (ghes_estatus_cached(estatus))
917 		goto no_work;
918 
919 	llist_add(&estatus_node->llnode, &ghes_estatus_llist);
920 
921 	return rc;
922 
923 no_work:
924 	gen_pool_free(ghes_estatus_pool, (unsigned long)estatus_node,
925 		      node_len);
926 
927 	return rc;
928 }
929 
930 static int ghes_in_nmi_spool_from_list(struct list_head *rcu_list,
931 				       enum fixed_addresses fixmap_idx)
932 {
933 	int ret = -ENOENT;
934 	struct ghes *ghes;
935 
936 	rcu_read_lock();
937 	list_for_each_entry_rcu(ghes, rcu_list, list) {
938 		if (!ghes_in_nmi_queue_one_entry(ghes, fixmap_idx))
939 			ret = 0;
940 	}
941 	rcu_read_unlock();
942 
943 	if (IS_ENABLED(CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG) && !ret)
944 		irq_work_queue(&ghes_proc_irq_work);
945 
946 	return ret;
947 }
948 
949 #ifdef CONFIG_ACPI_APEI_SEA
950 static LIST_HEAD(ghes_sea);
951 
952 /*
953  * Return 0 only if one of the SEA error sources successfully reported an error
954  * record sent from the firmware.
955  */
956 int ghes_notify_sea(void)
957 {
958 	static DEFINE_RAW_SPINLOCK(ghes_notify_lock_sea);
959 	int rv;
960 
961 	raw_spin_lock(&ghes_notify_lock_sea);
962 	rv = ghes_in_nmi_spool_from_list(&ghes_sea, FIX_APEI_GHES_SEA);
963 	raw_spin_unlock(&ghes_notify_lock_sea);
964 
965 	return rv;
966 }
967 
968 static void ghes_sea_add(struct ghes *ghes)
969 {
970 	mutex_lock(&ghes_list_mutex);
971 	list_add_rcu(&ghes->list, &ghes_sea);
972 	mutex_unlock(&ghes_list_mutex);
973 }
974 
975 static void ghes_sea_remove(struct ghes *ghes)
976 {
977 	mutex_lock(&ghes_list_mutex);
978 	list_del_rcu(&ghes->list);
979 	mutex_unlock(&ghes_list_mutex);
980 	synchronize_rcu();
981 }
982 #else /* CONFIG_ACPI_APEI_SEA */
983 static inline void ghes_sea_add(struct ghes *ghes) { }
984 static inline void ghes_sea_remove(struct ghes *ghes) { }
985 #endif /* CONFIG_ACPI_APEI_SEA */
986 
987 #ifdef CONFIG_HAVE_ACPI_APEI_NMI
988 /*
989  * NMI may be triggered on any CPU, so ghes_in_nmi is used for
990  * having only one concurrent reader.
991  */
992 static atomic_t ghes_in_nmi = ATOMIC_INIT(0);
993 
994 static LIST_HEAD(ghes_nmi);
995 
996 static int ghes_notify_nmi(unsigned int cmd, struct pt_regs *regs)
997 {
998 	static DEFINE_RAW_SPINLOCK(ghes_notify_lock_nmi);
999 	int ret = NMI_DONE;
1000 
1001 	if (!atomic_add_unless(&ghes_in_nmi, 1, 1))
1002 		return ret;
1003 
1004 	raw_spin_lock(&ghes_notify_lock_nmi);
1005 	if (!ghes_in_nmi_spool_from_list(&ghes_nmi, FIX_APEI_GHES_NMI))
1006 		ret = NMI_HANDLED;
1007 	raw_spin_unlock(&ghes_notify_lock_nmi);
1008 
1009 	atomic_dec(&ghes_in_nmi);
1010 	return ret;
1011 }
1012 
1013 static void ghes_nmi_add(struct ghes *ghes)
1014 {
1015 	mutex_lock(&ghes_list_mutex);
1016 	if (list_empty(&ghes_nmi))
1017 		register_nmi_handler(NMI_LOCAL, ghes_notify_nmi, 0, "ghes");
1018 	list_add_rcu(&ghes->list, &ghes_nmi);
1019 	mutex_unlock(&ghes_list_mutex);
1020 }
1021 
1022 static void ghes_nmi_remove(struct ghes *ghes)
1023 {
1024 	mutex_lock(&ghes_list_mutex);
1025 	list_del_rcu(&ghes->list);
1026 	if (list_empty(&ghes_nmi))
1027 		unregister_nmi_handler(NMI_LOCAL, "ghes");
1028 	mutex_unlock(&ghes_list_mutex);
1029 	/*
1030 	 * To synchronize with NMI handler, ghes can only be
1031 	 * freed after NMI handler finishes.
1032 	 */
1033 	synchronize_rcu();
1034 }
1035 #else /* CONFIG_HAVE_ACPI_APEI_NMI */
1036 static inline void ghes_nmi_add(struct ghes *ghes) { }
1037 static inline void ghes_nmi_remove(struct ghes *ghes) { }
1038 #endif /* CONFIG_HAVE_ACPI_APEI_NMI */
1039 
1040 static void ghes_nmi_init_cxt(void)
1041 {
1042 	init_irq_work(&ghes_proc_irq_work, ghes_proc_in_irq);
1043 }
1044 
1045 static int __ghes_sdei_callback(struct ghes *ghes,
1046 				enum fixed_addresses fixmap_idx)
1047 {
1048 	if (!ghes_in_nmi_queue_one_entry(ghes, fixmap_idx)) {
1049 		irq_work_queue(&ghes_proc_irq_work);
1050 
1051 		return 0;
1052 	}
1053 
1054 	return -ENOENT;
1055 }
1056 
1057 static int ghes_sdei_normal_callback(u32 event_num, struct pt_regs *regs,
1058 				      void *arg)
1059 {
1060 	static DEFINE_RAW_SPINLOCK(ghes_notify_lock_sdei_normal);
1061 	struct ghes *ghes = arg;
1062 	int err;
1063 
1064 	raw_spin_lock(&ghes_notify_lock_sdei_normal);
1065 	err = __ghes_sdei_callback(ghes, FIX_APEI_GHES_SDEI_NORMAL);
1066 	raw_spin_unlock(&ghes_notify_lock_sdei_normal);
1067 
1068 	return err;
1069 }
1070 
1071 static int ghes_sdei_critical_callback(u32 event_num, struct pt_regs *regs,
1072 				       void *arg)
1073 {
1074 	static DEFINE_RAW_SPINLOCK(ghes_notify_lock_sdei_critical);
1075 	struct ghes *ghes = arg;
1076 	int err;
1077 
1078 	raw_spin_lock(&ghes_notify_lock_sdei_critical);
1079 	err = __ghes_sdei_callback(ghes, FIX_APEI_GHES_SDEI_CRITICAL);
1080 	raw_spin_unlock(&ghes_notify_lock_sdei_critical);
1081 
1082 	return err;
1083 }
1084 
1085 static int apei_sdei_register_ghes(struct ghes *ghes)
1086 {
1087 	if (!IS_ENABLED(CONFIG_ARM_SDE_INTERFACE))
1088 		return -EOPNOTSUPP;
1089 
1090 	return sdei_register_ghes(ghes, ghes_sdei_normal_callback,
1091 				 ghes_sdei_critical_callback);
1092 }
1093 
1094 static int apei_sdei_unregister_ghes(struct ghes *ghes)
1095 {
1096 	if (!IS_ENABLED(CONFIG_ARM_SDE_INTERFACE))
1097 		return -EOPNOTSUPP;
1098 
1099 	return sdei_unregister_ghes(ghes);
1100 }
1101 
1102 static int ghes_probe(struct platform_device *ghes_dev)
1103 {
1104 	struct acpi_hest_generic *generic;
1105 	struct ghes *ghes = NULL;
1106 	unsigned long flags;
1107 
1108 	int rc = -EINVAL;
1109 
1110 	generic = *(struct acpi_hest_generic **)ghes_dev->dev.platform_data;
1111 	if (!generic->enabled)
1112 		return -ENODEV;
1113 
1114 	switch (generic->notify.type) {
1115 	case ACPI_HEST_NOTIFY_POLLED:
1116 	case ACPI_HEST_NOTIFY_EXTERNAL:
1117 	case ACPI_HEST_NOTIFY_SCI:
1118 	case ACPI_HEST_NOTIFY_GSIV:
1119 	case ACPI_HEST_NOTIFY_GPIO:
1120 		break;
1121 
1122 	case ACPI_HEST_NOTIFY_SEA:
1123 		if (!IS_ENABLED(CONFIG_ACPI_APEI_SEA)) {
1124 			pr_warn(GHES_PFX "Generic hardware error source: %d notified via SEA is not supported\n",
1125 				generic->header.source_id);
1126 			rc = -ENOTSUPP;
1127 			goto err;
1128 		}
1129 		break;
1130 	case ACPI_HEST_NOTIFY_NMI:
1131 		if (!IS_ENABLED(CONFIG_HAVE_ACPI_APEI_NMI)) {
1132 			pr_warn(GHES_PFX "Generic hardware error source: %d notified via NMI interrupt is not supported!\n",
1133 				generic->header.source_id);
1134 			goto err;
1135 		}
1136 		break;
1137 	case ACPI_HEST_NOTIFY_SOFTWARE_DELEGATED:
1138 		if (!IS_ENABLED(CONFIG_ARM_SDE_INTERFACE)) {
1139 			pr_warn(GHES_PFX "Generic hardware error source: %d notified via SDE Interface is not supported!\n",
1140 				generic->header.source_id);
1141 			goto err;
1142 		}
1143 		break;
1144 	case ACPI_HEST_NOTIFY_LOCAL:
1145 		pr_warning(GHES_PFX "Generic hardware error source: %d notified via local interrupt is not supported!\n",
1146 			   generic->header.source_id);
1147 		goto err;
1148 	default:
1149 		pr_warning(FW_WARN GHES_PFX "Unknown notification type: %u for generic hardware error source: %d\n",
1150 			   generic->notify.type, generic->header.source_id);
1151 		goto err;
1152 	}
1153 
1154 	rc = -EIO;
1155 	if (generic->error_block_length <
1156 	    sizeof(struct acpi_hest_generic_status)) {
1157 		pr_warning(FW_BUG GHES_PFX "Invalid error block length: %u for generic hardware error source: %d\n",
1158 			   generic->error_block_length,
1159 			   generic->header.source_id);
1160 		goto err;
1161 	}
1162 	ghes = ghes_new(generic);
1163 	if (IS_ERR(ghes)) {
1164 		rc = PTR_ERR(ghes);
1165 		ghes = NULL;
1166 		goto err;
1167 	}
1168 
1169 	switch (generic->notify.type) {
1170 	case ACPI_HEST_NOTIFY_POLLED:
1171 		timer_setup(&ghes->timer, ghes_poll_func, TIMER_DEFERRABLE);
1172 		ghes_add_timer(ghes);
1173 		break;
1174 	case ACPI_HEST_NOTIFY_EXTERNAL:
1175 		/* External interrupt vector is GSI */
1176 		rc = acpi_gsi_to_irq(generic->notify.vector, &ghes->irq);
1177 		if (rc) {
1178 			pr_err(GHES_PFX "Failed to map GSI to IRQ for generic hardware error source: %d\n",
1179 			       generic->header.source_id);
1180 			goto err;
1181 		}
1182 		rc = request_irq(ghes->irq, ghes_irq_func, IRQF_SHARED,
1183 				 "GHES IRQ", ghes);
1184 		if (rc) {
1185 			pr_err(GHES_PFX "Failed to register IRQ for generic hardware error source: %d\n",
1186 			       generic->header.source_id);
1187 			goto err;
1188 		}
1189 		break;
1190 
1191 	case ACPI_HEST_NOTIFY_SCI:
1192 	case ACPI_HEST_NOTIFY_GSIV:
1193 	case ACPI_HEST_NOTIFY_GPIO:
1194 		mutex_lock(&ghes_list_mutex);
1195 		if (list_empty(&ghes_hed))
1196 			register_acpi_hed_notifier(&ghes_notifier_hed);
1197 		list_add_rcu(&ghes->list, &ghes_hed);
1198 		mutex_unlock(&ghes_list_mutex);
1199 		break;
1200 
1201 	case ACPI_HEST_NOTIFY_SEA:
1202 		ghes_sea_add(ghes);
1203 		break;
1204 	case ACPI_HEST_NOTIFY_NMI:
1205 		ghes_nmi_add(ghes);
1206 		break;
1207 	case ACPI_HEST_NOTIFY_SOFTWARE_DELEGATED:
1208 		rc = apei_sdei_register_ghes(ghes);
1209 		if (rc)
1210 			goto err;
1211 		break;
1212 	default:
1213 		BUG();
1214 	}
1215 
1216 	platform_set_drvdata(ghes_dev, ghes);
1217 
1218 	ghes_edac_register(ghes, &ghes_dev->dev);
1219 
1220 	/* Handle any pending errors right away */
1221 	spin_lock_irqsave(&ghes_notify_lock_irq, flags);
1222 	ghes_proc(ghes);
1223 	spin_unlock_irqrestore(&ghes_notify_lock_irq, flags);
1224 
1225 	return 0;
1226 
1227 err:
1228 	if (ghes) {
1229 		ghes_fini(ghes);
1230 		kfree(ghes);
1231 	}
1232 	return rc;
1233 }
1234 
1235 static int ghes_remove(struct platform_device *ghes_dev)
1236 {
1237 	int rc;
1238 	struct ghes *ghes;
1239 	struct acpi_hest_generic *generic;
1240 
1241 	ghes = platform_get_drvdata(ghes_dev);
1242 	generic = ghes->generic;
1243 
1244 	ghes->flags |= GHES_EXITING;
1245 	switch (generic->notify.type) {
1246 	case ACPI_HEST_NOTIFY_POLLED:
1247 		del_timer_sync(&ghes->timer);
1248 		break;
1249 	case ACPI_HEST_NOTIFY_EXTERNAL:
1250 		free_irq(ghes->irq, ghes);
1251 		break;
1252 
1253 	case ACPI_HEST_NOTIFY_SCI:
1254 	case ACPI_HEST_NOTIFY_GSIV:
1255 	case ACPI_HEST_NOTIFY_GPIO:
1256 		mutex_lock(&ghes_list_mutex);
1257 		list_del_rcu(&ghes->list);
1258 		if (list_empty(&ghes_hed))
1259 			unregister_acpi_hed_notifier(&ghes_notifier_hed);
1260 		mutex_unlock(&ghes_list_mutex);
1261 		synchronize_rcu();
1262 		break;
1263 
1264 	case ACPI_HEST_NOTIFY_SEA:
1265 		ghes_sea_remove(ghes);
1266 		break;
1267 	case ACPI_HEST_NOTIFY_NMI:
1268 		ghes_nmi_remove(ghes);
1269 		break;
1270 	case ACPI_HEST_NOTIFY_SOFTWARE_DELEGATED:
1271 		rc = apei_sdei_unregister_ghes(ghes);
1272 		if (rc)
1273 			return rc;
1274 		break;
1275 	default:
1276 		BUG();
1277 		break;
1278 	}
1279 
1280 	ghes_fini(ghes);
1281 
1282 	ghes_edac_unregister(ghes);
1283 
1284 	kfree(ghes);
1285 
1286 	platform_set_drvdata(ghes_dev, NULL);
1287 
1288 	return 0;
1289 }
1290 
1291 static struct platform_driver ghes_platform_driver = {
1292 	.driver		= {
1293 		.name	= "GHES",
1294 	},
1295 	.probe		= ghes_probe,
1296 	.remove		= ghes_remove,
1297 };
1298 
1299 static int __init ghes_init(void)
1300 {
1301 	int rc;
1302 
1303 	if (acpi_disabled)
1304 		return -ENODEV;
1305 
1306 	switch (hest_disable) {
1307 	case HEST_NOT_FOUND:
1308 		return -ENODEV;
1309 	case HEST_DISABLED:
1310 		pr_info(GHES_PFX "HEST is not enabled!\n");
1311 		return -EINVAL;
1312 	default:
1313 		break;
1314 	}
1315 
1316 	if (ghes_disable) {
1317 		pr_info(GHES_PFX "GHES is not enabled!\n");
1318 		return -EINVAL;
1319 	}
1320 
1321 	ghes_nmi_init_cxt();
1322 
1323 	rc = platform_driver_register(&ghes_platform_driver);
1324 	if (rc)
1325 		goto err;
1326 
1327 	rc = apei_osc_setup();
1328 	if (rc == 0 && osc_sb_apei_support_acked)
1329 		pr_info(GHES_PFX "APEI firmware first mode is enabled by APEI bit and WHEA _OSC.\n");
1330 	else if (rc == 0 && !osc_sb_apei_support_acked)
1331 		pr_info(GHES_PFX "APEI firmware first mode is enabled by WHEA _OSC.\n");
1332 	else if (rc && osc_sb_apei_support_acked)
1333 		pr_info(GHES_PFX "APEI firmware first mode is enabled by APEI bit.\n");
1334 	else
1335 		pr_info(GHES_PFX "Failed to enable APEI firmware first mode.\n");
1336 
1337 	return 0;
1338 err:
1339 	return rc;
1340 }
1341 device_initcall(ghes_init);
1342