xref: /linux/drivers/acpi/nfit/mce.c (revision 4f2c0a4acffbec01079c28f839422e64ddeff004)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * NFIT - Machine Check Handler
4  *
5  * Copyright(c) 2013-2016 Intel Corporation. All rights reserved.
6  */
7 #include <linux/notifier.h>
8 #include <linux/acpi.h>
9 #include <linux/nd.h>
10 #include <asm/mce.h>
11 #include "nfit.h"
12 
nfit_handle_mce(struct notifier_block * nb,unsigned long val,void * data)13 static int nfit_handle_mce(struct notifier_block *nb, unsigned long val,
14 			void *data)
15 {
16 	struct mce *mce = (struct mce *)data;
17 	struct acpi_nfit_desc *acpi_desc;
18 	struct nfit_spa *nfit_spa;
19 
20 	/* We only care about uncorrectable memory errors */
21 	if (!mce_is_memory_error(mce) || mce_is_correctable(mce))
22 		return NOTIFY_DONE;
23 
24 	/* Verify the address reported in the MCE is valid. */
25 	if (!mce_usable_address(mce))
26 		return NOTIFY_DONE;
27 
28 	/*
29 	 * mce->addr contains the physical addr accessed that caused the
30 	 * machine check. We need to walk through the list of NFITs, and see
31 	 * if any of them matches that address, and only then start a scrub.
32 	 */
33 	mutex_lock(&acpi_desc_lock);
34 	list_for_each_entry(acpi_desc, &acpi_descs, list) {
35 		unsigned int align = 1UL << MCI_MISC_ADDR_LSB(mce->misc);
36 		struct device *dev = acpi_desc->dev;
37 		int found_match = 0;
38 
39 		mutex_lock(&acpi_desc->init_mutex);
40 		list_for_each_entry(nfit_spa, &acpi_desc->spas, list) {
41 			struct acpi_nfit_system_address *spa = nfit_spa->spa;
42 
43 			if (nfit_spa_type(spa) != NFIT_SPA_PM)
44 				continue;
45 			/* find the spa that covers the mce addr */
46 			if (spa->address > mce->addr)
47 				continue;
48 			if ((spa->address + spa->length - 1) < mce->addr)
49 				continue;
50 			found_match = 1;
51 			dev_dbg(dev, "addr in SPA %d (0x%llx, 0x%llx)\n",
52 				spa->range_index, spa->address, spa->length);
53 			/*
54 			 * We can break at the first match because we're going
55 			 * to rescan all the SPA ranges. There shouldn't be any
56 			 * aliasing anyway.
57 			 */
58 			break;
59 		}
60 		mutex_unlock(&acpi_desc->init_mutex);
61 
62 		if (!found_match)
63 			continue;
64 
65 		/* If this fails due to an -ENOMEM, there is little we can do */
66 		nvdimm_bus_add_badrange(acpi_desc->nvdimm_bus,
67 				ALIGN_DOWN(mce->addr, align), align);
68 		nvdimm_region_notify(nfit_spa->nd_region,
69 				NVDIMM_REVALIDATE_POISON);
70 
71 		if (acpi_desc->scrub_mode == HW_ERROR_SCRUB_ON) {
72 			/*
73 			 * We can ignore an -EBUSY here because if an ARS is
74 			 * already in progress, just let that be the last
75 			 * authoritative one
76 			 */
77 			acpi_nfit_ars_rescan(acpi_desc, 0);
78 		}
79 		mce->kflags |= MCE_HANDLED_NFIT;
80 		break;
81 	}
82 
83 	mutex_unlock(&acpi_desc_lock);
84 	return NOTIFY_DONE;
85 }
86 
87 static struct notifier_block nfit_mce_dec = {
88 	.notifier_call	= nfit_handle_mce,
89 	.priority	= MCE_PRIO_NFIT,
90 };
91 
nfit_mce_register(void)92 void nfit_mce_register(void)
93 {
94 	mce_register_decode_chain(&nfit_mce_dec);
95 }
96 
nfit_mce_unregister(void)97 void nfit_mce_unregister(void)
98 {
99 	mce_unregister_decode_chain(&nfit_mce_dec);
100 }
101