xref: /linux/drivers/firmware/efi/cper.c (revision c4ee0af3fa0dc65f690fc908f02b8355f9576ea0)
1 /*
2  * UEFI Common Platform Error Record (CPER) support
3  *
4  * Copyright (C) 2010, Intel Corp.
5  *	Author: Huang Ying <ying.huang@intel.com>
6  *
7  * CPER is the format used to describe platform hardware error by
8  * various tables, such as ERST, BERT and HEST etc.
9  *
10  * For more information about CPER, please refer to Appendix N of UEFI
11  * Specification version 2.4.
12  *
13  * This program is free software; you can redistribute it and/or
14  * modify it under the terms of the GNU General Public License version
15  * 2 as published by the Free Software Foundation.
16  *
17  * This program is distributed in the hope that it will be useful,
18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20  * GNU General Public License for more details.
21  *
22  * You should have received a copy of the GNU General Public License
23  * along with this program; if not, write to the Free Software
24  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
25  */
26 
27 #include <linux/kernel.h>
28 #include <linux/module.h>
29 #include <linux/time.h>
30 #include <linux/cper.h>
31 #include <linux/dmi.h>
32 #include <linux/acpi.h>
33 #include <linux/pci.h>
34 #include <linux/aer.h>
35 
36 #define INDENT_SP	" "
37 /*
38  * CPER record ID need to be unique even after reboot, because record
39  * ID is used as index for ERST storage, while CPER records from
40  * multiple boot may co-exist in ERST.
41  */
42 u64 cper_next_record_id(void)
43 {
44 	static atomic64_t seq;
45 
46 	if (!atomic64_read(&seq))
47 		atomic64_set(&seq, ((u64)get_seconds()) << 32);
48 
49 	return atomic64_inc_return(&seq);
50 }
51 EXPORT_SYMBOL_GPL(cper_next_record_id);
52 
53 static const char *cper_severity_strs[] = {
54 	"recoverable",
55 	"fatal",
56 	"corrected",
57 	"info",
58 };
59 
60 static const char *cper_severity_str(unsigned int severity)
61 {
62 	return severity < ARRAY_SIZE(cper_severity_strs) ?
63 		cper_severity_strs[severity] : "unknown";
64 }
65 
66 /*
67  * cper_print_bits - print strings for set bits
68  * @pfx: prefix for each line, including log level and prefix string
69  * @bits: bit mask
70  * @strs: string array, indexed by bit position
71  * @strs_size: size of the string array: @strs
72  *
73  * For each set bit in @bits, print the corresponding string in @strs.
74  * If the output length is longer than 80, multiple line will be
75  * printed, with @pfx is printed at the beginning of each line.
76  */
77 void cper_print_bits(const char *pfx, unsigned int bits,
78 		     const char * const strs[], unsigned int strs_size)
79 {
80 	int i, len = 0;
81 	const char *str;
82 	char buf[84];
83 
84 	for (i = 0; i < strs_size; i++) {
85 		if (!(bits & (1U << i)))
86 			continue;
87 		str = strs[i];
88 		if (!str)
89 			continue;
90 		if (len && len + strlen(str) + 2 > 80) {
91 			printk("%s\n", buf);
92 			len = 0;
93 		}
94 		if (!len)
95 			len = snprintf(buf, sizeof(buf), "%s%s", pfx, str);
96 		else
97 			len += snprintf(buf+len, sizeof(buf)-len, ", %s", str);
98 	}
99 	if (len)
100 		printk("%s\n", buf);
101 }
102 
103 static const char * const cper_proc_type_strs[] = {
104 	"IA32/X64",
105 	"IA64",
106 };
107 
108 static const char * const cper_proc_isa_strs[] = {
109 	"IA32",
110 	"IA64",
111 	"X64",
112 };
113 
114 static const char * const cper_proc_error_type_strs[] = {
115 	"cache error",
116 	"TLB error",
117 	"bus error",
118 	"micro-architectural error",
119 };
120 
121 static const char * const cper_proc_op_strs[] = {
122 	"unknown or generic",
123 	"data read",
124 	"data write",
125 	"instruction execution",
126 };
127 
128 static const char * const cper_proc_flag_strs[] = {
129 	"restartable",
130 	"precise IP",
131 	"overflow",
132 	"corrected",
133 };
134 
135 static void cper_print_proc_generic(const char *pfx,
136 				    const struct cper_sec_proc_generic *proc)
137 {
138 	if (proc->validation_bits & CPER_PROC_VALID_TYPE)
139 		printk("%s""processor_type: %d, %s\n", pfx, proc->proc_type,
140 		       proc->proc_type < ARRAY_SIZE(cper_proc_type_strs) ?
141 		       cper_proc_type_strs[proc->proc_type] : "unknown");
142 	if (proc->validation_bits & CPER_PROC_VALID_ISA)
143 		printk("%s""processor_isa: %d, %s\n", pfx, proc->proc_isa,
144 		       proc->proc_isa < ARRAY_SIZE(cper_proc_isa_strs) ?
145 		       cper_proc_isa_strs[proc->proc_isa] : "unknown");
146 	if (proc->validation_bits & CPER_PROC_VALID_ERROR_TYPE) {
147 		printk("%s""error_type: 0x%02x\n", pfx, proc->proc_error_type);
148 		cper_print_bits(pfx, proc->proc_error_type,
149 				cper_proc_error_type_strs,
150 				ARRAY_SIZE(cper_proc_error_type_strs));
151 	}
152 	if (proc->validation_bits & CPER_PROC_VALID_OPERATION)
153 		printk("%s""operation: %d, %s\n", pfx, proc->operation,
154 		       proc->operation < ARRAY_SIZE(cper_proc_op_strs) ?
155 		       cper_proc_op_strs[proc->operation] : "unknown");
156 	if (proc->validation_bits & CPER_PROC_VALID_FLAGS) {
157 		printk("%s""flags: 0x%02x\n", pfx, proc->flags);
158 		cper_print_bits(pfx, proc->flags, cper_proc_flag_strs,
159 				ARRAY_SIZE(cper_proc_flag_strs));
160 	}
161 	if (proc->validation_bits & CPER_PROC_VALID_LEVEL)
162 		printk("%s""level: %d\n", pfx, proc->level);
163 	if (proc->validation_bits & CPER_PROC_VALID_VERSION)
164 		printk("%s""version_info: 0x%016llx\n", pfx, proc->cpu_version);
165 	if (proc->validation_bits & CPER_PROC_VALID_ID)
166 		printk("%s""processor_id: 0x%016llx\n", pfx, proc->proc_id);
167 	if (proc->validation_bits & CPER_PROC_VALID_TARGET_ADDRESS)
168 		printk("%s""target_address: 0x%016llx\n",
169 		       pfx, proc->target_addr);
170 	if (proc->validation_bits & CPER_PROC_VALID_REQUESTOR_ID)
171 		printk("%s""requestor_id: 0x%016llx\n",
172 		       pfx, proc->requestor_id);
173 	if (proc->validation_bits & CPER_PROC_VALID_RESPONDER_ID)
174 		printk("%s""responder_id: 0x%016llx\n",
175 		       pfx, proc->responder_id);
176 	if (proc->validation_bits & CPER_PROC_VALID_IP)
177 		printk("%s""IP: 0x%016llx\n", pfx, proc->ip);
178 }
179 
180 static const char *cper_mem_err_type_strs[] = {
181 	"unknown",
182 	"no error",
183 	"single-bit ECC",
184 	"multi-bit ECC",
185 	"single-symbol chipkill ECC",
186 	"multi-symbol chipkill ECC",
187 	"master abort",
188 	"target abort",
189 	"parity error",
190 	"watchdog timeout",
191 	"invalid address",
192 	"mirror Broken",
193 	"memory sparing",
194 	"scrub corrected error",
195 	"scrub uncorrected error",
196 	"physical memory map-out event",
197 };
198 
199 static void cper_print_mem(const char *pfx, const struct cper_sec_mem_err *mem)
200 {
201 	if (mem->validation_bits & CPER_MEM_VALID_ERROR_STATUS)
202 		printk("%s""error_status: 0x%016llx\n", pfx, mem->error_status);
203 	if (mem->validation_bits & CPER_MEM_VALID_PA)
204 		printk("%s""physical_address: 0x%016llx\n",
205 		       pfx, mem->physical_addr);
206 	if (mem->validation_bits & CPER_MEM_VALID_PA_MASK)
207 		printk("%s""physical_address_mask: 0x%016llx\n",
208 		       pfx, mem->physical_addr_mask);
209 	if (mem->validation_bits & CPER_MEM_VALID_NODE)
210 		pr_debug("node: %d\n", mem->node);
211 	if (mem->validation_bits & CPER_MEM_VALID_CARD)
212 		pr_debug("card: %d\n", mem->card);
213 	if (mem->validation_bits & CPER_MEM_VALID_MODULE)
214 		pr_debug("module: %d\n", mem->module);
215 	if (mem->validation_bits & CPER_MEM_VALID_RANK_NUMBER)
216 		pr_debug("rank: %d\n", mem->rank);
217 	if (mem->validation_bits & CPER_MEM_VALID_BANK)
218 		pr_debug("bank: %d\n", mem->bank);
219 	if (mem->validation_bits & CPER_MEM_VALID_DEVICE)
220 		pr_debug("device: %d\n", mem->device);
221 	if (mem->validation_bits & CPER_MEM_VALID_ROW)
222 		pr_debug("row: %d\n", mem->row);
223 	if (mem->validation_bits & CPER_MEM_VALID_COLUMN)
224 		pr_debug("column: %d\n", mem->column);
225 	if (mem->validation_bits & CPER_MEM_VALID_BIT_POSITION)
226 		pr_debug("bit_position: %d\n", mem->bit_pos);
227 	if (mem->validation_bits & CPER_MEM_VALID_REQUESTOR_ID)
228 		pr_debug("requestor_id: 0x%016llx\n", mem->requestor_id);
229 	if (mem->validation_bits & CPER_MEM_VALID_RESPONDER_ID)
230 		pr_debug("responder_id: 0x%016llx\n", mem->responder_id);
231 	if (mem->validation_bits & CPER_MEM_VALID_TARGET_ID)
232 		pr_debug("target_id: 0x%016llx\n", mem->target_id);
233 	if (mem->validation_bits & CPER_MEM_VALID_ERROR_TYPE) {
234 		u8 etype = mem->error_type;
235 		printk("%s""error_type: %d, %s\n", pfx, etype,
236 		       etype < ARRAY_SIZE(cper_mem_err_type_strs) ?
237 		       cper_mem_err_type_strs[etype] : "unknown");
238 	}
239 	if (mem->validation_bits & CPER_MEM_VALID_MODULE_HANDLE) {
240 		const char *bank = NULL, *device = NULL;
241 		dmi_memdev_name(mem->mem_dev_handle, &bank, &device);
242 		if (bank != NULL && device != NULL)
243 			printk("%s""DIMM location: %s %s", pfx, bank, device);
244 		else
245 			printk("%s""DIMM DMI handle: 0x%.4x",
246 			       pfx, mem->mem_dev_handle);
247 	}
248 }
249 
250 static const char *cper_pcie_port_type_strs[] = {
251 	"PCIe end point",
252 	"legacy PCI end point",
253 	"unknown",
254 	"unknown",
255 	"root port",
256 	"upstream switch port",
257 	"downstream switch port",
258 	"PCIe to PCI/PCI-X bridge",
259 	"PCI/PCI-X to PCIe bridge",
260 	"root complex integrated endpoint device",
261 	"root complex event collector",
262 };
263 
264 static void cper_print_pcie(const char *pfx, const struct cper_sec_pcie *pcie,
265 			    const struct acpi_generic_data *gdata)
266 {
267 	if (pcie->validation_bits & CPER_PCIE_VALID_PORT_TYPE)
268 		printk("%s""port_type: %d, %s\n", pfx, pcie->port_type,
269 		       pcie->port_type < ARRAY_SIZE(cper_pcie_port_type_strs) ?
270 		       cper_pcie_port_type_strs[pcie->port_type] : "unknown");
271 	if (pcie->validation_bits & CPER_PCIE_VALID_VERSION)
272 		printk("%s""version: %d.%d\n", pfx,
273 		       pcie->version.major, pcie->version.minor);
274 	if (pcie->validation_bits & CPER_PCIE_VALID_COMMAND_STATUS)
275 		printk("%s""command: 0x%04x, status: 0x%04x\n", pfx,
276 		       pcie->command, pcie->status);
277 	if (pcie->validation_bits & CPER_PCIE_VALID_DEVICE_ID) {
278 		const __u8 *p;
279 		printk("%s""device_id: %04x:%02x:%02x.%x\n", pfx,
280 		       pcie->device_id.segment, pcie->device_id.bus,
281 		       pcie->device_id.device, pcie->device_id.function);
282 		printk("%s""slot: %d\n", pfx,
283 		       pcie->device_id.slot >> CPER_PCIE_SLOT_SHIFT);
284 		printk("%s""secondary_bus: 0x%02x\n", pfx,
285 		       pcie->device_id.secondary_bus);
286 		printk("%s""vendor_id: 0x%04x, device_id: 0x%04x\n", pfx,
287 		       pcie->device_id.vendor_id, pcie->device_id.device_id);
288 		p = pcie->device_id.class_code;
289 		printk("%s""class_code: %02x%02x%02x\n", pfx, p[0], p[1], p[2]);
290 	}
291 	if (pcie->validation_bits & CPER_PCIE_VALID_SERIAL_NUMBER)
292 		printk("%s""serial number: 0x%04x, 0x%04x\n", pfx,
293 		       pcie->serial_number.lower, pcie->serial_number.upper);
294 	if (pcie->validation_bits & CPER_PCIE_VALID_BRIDGE_CONTROL_STATUS)
295 		printk(
296 	"%s""bridge: secondary_status: 0x%04x, control: 0x%04x\n",
297 	pfx, pcie->bridge.secondary_status, pcie->bridge.control);
298 }
299 
300 static void cper_estatus_print_section(
301 	const char *pfx, const struct acpi_generic_data *gdata, int sec_no)
302 {
303 	uuid_le *sec_type = (uuid_le *)gdata->section_type;
304 	__u16 severity;
305 	char newpfx[64];
306 
307 	severity = gdata->error_severity;
308 	printk("%s""Error %d, type: %s\n", pfx, sec_no,
309 	       cper_severity_str(severity));
310 	if (gdata->validation_bits & CPER_SEC_VALID_FRU_ID)
311 		printk("%s""fru_id: %pUl\n", pfx, (uuid_le *)gdata->fru_id);
312 	if (gdata->validation_bits & CPER_SEC_VALID_FRU_TEXT)
313 		printk("%s""fru_text: %.20s\n", pfx, gdata->fru_text);
314 
315 	snprintf(newpfx, sizeof(newpfx), "%s%s", pfx, INDENT_SP);
316 	if (!uuid_le_cmp(*sec_type, CPER_SEC_PROC_GENERIC)) {
317 		struct cper_sec_proc_generic *proc_err = (void *)(gdata + 1);
318 		printk("%s""section_type: general processor error\n", newpfx);
319 		if (gdata->error_data_length >= sizeof(*proc_err))
320 			cper_print_proc_generic(newpfx, proc_err);
321 		else
322 			goto err_section_too_small;
323 	} else if (!uuid_le_cmp(*sec_type, CPER_SEC_PLATFORM_MEM)) {
324 		struct cper_sec_mem_err *mem_err = (void *)(gdata + 1);
325 		printk("%s""section_type: memory error\n", newpfx);
326 		if (gdata->error_data_length >= sizeof(*mem_err))
327 			cper_print_mem(newpfx, mem_err);
328 		else
329 			goto err_section_too_small;
330 	} else if (!uuid_le_cmp(*sec_type, CPER_SEC_PCIE)) {
331 		struct cper_sec_pcie *pcie = (void *)(gdata + 1);
332 		printk("%s""section_type: PCIe error\n", newpfx);
333 		if (gdata->error_data_length >= sizeof(*pcie))
334 			cper_print_pcie(newpfx, pcie, gdata);
335 		else
336 			goto err_section_too_small;
337 	} else
338 		printk("%s""section type: unknown, %pUl\n", newpfx, sec_type);
339 
340 	return;
341 
342 err_section_too_small:
343 	pr_err(FW_WARN "error section length is too small\n");
344 }
345 
346 void cper_estatus_print(const char *pfx,
347 			const struct acpi_generic_status *estatus)
348 {
349 	struct acpi_generic_data *gdata;
350 	unsigned int data_len, gedata_len;
351 	int sec_no = 0;
352 	char newpfx[64];
353 	__u16 severity;
354 
355 	severity = estatus->error_severity;
356 	if (severity == CPER_SEV_CORRECTED)
357 		printk("%s%s\n", pfx,
358 		       "It has been corrected by h/w "
359 		       "and requires no further action");
360 	printk("%s""event severity: %s\n", pfx, cper_severity_str(severity));
361 	data_len = estatus->data_length;
362 	gdata = (struct acpi_generic_data *)(estatus + 1);
363 	snprintf(newpfx, sizeof(newpfx), "%s%s", pfx, INDENT_SP);
364 	while (data_len >= sizeof(*gdata)) {
365 		gedata_len = gdata->error_data_length;
366 		cper_estatus_print_section(newpfx, gdata, sec_no);
367 		data_len -= gedata_len + sizeof(*gdata);
368 		gdata = (void *)(gdata + 1) + gedata_len;
369 		sec_no++;
370 	}
371 }
372 EXPORT_SYMBOL_GPL(cper_estatus_print);
373 
374 int cper_estatus_check_header(const struct acpi_generic_status *estatus)
375 {
376 	if (estatus->data_length &&
377 	    estatus->data_length < sizeof(struct acpi_generic_data))
378 		return -EINVAL;
379 	if (estatus->raw_data_length &&
380 	    estatus->raw_data_offset < sizeof(*estatus) + estatus->data_length)
381 		return -EINVAL;
382 
383 	return 0;
384 }
385 EXPORT_SYMBOL_GPL(cper_estatus_check_header);
386 
387 int cper_estatus_check(const struct acpi_generic_status *estatus)
388 {
389 	struct acpi_generic_data *gdata;
390 	unsigned int data_len, gedata_len;
391 	int rc;
392 
393 	rc = cper_estatus_check_header(estatus);
394 	if (rc)
395 		return rc;
396 	data_len = estatus->data_length;
397 	gdata = (struct acpi_generic_data *)(estatus + 1);
398 	while (data_len >= sizeof(*gdata)) {
399 		gedata_len = gdata->error_data_length;
400 		if (gedata_len > data_len - sizeof(*gdata))
401 			return -EINVAL;
402 		data_len -= gedata_len + sizeof(*gdata);
403 		gdata = (void *)(gdata + 1) + gedata_len;
404 	}
405 	if (data_len)
406 		return -EINVAL;
407 
408 	return 0;
409 }
410 EXPORT_SYMBOL_GPL(cper_estatus_check);
411