xref: /linux/drivers/acpi/apei/erst.c (revision ca55b2fef3a9373fcfc30f82fd26bc7fccbda732)
1 /*
2  * APEI Error Record Serialization Table support
3  *
4  * ERST is a way provided by APEI to save and retrieve hardware error
5  * information to and from a persistent store.
6  *
7  * For more information about ERST, please refer to ACPI Specification
8  * version 4.0, section 17.4.
9  *
10  * Copyright 2010 Intel Corp.
11  *   Author: Huang Ying <ying.huang@intel.com>
12  *
13  * This program is free software; you can redistribute it and/or
14  * modify it under the terms of the GNU General Public License version
15  * 2 as published by the Free Software Foundation.
16  *
17  * This program is distributed in the hope that it will be useful,
18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20  * GNU General Public License for more details.
21  */
22 
23 #include <linux/kernel.h>
24 #include <linux/module.h>
25 #include <linux/init.h>
26 #include <linux/delay.h>
27 #include <linux/io.h>
28 #include <linux/acpi.h>
29 #include <linux/uaccess.h>
30 #include <linux/cper.h>
31 #include <linux/nmi.h>
32 #include <linux/hardirq.h>
33 #include <linux/pstore.h>
34 #include <linux/vmalloc.h>
35 #include <acpi/apei.h>
36 
37 #include "apei-internal.h"
38 
39 #undef pr_fmt
40 #define pr_fmt(fmt) "ERST: " fmt
41 
42 /* ERST command status */
43 #define ERST_STATUS_SUCCESS			0x0
44 #define ERST_STATUS_NOT_ENOUGH_SPACE		0x1
45 #define ERST_STATUS_HARDWARE_NOT_AVAILABLE	0x2
46 #define ERST_STATUS_FAILED			0x3
47 #define ERST_STATUS_RECORD_STORE_EMPTY		0x4
48 #define ERST_STATUS_RECORD_NOT_FOUND		0x5
49 
50 #define ERST_TAB_ENTRY(tab)						\
51 	((struct acpi_whea_header *)((char *)(tab) +			\
52 				     sizeof(struct acpi_table_erst)))
53 
54 #define SPIN_UNIT		100			/* 100ns */
55 /* Firmware should respond within 1 milliseconds */
56 #define FIRMWARE_TIMEOUT	(1 * NSEC_PER_MSEC)
57 #define FIRMWARE_MAX_STALL	50			/* 50us */
58 
59 int erst_disable;
60 EXPORT_SYMBOL_GPL(erst_disable);
61 
62 static struct acpi_table_erst *erst_tab;
63 
64 /* ERST Error Log Address Range atrributes */
65 #define ERST_RANGE_RESERVED	0x0001
66 #define ERST_RANGE_NVRAM	0x0002
67 #define ERST_RANGE_SLOW		0x0004
68 
69 /*
70  * ERST Error Log Address Range, used as buffer for reading/writing
71  * error records.
72  */
73 static struct erst_erange {
74 	u64 base;
75 	u64 size;
76 	void __iomem *vaddr;
77 	u32 attr;
78 } erst_erange;
79 
80 /*
81  * Prevent ERST interpreter to run simultaneously, because the
82  * corresponding firmware implementation may not work properly when
83  * invoked simultaneously.
84  *
85  * It is used to provide exclusive accessing for ERST Error Log
86  * Address Range too.
87  */
88 static DEFINE_RAW_SPINLOCK(erst_lock);
89 
90 static inline int erst_errno(int command_status)
91 {
92 	switch (command_status) {
93 	case ERST_STATUS_SUCCESS:
94 		return 0;
95 	case ERST_STATUS_HARDWARE_NOT_AVAILABLE:
96 		return -ENODEV;
97 	case ERST_STATUS_NOT_ENOUGH_SPACE:
98 		return -ENOSPC;
99 	case ERST_STATUS_RECORD_STORE_EMPTY:
100 	case ERST_STATUS_RECORD_NOT_FOUND:
101 		return -ENOENT;
102 	default:
103 		return -EINVAL;
104 	}
105 }
106 
107 static int erst_timedout(u64 *t, u64 spin_unit)
108 {
109 	if ((s64)*t < spin_unit) {
110 		pr_warn(FW_WARN "Firmware does not respond in time.\n");
111 		return 1;
112 	}
113 	*t -= spin_unit;
114 	ndelay(spin_unit);
115 	touch_nmi_watchdog();
116 	return 0;
117 }
118 
119 static int erst_exec_load_var1(struct apei_exec_context *ctx,
120 			       struct acpi_whea_header *entry)
121 {
122 	return __apei_exec_read_register(entry, &ctx->var1);
123 }
124 
125 static int erst_exec_load_var2(struct apei_exec_context *ctx,
126 			       struct acpi_whea_header *entry)
127 {
128 	return __apei_exec_read_register(entry, &ctx->var2);
129 }
130 
131 static int erst_exec_store_var1(struct apei_exec_context *ctx,
132 				struct acpi_whea_header *entry)
133 {
134 	return __apei_exec_write_register(entry, ctx->var1);
135 }
136 
137 static int erst_exec_add(struct apei_exec_context *ctx,
138 			 struct acpi_whea_header *entry)
139 {
140 	ctx->var1 += ctx->var2;
141 	return 0;
142 }
143 
144 static int erst_exec_subtract(struct apei_exec_context *ctx,
145 			      struct acpi_whea_header *entry)
146 {
147 	ctx->var1 -= ctx->var2;
148 	return 0;
149 }
150 
151 static int erst_exec_add_value(struct apei_exec_context *ctx,
152 			       struct acpi_whea_header *entry)
153 {
154 	int rc;
155 	u64 val;
156 
157 	rc = __apei_exec_read_register(entry, &val);
158 	if (rc)
159 		return rc;
160 	val += ctx->value;
161 	rc = __apei_exec_write_register(entry, val);
162 	return rc;
163 }
164 
165 static int erst_exec_subtract_value(struct apei_exec_context *ctx,
166 				    struct acpi_whea_header *entry)
167 {
168 	int rc;
169 	u64 val;
170 
171 	rc = __apei_exec_read_register(entry, &val);
172 	if (rc)
173 		return rc;
174 	val -= ctx->value;
175 	rc = __apei_exec_write_register(entry, val);
176 	return rc;
177 }
178 
179 static int erst_exec_stall(struct apei_exec_context *ctx,
180 			   struct acpi_whea_header *entry)
181 {
182 	u64 stall_time;
183 
184 	if (ctx->value > FIRMWARE_MAX_STALL) {
185 		if (!in_nmi())
186 			pr_warn(FW_WARN
187 			"Too long stall time for stall instruction: 0x%llx.\n",
188 				   ctx->value);
189 		stall_time = FIRMWARE_MAX_STALL;
190 	} else
191 		stall_time = ctx->value;
192 	udelay(stall_time);
193 	return 0;
194 }
195 
196 static int erst_exec_stall_while_true(struct apei_exec_context *ctx,
197 				      struct acpi_whea_header *entry)
198 {
199 	int rc;
200 	u64 val;
201 	u64 timeout = FIRMWARE_TIMEOUT;
202 	u64 stall_time;
203 
204 	if (ctx->var1 > FIRMWARE_MAX_STALL) {
205 		if (!in_nmi())
206 			pr_warn(FW_WARN
207 		"Too long stall time for stall while true instruction: 0x%llx.\n",
208 				   ctx->var1);
209 		stall_time = FIRMWARE_MAX_STALL;
210 	} else
211 		stall_time = ctx->var1;
212 
213 	for (;;) {
214 		rc = __apei_exec_read_register(entry, &val);
215 		if (rc)
216 			return rc;
217 		if (val != ctx->value)
218 			break;
219 		if (erst_timedout(&timeout, stall_time * NSEC_PER_USEC))
220 			return -EIO;
221 	}
222 	return 0;
223 }
224 
225 static int erst_exec_skip_next_instruction_if_true(
226 	struct apei_exec_context *ctx,
227 	struct acpi_whea_header *entry)
228 {
229 	int rc;
230 	u64 val;
231 
232 	rc = __apei_exec_read_register(entry, &val);
233 	if (rc)
234 		return rc;
235 	if (val == ctx->value) {
236 		ctx->ip += 2;
237 		return APEI_EXEC_SET_IP;
238 	}
239 
240 	return 0;
241 }
242 
243 static int erst_exec_goto(struct apei_exec_context *ctx,
244 			  struct acpi_whea_header *entry)
245 {
246 	ctx->ip = ctx->value;
247 	return APEI_EXEC_SET_IP;
248 }
249 
250 static int erst_exec_set_src_address_base(struct apei_exec_context *ctx,
251 					  struct acpi_whea_header *entry)
252 {
253 	return __apei_exec_read_register(entry, &ctx->src_base);
254 }
255 
256 static int erst_exec_set_dst_address_base(struct apei_exec_context *ctx,
257 					  struct acpi_whea_header *entry)
258 {
259 	return __apei_exec_read_register(entry, &ctx->dst_base);
260 }
261 
262 static int erst_exec_move_data(struct apei_exec_context *ctx,
263 			       struct acpi_whea_header *entry)
264 {
265 	int rc;
266 	u64 offset;
267 	void *src, *dst;
268 
269 	/* ioremap does not work in interrupt context */
270 	if (in_interrupt()) {
271 		pr_warn("MOVE_DATA can not be used in interrupt context.\n");
272 		return -EBUSY;
273 	}
274 
275 	rc = __apei_exec_read_register(entry, &offset);
276 	if (rc)
277 		return rc;
278 
279 	src = ioremap(ctx->src_base + offset, ctx->var2);
280 	if (!src)
281 		return -ENOMEM;
282 	dst = ioremap(ctx->dst_base + offset, ctx->var2);
283 	if (!dst) {
284 		iounmap(src);
285 		return -ENOMEM;
286 	}
287 
288 	memmove(dst, src, ctx->var2);
289 
290 	iounmap(src);
291 	iounmap(dst);
292 
293 	return 0;
294 }
295 
296 static struct apei_exec_ins_type erst_ins_type[] = {
297 	[ACPI_ERST_READ_REGISTER] = {
298 		.flags = APEI_EXEC_INS_ACCESS_REGISTER,
299 		.run = apei_exec_read_register,
300 	},
301 	[ACPI_ERST_READ_REGISTER_VALUE] = {
302 		.flags = APEI_EXEC_INS_ACCESS_REGISTER,
303 		.run = apei_exec_read_register_value,
304 	},
305 	[ACPI_ERST_WRITE_REGISTER] = {
306 		.flags = APEI_EXEC_INS_ACCESS_REGISTER,
307 		.run = apei_exec_write_register,
308 	},
309 	[ACPI_ERST_WRITE_REGISTER_VALUE] = {
310 		.flags = APEI_EXEC_INS_ACCESS_REGISTER,
311 		.run = apei_exec_write_register_value,
312 	},
313 	[ACPI_ERST_NOOP] = {
314 		.flags = 0,
315 		.run = apei_exec_noop,
316 	},
317 	[ACPI_ERST_LOAD_VAR1] = {
318 		.flags = APEI_EXEC_INS_ACCESS_REGISTER,
319 		.run = erst_exec_load_var1,
320 	},
321 	[ACPI_ERST_LOAD_VAR2] = {
322 		.flags = APEI_EXEC_INS_ACCESS_REGISTER,
323 		.run = erst_exec_load_var2,
324 	},
325 	[ACPI_ERST_STORE_VAR1] = {
326 		.flags = APEI_EXEC_INS_ACCESS_REGISTER,
327 		.run = erst_exec_store_var1,
328 	},
329 	[ACPI_ERST_ADD] = {
330 		.flags = 0,
331 		.run = erst_exec_add,
332 	},
333 	[ACPI_ERST_SUBTRACT] = {
334 		.flags = 0,
335 		.run = erst_exec_subtract,
336 	},
337 	[ACPI_ERST_ADD_VALUE] = {
338 		.flags = APEI_EXEC_INS_ACCESS_REGISTER,
339 		.run = erst_exec_add_value,
340 	},
341 	[ACPI_ERST_SUBTRACT_VALUE] = {
342 		.flags = APEI_EXEC_INS_ACCESS_REGISTER,
343 		.run = erst_exec_subtract_value,
344 	},
345 	[ACPI_ERST_STALL] = {
346 		.flags = 0,
347 		.run = erst_exec_stall,
348 	},
349 	[ACPI_ERST_STALL_WHILE_TRUE] = {
350 		.flags = APEI_EXEC_INS_ACCESS_REGISTER,
351 		.run = erst_exec_stall_while_true,
352 	},
353 	[ACPI_ERST_SKIP_NEXT_IF_TRUE] = {
354 		.flags = APEI_EXEC_INS_ACCESS_REGISTER,
355 		.run = erst_exec_skip_next_instruction_if_true,
356 	},
357 	[ACPI_ERST_GOTO] = {
358 		.flags = 0,
359 		.run = erst_exec_goto,
360 	},
361 	[ACPI_ERST_SET_SRC_ADDRESS_BASE] = {
362 		.flags = APEI_EXEC_INS_ACCESS_REGISTER,
363 		.run = erst_exec_set_src_address_base,
364 	},
365 	[ACPI_ERST_SET_DST_ADDRESS_BASE] = {
366 		.flags = APEI_EXEC_INS_ACCESS_REGISTER,
367 		.run = erst_exec_set_dst_address_base,
368 	},
369 	[ACPI_ERST_MOVE_DATA] = {
370 		.flags = APEI_EXEC_INS_ACCESS_REGISTER,
371 		.run = erst_exec_move_data,
372 	},
373 };
374 
375 static inline void erst_exec_ctx_init(struct apei_exec_context *ctx)
376 {
377 	apei_exec_ctx_init(ctx, erst_ins_type, ARRAY_SIZE(erst_ins_type),
378 			   ERST_TAB_ENTRY(erst_tab), erst_tab->entries);
379 }
380 
381 static int erst_get_erange(struct erst_erange *range)
382 {
383 	struct apei_exec_context ctx;
384 	int rc;
385 
386 	erst_exec_ctx_init(&ctx);
387 	rc = apei_exec_run(&ctx, ACPI_ERST_GET_ERROR_RANGE);
388 	if (rc)
389 		return rc;
390 	range->base = apei_exec_ctx_get_output(&ctx);
391 	rc = apei_exec_run(&ctx, ACPI_ERST_GET_ERROR_LENGTH);
392 	if (rc)
393 		return rc;
394 	range->size = apei_exec_ctx_get_output(&ctx);
395 	rc = apei_exec_run(&ctx, ACPI_ERST_GET_ERROR_ATTRIBUTES);
396 	if (rc)
397 		return rc;
398 	range->attr = apei_exec_ctx_get_output(&ctx);
399 
400 	return 0;
401 }
402 
403 static ssize_t __erst_get_record_count(void)
404 {
405 	struct apei_exec_context ctx;
406 	int rc;
407 
408 	erst_exec_ctx_init(&ctx);
409 	rc = apei_exec_run(&ctx, ACPI_ERST_GET_RECORD_COUNT);
410 	if (rc)
411 		return rc;
412 	return apei_exec_ctx_get_output(&ctx);
413 }
414 
415 ssize_t erst_get_record_count(void)
416 {
417 	ssize_t count;
418 	unsigned long flags;
419 
420 	if (erst_disable)
421 		return -ENODEV;
422 
423 	raw_spin_lock_irqsave(&erst_lock, flags);
424 	count = __erst_get_record_count();
425 	raw_spin_unlock_irqrestore(&erst_lock, flags);
426 
427 	return count;
428 }
429 EXPORT_SYMBOL_GPL(erst_get_record_count);
430 
431 #define ERST_RECORD_ID_CACHE_SIZE_MIN	16
432 #define ERST_RECORD_ID_CACHE_SIZE_MAX	1024
433 
434 struct erst_record_id_cache {
435 	struct mutex lock;
436 	u64 *entries;
437 	int len;
438 	int size;
439 	int refcount;
440 };
441 
442 static struct erst_record_id_cache erst_record_id_cache = {
443 	.lock = __MUTEX_INITIALIZER(erst_record_id_cache.lock),
444 	.refcount = 0,
445 };
446 
447 static int __erst_get_next_record_id(u64 *record_id)
448 {
449 	struct apei_exec_context ctx;
450 	int rc;
451 
452 	erst_exec_ctx_init(&ctx);
453 	rc = apei_exec_run(&ctx, ACPI_ERST_GET_RECORD_ID);
454 	if (rc)
455 		return rc;
456 	*record_id = apei_exec_ctx_get_output(&ctx);
457 
458 	return 0;
459 }
460 
461 int erst_get_record_id_begin(int *pos)
462 {
463 	int rc;
464 
465 	if (erst_disable)
466 		return -ENODEV;
467 
468 	rc = mutex_lock_interruptible(&erst_record_id_cache.lock);
469 	if (rc)
470 		return rc;
471 	erst_record_id_cache.refcount++;
472 	mutex_unlock(&erst_record_id_cache.lock);
473 
474 	*pos = 0;
475 
476 	return 0;
477 }
478 EXPORT_SYMBOL_GPL(erst_get_record_id_begin);
479 
480 /* erst_record_id_cache.lock must be held by caller */
481 static int __erst_record_id_cache_add_one(void)
482 {
483 	u64 id, prev_id, first_id;
484 	int i, rc;
485 	u64 *entries;
486 	unsigned long flags;
487 
488 	id = prev_id = first_id = APEI_ERST_INVALID_RECORD_ID;
489 retry:
490 	raw_spin_lock_irqsave(&erst_lock, flags);
491 	rc = __erst_get_next_record_id(&id);
492 	raw_spin_unlock_irqrestore(&erst_lock, flags);
493 	if (rc == -ENOENT)
494 		return 0;
495 	if (rc)
496 		return rc;
497 	if (id == APEI_ERST_INVALID_RECORD_ID)
498 		return 0;
499 	/* can not skip current ID, or loop back to first ID */
500 	if (id == prev_id || id == first_id)
501 		return 0;
502 	if (first_id == APEI_ERST_INVALID_RECORD_ID)
503 		first_id = id;
504 	prev_id = id;
505 
506 	entries = erst_record_id_cache.entries;
507 	for (i = 0; i < erst_record_id_cache.len; i++) {
508 		if (entries[i] == id)
509 			break;
510 	}
511 	/* record id already in cache, try next */
512 	if (i < erst_record_id_cache.len)
513 		goto retry;
514 	if (erst_record_id_cache.len >= erst_record_id_cache.size) {
515 		int new_size, alloc_size;
516 		u64 *new_entries;
517 
518 		new_size = erst_record_id_cache.size * 2;
519 		new_size = clamp_val(new_size, ERST_RECORD_ID_CACHE_SIZE_MIN,
520 				     ERST_RECORD_ID_CACHE_SIZE_MAX);
521 		if (new_size <= erst_record_id_cache.size) {
522 			if (printk_ratelimit())
523 				pr_warn(FW_WARN "too many record IDs!\n");
524 			return 0;
525 		}
526 		alloc_size = new_size * sizeof(entries[0]);
527 		if (alloc_size < PAGE_SIZE)
528 			new_entries = kmalloc(alloc_size, GFP_KERNEL);
529 		else
530 			new_entries = vmalloc(alloc_size);
531 		if (!new_entries)
532 			return -ENOMEM;
533 		memcpy(new_entries, entries,
534 		       erst_record_id_cache.len * sizeof(entries[0]));
535 		if (erst_record_id_cache.size < PAGE_SIZE)
536 			kfree(entries);
537 		else
538 			vfree(entries);
539 		erst_record_id_cache.entries = entries = new_entries;
540 		erst_record_id_cache.size = new_size;
541 	}
542 	entries[i] = id;
543 	erst_record_id_cache.len++;
544 
545 	return 1;
546 }
547 
548 /*
549  * Get the record ID of an existing error record on the persistent
550  * storage. If there is no error record on the persistent storage, the
551  * returned record_id is APEI_ERST_INVALID_RECORD_ID.
552  */
553 int erst_get_record_id_next(int *pos, u64 *record_id)
554 {
555 	int rc = 0;
556 	u64 *entries;
557 
558 	if (erst_disable)
559 		return -ENODEV;
560 
561 	/* must be enclosed by erst_get_record_id_begin/end */
562 	BUG_ON(!erst_record_id_cache.refcount);
563 	BUG_ON(*pos < 0 || *pos > erst_record_id_cache.len);
564 
565 	mutex_lock(&erst_record_id_cache.lock);
566 	entries = erst_record_id_cache.entries;
567 	for (; *pos < erst_record_id_cache.len; (*pos)++)
568 		if (entries[*pos] != APEI_ERST_INVALID_RECORD_ID)
569 			break;
570 	/* found next record id in cache */
571 	if (*pos < erst_record_id_cache.len) {
572 		*record_id = entries[*pos];
573 		(*pos)++;
574 		goto out_unlock;
575 	}
576 
577 	/* Try to add one more record ID to cache */
578 	rc = __erst_record_id_cache_add_one();
579 	if (rc < 0)
580 		goto out_unlock;
581 	/* successfully add one new ID */
582 	if (rc == 1) {
583 		*record_id = erst_record_id_cache.entries[*pos];
584 		(*pos)++;
585 		rc = 0;
586 	} else {
587 		*pos = -1;
588 		*record_id = APEI_ERST_INVALID_RECORD_ID;
589 	}
590 out_unlock:
591 	mutex_unlock(&erst_record_id_cache.lock);
592 
593 	return rc;
594 }
595 EXPORT_SYMBOL_GPL(erst_get_record_id_next);
596 
597 /* erst_record_id_cache.lock must be held by caller */
598 static void __erst_record_id_cache_compact(void)
599 {
600 	int i, wpos = 0;
601 	u64 *entries;
602 
603 	if (erst_record_id_cache.refcount)
604 		return;
605 
606 	entries = erst_record_id_cache.entries;
607 	for (i = 0; i < erst_record_id_cache.len; i++) {
608 		if (entries[i] == APEI_ERST_INVALID_RECORD_ID)
609 			continue;
610 		if (wpos != i)
611 			entries[wpos] = entries[i];
612 		wpos++;
613 	}
614 	erst_record_id_cache.len = wpos;
615 }
616 
617 void erst_get_record_id_end(void)
618 {
619 	/*
620 	 * erst_disable != 0 should be detected by invoker via the
621 	 * return value of erst_get_record_id_begin/next, so this
622 	 * function should not be called for erst_disable != 0.
623 	 */
624 	BUG_ON(erst_disable);
625 
626 	mutex_lock(&erst_record_id_cache.lock);
627 	erst_record_id_cache.refcount--;
628 	BUG_ON(erst_record_id_cache.refcount < 0);
629 	__erst_record_id_cache_compact();
630 	mutex_unlock(&erst_record_id_cache.lock);
631 }
632 EXPORT_SYMBOL_GPL(erst_get_record_id_end);
633 
634 static int __erst_write_to_storage(u64 offset)
635 {
636 	struct apei_exec_context ctx;
637 	u64 timeout = FIRMWARE_TIMEOUT;
638 	u64 val;
639 	int rc;
640 
641 	erst_exec_ctx_init(&ctx);
642 	rc = apei_exec_run_optional(&ctx, ACPI_ERST_BEGIN_WRITE);
643 	if (rc)
644 		return rc;
645 	apei_exec_ctx_set_input(&ctx, offset);
646 	rc = apei_exec_run(&ctx, ACPI_ERST_SET_RECORD_OFFSET);
647 	if (rc)
648 		return rc;
649 	rc = apei_exec_run(&ctx, ACPI_ERST_EXECUTE_OPERATION);
650 	if (rc)
651 		return rc;
652 	for (;;) {
653 		rc = apei_exec_run(&ctx, ACPI_ERST_CHECK_BUSY_STATUS);
654 		if (rc)
655 			return rc;
656 		val = apei_exec_ctx_get_output(&ctx);
657 		if (!val)
658 			break;
659 		if (erst_timedout(&timeout, SPIN_UNIT))
660 			return -EIO;
661 	}
662 	rc = apei_exec_run(&ctx, ACPI_ERST_GET_COMMAND_STATUS);
663 	if (rc)
664 		return rc;
665 	val = apei_exec_ctx_get_output(&ctx);
666 	rc = apei_exec_run_optional(&ctx, ACPI_ERST_END);
667 	if (rc)
668 		return rc;
669 
670 	return erst_errno(val);
671 }
672 
673 static int __erst_read_from_storage(u64 record_id, u64 offset)
674 {
675 	struct apei_exec_context ctx;
676 	u64 timeout = FIRMWARE_TIMEOUT;
677 	u64 val;
678 	int rc;
679 
680 	erst_exec_ctx_init(&ctx);
681 	rc = apei_exec_run_optional(&ctx, ACPI_ERST_BEGIN_READ);
682 	if (rc)
683 		return rc;
684 	apei_exec_ctx_set_input(&ctx, offset);
685 	rc = apei_exec_run(&ctx, ACPI_ERST_SET_RECORD_OFFSET);
686 	if (rc)
687 		return rc;
688 	apei_exec_ctx_set_input(&ctx, record_id);
689 	rc = apei_exec_run(&ctx, ACPI_ERST_SET_RECORD_ID);
690 	if (rc)
691 		return rc;
692 	rc = apei_exec_run(&ctx, ACPI_ERST_EXECUTE_OPERATION);
693 	if (rc)
694 		return rc;
695 	for (;;) {
696 		rc = apei_exec_run(&ctx, ACPI_ERST_CHECK_BUSY_STATUS);
697 		if (rc)
698 			return rc;
699 		val = apei_exec_ctx_get_output(&ctx);
700 		if (!val)
701 			break;
702 		if (erst_timedout(&timeout, SPIN_UNIT))
703 			return -EIO;
704 	};
705 	rc = apei_exec_run(&ctx, ACPI_ERST_GET_COMMAND_STATUS);
706 	if (rc)
707 		return rc;
708 	val = apei_exec_ctx_get_output(&ctx);
709 	rc = apei_exec_run_optional(&ctx, ACPI_ERST_END);
710 	if (rc)
711 		return rc;
712 
713 	return erst_errno(val);
714 }
715 
716 static int __erst_clear_from_storage(u64 record_id)
717 {
718 	struct apei_exec_context ctx;
719 	u64 timeout = FIRMWARE_TIMEOUT;
720 	u64 val;
721 	int rc;
722 
723 	erst_exec_ctx_init(&ctx);
724 	rc = apei_exec_run_optional(&ctx, ACPI_ERST_BEGIN_CLEAR);
725 	if (rc)
726 		return rc;
727 	apei_exec_ctx_set_input(&ctx, record_id);
728 	rc = apei_exec_run(&ctx, ACPI_ERST_SET_RECORD_ID);
729 	if (rc)
730 		return rc;
731 	rc = apei_exec_run(&ctx, ACPI_ERST_EXECUTE_OPERATION);
732 	if (rc)
733 		return rc;
734 	for (;;) {
735 		rc = apei_exec_run(&ctx, ACPI_ERST_CHECK_BUSY_STATUS);
736 		if (rc)
737 			return rc;
738 		val = apei_exec_ctx_get_output(&ctx);
739 		if (!val)
740 			break;
741 		if (erst_timedout(&timeout, SPIN_UNIT))
742 			return -EIO;
743 	}
744 	rc = apei_exec_run(&ctx, ACPI_ERST_GET_COMMAND_STATUS);
745 	if (rc)
746 		return rc;
747 	val = apei_exec_ctx_get_output(&ctx);
748 	rc = apei_exec_run_optional(&ctx, ACPI_ERST_END);
749 	if (rc)
750 		return rc;
751 
752 	return erst_errno(val);
753 }
754 
755 /* NVRAM ERST Error Log Address Range is not supported yet */
756 static void pr_unimpl_nvram(void)
757 {
758 	if (printk_ratelimit())
759 		pr_warn("NVRAM ERST Log Address Range not implemented yet.\n");
760 }
761 
762 static int __erst_write_to_nvram(const struct cper_record_header *record)
763 {
764 	/* do not print message, because printk is not safe for NMI */
765 	return -ENOSYS;
766 }
767 
768 static int __erst_read_to_erange_from_nvram(u64 record_id, u64 *offset)
769 {
770 	pr_unimpl_nvram();
771 	return -ENOSYS;
772 }
773 
774 static int __erst_clear_from_nvram(u64 record_id)
775 {
776 	pr_unimpl_nvram();
777 	return -ENOSYS;
778 }
779 
780 int erst_write(const struct cper_record_header *record)
781 {
782 	int rc;
783 	unsigned long flags;
784 	struct cper_record_header *rcd_erange;
785 
786 	if (erst_disable)
787 		return -ENODEV;
788 
789 	if (memcmp(record->signature, CPER_SIG_RECORD, CPER_SIG_SIZE))
790 		return -EINVAL;
791 
792 	if (erst_erange.attr & ERST_RANGE_NVRAM) {
793 		if (!raw_spin_trylock_irqsave(&erst_lock, flags))
794 			return -EBUSY;
795 		rc = __erst_write_to_nvram(record);
796 		raw_spin_unlock_irqrestore(&erst_lock, flags);
797 		return rc;
798 	}
799 
800 	if (record->record_length > erst_erange.size)
801 		return -EINVAL;
802 
803 	if (!raw_spin_trylock_irqsave(&erst_lock, flags))
804 		return -EBUSY;
805 	memcpy(erst_erange.vaddr, record, record->record_length);
806 	rcd_erange = erst_erange.vaddr;
807 	/* signature for serialization system */
808 	memcpy(&rcd_erange->persistence_information, "ER", 2);
809 
810 	rc = __erst_write_to_storage(0);
811 	raw_spin_unlock_irqrestore(&erst_lock, flags);
812 
813 	return rc;
814 }
815 EXPORT_SYMBOL_GPL(erst_write);
816 
817 static int __erst_read_to_erange(u64 record_id, u64 *offset)
818 {
819 	int rc;
820 
821 	if (erst_erange.attr & ERST_RANGE_NVRAM)
822 		return __erst_read_to_erange_from_nvram(
823 			record_id, offset);
824 
825 	rc = __erst_read_from_storage(record_id, 0);
826 	if (rc)
827 		return rc;
828 	*offset = 0;
829 
830 	return 0;
831 }
832 
833 static ssize_t __erst_read(u64 record_id, struct cper_record_header *record,
834 			   size_t buflen)
835 {
836 	int rc;
837 	u64 offset, len = 0;
838 	struct cper_record_header *rcd_tmp;
839 
840 	rc = __erst_read_to_erange(record_id, &offset);
841 	if (rc)
842 		return rc;
843 	rcd_tmp = erst_erange.vaddr + offset;
844 	len = rcd_tmp->record_length;
845 	if (len <= buflen)
846 		memcpy(record, rcd_tmp, len);
847 
848 	return len;
849 }
850 
851 /*
852  * If return value > buflen, the buffer size is not big enough,
853  * else if return value < 0, something goes wrong,
854  * else everything is OK, and return value is record length
855  */
856 ssize_t erst_read(u64 record_id, struct cper_record_header *record,
857 		  size_t buflen)
858 {
859 	ssize_t len;
860 	unsigned long flags;
861 
862 	if (erst_disable)
863 		return -ENODEV;
864 
865 	raw_spin_lock_irqsave(&erst_lock, flags);
866 	len = __erst_read(record_id, record, buflen);
867 	raw_spin_unlock_irqrestore(&erst_lock, flags);
868 	return len;
869 }
870 EXPORT_SYMBOL_GPL(erst_read);
871 
872 int erst_clear(u64 record_id)
873 {
874 	int rc, i;
875 	unsigned long flags;
876 	u64 *entries;
877 
878 	if (erst_disable)
879 		return -ENODEV;
880 
881 	rc = mutex_lock_interruptible(&erst_record_id_cache.lock);
882 	if (rc)
883 		return rc;
884 	raw_spin_lock_irqsave(&erst_lock, flags);
885 	if (erst_erange.attr & ERST_RANGE_NVRAM)
886 		rc = __erst_clear_from_nvram(record_id);
887 	else
888 		rc = __erst_clear_from_storage(record_id);
889 	raw_spin_unlock_irqrestore(&erst_lock, flags);
890 	if (rc)
891 		goto out;
892 	entries = erst_record_id_cache.entries;
893 	for (i = 0; i < erst_record_id_cache.len; i++) {
894 		if (entries[i] == record_id)
895 			entries[i] = APEI_ERST_INVALID_RECORD_ID;
896 	}
897 	__erst_record_id_cache_compact();
898 out:
899 	mutex_unlock(&erst_record_id_cache.lock);
900 	return rc;
901 }
902 EXPORT_SYMBOL_GPL(erst_clear);
903 
904 static int __init setup_erst_disable(char *str)
905 {
906 	erst_disable = 1;
907 	return 0;
908 }
909 
910 __setup("erst_disable", setup_erst_disable);
911 
912 static int erst_check_table(struct acpi_table_erst *erst_tab)
913 {
914 	if ((erst_tab->header_length !=
915 	     (sizeof(struct acpi_table_erst) - sizeof(erst_tab->header)))
916 	    && (erst_tab->header_length != sizeof(struct acpi_table_erst)))
917 		return -EINVAL;
918 	if (erst_tab->header.length < sizeof(struct acpi_table_erst))
919 		return -EINVAL;
920 	if (erst_tab->entries !=
921 	    (erst_tab->header.length - sizeof(struct acpi_table_erst)) /
922 	    sizeof(struct acpi_erst_entry))
923 		return -EINVAL;
924 
925 	return 0;
926 }
927 
928 static int erst_open_pstore(struct pstore_info *psi);
929 static int erst_close_pstore(struct pstore_info *psi);
930 static ssize_t erst_reader(u64 *id, enum pstore_type_id *type, int *count,
931 			   struct timespec *time, char **buf,
932 			   bool *compressed, struct pstore_info *psi);
933 static int erst_writer(enum pstore_type_id type, enum kmsg_dump_reason reason,
934 		       u64 *id, unsigned int part, int count, bool compressed,
935 		       size_t size, struct pstore_info *psi);
936 static int erst_clearer(enum pstore_type_id type, u64 id, int count,
937 			struct timespec time, struct pstore_info *psi);
938 
939 static struct pstore_info erst_info = {
940 	.owner		= THIS_MODULE,
941 	.name		= "erst",
942 	.flags		= PSTORE_FLAGS_FRAGILE,
943 	.open		= erst_open_pstore,
944 	.close		= erst_close_pstore,
945 	.read		= erst_reader,
946 	.write		= erst_writer,
947 	.erase		= erst_clearer
948 };
949 
950 #define CPER_CREATOR_PSTORE						\
951 	UUID_LE(0x75a574e3, 0x5052, 0x4b29, 0x8a, 0x8e, 0xbe, 0x2c,	\
952 		0x64, 0x90, 0xb8, 0x9d)
953 #define CPER_SECTION_TYPE_DMESG						\
954 	UUID_LE(0xc197e04e, 0xd545, 0x4a70, 0x9c, 0x17, 0xa5, 0x54,	\
955 		0x94, 0x19, 0xeb, 0x12)
956 #define CPER_SECTION_TYPE_DMESG_Z					\
957 	UUID_LE(0x4f118707, 0x04dd, 0x4055, 0xb5, 0xdd, 0x95, 0x6d,	\
958 		0x34, 0xdd, 0xfa, 0xc6)
959 #define CPER_SECTION_TYPE_MCE						\
960 	UUID_LE(0xfe08ffbe, 0x95e4, 0x4be7, 0xbc, 0x73, 0x40, 0x96,	\
961 		0x04, 0x4a, 0x38, 0xfc)
962 
963 struct cper_pstore_record {
964 	struct cper_record_header hdr;
965 	struct cper_section_descriptor sec_hdr;
966 	char data[];
967 } __packed;
968 
969 static int reader_pos;
970 
971 static int erst_open_pstore(struct pstore_info *psi)
972 {
973 	int rc;
974 
975 	if (erst_disable)
976 		return -ENODEV;
977 
978 	rc = erst_get_record_id_begin(&reader_pos);
979 
980 	return rc;
981 }
982 
983 static int erst_close_pstore(struct pstore_info *psi)
984 {
985 	erst_get_record_id_end();
986 
987 	return 0;
988 }
989 
990 static ssize_t erst_reader(u64 *id, enum pstore_type_id *type, int *count,
991 			   struct timespec *time, char **buf,
992 			   bool *compressed, struct pstore_info *psi)
993 {
994 	int rc;
995 	ssize_t len = 0;
996 	u64 record_id;
997 	struct cper_pstore_record *rcd;
998 	size_t rcd_len = sizeof(*rcd) + erst_info.bufsize;
999 
1000 	if (erst_disable)
1001 		return -ENODEV;
1002 
1003 	rcd = kmalloc(rcd_len, GFP_KERNEL);
1004 	if (!rcd) {
1005 		rc = -ENOMEM;
1006 		goto out;
1007 	}
1008 skip:
1009 	rc = erst_get_record_id_next(&reader_pos, &record_id);
1010 	if (rc)
1011 		goto out;
1012 
1013 	/* no more record */
1014 	if (record_id == APEI_ERST_INVALID_RECORD_ID) {
1015 		rc = -EINVAL;
1016 		goto out;
1017 	}
1018 
1019 	len = erst_read(record_id, &rcd->hdr, rcd_len);
1020 	/* The record may be cleared by others, try read next record */
1021 	if (len == -ENOENT)
1022 		goto skip;
1023 	else if (len < sizeof(*rcd)) {
1024 		rc = -EIO;
1025 		goto out;
1026 	}
1027 	if (uuid_le_cmp(rcd->hdr.creator_id, CPER_CREATOR_PSTORE) != 0)
1028 		goto skip;
1029 
1030 	*buf = kmalloc(len, GFP_KERNEL);
1031 	if (*buf == NULL) {
1032 		rc = -ENOMEM;
1033 		goto out;
1034 	}
1035 	memcpy(*buf, rcd->data, len - sizeof(*rcd));
1036 	*id = record_id;
1037 	*compressed = false;
1038 	if (uuid_le_cmp(rcd->sec_hdr.section_type,
1039 			CPER_SECTION_TYPE_DMESG_Z) == 0) {
1040 		*type = PSTORE_TYPE_DMESG;
1041 		*compressed = true;
1042 	} else if (uuid_le_cmp(rcd->sec_hdr.section_type,
1043 			CPER_SECTION_TYPE_DMESG) == 0)
1044 		*type = PSTORE_TYPE_DMESG;
1045 	else if (uuid_le_cmp(rcd->sec_hdr.section_type,
1046 			     CPER_SECTION_TYPE_MCE) == 0)
1047 		*type = PSTORE_TYPE_MCE;
1048 	else
1049 		*type = PSTORE_TYPE_UNKNOWN;
1050 
1051 	if (rcd->hdr.validation_bits & CPER_VALID_TIMESTAMP)
1052 		time->tv_sec = rcd->hdr.timestamp;
1053 	else
1054 		time->tv_sec = 0;
1055 	time->tv_nsec = 0;
1056 
1057 out:
1058 	kfree(rcd);
1059 	return (rc < 0) ? rc : (len - sizeof(*rcd));
1060 }
1061 
1062 static int erst_writer(enum pstore_type_id type, enum kmsg_dump_reason reason,
1063 		       u64 *id, unsigned int part, int count, bool compressed,
1064 		       size_t size, struct pstore_info *psi)
1065 {
1066 	struct cper_pstore_record *rcd = (struct cper_pstore_record *)
1067 					(erst_info.buf - sizeof(*rcd));
1068 	int ret;
1069 
1070 	memset(rcd, 0, sizeof(*rcd));
1071 	memcpy(rcd->hdr.signature, CPER_SIG_RECORD, CPER_SIG_SIZE);
1072 	rcd->hdr.revision = CPER_RECORD_REV;
1073 	rcd->hdr.signature_end = CPER_SIG_END;
1074 	rcd->hdr.section_count = 1;
1075 	rcd->hdr.error_severity = CPER_SEV_FATAL;
1076 	/* timestamp valid. platform_id, partition_id are invalid */
1077 	rcd->hdr.validation_bits = CPER_VALID_TIMESTAMP;
1078 	rcd->hdr.timestamp = get_seconds();
1079 	rcd->hdr.record_length = sizeof(*rcd) + size;
1080 	rcd->hdr.creator_id = CPER_CREATOR_PSTORE;
1081 	rcd->hdr.notification_type = CPER_NOTIFY_MCE;
1082 	rcd->hdr.record_id = cper_next_record_id();
1083 	rcd->hdr.flags = CPER_HW_ERROR_FLAGS_PREVERR;
1084 
1085 	rcd->sec_hdr.section_offset = sizeof(*rcd);
1086 	rcd->sec_hdr.section_length = size;
1087 	rcd->sec_hdr.revision = CPER_SEC_REV;
1088 	/* fru_id and fru_text is invalid */
1089 	rcd->sec_hdr.validation_bits = 0;
1090 	rcd->sec_hdr.flags = CPER_SEC_PRIMARY;
1091 	switch (type) {
1092 	case PSTORE_TYPE_DMESG:
1093 		if (compressed)
1094 			rcd->sec_hdr.section_type = CPER_SECTION_TYPE_DMESG_Z;
1095 		else
1096 			rcd->sec_hdr.section_type = CPER_SECTION_TYPE_DMESG;
1097 		break;
1098 	case PSTORE_TYPE_MCE:
1099 		rcd->sec_hdr.section_type = CPER_SECTION_TYPE_MCE;
1100 		break;
1101 	default:
1102 		return -EINVAL;
1103 	}
1104 	rcd->sec_hdr.section_severity = CPER_SEV_FATAL;
1105 
1106 	ret = erst_write(&rcd->hdr);
1107 	*id = rcd->hdr.record_id;
1108 
1109 	return ret;
1110 }
1111 
1112 static int erst_clearer(enum pstore_type_id type, u64 id, int count,
1113 			struct timespec time, struct pstore_info *psi)
1114 {
1115 	return erst_clear(id);
1116 }
1117 
1118 static int __init erst_init(void)
1119 {
1120 	int rc = 0;
1121 	acpi_status status;
1122 	struct apei_exec_context ctx;
1123 	struct apei_resources erst_resources;
1124 	struct resource *r;
1125 	char *buf;
1126 
1127 	if (acpi_disabled)
1128 		goto err;
1129 
1130 	if (erst_disable) {
1131 		pr_info(
1132 	"Error Record Serialization Table (ERST) support is disabled.\n");
1133 		goto err;
1134 	}
1135 
1136 	status = acpi_get_table(ACPI_SIG_ERST, 0,
1137 				(struct acpi_table_header **)&erst_tab);
1138 	if (status == AE_NOT_FOUND)
1139 		goto err;
1140 	else if (ACPI_FAILURE(status)) {
1141 		const char *msg = acpi_format_exception(status);
1142 		pr_err("Failed to get table, %s\n", msg);
1143 		rc = -EINVAL;
1144 		goto err;
1145 	}
1146 
1147 	rc = erst_check_table(erst_tab);
1148 	if (rc) {
1149 		pr_err(FW_BUG "ERST table is invalid.\n");
1150 		goto err;
1151 	}
1152 
1153 	apei_resources_init(&erst_resources);
1154 	erst_exec_ctx_init(&ctx);
1155 	rc = apei_exec_collect_resources(&ctx, &erst_resources);
1156 	if (rc)
1157 		goto err_fini;
1158 	rc = apei_resources_request(&erst_resources, "APEI ERST");
1159 	if (rc)
1160 		goto err_fini;
1161 	rc = apei_exec_pre_map_gars(&ctx);
1162 	if (rc)
1163 		goto err_release;
1164 	rc = erst_get_erange(&erst_erange);
1165 	if (rc) {
1166 		if (rc == -ENODEV)
1167 			pr_info(
1168 	"The corresponding hardware device or firmware implementation "
1169 	"is not available.\n");
1170 		else
1171 			pr_err("Failed to get Error Log Address Range.\n");
1172 		goto err_unmap_reg;
1173 	}
1174 
1175 	r = request_mem_region(erst_erange.base, erst_erange.size, "APEI ERST");
1176 	if (!r) {
1177 		pr_err("Can not request [mem %#010llx-%#010llx] for ERST.\n",
1178 		       (unsigned long long)erst_erange.base,
1179 		       (unsigned long long)erst_erange.base + erst_erange.size - 1);
1180 		rc = -EIO;
1181 		goto err_unmap_reg;
1182 	}
1183 	rc = -ENOMEM;
1184 	erst_erange.vaddr = ioremap_cache(erst_erange.base,
1185 					  erst_erange.size);
1186 	if (!erst_erange.vaddr)
1187 		goto err_release_erange;
1188 
1189 	pr_info(
1190 	"Error Record Serialization Table (ERST) support is initialized.\n");
1191 
1192 	buf = kmalloc(erst_erange.size, GFP_KERNEL);
1193 	spin_lock_init(&erst_info.buf_lock);
1194 	if (buf) {
1195 		erst_info.buf = buf + sizeof(struct cper_pstore_record);
1196 		erst_info.bufsize = erst_erange.size -
1197 				    sizeof(struct cper_pstore_record);
1198 		rc = pstore_register(&erst_info);
1199 		if (rc) {
1200 			if (rc != -EPERM)
1201 				pr_info(
1202 				"Could not register with persistent store.\n");
1203 			erst_info.buf = NULL;
1204 			erst_info.bufsize = 0;
1205 			kfree(buf);
1206 		}
1207 	} else
1208 		pr_err(
1209 		"Failed to allocate %lld bytes for persistent store error log.\n",
1210 		erst_erange.size);
1211 
1212 	return 0;
1213 
1214 err_release_erange:
1215 	release_mem_region(erst_erange.base, erst_erange.size);
1216 err_unmap_reg:
1217 	apei_exec_post_unmap_gars(&ctx);
1218 err_release:
1219 	apei_resources_release(&erst_resources);
1220 err_fini:
1221 	apei_resources_fini(&erst_resources);
1222 err:
1223 	erst_disable = 1;
1224 	return rc;
1225 }
1226 
1227 device_initcall(erst_init);
1228