xref: /linux/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c (revision 2c97b5ae83dca56718774e7b4bf9640f05d11867)
1 /*
2  * Copyright 2018 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  *
23  */
24 #include <linux/debugfs.h>
25 #include <linux/list.h>
26 #include <linux/module.h>
27 #include <linux/uaccess.h>
28 #include <linux/reboot.h>
29 #include <linux/syscalls.h>
30 
31 #include "amdgpu.h"
32 #include "amdgpu_ras.h"
33 #include "amdgpu_atomfirmware.h"
34 #include "ivsrcid/nbio/irqsrcs_nbif_7_4.h"
35 
36 const char *ras_error_string[] = {
37 	"none",
38 	"parity",
39 	"single_correctable",
40 	"multi_uncorrectable",
41 	"poison",
42 };
43 
44 const char *ras_block_string[] = {
45 	"umc",
46 	"sdma",
47 	"gfx",
48 	"mmhub",
49 	"athub",
50 	"pcie_bif",
51 	"hdp",
52 	"xgmi_wafl",
53 	"df",
54 	"smn",
55 	"sem",
56 	"mp0",
57 	"mp1",
58 	"fuse",
59 };
60 
61 #define ras_err_str(i) (ras_error_string[ffs(i)])
62 #define ras_block_str(i) (ras_block_string[i])
63 
64 #define AMDGPU_RAS_FLAG_INIT_BY_VBIOS		1
65 #define AMDGPU_RAS_FLAG_INIT_NEED_RESET		2
66 #define RAS_DEFAULT_FLAGS (AMDGPU_RAS_FLAG_INIT_BY_VBIOS)
67 
68 /* inject address is 52 bits */
69 #define	RAS_UMC_INJECT_ADDR_LIMIT	(0x1ULL << 52)
70 
71 enum amdgpu_ras_retire_page_reservation {
72 	AMDGPU_RAS_RETIRE_PAGE_RESERVED,
73 	AMDGPU_RAS_RETIRE_PAGE_PENDING,
74 	AMDGPU_RAS_RETIRE_PAGE_FAULT,
75 };
76 
77 atomic_t amdgpu_ras_in_intr = ATOMIC_INIT(0);
78 
79 static bool amdgpu_ras_check_bad_page(struct amdgpu_device *adev,
80 				uint64_t addr);
81 
82 static ssize_t amdgpu_ras_debugfs_read(struct file *f, char __user *buf,
83 					size_t size, loff_t *pos)
84 {
85 	struct ras_manager *obj = (struct ras_manager *)file_inode(f)->i_private;
86 	struct ras_query_if info = {
87 		.head = obj->head,
88 	};
89 	ssize_t s;
90 	char val[128];
91 
92 	if (amdgpu_ras_error_query(obj->adev, &info))
93 		return -EINVAL;
94 
95 	s = snprintf(val, sizeof(val), "%s: %lu\n%s: %lu\n",
96 			"ue", info.ue_count,
97 			"ce", info.ce_count);
98 	if (*pos >= s)
99 		return 0;
100 
101 	s -= *pos;
102 	s = min_t(u64, s, size);
103 
104 
105 	if (copy_to_user(buf, &val[*pos], s))
106 		return -EINVAL;
107 
108 	*pos += s;
109 
110 	return s;
111 }
112 
113 static const struct file_operations amdgpu_ras_debugfs_ops = {
114 	.owner = THIS_MODULE,
115 	.read = amdgpu_ras_debugfs_read,
116 	.write = NULL,
117 	.llseek = default_llseek
118 };
119 
120 static int amdgpu_ras_find_block_id_by_name(const char *name, int *block_id)
121 {
122 	int i;
123 
124 	for (i = 0; i < ARRAY_SIZE(ras_block_string); i++) {
125 		*block_id = i;
126 		if (strcmp(name, ras_block_str(i)) == 0)
127 			return 0;
128 	}
129 	return -EINVAL;
130 }
131 
132 static int amdgpu_ras_debugfs_ctrl_parse_data(struct file *f,
133 		const char __user *buf, size_t size,
134 		loff_t *pos, struct ras_debug_if *data)
135 {
136 	ssize_t s = min_t(u64, 64, size);
137 	char str[65];
138 	char block_name[33];
139 	char err[9] = "ue";
140 	int op = -1;
141 	int block_id;
142 	uint32_t sub_block;
143 	u64 address, value;
144 
145 	if (*pos)
146 		return -EINVAL;
147 	*pos = size;
148 
149 	memset(str, 0, sizeof(str));
150 	memset(data, 0, sizeof(*data));
151 
152 	if (copy_from_user(str, buf, s))
153 		return -EINVAL;
154 
155 	if (sscanf(str, "disable %32s", block_name) == 1)
156 		op = 0;
157 	else if (sscanf(str, "enable %32s %8s", block_name, err) == 2)
158 		op = 1;
159 	else if (sscanf(str, "inject %32s %8s", block_name, err) == 2)
160 		op = 2;
161 	else if (str[0] && str[1] && str[2] && str[3])
162 		/* ascii string, but commands are not matched. */
163 		return -EINVAL;
164 
165 	if (op != -1) {
166 		if (amdgpu_ras_find_block_id_by_name(block_name, &block_id))
167 			return -EINVAL;
168 
169 		data->head.block = block_id;
170 		/* only ue and ce errors are supported */
171 		if (!memcmp("ue", err, 2))
172 			data->head.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
173 		else if (!memcmp("ce", err, 2))
174 			data->head.type = AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE;
175 		else
176 			return -EINVAL;
177 
178 		data->op = op;
179 
180 		if (op == 2) {
181 			if (sscanf(str, "%*s %*s %*s %u %llu %llu",
182 						&sub_block, &address, &value) != 3)
183 				if (sscanf(str, "%*s %*s %*s 0x%x 0x%llx 0x%llx",
184 							&sub_block, &address, &value) != 3)
185 					return -EINVAL;
186 			data->head.sub_block_index = sub_block;
187 			data->inject.address = address;
188 			data->inject.value = value;
189 		}
190 	} else {
191 		if (size < sizeof(*data))
192 			return -EINVAL;
193 
194 		if (copy_from_user(data, buf, sizeof(*data)))
195 			return -EINVAL;
196 	}
197 
198 	return 0;
199 }
200 
201 static struct ras_manager *amdgpu_ras_find_obj(struct amdgpu_device *adev,
202 		struct ras_common_if *head);
203 
204 /**
205  * DOC: AMDGPU RAS debugfs control interface
206  *
207  * It accepts struct ras_debug_if who has two members.
208  *
209  * First member: ras_debug_if::head or ras_debug_if::inject.
210  *
211  * head is used to indicate which IP block will be under control.
212  *
213  * head has four members, they are block, type, sub_block_index, name.
214  * block: which IP will be under control.
215  * type: what kind of error will be enabled/disabled/injected.
216  * sub_block_index: some IPs have subcomponets. say, GFX, sDMA.
217  * name: the name of IP.
218  *
219  * inject has two more members than head, they are address, value.
220  * As their names indicate, inject operation will write the
221  * value to the address.
222  *
223  * The second member: struct ras_debug_if::op.
224  * It has three kinds of operations.
225  *
226  * - 0: disable RAS on the block. Take ::head as its data.
227  * - 1: enable RAS on the block. Take ::head as its data.
228  * - 2: inject errors on the block. Take ::inject as its data.
229  *
230  * How to use the interface?
231  *
232  * Programs
233  *
234  * Copy the struct ras_debug_if in your codes and initialize it.
235  * Write the struct to the control node.
236  *
237  * Shells
238  *
239  * .. code-block:: bash
240  *
241  *	echo op block [error [sub_block address value]] > .../ras/ras_ctrl
242  *
243  * Parameters:
244  *
245  * op: disable, enable, inject
246  *	disable: only block is needed
247  *	enable: block and error are needed
248  *	inject: error, address, value are needed
249  * block: umc, sdma, gfx, .........
250  *	see ras_block_string[] for details
251  * error: ue, ce
252  *	ue: multi_uncorrectable
253  *	ce: single_correctable
254  * sub_block:
255  *	sub block index, pass 0 if there is no sub block
256  *
257  * here are some examples for bash commands:
258  *
259  * .. code-block:: bash
260  *
261  *	echo inject umc ue 0x0 0x0 0x0 > /sys/kernel/debug/dri/0/ras/ras_ctrl
262  *	echo inject umc ce 0 0 0 > /sys/kernel/debug/dri/0/ras/ras_ctrl
263  *	echo disable umc > /sys/kernel/debug/dri/0/ras/ras_ctrl
264  *
265  * How to check the result?
266  *
267  * For disable/enable, please check ras features at
268  * /sys/class/drm/card[0/1/2...]/device/ras/features
269  *
270  * For inject, please check corresponding err count at
271  * /sys/class/drm/card[0/1/2...]/device/ras/[gfx/sdma/...]_err_count
272  *
273  * .. note::
274  *	Operations are only allowed on blocks which are supported.
275  *	Please check ras mask at /sys/module/amdgpu/parameters/ras_mask
276  *	to see which blocks support RAS on a particular asic.
277  *
278  */
279 static ssize_t amdgpu_ras_debugfs_ctrl_write(struct file *f, const char __user *buf,
280 		size_t size, loff_t *pos)
281 {
282 	struct amdgpu_device *adev = (struct amdgpu_device *)file_inode(f)->i_private;
283 	struct ras_debug_if data;
284 	int ret = 0;
285 
286 	ret = amdgpu_ras_debugfs_ctrl_parse_data(f, buf, size, pos, &data);
287 	if (ret)
288 		return -EINVAL;
289 
290 	if (!amdgpu_ras_is_supported(adev, data.head.block))
291 		return -EINVAL;
292 
293 	switch (data.op) {
294 	case 0:
295 		ret = amdgpu_ras_feature_enable(adev, &data.head, 0);
296 		break;
297 	case 1:
298 		ret = amdgpu_ras_feature_enable(adev, &data.head, 1);
299 		break;
300 	case 2:
301 		if ((data.inject.address >= adev->gmc.mc_vram_size) ||
302 		    (data.inject.address >= RAS_UMC_INJECT_ADDR_LIMIT)) {
303 			ret = -EINVAL;
304 			break;
305 		}
306 
307 		/* umc ce/ue error injection for a bad page is not allowed */
308 		if ((data.head.block == AMDGPU_RAS_BLOCK__UMC) &&
309 		    amdgpu_ras_check_bad_page(adev, data.inject.address)) {
310 			DRM_WARN("RAS WARN: 0x%llx has been marked as bad before error injection!\n",
311 					data.inject.address);
312 			break;
313 		}
314 
315 		/* data.inject.address is offset instead of absolute gpu address */
316 		ret = amdgpu_ras_error_inject(adev, &data.inject);
317 		break;
318 	default:
319 		ret = -EINVAL;
320 		break;
321 	};
322 
323 	if (ret)
324 		return -EINVAL;
325 
326 	return size;
327 }
328 
329 /**
330  * DOC: AMDGPU RAS debugfs EEPROM table reset interface
331  *
332  * Some boards contain an EEPROM which is used to persistently store a list of
333  * bad pages which experiences ECC errors in vram.  This interface provides
334  * a way to reset the EEPROM, e.g., after testing error injection.
335  *
336  * Usage:
337  *
338  * .. code-block:: bash
339  *
340  *	echo 1 > ../ras/ras_eeprom_reset
341  *
342  * will reset EEPROM table to 0 entries.
343  *
344  */
345 static ssize_t amdgpu_ras_debugfs_eeprom_write(struct file *f, const char __user *buf,
346 		size_t size, loff_t *pos)
347 {
348 	struct amdgpu_device *adev = (struct amdgpu_device *)file_inode(f)->i_private;
349 	int ret;
350 
351 	ret = amdgpu_ras_eeprom_reset_table(&adev->psp.ras.ras->eeprom_control);
352 
353 	return ret == 1 ? size : -EIO;
354 }
355 
356 static const struct file_operations amdgpu_ras_debugfs_ctrl_ops = {
357 	.owner = THIS_MODULE,
358 	.read = NULL,
359 	.write = amdgpu_ras_debugfs_ctrl_write,
360 	.llseek = default_llseek
361 };
362 
363 static const struct file_operations amdgpu_ras_debugfs_eeprom_ops = {
364 	.owner = THIS_MODULE,
365 	.read = NULL,
366 	.write = amdgpu_ras_debugfs_eeprom_write,
367 	.llseek = default_llseek
368 };
369 
370 /**
371  * DOC: AMDGPU RAS sysfs Error Count Interface
372  *
373  * It allows the user to read the error count for each IP block on the gpu through
374  * /sys/class/drm/card[0/1/2...]/device/ras/[gfx/sdma/...]_err_count
375  *
376  * It outputs the multiple lines which report the uncorrected (ue) and corrected
377  * (ce) error counts.
378  *
379  * The format of one line is below,
380  *
381  * [ce|ue]: count
382  *
383  * Example:
384  *
385  * .. code-block:: bash
386  *
387  *	ue: 0
388  *	ce: 1
389  *
390  */
391 static ssize_t amdgpu_ras_sysfs_read(struct device *dev,
392 		struct device_attribute *attr, char *buf)
393 {
394 	struct ras_manager *obj = container_of(attr, struct ras_manager, sysfs_attr);
395 	struct ras_query_if info = {
396 		.head = obj->head,
397 	};
398 
399 	if (amdgpu_ras_error_query(obj->adev, &info))
400 		return -EINVAL;
401 
402 	return snprintf(buf, PAGE_SIZE, "%s: %lu\n%s: %lu\n",
403 			"ue", info.ue_count,
404 			"ce", info.ce_count);
405 }
406 
407 /* obj begin */
408 
409 #define get_obj(obj) do { (obj)->use++; } while (0)
410 #define alive_obj(obj) ((obj)->use)
411 
412 static inline void put_obj(struct ras_manager *obj)
413 {
414 	if (obj && --obj->use == 0)
415 		list_del(&obj->node);
416 	if (obj && obj->use < 0) {
417 		 DRM_ERROR("RAS ERROR: Unbalance obj(%s) use\n", obj->head.name);
418 	}
419 }
420 
421 /* make one obj and return it. */
422 static struct ras_manager *amdgpu_ras_create_obj(struct amdgpu_device *adev,
423 		struct ras_common_if *head)
424 {
425 	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
426 	struct ras_manager *obj;
427 
428 	if (!con)
429 		return NULL;
430 
431 	if (head->block >= AMDGPU_RAS_BLOCK_COUNT)
432 		return NULL;
433 
434 	obj = &con->objs[head->block];
435 	/* already exist. return obj? */
436 	if (alive_obj(obj))
437 		return NULL;
438 
439 	obj->head = *head;
440 	obj->adev = adev;
441 	list_add(&obj->node, &con->head);
442 	get_obj(obj);
443 
444 	return obj;
445 }
446 
447 /* return an obj equal to head, or the first when head is NULL */
448 static struct ras_manager *amdgpu_ras_find_obj(struct amdgpu_device *adev,
449 		struct ras_common_if *head)
450 {
451 	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
452 	struct ras_manager *obj;
453 	int i;
454 
455 	if (!con)
456 		return NULL;
457 
458 	if (head) {
459 		if (head->block >= AMDGPU_RAS_BLOCK_COUNT)
460 			return NULL;
461 
462 		obj = &con->objs[head->block];
463 
464 		if (alive_obj(obj)) {
465 			WARN_ON(head->block != obj->head.block);
466 			return obj;
467 		}
468 	} else {
469 		for (i = 0; i < AMDGPU_RAS_BLOCK_COUNT; i++) {
470 			obj = &con->objs[i];
471 			if (alive_obj(obj)) {
472 				WARN_ON(i != obj->head.block);
473 				return obj;
474 			}
475 		}
476 	}
477 
478 	return NULL;
479 }
480 /* obj end */
481 
482 /* feature ctl begin */
483 static int amdgpu_ras_is_feature_allowed(struct amdgpu_device *adev,
484 		struct ras_common_if *head)
485 {
486 	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
487 
488 	return con->hw_supported & BIT(head->block);
489 }
490 
491 static int amdgpu_ras_is_feature_enabled(struct amdgpu_device *adev,
492 		struct ras_common_if *head)
493 {
494 	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
495 
496 	return con->features & BIT(head->block);
497 }
498 
499 /*
500  * if obj is not created, then create one.
501  * set feature enable flag.
502  */
503 static int __amdgpu_ras_feature_enable(struct amdgpu_device *adev,
504 		struct ras_common_if *head, int enable)
505 {
506 	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
507 	struct ras_manager *obj = amdgpu_ras_find_obj(adev, head);
508 
509 	/* If hardware does not support ras, then do not create obj.
510 	 * But if hardware support ras, we can create the obj.
511 	 * Ras framework checks con->hw_supported to see if it need do
512 	 * corresponding initialization.
513 	 * IP checks con->support to see if it need disable ras.
514 	 */
515 	if (!amdgpu_ras_is_feature_allowed(adev, head))
516 		return 0;
517 	if (!(!!enable ^ !!amdgpu_ras_is_feature_enabled(adev, head)))
518 		return 0;
519 
520 	if (enable) {
521 		if (!obj) {
522 			obj = amdgpu_ras_create_obj(adev, head);
523 			if (!obj)
524 				return -EINVAL;
525 		} else {
526 			/* In case we create obj somewhere else */
527 			get_obj(obj);
528 		}
529 		con->features |= BIT(head->block);
530 	} else {
531 		if (obj && amdgpu_ras_is_feature_enabled(adev, head)) {
532 			con->features &= ~BIT(head->block);
533 			put_obj(obj);
534 		}
535 	}
536 
537 	return 0;
538 }
539 
540 /* wrapper of psp_ras_enable_features */
541 int amdgpu_ras_feature_enable(struct amdgpu_device *adev,
542 		struct ras_common_if *head, bool enable)
543 {
544 	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
545 	union ta_ras_cmd_input info;
546 	int ret;
547 
548 	if (!con)
549 		return -EINVAL;
550 
551 	if (!enable) {
552 		info.disable_features = (struct ta_ras_disable_features_input) {
553 			.block_id =  amdgpu_ras_block_to_ta(head->block),
554 			.error_type = amdgpu_ras_error_to_ta(head->type),
555 		};
556 	} else {
557 		info.enable_features = (struct ta_ras_enable_features_input) {
558 			.block_id =  amdgpu_ras_block_to_ta(head->block),
559 			.error_type = amdgpu_ras_error_to_ta(head->type),
560 		};
561 	}
562 
563 	/* Do not enable if it is not allowed. */
564 	WARN_ON(enable && !amdgpu_ras_is_feature_allowed(adev, head));
565 	/* Are we alerady in that state we are going to set? */
566 	if (!(!!enable ^ !!amdgpu_ras_is_feature_enabled(adev, head)))
567 		return 0;
568 
569 	if (!amdgpu_ras_intr_triggered()) {
570 		ret = psp_ras_enable_features(&adev->psp, &info, enable);
571 		if (ret) {
572 			DRM_ERROR("RAS ERROR: %s %s feature failed ret %d\n",
573 					enable ? "enable":"disable",
574 					ras_block_str(head->block),
575 					ret);
576 			if (ret == TA_RAS_STATUS__RESET_NEEDED)
577 				return -EAGAIN;
578 			return -EINVAL;
579 		}
580 	}
581 
582 	/* setup the obj */
583 	__amdgpu_ras_feature_enable(adev, head, enable);
584 
585 	return 0;
586 }
587 
588 /* Only used in device probe stage and called only once. */
589 int amdgpu_ras_feature_enable_on_boot(struct amdgpu_device *adev,
590 		struct ras_common_if *head, bool enable)
591 {
592 	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
593 	int ret;
594 
595 	if (!con)
596 		return -EINVAL;
597 
598 	if (con->flags & AMDGPU_RAS_FLAG_INIT_BY_VBIOS) {
599 		if (enable) {
600 			/* There is no harm to issue a ras TA cmd regardless of
601 			 * the currecnt ras state.
602 			 * If current state == target state, it will do nothing
603 			 * But sometimes it requests driver to reset and repost
604 			 * with error code -EAGAIN.
605 			 */
606 			ret = amdgpu_ras_feature_enable(adev, head, 1);
607 			/* With old ras TA, we might fail to enable ras.
608 			 * Log it and just setup the object.
609 			 * TODO need remove this WA in the future.
610 			 */
611 			if (ret == -EINVAL) {
612 				ret = __amdgpu_ras_feature_enable(adev, head, 1);
613 				if (!ret)
614 					DRM_INFO("RAS INFO: %s setup object\n",
615 						ras_block_str(head->block));
616 			}
617 		} else {
618 			/* setup the object then issue a ras TA disable cmd.*/
619 			ret = __amdgpu_ras_feature_enable(adev, head, 1);
620 			if (ret)
621 				return ret;
622 
623 			ret = amdgpu_ras_feature_enable(adev, head, 0);
624 		}
625 	} else
626 		ret = amdgpu_ras_feature_enable(adev, head, enable);
627 
628 	return ret;
629 }
630 
631 static int amdgpu_ras_disable_all_features(struct amdgpu_device *adev,
632 		bool bypass)
633 {
634 	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
635 	struct ras_manager *obj, *tmp;
636 
637 	list_for_each_entry_safe(obj, tmp, &con->head, node) {
638 		/* bypass psp.
639 		 * aka just release the obj and corresponding flags
640 		 */
641 		if (bypass) {
642 			if (__amdgpu_ras_feature_enable(adev, &obj->head, 0))
643 				break;
644 		} else {
645 			if (amdgpu_ras_feature_enable(adev, &obj->head, 0))
646 				break;
647 		}
648 	}
649 
650 	return con->features;
651 }
652 
653 static int amdgpu_ras_enable_all_features(struct amdgpu_device *adev,
654 		bool bypass)
655 {
656 	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
657 	int ras_block_count = AMDGPU_RAS_BLOCK_COUNT;
658 	int i;
659 	const enum amdgpu_ras_error_type default_ras_type =
660 		AMDGPU_RAS_ERROR__NONE;
661 
662 	for (i = 0; i < ras_block_count; i++) {
663 		struct ras_common_if head = {
664 			.block = i,
665 			.type = default_ras_type,
666 			.sub_block_index = 0,
667 		};
668 		strcpy(head.name, ras_block_str(i));
669 		if (bypass) {
670 			/*
671 			 * bypass psp. vbios enable ras for us.
672 			 * so just create the obj
673 			 */
674 			if (__amdgpu_ras_feature_enable(adev, &head, 1))
675 				break;
676 		} else {
677 			if (amdgpu_ras_feature_enable(adev, &head, 1))
678 				break;
679 		}
680 	}
681 
682 	return con->features;
683 }
684 /* feature ctl end */
685 
686 /* query/inject/cure begin */
687 int amdgpu_ras_error_query(struct amdgpu_device *adev,
688 		struct ras_query_if *info)
689 {
690 	struct ras_manager *obj = amdgpu_ras_find_obj(adev, &info->head);
691 	struct ras_err_data err_data = {0, 0, 0, NULL};
692 
693 	if (!obj)
694 		return -EINVAL;
695 
696 	switch (info->head.block) {
697 	case AMDGPU_RAS_BLOCK__UMC:
698 		if (adev->umc.funcs->query_ras_error_count)
699 			adev->umc.funcs->query_ras_error_count(adev, &err_data);
700 		/* umc query_ras_error_address is also responsible for clearing
701 		 * error status
702 		 */
703 		if (adev->umc.funcs->query_ras_error_address)
704 			adev->umc.funcs->query_ras_error_address(adev, &err_data);
705 		break;
706 	case AMDGPU_RAS_BLOCK__GFX:
707 		if (adev->gfx.funcs->query_ras_error_count)
708 			adev->gfx.funcs->query_ras_error_count(adev, &err_data);
709 		break;
710 	case AMDGPU_RAS_BLOCK__MMHUB:
711 		if (adev->mmhub.funcs->query_ras_error_count)
712 			adev->mmhub.funcs->query_ras_error_count(adev, &err_data);
713 		break;
714 	case AMDGPU_RAS_BLOCK__PCIE_BIF:
715 		if (adev->nbio.funcs->query_ras_error_count)
716 			adev->nbio.funcs->query_ras_error_count(adev, &err_data);
717 		break;
718 	default:
719 		break;
720 	}
721 
722 	obj->err_data.ue_count += err_data.ue_count;
723 	obj->err_data.ce_count += err_data.ce_count;
724 
725 	info->ue_count = obj->err_data.ue_count;
726 	info->ce_count = obj->err_data.ce_count;
727 
728 	if (err_data.ce_count) {
729 		dev_info(adev->dev, "%ld correctable errors detected in %s block\n",
730 			 obj->err_data.ce_count, ras_block_str(info->head.block));
731 	}
732 	if (err_data.ue_count) {
733 		dev_info(adev->dev, "%ld uncorrectable errors detected in %s block\n",
734 			 obj->err_data.ue_count, ras_block_str(info->head.block));
735 	}
736 
737 	return 0;
738 }
739 
740 /* wrapper of psp_ras_trigger_error */
741 int amdgpu_ras_error_inject(struct amdgpu_device *adev,
742 		struct ras_inject_if *info)
743 {
744 	struct ras_manager *obj = amdgpu_ras_find_obj(adev, &info->head);
745 	struct ta_ras_trigger_error_input block_info = {
746 		.block_id =  amdgpu_ras_block_to_ta(info->head.block),
747 		.inject_error_type = amdgpu_ras_error_to_ta(info->head.type),
748 		.sub_block_index = info->head.sub_block_index,
749 		.address = info->address,
750 		.value = info->value,
751 	};
752 	int ret = 0;
753 
754 	if (!obj)
755 		return -EINVAL;
756 
757 	switch (info->head.block) {
758 	case AMDGPU_RAS_BLOCK__GFX:
759 		if (adev->gfx.funcs->ras_error_inject)
760 			ret = adev->gfx.funcs->ras_error_inject(adev, info);
761 		else
762 			ret = -EINVAL;
763 		break;
764 	case AMDGPU_RAS_BLOCK__UMC:
765 	case AMDGPU_RAS_BLOCK__MMHUB:
766 	case AMDGPU_RAS_BLOCK__XGMI_WAFL:
767 	case AMDGPU_RAS_BLOCK__PCIE_BIF:
768 		ret = psp_ras_trigger_error(&adev->psp, &block_info);
769 		break;
770 	default:
771 		DRM_INFO("%s error injection is not supported yet\n",
772 			 ras_block_str(info->head.block));
773 		ret = -EINVAL;
774 	}
775 
776 	if (ret)
777 		DRM_ERROR("RAS ERROR: inject %s error failed ret %d\n",
778 				ras_block_str(info->head.block),
779 				ret);
780 
781 	return ret;
782 }
783 
784 int amdgpu_ras_error_cure(struct amdgpu_device *adev,
785 		struct ras_cure_if *info)
786 {
787 	/* psp fw has no cure interface for now. */
788 	return 0;
789 }
790 
791 /* get the total error counts on all IPs */
792 unsigned long amdgpu_ras_query_error_count(struct amdgpu_device *adev,
793 		bool is_ce)
794 {
795 	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
796 	struct ras_manager *obj;
797 	struct ras_err_data data = {0, 0};
798 
799 	if (!con)
800 		return 0;
801 
802 	list_for_each_entry(obj, &con->head, node) {
803 		struct ras_query_if info = {
804 			.head = obj->head,
805 		};
806 
807 		if (amdgpu_ras_error_query(adev, &info))
808 			return 0;
809 
810 		data.ce_count += info.ce_count;
811 		data.ue_count += info.ue_count;
812 	}
813 
814 	return is_ce ? data.ce_count : data.ue_count;
815 }
816 /* query/inject/cure end */
817 
818 
819 /* sysfs begin */
820 
821 static int amdgpu_ras_badpages_read(struct amdgpu_device *adev,
822 		struct ras_badpage **bps, unsigned int *count);
823 
824 static char *amdgpu_ras_badpage_flags_str(unsigned int flags)
825 {
826 	switch (flags) {
827 	case AMDGPU_RAS_RETIRE_PAGE_RESERVED:
828 		return "R";
829 	case AMDGPU_RAS_RETIRE_PAGE_PENDING:
830 		return "P";
831 	case AMDGPU_RAS_RETIRE_PAGE_FAULT:
832 	default:
833 		return "F";
834 	};
835 }
836 
837 /**
838  * DOC: AMDGPU RAS sysfs gpu_vram_bad_pages Interface
839  *
840  * It allows user to read the bad pages of vram on the gpu through
841  * /sys/class/drm/card[0/1/2...]/device/ras/gpu_vram_bad_pages
842  *
843  * It outputs multiple lines, and each line stands for one gpu page.
844  *
845  * The format of one line is below,
846  * gpu pfn : gpu page size : flags
847  *
848  * gpu pfn and gpu page size are printed in hex format.
849  * flags can be one of below character,
850  *
851  * R: reserved, this gpu page is reserved and not able to use.
852  *
853  * P: pending for reserve, this gpu page is marked as bad, will be reserved
854  * in next window of page_reserve.
855  *
856  * F: unable to reserve. this gpu page can't be reserved due to some reasons.
857  *
858  * Examples:
859  *
860  * .. code-block:: bash
861  *
862  *	0x00000001 : 0x00001000 : R
863  *	0x00000002 : 0x00001000 : P
864  *
865  */
866 
867 static ssize_t amdgpu_ras_sysfs_badpages_read(struct file *f,
868 		struct kobject *kobj, struct bin_attribute *attr,
869 		char *buf, loff_t ppos, size_t count)
870 {
871 	struct amdgpu_ras *con =
872 		container_of(attr, struct amdgpu_ras, badpages_attr);
873 	struct amdgpu_device *adev = con->adev;
874 	const unsigned int element_size =
875 		sizeof("0xabcdabcd : 0x12345678 : R\n") - 1;
876 	unsigned int start = div64_ul(ppos + element_size - 1, element_size);
877 	unsigned int end = div64_ul(ppos + count - 1, element_size);
878 	ssize_t s = 0;
879 	struct ras_badpage *bps = NULL;
880 	unsigned int bps_count = 0;
881 
882 	memset(buf, 0, count);
883 
884 	if (amdgpu_ras_badpages_read(adev, &bps, &bps_count))
885 		return 0;
886 
887 	for (; start < end && start < bps_count; start++)
888 		s += scnprintf(&buf[s], element_size + 1,
889 				"0x%08x : 0x%08x : %1s\n",
890 				bps[start].bp,
891 				bps[start].size,
892 				amdgpu_ras_badpage_flags_str(bps[start].flags));
893 
894 	kfree(bps);
895 
896 	return s;
897 }
898 
899 static ssize_t amdgpu_ras_sysfs_features_read(struct device *dev,
900 		struct device_attribute *attr, char *buf)
901 {
902 	struct amdgpu_ras *con =
903 		container_of(attr, struct amdgpu_ras, features_attr);
904 
905 	return scnprintf(buf, PAGE_SIZE, "feature mask: 0x%x\n", con->features);
906 }
907 
908 static int amdgpu_ras_sysfs_create_feature_node(struct amdgpu_device *adev)
909 {
910 	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
911 	struct attribute *attrs[] = {
912 		&con->features_attr.attr,
913 		NULL
914 	};
915 	struct bin_attribute *bin_attrs[] = {
916 		&con->badpages_attr,
917 		NULL
918 	};
919 	struct attribute_group group = {
920 		.name = "ras",
921 		.attrs = attrs,
922 		.bin_attrs = bin_attrs,
923 	};
924 
925 	con->features_attr = (struct device_attribute) {
926 		.attr = {
927 			.name = "features",
928 			.mode = S_IRUGO,
929 		},
930 			.show = amdgpu_ras_sysfs_features_read,
931 	};
932 
933 	con->badpages_attr = (struct bin_attribute) {
934 		.attr = {
935 			.name = "gpu_vram_bad_pages",
936 			.mode = S_IRUGO,
937 		},
938 		.size = 0,
939 		.private = NULL,
940 		.read = amdgpu_ras_sysfs_badpages_read,
941 	};
942 
943 	sysfs_attr_init(attrs[0]);
944 	sysfs_bin_attr_init(bin_attrs[0]);
945 
946 	return sysfs_create_group(&adev->dev->kobj, &group);
947 }
948 
949 static int amdgpu_ras_sysfs_remove_feature_node(struct amdgpu_device *adev)
950 {
951 	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
952 	struct attribute *attrs[] = {
953 		&con->features_attr.attr,
954 		NULL
955 	};
956 	struct bin_attribute *bin_attrs[] = {
957 		&con->badpages_attr,
958 		NULL
959 	};
960 	struct attribute_group group = {
961 		.name = "ras",
962 		.attrs = attrs,
963 		.bin_attrs = bin_attrs,
964 	};
965 
966 	sysfs_remove_group(&adev->dev->kobj, &group);
967 
968 	return 0;
969 }
970 
971 int amdgpu_ras_sysfs_create(struct amdgpu_device *adev,
972 		struct ras_fs_if *head)
973 {
974 	struct ras_manager *obj = amdgpu_ras_find_obj(adev, &head->head);
975 
976 	if (!obj || obj->attr_inuse)
977 		return -EINVAL;
978 
979 	get_obj(obj);
980 
981 	memcpy(obj->fs_data.sysfs_name,
982 			head->sysfs_name,
983 			sizeof(obj->fs_data.sysfs_name));
984 
985 	obj->sysfs_attr = (struct device_attribute){
986 		.attr = {
987 			.name = obj->fs_data.sysfs_name,
988 			.mode = S_IRUGO,
989 		},
990 			.show = amdgpu_ras_sysfs_read,
991 	};
992 	sysfs_attr_init(&obj->sysfs_attr.attr);
993 
994 	if (sysfs_add_file_to_group(&adev->dev->kobj,
995 				&obj->sysfs_attr.attr,
996 				"ras")) {
997 		put_obj(obj);
998 		return -EINVAL;
999 	}
1000 
1001 	obj->attr_inuse = 1;
1002 
1003 	return 0;
1004 }
1005 
1006 int amdgpu_ras_sysfs_remove(struct amdgpu_device *adev,
1007 		struct ras_common_if *head)
1008 {
1009 	struct ras_manager *obj = amdgpu_ras_find_obj(adev, head);
1010 
1011 	if (!obj || !obj->attr_inuse)
1012 		return -EINVAL;
1013 
1014 	sysfs_remove_file_from_group(&adev->dev->kobj,
1015 				&obj->sysfs_attr.attr,
1016 				"ras");
1017 	obj->attr_inuse = 0;
1018 	put_obj(obj);
1019 
1020 	return 0;
1021 }
1022 
1023 static int amdgpu_ras_sysfs_remove_all(struct amdgpu_device *adev)
1024 {
1025 	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
1026 	struct ras_manager *obj, *tmp;
1027 
1028 	list_for_each_entry_safe(obj, tmp, &con->head, node) {
1029 		amdgpu_ras_sysfs_remove(adev, &obj->head);
1030 	}
1031 
1032 	amdgpu_ras_sysfs_remove_feature_node(adev);
1033 
1034 	return 0;
1035 }
1036 /* sysfs end */
1037 
1038 /**
1039  * DOC: AMDGPU RAS Reboot Behavior for Unrecoverable Errors
1040  *
1041  * Normally when there is an uncorrectable error, the driver will reset
1042  * the GPU to recover.  However, in the event of an unrecoverable error,
1043  * the driver provides an interface to reboot the system automatically
1044  * in that event.
1045  *
1046  * The following file in debugfs provides that interface:
1047  * /sys/kernel/debug/dri/[0/1/2...]/ras/auto_reboot
1048  *
1049  * Usage:
1050  *
1051  * .. code-block:: bash
1052  *
1053  *	echo true > .../ras/auto_reboot
1054  *
1055  */
1056 /* debugfs begin */
1057 static void amdgpu_ras_debugfs_create_ctrl_node(struct amdgpu_device *adev)
1058 {
1059 	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
1060 	struct drm_minor *minor = adev->ddev->primary;
1061 
1062 	con->dir = debugfs_create_dir("ras", minor->debugfs_root);
1063 	debugfs_create_file("ras_ctrl", S_IWUGO | S_IRUGO, con->dir,
1064 				adev, &amdgpu_ras_debugfs_ctrl_ops);
1065 	debugfs_create_file("ras_eeprom_reset", S_IWUGO | S_IRUGO, con->dir,
1066 				adev, &amdgpu_ras_debugfs_eeprom_ops);
1067 
1068 	/*
1069 	 * After one uncorrectable error happens, usually GPU recovery will
1070 	 * be scheduled. But due to the known problem in GPU recovery failing
1071 	 * to bring GPU back, below interface provides one direct way to
1072 	 * user to reboot system automatically in such case within
1073 	 * ERREVENT_ATHUB_INTERRUPT generated. Normal GPU recovery routine
1074 	 * will never be called.
1075 	 */
1076 	debugfs_create_bool("auto_reboot", S_IWUGO | S_IRUGO, con->dir,
1077 				&con->reboot);
1078 }
1079 
1080 void amdgpu_ras_debugfs_create(struct amdgpu_device *adev,
1081 		struct ras_fs_if *head)
1082 {
1083 	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
1084 	struct ras_manager *obj = amdgpu_ras_find_obj(adev, &head->head);
1085 
1086 	if (!obj || obj->ent)
1087 		return;
1088 
1089 	get_obj(obj);
1090 
1091 	memcpy(obj->fs_data.debugfs_name,
1092 			head->debugfs_name,
1093 			sizeof(obj->fs_data.debugfs_name));
1094 
1095 	obj->ent = debugfs_create_file(obj->fs_data.debugfs_name,
1096 				       S_IWUGO | S_IRUGO, con->dir, obj,
1097 				       &amdgpu_ras_debugfs_ops);
1098 }
1099 
1100 void amdgpu_ras_debugfs_remove(struct amdgpu_device *adev,
1101 		struct ras_common_if *head)
1102 {
1103 	struct ras_manager *obj = amdgpu_ras_find_obj(adev, head);
1104 
1105 	if (!obj || !obj->ent)
1106 		return;
1107 
1108 	debugfs_remove(obj->ent);
1109 	obj->ent = NULL;
1110 	put_obj(obj);
1111 }
1112 
1113 static void amdgpu_ras_debugfs_remove_all(struct amdgpu_device *adev)
1114 {
1115 	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
1116 	struct ras_manager *obj, *tmp;
1117 
1118 	list_for_each_entry_safe(obj, tmp, &con->head, node) {
1119 		amdgpu_ras_debugfs_remove(adev, &obj->head);
1120 	}
1121 
1122 	debugfs_remove_recursive(con->dir);
1123 	con->dir = NULL;
1124 }
1125 /* debugfs end */
1126 
1127 /* ras fs */
1128 
1129 static int amdgpu_ras_fs_init(struct amdgpu_device *adev)
1130 {
1131 	amdgpu_ras_sysfs_create_feature_node(adev);
1132 	amdgpu_ras_debugfs_create_ctrl_node(adev);
1133 
1134 	return 0;
1135 }
1136 
1137 static int amdgpu_ras_fs_fini(struct amdgpu_device *adev)
1138 {
1139 	amdgpu_ras_debugfs_remove_all(adev);
1140 	amdgpu_ras_sysfs_remove_all(adev);
1141 	return 0;
1142 }
1143 /* ras fs end */
1144 
1145 /* ih begin */
1146 static void amdgpu_ras_interrupt_handler(struct ras_manager *obj)
1147 {
1148 	struct ras_ih_data *data = &obj->ih_data;
1149 	struct amdgpu_iv_entry entry;
1150 	int ret;
1151 	struct ras_err_data err_data = {0, 0, 0, NULL};
1152 
1153 	while (data->rptr != data->wptr) {
1154 		rmb();
1155 		memcpy(&entry, &data->ring[data->rptr],
1156 				data->element_size);
1157 
1158 		wmb();
1159 		data->rptr = (data->aligned_element_size +
1160 				data->rptr) % data->ring_size;
1161 
1162 		/* Let IP handle its data, maybe we need get the output
1163 		 * from the callback to udpate the error type/count, etc
1164 		 */
1165 		if (data->cb) {
1166 			ret = data->cb(obj->adev, &err_data, &entry);
1167 			/* ue will trigger an interrupt, and in that case
1168 			 * we need do a reset to recovery the whole system.
1169 			 * But leave IP do that recovery, here we just dispatch
1170 			 * the error.
1171 			 */
1172 			if (ret == AMDGPU_RAS_SUCCESS) {
1173 				/* these counts could be left as 0 if
1174 				 * some blocks do not count error number
1175 				 */
1176 				obj->err_data.ue_count += err_data.ue_count;
1177 				obj->err_data.ce_count += err_data.ce_count;
1178 			}
1179 		}
1180 	}
1181 }
1182 
1183 static void amdgpu_ras_interrupt_process_handler(struct work_struct *work)
1184 {
1185 	struct ras_ih_data *data =
1186 		container_of(work, struct ras_ih_data, ih_work);
1187 	struct ras_manager *obj =
1188 		container_of(data, struct ras_manager, ih_data);
1189 
1190 	amdgpu_ras_interrupt_handler(obj);
1191 }
1192 
1193 int amdgpu_ras_interrupt_dispatch(struct amdgpu_device *adev,
1194 		struct ras_dispatch_if *info)
1195 {
1196 	struct ras_manager *obj = amdgpu_ras_find_obj(adev, &info->head);
1197 	struct ras_ih_data *data = &obj->ih_data;
1198 
1199 	if (!obj)
1200 		return -EINVAL;
1201 
1202 	if (data->inuse == 0)
1203 		return 0;
1204 
1205 	/* Might be overflow... */
1206 	memcpy(&data->ring[data->wptr], info->entry,
1207 			data->element_size);
1208 
1209 	wmb();
1210 	data->wptr = (data->aligned_element_size +
1211 			data->wptr) % data->ring_size;
1212 
1213 	schedule_work(&data->ih_work);
1214 
1215 	return 0;
1216 }
1217 
1218 int amdgpu_ras_interrupt_remove_handler(struct amdgpu_device *adev,
1219 		struct ras_ih_if *info)
1220 {
1221 	struct ras_manager *obj = amdgpu_ras_find_obj(adev, &info->head);
1222 	struct ras_ih_data *data;
1223 
1224 	if (!obj)
1225 		return -EINVAL;
1226 
1227 	data = &obj->ih_data;
1228 	if (data->inuse == 0)
1229 		return 0;
1230 
1231 	cancel_work_sync(&data->ih_work);
1232 
1233 	kfree(data->ring);
1234 	memset(data, 0, sizeof(*data));
1235 	put_obj(obj);
1236 
1237 	return 0;
1238 }
1239 
1240 int amdgpu_ras_interrupt_add_handler(struct amdgpu_device *adev,
1241 		struct ras_ih_if *info)
1242 {
1243 	struct ras_manager *obj = amdgpu_ras_find_obj(adev, &info->head);
1244 	struct ras_ih_data *data;
1245 
1246 	if (!obj) {
1247 		/* in case we registe the IH before enable ras feature */
1248 		obj = amdgpu_ras_create_obj(adev, &info->head);
1249 		if (!obj)
1250 			return -EINVAL;
1251 	} else
1252 		get_obj(obj);
1253 
1254 	data = &obj->ih_data;
1255 	/* add the callback.etc */
1256 	*data = (struct ras_ih_data) {
1257 		.inuse = 0,
1258 		.cb = info->cb,
1259 		.element_size = sizeof(struct amdgpu_iv_entry),
1260 		.rptr = 0,
1261 		.wptr = 0,
1262 	};
1263 
1264 	INIT_WORK(&data->ih_work, amdgpu_ras_interrupt_process_handler);
1265 
1266 	data->aligned_element_size = ALIGN(data->element_size, 8);
1267 	/* the ring can store 64 iv entries. */
1268 	data->ring_size = 64 * data->aligned_element_size;
1269 	data->ring = kmalloc(data->ring_size, GFP_KERNEL);
1270 	if (!data->ring) {
1271 		put_obj(obj);
1272 		return -ENOMEM;
1273 	}
1274 
1275 	/* IH is ready */
1276 	data->inuse = 1;
1277 
1278 	return 0;
1279 }
1280 
1281 static int amdgpu_ras_interrupt_remove_all(struct amdgpu_device *adev)
1282 {
1283 	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
1284 	struct ras_manager *obj, *tmp;
1285 
1286 	list_for_each_entry_safe(obj, tmp, &con->head, node) {
1287 		struct ras_ih_if info = {
1288 			.head = obj->head,
1289 		};
1290 		amdgpu_ras_interrupt_remove_handler(adev, &info);
1291 	}
1292 
1293 	return 0;
1294 }
1295 /* ih end */
1296 
1297 /* recovery begin */
1298 
1299 /* return 0 on success.
1300  * caller need free bps.
1301  */
1302 static int amdgpu_ras_badpages_read(struct amdgpu_device *adev,
1303 		struct ras_badpage **bps, unsigned int *count)
1304 {
1305 	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
1306 	struct ras_err_handler_data *data;
1307 	int i = 0;
1308 	int ret = 0;
1309 
1310 	if (!con || !con->eh_data || !bps || !count)
1311 		return -EINVAL;
1312 
1313 	mutex_lock(&con->recovery_lock);
1314 	data = con->eh_data;
1315 	if (!data || data->count == 0) {
1316 		*bps = NULL;
1317 		goto out;
1318 	}
1319 
1320 	*bps = kmalloc(sizeof(struct ras_badpage) * data->count, GFP_KERNEL);
1321 	if (!*bps) {
1322 		ret = -ENOMEM;
1323 		goto out;
1324 	}
1325 
1326 	for (; i < data->count; i++) {
1327 		(*bps)[i] = (struct ras_badpage){
1328 			.bp = data->bps[i].retired_page,
1329 			.size = AMDGPU_GPU_PAGE_SIZE,
1330 			.flags = AMDGPU_RAS_RETIRE_PAGE_RESERVED,
1331 		};
1332 
1333 		if (data->last_reserved <= i)
1334 			(*bps)[i].flags = AMDGPU_RAS_RETIRE_PAGE_PENDING;
1335 		else if (data->bps_bo[i] == NULL)
1336 			(*bps)[i].flags = AMDGPU_RAS_RETIRE_PAGE_FAULT;
1337 	}
1338 
1339 	*count = data->count;
1340 out:
1341 	mutex_unlock(&con->recovery_lock);
1342 	return ret;
1343 }
1344 
1345 static void amdgpu_ras_do_recovery(struct work_struct *work)
1346 {
1347 	struct amdgpu_ras *ras =
1348 		container_of(work, struct amdgpu_ras, recovery_work);
1349 
1350 	amdgpu_device_gpu_recover(ras->adev, 0);
1351 	atomic_set(&ras->in_recovery, 0);
1352 }
1353 
1354 /* alloc/realloc bps array */
1355 static int amdgpu_ras_realloc_eh_data_space(struct amdgpu_device *adev,
1356 		struct ras_err_handler_data *data, int pages)
1357 {
1358 	unsigned int old_space = data->count + data->space_left;
1359 	unsigned int new_space = old_space + pages;
1360 	unsigned int align_space = ALIGN(new_space, 512);
1361 	void *bps = kmalloc(align_space * sizeof(*data->bps), GFP_KERNEL);
1362 	struct amdgpu_bo **bps_bo =
1363 			kmalloc(align_space * sizeof(*data->bps_bo), GFP_KERNEL);
1364 
1365 	if (!bps || !bps_bo) {
1366 		kfree(bps);
1367 		kfree(bps_bo);
1368 		return -ENOMEM;
1369 	}
1370 
1371 	if (data->bps) {
1372 		memcpy(bps, data->bps,
1373 				data->count * sizeof(*data->bps));
1374 		kfree(data->bps);
1375 	}
1376 	if (data->bps_bo) {
1377 		memcpy(bps_bo, data->bps_bo,
1378 				data->count * sizeof(*data->bps_bo));
1379 		kfree(data->bps_bo);
1380 	}
1381 
1382 	data->bps = bps;
1383 	data->bps_bo = bps_bo;
1384 	data->space_left += align_space - old_space;
1385 	return 0;
1386 }
1387 
1388 /* it deal with vram only. */
1389 int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev,
1390 		struct eeprom_table_record *bps, int pages)
1391 {
1392 	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
1393 	struct ras_err_handler_data *data;
1394 	int ret = 0;
1395 
1396 	if (!con || !con->eh_data || !bps || pages <= 0)
1397 		return 0;
1398 
1399 	mutex_lock(&con->recovery_lock);
1400 	data = con->eh_data;
1401 	if (!data)
1402 		goto out;
1403 
1404 	if (data->space_left <= pages)
1405 		if (amdgpu_ras_realloc_eh_data_space(adev, data, pages)) {
1406 			ret = -ENOMEM;
1407 			goto out;
1408 		}
1409 
1410 	memcpy(&data->bps[data->count], bps, pages * sizeof(*data->bps));
1411 	data->count += pages;
1412 	data->space_left -= pages;
1413 
1414 out:
1415 	mutex_unlock(&con->recovery_lock);
1416 
1417 	return ret;
1418 }
1419 
1420 /*
1421  * write error record array to eeprom, the function should be
1422  * protected by recovery_lock
1423  */
1424 static int amdgpu_ras_save_bad_pages(struct amdgpu_device *adev)
1425 {
1426 	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
1427 	struct ras_err_handler_data *data;
1428 	struct amdgpu_ras_eeprom_control *control;
1429 	int save_count;
1430 
1431 	if (!con || !con->eh_data)
1432 		return 0;
1433 
1434 	control = &con->eeprom_control;
1435 	data = con->eh_data;
1436 	save_count = data->count - control->num_recs;
1437 	/* only new entries are saved */
1438 	if (save_count > 0)
1439 		if (amdgpu_ras_eeprom_process_recods(control,
1440 							&data->bps[control->num_recs],
1441 							true,
1442 							save_count)) {
1443 			DRM_ERROR("Failed to save EEPROM table data!");
1444 			return -EIO;
1445 		}
1446 
1447 	return 0;
1448 }
1449 
1450 /*
1451  * read error record array in eeprom and reserve enough space for
1452  * storing new bad pages
1453  */
1454 static int amdgpu_ras_load_bad_pages(struct amdgpu_device *adev)
1455 {
1456 	struct amdgpu_ras_eeprom_control *control =
1457 					&adev->psp.ras.ras->eeprom_control;
1458 	struct eeprom_table_record *bps = NULL;
1459 	int ret = 0;
1460 
1461 	/* no bad page record, skip eeprom access */
1462 	if (!control->num_recs)
1463 		return ret;
1464 
1465 	bps = kcalloc(control->num_recs, sizeof(*bps), GFP_KERNEL);
1466 	if (!bps)
1467 		return -ENOMEM;
1468 
1469 	if (amdgpu_ras_eeprom_process_recods(control, bps, false,
1470 		control->num_recs)) {
1471 		DRM_ERROR("Failed to load EEPROM table records!");
1472 		ret = -EIO;
1473 		goto out;
1474 	}
1475 
1476 	ret = amdgpu_ras_add_bad_pages(adev, bps, control->num_recs);
1477 
1478 out:
1479 	kfree(bps);
1480 	return ret;
1481 }
1482 
1483 /*
1484  * check if an address belongs to bad page
1485  *
1486  * Note: this check is only for umc block
1487  */
1488 static bool amdgpu_ras_check_bad_page(struct amdgpu_device *adev,
1489 				uint64_t addr)
1490 {
1491 	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
1492 	struct ras_err_handler_data *data;
1493 	int i;
1494 	bool ret = false;
1495 
1496 	if (!con || !con->eh_data)
1497 		return ret;
1498 
1499 	mutex_lock(&con->recovery_lock);
1500 	data = con->eh_data;
1501 	if (!data)
1502 		goto out;
1503 
1504 	addr >>= AMDGPU_GPU_PAGE_SHIFT;
1505 	for (i = 0; i < data->count; i++)
1506 		if (addr == data->bps[i].retired_page) {
1507 			ret = true;
1508 			goto out;
1509 		}
1510 
1511 out:
1512 	mutex_unlock(&con->recovery_lock);
1513 	return ret;
1514 }
1515 
1516 /* called in gpu recovery/init */
1517 int amdgpu_ras_reserve_bad_pages(struct amdgpu_device *adev)
1518 {
1519 	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
1520 	struct ras_err_handler_data *data;
1521 	uint64_t bp;
1522 	struct amdgpu_bo *bo = NULL;
1523 	int i, ret = 0;
1524 
1525 	if (!con || !con->eh_data)
1526 		return 0;
1527 
1528 	mutex_lock(&con->recovery_lock);
1529 	data = con->eh_data;
1530 	if (!data)
1531 		goto out;
1532 	/* reserve vram at driver post stage. */
1533 	for (i = data->last_reserved; i < data->count; i++) {
1534 		bp = data->bps[i].retired_page;
1535 
1536 		/* There are two cases of reserve error should be ignored:
1537 		 * 1) a ras bad page has been allocated (used by someone);
1538 		 * 2) a ras bad page has been reserved (duplicate error injection
1539 		 *    for one page);
1540 		 */
1541 		if (amdgpu_bo_create_kernel_at(adev, bp << AMDGPU_GPU_PAGE_SHIFT,
1542 					       AMDGPU_GPU_PAGE_SIZE,
1543 					       AMDGPU_GEM_DOMAIN_VRAM,
1544 					       &bo, NULL))
1545 			DRM_WARN("RAS WARN: reserve vram for retired page %llx fail\n", bp);
1546 
1547 		data->bps_bo[i] = bo;
1548 		data->last_reserved = i + 1;
1549 		bo = NULL;
1550 	}
1551 
1552 	/* continue to save bad pages to eeprom even reesrve_vram fails */
1553 	ret = amdgpu_ras_save_bad_pages(adev);
1554 out:
1555 	mutex_unlock(&con->recovery_lock);
1556 	return ret;
1557 }
1558 
1559 /* called when driver unload */
1560 static int amdgpu_ras_release_bad_pages(struct amdgpu_device *adev)
1561 {
1562 	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
1563 	struct ras_err_handler_data *data;
1564 	struct amdgpu_bo *bo;
1565 	int i;
1566 
1567 	if (!con || !con->eh_data)
1568 		return 0;
1569 
1570 	mutex_lock(&con->recovery_lock);
1571 	data = con->eh_data;
1572 	if (!data)
1573 		goto out;
1574 
1575 	for (i = data->last_reserved - 1; i >= 0; i--) {
1576 		bo = data->bps_bo[i];
1577 
1578 		amdgpu_bo_free_kernel(&bo, NULL, NULL);
1579 
1580 		data->bps_bo[i] = bo;
1581 		data->last_reserved = i;
1582 	}
1583 out:
1584 	mutex_unlock(&con->recovery_lock);
1585 	return 0;
1586 }
1587 
1588 int amdgpu_ras_recovery_init(struct amdgpu_device *adev)
1589 {
1590 	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
1591 	struct ras_err_handler_data **data;
1592 	int ret;
1593 
1594 	if (con)
1595 		data = &con->eh_data;
1596 	else
1597 		return 0;
1598 
1599 	*data = kmalloc(sizeof(**data), GFP_KERNEL | __GFP_ZERO);
1600 	if (!*data) {
1601 		ret = -ENOMEM;
1602 		goto out;
1603 	}
1604 
1605 	mutex_init(&con->recovery_lock);
1606 	INIT_WORK(&con->recovery_work, amdgpu_ras_do_recovery);
1607 	atomic_set(&con->in_recovery, 0);
1608 	con->adev = adev;
1609 
1610 	ret = amdgpu_ras_eeprom_init(&con->eeprom_control);
1611 	if (ret)
1612 		goto free;
1613 
1614 	if (con->eeprom_control.num_recs) {
1615 		ret = amdgpu_ras_load_bad_pages(adev);
1616 		if (ret)
1617 			goto free;
1618 		ret = amdgpu_ras_reserve_bad_pages(adev);
1619 		if (ret)
1620 			goto release;
1621 	}
1622 
1623 	return 0;
1624 
1625 release:
1626 	amdgpu_ras_release_bad_pages(adev);
1627 free:
1628 	kfree((*data)->bps);
1629 	kfree((*data)->bps_bo);
1630 	kfree(*data);
1631 	con->eh_data = NULL;
1632 out:
1633 	DRM_WARN("Failed to initialize ras recovery!\n");
1634 
1635 	return ret;
1636 }
1637 
1638 static int amdgpu_ras_recovery_fini(struct amdgpu_device *adev)
1639 {
1640 	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
1641 	struct ras_err_handler_data *data = con->eh_data;
1642 
1643 	/* recovery_init failed to init it, fini is useless */
1644 	if (!data)
1645 		return 0;
1646 
1647 	cancel_work_sync(&con->recovery_work);
1648 	amdgpu_ras_release_bad_pages(adev);
1649 
1650 	mutex_lock(&con->recovery_lock);
1651 	con->eh_data = NULL;
1652 	kfree(data->bps);
1653 	kfree(data->bps_bo);
1654 	kfree(data);
1655 	mutex_unlock(&con->recovery_lock);
1656 
1657 	return 0;
1658 }
1659 /* recovery end */
1660 
1661 /* return 0 if ras will reset gpu and repost.*/
1662 int amdgpu_ras_request_reset_on_boot(struct amdgpu_device *adev,
1663 		unsigned int block)
1664 {
1665 	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
1666 
1667 	if (!ras)
1668 		return -EINVAL;
1669 
1670 	ras->flags |= AMDGPU_RAS_FLAG_INIT_NEED_RESET;
1671 	return 0;
1672 }
1673 
1674 /*
1675  * check hardware's ras ability which will be saved in hw_supported.
1676  * if hardware does not support ras, we can skip some ras initializtion and
1677  * forbid some ras operations from IP.
1678  * if software itself, say boot parameter, limit the ras ability. We still
1679  * need allow IP do some limited operations, like disable. In such case,
1680  * we have to initialize ras as normal. but need check if operation is
1681  * allowed or not in each function.
1682  */
1683 static void amdgpu_ras_check_supported(struct amdgpu_device *adev,
1684 		uint32_t *hw_supported, uint32_t *supported)
1685 {
1686 	*hw_supported = 0;
1687 	*supported = 0;
1688 
1689 	if (amdgpu_sriov_vf(adev) ||
1690 			adev->asic_type != CHIP_VEGA20)
1691 		return;
1692 
1693 	if (adev->is_atom_fw &&
1694 			(amdgpu_atomfirmware_mem_ecc_supported(adev) ||
1695 			 amdgpu_atomfirmware_sram_ecc_supported(adev)))
1696 		*hw_supported = AMDGPU_RAS_BLOCK_MASK;
1697 
1698 	*supported = amdgpu_ras_enable == 0 ?
1699 				0 : *hw_supported & amdgpu_ras_mask;
1700 }
1701 
1702 int amdgpu_ras_init(struct amdgpu_device *adev)
1703 {
1704 	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
1705 	int r;
1706 
1707 	if (con)
1708 		return 0;
1709 
1710 	con = kmalloc(sizeof(struct amdgpu_ras) +
1711 			sizeof(struct ras_manager) * AMDGPU_RAS_BLOCK_COUNT,
1712 			GFP_KERNEL|__GFP_ZERO);
1713 	if (!con)
1714 		return -ENOMEM;
1715 
1716 	con->objs = (struct ras_manager *)(con + 1);
1717 
1718 	amdgpu_ras_set_context(adev, con);
1719 
1720 	amdgpu_ras_check_supported(adev, &con->hw_supported,
1721 			&con->supported);
1722 	if (!con->hw_supported) {
1723 		amdgpu_ras_set_context(adev, NULL);
1724 		kfree(con);
1725 		return 0;
1726 	}
1727 
1728 	con->features = 0;
1729 	INIT_LIST_HEAD(&con->head);
1730 	/* Might need get this flag from vbios. */
1731 	con->flags = RAS_DEFAULT_FLAGS;
1732 
1733 	if (adev->nbio.funcs->init_ras_controller_interrupt) {
1734 		r = adev->nbio.funcs->init_ras_controller_interrupt(adev);
1735 		if (r)
1736 			return r;
1737 	}
1738 
1739 	if (adev->nbio.funcs->init_ras_err_event_athub_interrupt) {
1740 		r = adev->nbio.funcs->init_ras_err_event_athub_interrupt(adev);
1741 		if (r)
1742 			return r;
1743 	}
1744 
1745 	amdgpu_ras_mask &= AMDGPU_RAS_BLOCK_MASK;
1746 
1747 	if (amdgpu_ras_fs_init(adev))
1748 		goto fs_out;
1749 
1750 	DRM_INFO("RAS INFO: ras initialized successfully, "
1751 			"hardware ability[%x] ras_mask[%x]\n",
1752 			con->hw_supported, con->supported);
1753 	return 0;
1754 fs_out:
1755 	amdgpu_ras_set_context(adev, NULL);
1756 	kfree(con);
1757 
1758 	return -EINVAL;
1759 }
1760 
1761 /* helper function to handle common stuff in ip late init phase */
1762 int amdgpu_ras_late_init(struct amdgpu_device *adev,
1763 			 struct ras_common_if *ras_block,
1764 			 struct ras_fs_if *fs_info,
1765 			 struct ras_ih_if *ih_info)
1766 {
1767 	int r;
1768 
1769 	/* disable RAS feature per IP block if it is not supported */
1770 	if (!amdgpu_ras_is_supported(adev, ras_block->block)) {
1771 		amdgpu_ras_feature_enable_on_boot(adev, ras_block, 0);
1772 		return 0;
1773 	}
1774 
1775 	r = amdgpu_ras_feature_enable_on_boot(adev, ras_block, 1);
1776 	if (r) {
1777 		if (r == -EAGAIN) {
1778 			/* request gpu reset. will run again */
1779 			amdgpu_ras_request_reset_on_boot(adev,
1780 					ras_block->block);
1781 			return 0;
1782 		} else if (adev->in_suspend || adev->in_gpu_reset) {
1783 			/* in resume phase, if fail to enable ras,
1784 			 * clean up all ras fs nodes, and disable ras */
1785 			goto cleanup;
1786 		} else
1787 			return r;
1788 	}
1789 
1790 	/* in resume phase, no need to create ras fs node */
1791 	if (adev->in_suspend || adev->in_gpu_reset)
1792 		return 0;
1793 
1794 	if (ih_info->cb) {
1795 		r = amdgpu_ras_interrupt_add_handler(adev, ih_info);
1796 		if (r)
1797 			goto interrupt;
1798 	}
1799 
1800 	amdgpu_ras_debugfs_create(adev, fs_info);
1801 
1802 	r = amdgpu_ras_sysfs_create(adev, fs_info);
1803 	if (r)
1804 		goto sysfs;
1805 
1806 	return 0;
1807 cleanup:
1808 	amdgpu_ras_sysfs_remove(adev, ras_block);
1809 sysfs:
1810 	amdgpu_ras_debugfs_remove(adev, ras_block);
1811 	if (ih_info->cb)
1812 		amdgpu_ras_interrupt_remove_handler(adev, ih_info);
1813 interrupt:
1814 	amdgpu_ras_feature_enable(adev, ras_block, 0);
1815 	return r;
1816 }
1817 
1818 /* helper function to remove ras fs node and interrupt handler */
1819 void amdgpu_ras_late_fini(struct amdgpu_device *adev,
1820 			  struct ras_common_if *ras_block,
1821 			  struct ras_ih_if *ih_info)
1822 {
1823 	if (!ras_block || !ih_info)
1824 		return;
1825 
1826 	amdgpu_ras_sysfs_remove(adev, ras_block);
1827 	amdgpu_ras_debugfs_remove(adev, ras_block);
1828 	if (ih_info->cb)
1829                 amdgpu_ras_interrupt_remove_handler(adev, ih_info);
1830 	amdgpu_ras_feature_enable(adev, ras_block, 0);
1831 }
1832 
1833 /* do some init work after IP late init as dependence.
1834  * and it runs in resume/gpu reset/booting up cases.
1835  */
1836 void amdgpu_ras_resume(struct amdgpu_device *adev)
1837 {
1838 	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
1839 	struct ras_manager *obj, *tmp;
1840 
1841 	if (!con)
1842 		return;
1843 
1844 	if (con->flags & AMDGPU_RAS_FLAG_INIT_BY_VBIOS) {
1845 		/* Set up all other IPs which are not implemented. There is a
1846 		 * tricky thing that IP's actual ras error type should be
1847 		 * MULTI_UNCORRECTABLE, but as driver does not handle it, so
1848 		 * ERROR_NONE make sense anyway.
1849 		 */
1850 		amdgpu_ras_enable_all_features(adev, 1);
1851 
1852 		/* We enable ras on all hw_supported block, but as boot
1853 		 * parameter might disable some of them and one or more IP has
1854 		 * not implemented yet. So we disable them on behalf.
1855 		 */
1856 		list_for_each_entry_safe(obj, tmp, &con->head, node) {
1857 			if (!amdgpu_ras_is_supported(adev, obj->head.block)) {
1858 				amdgpu_ras_feature_enable(adev, &obj->head, 0);
1859 				/* there should be no any reference. */
1860 				WARN_ON(alive_obj(obj));
1861 			}
1862 		}
1863 	}
1864 
1865 	if (con->flags & AMDGPU_RAS_FLAG_INIT_NEED_RESET) {
1866 		con->flags &= ~AMDGPU_RAS_FLAG_INIT_NEED_RESET;
1867 		/* setup ras obj state as disabled.
1868 		 * for init_by_vbios case.
1869 		 * if we want to enable ras, just enable it in a normal way.
1870 		 * If we want do disable it, need setup ras obj as enabled,
1871 		 * then issue another TA disable cmd.
1872 		 * See feature_enable_on_boot
1873 		 */
1874 		amdgpu_ras_disable_all_features(adev, 1);
1875 		amdgpu_ras_reset_gpu(adev, 0);
1876 	}
1877 }
1878 
1879 void amdgpu_ras_suspend(struct amdgpu_device *adev)
1880 {
1881 	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
1882 
1883 	if (!con)
1884 		return;
1885 
1886 	amdgpu_ras_disable_all_features(adev, 0);
1887 	/* Make sure all ras objects are disabled. */
1888 	if (con->features)
1889 		amdgpu_ras_disable_all_features(adev, 1);
1890 }
1891 
1892 /* do some fini work before IP fini as dependence */
1893 int amdgpu_ras_pre_fini(struct amdgpu_device *adev)
1894 {
1895 	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
1896 
1897 	if (!con)
1898 		return 0;
1899 
1900 	/* Need disable ras on all IPs here before ip [hw/sw]fini */
1901 	amdgpu_ras_disable_all_features(adev, 0);
1902 	amdgpu_ras_recovery_fini(adev);
1903 	return 0;
1904 }
1905 
1906 int amdgpu_ras_fini(struct amdgpu_device *adev)
1907 {
1908 	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
1909 
1910 	if (!con)
1911 		return 0;
1912 
1913 	amdgpu_ras_fs_fini(adev);
1914 	amdgpu_ras_interrupt_remove_all(adev);
1915 
1916 	WARN(con->features, "Feature mask is not cleared");
1917 
1918 	if (con->features)
1919 		amdgpu_ras_disable_all_features(adev, 1);
1920 
1921 	amdgpu_ras_set_context(adev, NULL);
1922 	kfree(con);
1923 
1924 	return 0;
1925 }
1926 
1927 void amdgpu_ras_global_ras_isr(struct amdgpu_device *adev)
1928 {
1929 	uint32_t hw_supported, supported;
1930 
1931 	amdgpu_ras_check_supported(adev, &hw_supported, &supported);
1932 	if (!hw_supported)
1933 		return;
1934 
1935 	if (atomic_cmpxchg(&amdgpu_ras_in_intr, 0, 1) == 0) {
1936 		DRM_WARN("RAS event of type ERREVENT_ATHUB_INTERRUPT detected!\n");
1937 
1938 		amdgpu_ras_reset_gpu(adev, false);
1939 	}
1940 }
1941