xref: /linux/drivers/gpu/drm/amd/ras/rascore/ras_umc.c (revision 24f171c7e145f43b9f187578e89b0982ce87e54c)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright 2025 Advanced Micro Devices, Inc.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice shall be included in
13  * all copies or substantial portions of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
19  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
21  * OTHER DEALINGS IN THE SOFTWARE.
22  *
23  */
24 #include "ras.h"
25 #include "ras_umc.h"
26 #include "ras_umc_v12_0.h"
27 
28 #define MAX_ECC_NUM_PER_RETIREMENT  16
29 
30 /* bad page timestamp format
31  * yy[31:27] mm[26:23] day[22:17] hh[16:12] mm[11:6] ss[5:0]
32  */
33 #define EEPROM_TIMESTAMP_MINUTE  6
34 #define EEPROM_TIMESTAMP_HOUR    12
35 #define EEPROM_TIMESTAMP_DAY     17
36 #define EEPROM_TIMESTAMP_MONTH   23
37 #define EEPROM_TIMESTAMP_YEAR    27
38 
39 static uint64_t ras_umc_get_eeprom_timestamp(struct ras_core_context *ras_core)
40 {
41 	struct ras_time tm = {0};
42 	uint64_t utc_timestamp = 0;
43 	uint64_t eeprom_timestamp = 0;
44 
45 	utc_timestamp = ras_core_get_utc_second_timestamp(ras_core);
46 	if (!utc_timestamp)
47 		return utc_timestamp;
48 
49 	ras_core_convert_timestamp_to_time(ras_core, utc_timestamp, &tm);
50 
51 	/* the year range is 2000 ~ 2031, set the year if not in the range */
52 	if (tm.tm_year < 2000)
53 		tm.tm_year = 2000;
54 	if (tm.tm_year > 2031)
55 		tm.tm_year = 2031;
56 
57 	tm.tm_year -= 2000;
58 
59 	eeprom_timestamp = tm.tm_sec + (tm.tm_min << EEPROM_TIMESTAMP_MINUTE)
60 				+ (tm.tm_hour << EEPROM_TIMESTAMP_HOUR)
61 				+ (tm.tm_mday << EEPROM_TIMESTAMP_DAY)
62 				+ (tm.tm_mon << EEPROM_TIMESTAMP_MONTH)
63 				+ (tm.tm_year << EEPROM_TIMESTAMP_YEAR);
64 	eeprom_timestamp &= 0xffffffff;
65 
66 	return eeprom_timestamp;
67 }
68 
69 static const struct ras_umc_ip_func *ras_umc_get_ip_func(
70 				struct ras_core_context *ras_core, uint32_t ip_version)
71 {
72 	switch (ip_version) {
73 	case IP_VERSION(12, 0, 0):
74 	case IP_VERSION(12, 5, 0):
75 		return &ras_umc_func_v12_0;
76 	default:
77 		RAS_DEV_ERR(ras_core->dev,
78 			"UMC ip version(0x%x) is not supported!\n", ip_version);
79 		break;
80 	}
81 
82 	return NULL;
83 }
84 
85 int ras_umc_psp_convert_ma_to_pa(struct ras_core_context *ras_core,
86 		struct umc_mca_addr *in, struct umc_phy_addr *out,
87 		uint32_t nps)
88 {
89 	struct ras_ta_query_address_input addr_in;
90 	struct ras_ta_query_address_output addr_out;
91 	int ret;
92 
93 	if (!in)
94 		return -EINVAL;
95 
96 	memset(&addr_in, 0, sizeof(addr_in));
97 	memset(&addr_out, 0, sizeof(addr_out));
98 
99 	addr_in.ma.err_addr = in->err_addr;
100 	addr_in.ma.ch_inst = in->ch_inst;
101 	addr_in.ma.umc_inst = in->umc_inst;
102 	addr_in.ma.node_inst = in->node_inst;
103 	addr_in.ma.socket_id = in->socket_id;
104 
105 	addr_in.addr_type = RAS_TA_MCA_TO_PA;
106 
107 	ret = ras_psp_query_address(ras_core, &addr_in, &addr_out);
108 	if (ret) {
109 		RAS_DEV_WARN(ras_core->dev,
110 			"Failed to query RAS physical address for 0x%llx, ret:%d",
111 			in->err_addr, ret);
112 		return -EREMOTEIO;
113 	}
114 
115 	if (out) {
116 		out->pa = addr_out.pa.pa;
117 		out->bank = addr_out.pa.bank;
118 		out->channel_idx = addr_out.pa.channel_idx;
119 	}
120 
121 	return 0;
122 }
123 
124 static int ras_umc_log_ecc(struct ras_core_context *ras_core,
125 		unsigned long idx, void *data)
126 {
127 	struct ras_umc *ras_umc = &ras_core->ras_umc;
128 	int ret;
129 
130 	mutex_lock(&ras_umc->tree_lock);
131 	ret = radix_tree_insert(&ras_umc->root, idx, data);
132 	if (!ret)
133 		radix_tree_tag_set(&ras_umc->root, idx, UMC_ECC_NEW_DETECTED_TAG);
134 	mutex_unlock(&ras_umc->tree_lock);
135 
136 	return ret;
137 }
138 
139 int ras_umc_clear_logged_ecc(struct ras_core_context *ras_core)
140 {
141 	struct ras_umc *ras_umc = &ras_core->ras_umc;
142 	uint64_t buf[8] = {0};
143 	void  **slot;
144 	void *data;
145 	void *iter = buf;
146 
147 	mutex_lock(&ras_umc->tree_lock);
148 	radix_tree_for_each_slot(slot, &ras_umc->root, iter, 0) {
149 		data = ras_radix_tree_delete_iter(&ras_umc->root, iter);
150 		kfree(data);
151 	}
152 	mutex_unlock(&ras_umc->tree_lock);
153 
154 	return 0;
155 }
156 
157 static void ras_umc_reserve_eeprom_record(struct ras_core_context *ras_core,
158 				struct eeprom_umc_record *record)
159 {
160 	struct ras_umc *ras_umc = &ras_core->ras_umc;
161 	uint64_t page_pfn[16];
162 	int count = 0, i;
163 
164 	memset(page_pfn, 0, sizeof(page_pfn));
165 	if (ras_umc->ip_func && ras_umc->ip_func->eeprom_record_to_nps_pages) {
166 		count = ras_umc->ip_func->eeprom_record_to_nps_pages(ras_core,
167 					record, record->cur_nps, page_pfn, ARRAY_SIZE(page_pfn));
168 		if (count <= 0) {
169 			RAS_DEV_ERR(ras_core->dev,
170 				"Fail to convert error address! count:%d\n", count);
171 			return;
172 		}
173 	}
174 
175 	/* Reserve memory */
176 	for (i = 0; i < count; i++)
177 		ras_core_event_notify(ras_core,
178 			RAS_EVENT_ID__RESERVE_BAD_PAGE, &page_pfn[i]);
179 }
180 
181 /* When gpu reset is ongoing, ecc logging operations will be pended.
182  */
183 int ras_umc_log_bad_bank_pending(struct ras_core_context *ras_core, struct ras_bank_ecc *bank)
184 {
185 	struct ras_umc *ras_umc = &ras_core->ras_umc;
186 	struct ras_bank_ecc_node *ecc_node;
187 
188 	ecc_node = kzalloc(sizeof(*ecc_node), GFP_KERNEL);
189 	if (!ecc_node)
190 		return -ENOMEM;
191 
192 	memcpy(&ecc_node->ecc, bank, sizeof(ecc_node->ecc));
193 
194 	mutex_lock(&ras_umc->pending_ecc_lock);
195 	list_add_tail(&ecc_node->node, &ras_umc->pending_ecc_list);
196 	mutex_unlock(&ras_umc->pending_ecc_lock);
197 
198 	return 0;
199 }
200 
201 /* After gpu reset is complete, re-log the pending error banks.
202  */
203 int ras_umc_log_pending_bad_bank(struct ras_core_context *ras_core)
204 {
205 	struct ras_umc *ras_umc = &ras_core->ras_umc;
206 	struct ras_bank_ecc_node *ecc_node, *tmp;
207 
208 	mutex_lock(&ras_umc->pending_ecc_lock);
209 	list_for_each_entry_safe(ecc_node,
210 		tmp, &ras_umc->pending_ecc_list, node){
211 		if (ecc_node && !ras_umc_log_bad_bank(ras_core, &ecc_node->ecc)) {
212 			list_del(&ecc_node->node);
213 			kfree(ecc_node);
214 		}
215 	}
216 	mutex_unlock(&ras_umc->pending_ecc_lock);
217 
218 	return 0;
219 }
220 
221 int ras_umc_log_bad_bank(struct ras_core_context *ras_core, struct ras_bank_ecc *bank)
222 {
223 	struct ras_umc *ras_umc = &ras_core->ras_umc;
224 	struct eeprom_umc_record umc_rec;
225 	struct eeprom_umc_record *err_rec;
226 	int ret;
227 
228 	memset(&umc_rec, 0, sizeof(umc_rec));
229 
230 	mutex_lock(&ras_umc->bank_log_lock);
231 	ret = ras_umc->ip_func->bank_to_eeprom_record(ras_core, bank, &umc_rec);
232 	if (ret)
233 		goto out;
234 
235 	err_rec = kzalloc(sizeof(*err_rec), GFP_KERNEL);
236 	if (!err_rec) {
237 		ret = -ENOMEM;
238 		goto out;
239 	}
240 
241 	memcpy(err_rec, &umc_rec, sizeof(umc_rec));
242 	ret = ras_umc_log_ecc(ras_core, err_rec->cur_nps_retired_row_pfn, err_rec);
243 	if (ret) {
244 		if (ret == -EEXIST) {
245 			RAS_DEV_INFO(ras_core->dev, "The bad pages have been logged before.\n");
246 			ret = 0;
247 		}
248 
249 		kfree(err_rec);
250 		goto out;
251 	}
252 
253 	ras_umc_reserve_eeprom_record(ras_core, err_rec);
254 
255 	ret = ras_core_event_notify(ras_core,
256 			RAS_EVENT_ID__BAD_PAGE_DETECTED, NULL);
257 
258 out:
259 	mutex_unlock(&ras_umc->bank_log_lock);
260 	return ret;
261 }
262 
263 static int ras_umc_get_new_records(struct ras_core_context *ras_core,
264 			struct eeprom_umc_record *records, u32 num)
265 {
266 	struct ras_umc *ras_umc = &ras_core->ras_umc;
267 	struct eeprom_umc_record *entries[MAX_ECC_NUM_PER_RETIREMENT];
268 	u32 entry_num = num < MAX_ECC_NUM_PER_RETIREMENT ? num : MAX_ECC_NUM_PER_RETIREMENT;
269 	int count = 0;
270 	int new_detected, i;
271 
272 	mutex_lock(&ras_umc->tree_lock);
273 	new_detected = radix_tree_gang_lookup_tag(&ras_umc->root, (void **)entries,
274 			0, entry_num, UMC_ECC_NEW_DETECTED_TAG);
275 	for (i = 0; i < new_detected; i++) {
276 		if (!entries[i])
277 			continue;
278 
279 		memcpy(&records[i], entries[i], sizeof(struct eeprom_umc_record));
280 		count++;
281 		radix_tree_tag_clear(&ras_umc->root,
282 				entries[i]->cur_nps_retired_row_pfn, UMC_ECC_NEW_DETECTED_TAG);
283 	}
284 	mutex_unlock(&ras_umc->tree_lock);
285 
286 	return count;
287 }
288 
289 static bool ras_umc_check_retired_record(struct ras_core_context *ras_core,
290 				struct eeprom_umc_record *record, bool from_eeprom)
291 {
292 	struct ras_umc *ras_umc = &ras_core->ras_umc;
293 	struct eeprom_store_record *data = &ras_umc->umc_err_data.rom_data;
294 	uint32_t nps = 0;
295 	int i, ret;
296 
297 	if (from_eeprom) {
298 		nps = ras_umc->umc_err_data.umc_nps_mode;
299 		if (ras_umc->ip_func && ras_umc->ip_func->eeprom_record_to_nps_record) {
300 			ret = ras_umc->ip_func->eeprom_record_to_nps_record(ras_core, record, nps);
301 			if (ret)
302 				RAS_DEV_WARN(ras_core->dev,
303 					"Failed to adjust eeprom record, ret:%d", ret);
304 		}
305 		return false;
306 	}
307 
308 	for (i = 0; i < data->count; i++) {
309 		if ((data->bps[i].retired_row_pfn == record->retired_row_pfn) &&
310 		    (data->bps[i].cur_nps_retired_row_pfn == record->cur_nps_retired_row_pfn))
311 			return true;
312 	}
313 
314 	return false;
315 }
316 
317 /* alloc/realloc bps array */
318 static int ras_umc_realloc_err_data_space(struct ras_core_context *ras_core,
319 		struct eeprom_store_record *data, int pages)
320 {
321 	unsigned int old_space = data->count + data->space_left;
322 	unsigned int new_space = old_space + pages;
323 	unsigned int align_space = ALIGN(new_space, 512);
324 	void *bps = kzalloc(align_space * sizeof(*data->bps), GFP_KERNEL);
325 
326 	if (!bps)
327 		return -ENOMEM;
328 
329 	if (data->bps) {
330 		memcpy(bps, data->bps,
331 				data->count * sizeof(*data->bps));
332 		kfree(data->bps);
333 	}
334 
335 	data->bps = bps;
336 	data->space_left += align_space - old_space;
337 	return 0;
338 }
339 
340 static int ras_umc_update_eeprom_rom_data(struct ras_core_context *ras_core,
341 		struct eeprom_umc_record *bps)
342 {
343 	struct eeprom_store_record *data = &ras_core->ras_umc.umc_err_data.rom_data;
344 
345 	if (!data->space_left &&
346 		ras_umc_realloc_err_data_space(ras_core, data, 256)) {
347 		return	-ENOMEM;
348 	}
349 
350 	memcpy(&data->bps[data->count], bps, sizeof(*data->bps));
351 	data->count++;
352 	data->space_left--;
353 	return 0;
354 }
355 
356 static int ras_umc_update_eeprom_ram_data(struct ras_core_context *ras_core,
357 				struct eeprom_umc_record *bps)
358 {
359 	struct ras_umc *ras_umc = &ras_core->ras_umc;
360 	struct eeprom_store_record *data = &ras_umc->umc_err_data.ram_data;
361 	uint64_t page_pfn[16];
362 	int count = 0, j;
363 
364 	if (!data->space_left &&
365 		ras_umc_realloc_err_data_space(ras_core, data, 256)) {
366 		return	-ENOMEM;
367 	}
368 
369 	memset(page_pfn, 0, sizeof(page_pfn));
370 	if (ras_umc->ip_func && ras_umc->ip_func->eeprom_record_to_nps_pages)
371 		count = ras_umc->ip_func->eeprom_record_to_nps_pages(ras_core,
372 					bps, bps->cur_nps, page_pfn, ARRAY_SIZE(page_pfn));
373 
374 	if (count > 0) {
375 		for (j = 0; j < count; j++) {
376 			bps->cur_nps_retired_row_pfn = page_pfn[j];
377 			memcpy(&data->bps[data->count], bps, sizeof(*data->bps));
378 			data->count++;
379 			data->space_left--;
380 		}
381 	} else {
382 		memcpy(&data->bps[data->count], bps, sizeof(*data->bps));
383 		data->count++;
384 		data->space_left--;
385 	}
386 
387 	return 0;
388 }
389 
390 /* it deal with vram only. */
391 static int ras_umc_add_bad_pages(struct ras_core_context *ras_core,
392 				 struct eeprom_umc_record *bps,
393 				 int pages, bool from_eeprom)
394 {
395 	struct ras_umc *ras_umc = &ras_core->ras_umc;
396 	struct ras_umc_err_data *data = &ras_umc->umc_err_data;
397 	int i, ret = 0;
398 
399 	if (!bps || pages <= 0)
400 		return 0;
401 
402 	mutex_lock(&ras_umc->umc_lock);
403 	for (i = 0; i < pages; i++) {
404 		if (ras_umc_check_retired_record(ras_core, &bps[i], from_eeprom))
405 			continue;
406 
407 		ret = ras_umc_update_eeprom_rom_data(ras_core, &bps[i]);
408 		if (ret)
409 			goto out;
410 
411 		if (data->last_retired_pfn == bps[i].cur_nps_retired_row_pfn)
412 			continue;
413 
414 		data->last_retired_pfn = bps[i].cur_nps_retired_row_pfn;
415 
416 		if (from_eeprom)
417 			ras_umc_reserve_eeprom_record(ras_core, &bps[i]);
418 
419 		ret = ras_umc_update_eeprom_ram_data(ras_core, &bps[i]);
420 		if (ret)
421 			goto out;
422 	}
423 out:
424 	mutex_unlock(&ras_umc->umc_lock);
425 
426 	return ret;
427 }
428 
429 /*
430  * read error record array in eeprom and reserve enough space for
431  * storing new bad pages
432  */
433 int ras_umc_load_bad_pages(struct ras_core_context *ras_core)
434 {
435 	struct eeprom_umc_record *bps;
436 	uint32_t ras_num_recs;
437 	int ret;
438 
439 	ras_num_recs = ras_eeprom_get_record_count(ras_core);
440 	/* no bad page record, skip eeprom access */
441 	if (!ras_num_recs ||
442 	    ras_core->ras_eeprom.record_threshold_config == DISABLE_RETIRE_PAGE)
443 		return 0;
444 
445 	bps = kcalloc(ras_num_recs, sizeof(*bps), GFP_KERNEL);
446 	if (!bps)
447 		return -ENOMEM;
448 
449 	ret = ras_eeprom_read(ras_core, bps, ras_num_recs);
450 	if (ret) {
451 		RAS_DEV_ERR(ras_core->dev, "Failed to load EEPROM table records!");
452 	} else {
453 		ras_core->ras_umc.umc_err_data.last_retired_pfn = UMC_INV_MEM_PFN;
454 		ret = ras_umc_add_bad_pages(ras_core, bps, ras_num_recs, true);
455 	}
456 
457 	kfree(bps);
458 	return ret;
459 }
460 
461 /*
462  * write error record array to eeprom, the function should be
463  * protected by recovery_lock
464  * new_cnt: new added UE count, excluding reserved bad pages, can be NULL
465  */
466 static int ras_umc_save_bad_pages(struct ras_core_context *ras_core)
467 {
468 	struct ras_umc *ras_umc = &ras_core->ras_umc;
469 	struct eeprom_store_record *data = &ras_umc->umc_err_data.rom_data;
470 	uint32_t eeprom_record_num;
471 	int save_count;
472 	int ret = 0;
473 
474 	if (!data->bps)
475 		return 0;
476 
477 	eeprom_record_num = ras_eeprom_get_record_count(ras_core);
478 	mutex_lock(&ras_umc->umc_lock);
479 	save_count = data->count - eeprom_record_num;
480 	/* only new entries are saved */
481 	if (save_count > 0) {
482 		if (ras_eeprom_append(ras_core,
483 					   &data->bps[eeprom_record_num],
484 					   save_count)) {
485 			RAS_DEV_ERR(ras_core->dev, "Failed to save EEPROM table data!");
486 			ret = -EIO;
487 			goto exit;
488 		}
489 
490 		RAS_DEV_INFO(ras_core->dev, "Saved %d pages to EEPROM table.\n", save_count);
491 	}
492 
493 exit:
494 	mutex_unlock(&ras_umc->umc_lock);
495 	return ret;
496 }
497 
498 int ras_umc_handle_bad_pages(struct ras_core_context *ras_core, void *data)
499 {
500 	struct eeprom_umc_record records[MAX_ECC_NUM_PER_RETIREMENT];
501 	int count, ret;
502 
503 	memset(records, 0, sizeof(records));
504 	count = ras_umc_get_new_records(ras_core, records, ARRAY_SIZE(records));
505 	if (count <= 0)
506 		return -ENODATA;
507 
508 	ret = ras_umc_add_bad_pages(ras_core, records, count, false);
509 	if (ret) {
510 		RAS_DEV_ERR(ras_core->dev, "Failed to add ras bad page!\n");
511 		return -EINVAL;
512 	}
513 
514 	ret = ras_umc_save_bad_pages(ras_core);
515 	if (ret) {
516 		RAS_DEV_ERR(ras_core->dev, "Failed to save ras bad page\n");
517 		return -EINVAL;
518 	}
519 
520 	return 0;
521 }
522 
523 int ras_umc_sw_init(struct ras_core_context *ras_core)
524 {
525 	struct ras_umc *ras_umc = &ras_core->ras_umc;
526 
527 	memset(ras_umc, 0, sizeof(*ras_umc));
528 
529 	INIT_LIST_HEAD(&ras_umc->pending_ecc_list);
530 
531 	INIT_RADIX_TREE(&ras_umc->root, GFP_KERNEL);
532 
533 	mutex_init(&ras_umc->tree_lock);
534 	mutex_init(&ras_umc->pending_ecc_lock);
535 	mutex_init(&ras_umc->umc_lock);
536 	mutex_init(&ras_umc->bank_log_lock);
537 
538 	return 0;
539 }
540 
541 int ras_umc_sw_fini(struct ras_core_context *ras_core)
542 {
543 	struct ras_umc *ras_umc = &ras_core->ras_umc;
544 	struct ras_umc_err_data *umc_err_data = &ras_umc->umc_err_data;
545 	struct ras_bank_ecc_node *ecc_node, *tmp;
546 
547 	mutex_destroy(&ras_umc->umc_lock);
548 	mutex_destroy(&ras_umc->bank_log_lock);
549 
550 	if (umc_err_data->rom_data.bps) {
551 		umc_err_data->rom_data.count = 0;
552 		kfree(umc_err_data->rom_data.bps);
553 		umc_err_data->rom_data.bps = NULL;
554 		umc_err_data->rom_data.space_left = 0;
555 	}
556 
557 	if (umc_err_data->ram_data.bps) {
558 		umc_err_data->ram_data.count = 0;
559 		kfree(umc_err_data->ram_data.bps);
560 		umc_err_data->ram_data.bps = NULL;
561 		umc_err_data->ram_data.space_left = 0;
562 	}
563 
564 	ras_umc_clear_logged_ecc(ras_core);
565 
566 	mutex_lock(&ras_umc->pending_ecc_lock);
567 	list_for_each_entry_safe(ecc_node,
568 		tmp, &ras_umc->pending_ecc_list, node){
569 		list_del(&ecc_node->node);
570 		kfree(ecc_node);
571 	}
572 	mutex_unlock(&ras_umc->pending_ecc_lock);
573 
574 	mutex_destroy(&ras_umc->tree_lock);
575 	mutex_destroy(&ras_umc->pending_ecc_lock);
576 
577 	return 0;
578 }
579 
580 int ras_umc_hw_init(struct ras_core_context *ras_core)
581 {
582 	struct ras_umc *ras_umc = &ras_core->ras_umc;
583 	uint32_t nps;
584 
585 	nps = ras_core_get_curr_nps_mode(ras_core);
586 
587 	if (!nps || (nps >= UMC_MEMORY_PARTITION_MODE_UNKNOWN)) {
588 		RAS_DEV_ERR(ras_core->dev, "Invalid memory NPS mode: %u!\n", nps);
589 		return -ENODATA;
590 	}
591 
592 	ras_umc->umc_err_data.umc_nps_mode = nps;
593 
594 	ras_umc->umc_vram_type = ras_core->config->umc_cfg.umc_vram_type;
595 	if (!ras_umc->umc_vram_type) {
596 		RAS_DEV_ERR(ras_core->dev, "Invalid UMC VRAM Type: %u!\n",
597 			ras_umc->umc_vram_type);
598 		return -ENODATA;
599 	}
600 
601 	ras_umc->umc_ip_version = ras_core->config->umc_ip_version;
602 	ras_umc->ip_func = ras_umc_get_ip_func(ras_core, ras_umc->umc_ip_version);
603 	if (!ras_umc->ip_func)
604 		return -EINVAL;
605 
606 	return 0;
607 }
608 
609 int ras_umc_hw_fini(struct ras_core_context *ras_core)
610 {
611 	return 0;
612 }
613 
614 int ras_umc_clean_badpage_data(struct ras_core_context *ras_core)
615 {
616 	struct ras_umc_err_data *data = &ras_core->ras_umc.umc_err_data;
617 
618 	mutex_lock(&ras_core->ras_umc.umc_lock);
619 
620 	kfree(data->rom_data.bps);
621 	kfree(data->ram_data.bps);
622 
623 	memset(data, 0, sizeof(*data));
624 	mutex_unlock(&ras_core->ras_umc.umc_lock);
625 
626 	return 0;
627 }
628 
629 int ras_umc_fill_eeprom_record(struct ras_core_context *ras_core,
630 		uint64_t err_addr, uint32_t umc_inst, struct umc_phy_addr *cur_nps_addr,
631 		enum umc_memory_partition_mode cur_nps, struct eeprom_umc_record *record)
632 {
633 	struct eeprom_umc_record *err_rec = record;
634 
635 	/* Set bad page pfn and nps mode */
636 	EEPROM_RECORD_SETUP_UMC_ADDR_AND_NPS(err_rec,
637 			RAS_ADDR_TO_PFN(cur_nps_addr->pa), cur_nps);
638 
639 	err_rec->address = err_addr;
640 	err_rec->ts = ras_umc_get_eeprom_timestamp(ras_core);
641 	err_rec->err_type = RAS_EEPROM_ERR_NON_RECOVERABLE;
642 	err_rec->cu = 0;
643 	err_rec->mem_channel = cur_nps_addr->channel_idx;
644 	err_rec->mcumc_id = umc_inst;
645 	err_rec->cur_nps_retired_row_pfn = RAS_ADDR_TO_PFN(cur_nps_addr->pa);
646 	err_rec->cur_nps_bank = cur_nps_addr->bank;
647 	err_rec->cur_nps = cur_nps;
648 	return 0;
649 }
650 
651 int ras_umc_get_saved_eeprom_count(struct ras_core_context *ras_core)
652 {
653 	struct ras_umc_err_data *err_data = &ras_core->ras_umc.umc_err_data;
654 
655 	return err_data->rom_data.count;
656 }
657 
658 int ras_umc_get_badpage_count(struct ras_core_context *ras_core)
659 {
660 	struct eeprom_store_record *data = &ras_core->ras_umc.umc_err_data.ram_data;
661 
662 	return data->count;
663 }
664 
665 int ras_umc_get_badpage_record(struct ras_core_context *ras_core, uint32_t index, void *record)
666 {
667 	struct eeprom_store_record *data = &ras_core->ras_umc.umc_err_data.ram_data;
668 
669 	if (index >= data->count)
670 		return -EINVAL;
671 
672 	memcpy(record, &data->bps[index], sizeof(struct eeprom_umc_record));
673 	return 0;
674 }
675 
676 bool ras_umc_check_retired_addr(struct ras_core_context *ras_core, uint64_t addr)
677 {
678 	struct ras_umc *ras_umc = &ras_core->ras_umc;
679 	struct eeprom_store_record *data = &ras_umc->umc_err_data.ram_data;
680 	uint64_t page_pfn = RAS_ADDR_TO_PFN(addr);
681 	int i, ret = false;
682 
683 	mutex_lock(&ras_umc->umc_lock);
684 	for (i = 0; i < data->count; i++) {
685 		if (data->bps[i].cur_nps_retired_row_pfn == page_pfn) {
686 			ret = true;
687 			break;
688 		}
689 	}
690 	mutex_unlock(&ras_umc->umc_lock);
691 
692 	return ret;
693 }
694 
695 int ras_umc_translate_soc_pa_and_bank(struct ras_core_context *ras_core,
696 	uint64_t *soc_pa, struct umc_bank_addr *bank_addr, bool bank_to_pa)
697 {
698 	struct ras_umc *ras_umc = &ras_core->ras_umc;
699 	int ret = 0;
700 
701 	if (bank_to_pa)
702 		ret = ras_umc->ip_func->bank_to_soc_pa(ras_core, *bank_addr, soc_pa);
703 	else
704 		ret = ras_umc->ip_func->soc_pa_to_bank(ras_core, *soc_pa, bank_addr);
705 
706 	return ret;
707 }
708