xref: /linux/drivers/gpu/drm/amd/ras/rascore/ras_psp.c (revision 24f171c7e145f43b9f187578e89b0982ce87e54c)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright 2025 Advanced Micro Devices, Inc.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice shall be included in
13  * all copies or substantial portions of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
19  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
21  * OTHER DEALINGS IN THE SOFTWARE.
22  *
23  */
24 #include "ras.h"
25 #include "ras_ta_if.h"
26 #include "ras_psp.h"
27 #include "ras_psp_v13_0.h"
28 
29 /* position of instance value in sub_block_index of
30  * ta_ras_trigger_error_input, the sub block uses lower 12 bits
31  */
32 #define RAS_TA_INST_MASK 0xfffff000
33 #define RAS_TA_INST_SHIFT 0xc
34 
35 static const struct ras_psp_ip_func *ras_psp_get_ip_funcs(
36 			struct ras_core_context *ras_core, uint32_t ip_version)
37 {
38 	switch (ip_version) {
39 	case IP_VERSION(13, 0, 6):
40 	case IP_VERSION(13, 0, 14):
41 	case IP_VERSION(13, 0, 12):
42 		return &ras_psp_v13_0;
43 	default:
44 		RAS_DEV_ERR(ras_core->dev,
45 			"psp ip version(0x%x) is not supported!\n", ip_version);
46 		break;
47 	}
48 
49 	return NULL;
50 }
51 
52 static int ras_psp_sync_system_ras_psp_status(struct ras_core_context *ras_core)
53 {
54 	struct ras_psp *psp = &ras_core->ras_psp;
55 	struct ras_ta_ctx *ta_ctx = &ras_core->ras_psp.ta_ctx;
56 	struct ras_psp_ctx *psp_ctx = &ras_core->ras_psp.psp_ctx;
57 	struct ras_psp_sys_status status = {0};
58 	int ret;
59 
60 	if (psp->sys_func && psp->sys_func->get_ras_psp_system_status) {
61 		ret = psp->sys_func->get_ras_psp_system_status(ras_core, &status);
62 		if (ret)
63 			return ret;
64 
65 		if (status.initialized) {
66 			ta_ctx->preload_ras_ta_enabled = true;
67 			ta_ctx->ras_ta_initialized = status.initialized;
68 			ta_ctx->session_id = status.session_id;
69 		}
70 
71 		psp_ctx->external_mutex = status.psp_cmd_mutex;
72 	}
73 
74 	return 0;
75 }
76 
77 static int ras_psp_get_ras_ta_init_param(struct ras_core_context *ras_core,
78 	struct ras_ta_init_param *ras_ta_param)
79 {
80 	struct ras_psp *psp = &ras_core->ras_psp;
81 
82 	if (psp->sys_func && psp->sys_func->get_ras_ta_init_param)
83 		return psp->sys_func->get_ras_ta_init_param(ras_core, ras_ta_param);
84 
85 	RAS_DEV_ERR(ras_core->dev, "Not config get_ras_ta_init_param API!!\n");
86 	return -EACCES;
87 }
88 
89 static struct gpu_mem_block *ras_psp_get_gpu_mem(struct ras_core_context *ras_core,
90 			enum gpu_mem_type mem_type)
91 {
92 	struct ras_psp *psp = &ras_core->ras_psp;
93 	struct gpu_mem_block *gpu_mem = NULL;
94 	int ret;
95 
96 	switch (mem_type) {
97 	case GPU_MEM_TYPE_RAS_PSP_RING:
98 		gpu_mem = &psp->psp_ring.ras_ring_gpu_mem;
99 		break;
100 	case GPU_MEM_TYPE_RAS_PSP_CMD:
101 		gpu_mem = &psp->psp_ctx.psp_cmd_gpu_mem;
102 		break;
103 	case GPU_MEM_TYPE_RAS_PSP_FENCE:
104 		gpu_mem = &psp->psp_ctx.out_fence_gpu_mem;
105 		break;
106 	case GPU_MEM_TYPE_RAS_TA_FW:
107 		gpu_mem = &psp->ta_ctx.fw_gpu_mem;
108 		break;
109 	case GPU_MEM_TYPE_RAS_TA_CMD:
110 		gpu_mem = &psp->ta_ctx.cmd_gpu_mem;
111 		break;
112 	default:
113 		return NULL;
114 	}
115 
116 	if (!gpu_mem->ref_count) {
117 		ret = ras_core_get_gpu_mem(ras_core, mem_type, gpu_mem);
118 		if (ret)
119 			return NULL;
120 		gpu_mem->mem_type = mem_type;
121 	}
122 
123 	gpu_mem->ref_count++;
124 
125 	return gpu_mem;
126 }
127 
128 static int ras_psp_put_gpu_mem(struct ras_core_context *ras_core,
129 			struct gpu_mem_block *gpu_mem)
130 {
131 	if (!gpu_mem)
132 		return 0;
133 
134 	gpu_mem->ref_count--;
135 
136 	if (gpu_mem->ref_count > 0) {
137 		return 0;
138 	} else if (gpu_mem->ref_count < 0) {
139 		RAS_DEV_WARN(ras_core->dev,
140 			"Duplicate free gpu memory %u\n", gpu_mem->mem_type);
141 	} else {
142 		ras_core_put_gpu_mem(ras_core, gpu_mem->mem_type, gpu_mem);
143 		memset(gpu_mem, 0, sizeof(*gpu_mem));
144 	}
145 
146 	return 0;
147 }
148 
149 static void __acquire_psp_cmd_lock(struct ras_core_context *ras_core)
150 {
151 	struct ras_psp_ctx *psp_ctx = &ras_core->ras_psp.psp_ctx;
152 
153 	if (psp_ctx->external_mutex)
154 		mutex_lock(psp_ctx->external_mutex);
155 	else
156 		mutex_lock(&psp_ctx->internal_mutex);
157 }
158 
159 static void __release_psp_cmd_lock(struct ras_core_context *ras_core)
160 {
161 	struct ras_psp_ctx *psp_ctx = &ras_core->ras_psp.psp_ctx;
162 
163 	if (psp_ctx->external_mutex)
164 		mutex_unlock(psp_ctx->external_mutex);
165 	else
166 		mutex_unlock(&psp_ctx->internal_mutex);
167 }
168 
169 static uint32_t __get_ring_frame_slot(struct ras_core_context *ras_core)
170 {
171 	struct ras_psp *psp = &ras_core->ras_psp;
172 	uint32_t ras_ring_wptr_dw;
173 
174 	ras_ring_wptr_dw = psp->ip_func->psp_ras_ring_wptr_get(ras_core);
175 
176 	return div64_u64((ras_ring_wptr_dw << 2), sizeof(struct psp_gfx_rb_frame));
177 }
178 
179 static int __set_ring_frame_slot(struct ras_core_context *ras_core,
180 			uint32_t slot)
181 {
182 	struct ras_psp *psp = &ras_core->ras_psp;
183 
184 	return psp->ip_func->psp_ras_ring_wptr_set(ras_core,
185 				(slot * sizeof(struct psp_gfx_rb_frame)) >> 2);
186 }
187 
188 static int write_frame_to_ras_psp_ring(struct ras_core_context *ras_core,
189 		struct psp_gfx_rb_frame *frame)
190 {
191 	struct gpu_mem_block *ring_mem;
192 	struct psp_gfx_rb_frame *rb_frame;
193 	uint32_t max_frame_slot;
194 	uint32_t slot_idx;
195 	uint32_t write_flush_read_back = 0;
196 	int ret = 0;
197 
198 	ring_mem = ras_psp_get_gpu_mem(ras_core, GPU_MEM_TYPE_RAS_PSP_RING);
199 	if (!ring_mem)
200 		return -ENOMEM;
201 
202 	max_frame_slot =
203 		div64_u64(ring_mem->mem_size, sizeof(struct psp_gfx_rb_frame));
204 
205 	rb_frame =
206 		(struct psp_gfx_rb_frame *)ring_mem->mem_cpu_addr;
207 
208 	slot_idx = __get_ring_frame_slot(ras_core);
209 	if (slot_idx >= max_frame_slot)
210 		slot_idx = 0;
211 
212 	memcpy(&rb_frame[slot_idx], frame, sizeof(*frame));
213 
214 	/* Do a read to force the write of the frame before writing
215 	 * write pointer.
216 	 */
217 	write_flush_read_back = rb_frame[slot_idx].fence_value;
218 	if (write_flush_read_back != frame->fence_value) {
219 		RAS_DEV_ERR(ras_core->dev,
220 		"Failed to submit ring cmd! cmd:0x%x:0x%x, fence:0x%x:0x%x value:%u, expected:%u\n",
221 			rb_frame[slot_idx].cmd_buf_addr_hi,
222 			rb_frame[slot_idx].cmd_buf_addr_lo,
223 			rb_frame[slot_idx].fence_addr_hi,
224 			rb_frame[slot_idx].fence_addr_lo,
225 			write_flush_read_back, frame->fence_value);
226 		ret = -EACCES;
227 		goto err;
228 	}
229 
230 	slot_idx++;
231 
232 	if (slot_idx >= max_frame_slot)
233 		slot_idx = 0;
234 
235 	__set_ring_frame_slot(ras_core, slot_idx);
236 
237 err:
238 	ras_psp_put_gpu_mem(ras_core, ring_mem);
239 	return ret;
240 }
241 
242 static int send_psp_cmd(struct ras_core_context *ras_core,
243 		enum psp_gfx_cmd_id gfx_cmd_id, void *cmd_data,
244 		uint32_t cmd_size, struct psp_cmd_resp *resp)
245 {
246 	struct ras_psp_ctx *psp_ctx = &ras_core->ras_psp.psp_ctx;
247 	struct gpu_mem_block *psp_cmd_buf = NULL;
248 	struct gpu_mem_block *psp_fence_buf = NULL;
249 	struct psp_gfx_cmd_resp *gfx_cmd;
250 	struct psp_gfx_rb_frame rb_frame;
251 	int ret = 0;
252 	int timeout = 1000;
253 
254 	if (!cmd_data || (cmd_size > sizeof(union psp_gfx_commands)) || !resp) {
255 		RAS_DEV_ERR(ras_core->dev, "Invalid RAS PSP command, id: %u\n", gfx_cmd_id);
256 		return -EINVAL;
257 	}
258 
259 	__acquire_psp_cmd_lock(ras_core);
260 
261 	psp_cmd_buf = ras_psp_get_gpu_mem(ras_core, GPU_MEM_TYPE_RAS_PSP_CMD);
262 	if (!psp_cmd_buf) {
263 		ret = -ENOMEM;
264 		goto exit;
265 	}
266 
267 	psp_fence_buf = ras_psp_get_gpu_mem(ras_core, GPU_MEM_TYPE_RAS_PSP_FENCE);
268 	if (!psp_fence_buf) {
269 		ret = -ENOMEM;
270 		goto exit;
271 	}
272 
273 	gfx_cmd = (struct psp_gfx_cmd_resp *)psp_cmd_buf->mem_cpu_addr;
274 	memset(gfx_cmd, 0, sizeof(*gfx_cmd));
275 	gfx_cmd->cmd_id = gfx_cmd_id;
276 	memcpy(&gfx_cmd->cmd, cmd_data, cmd_size);
277 
278 	psp_ctx->in_fence_value++;
279 
280 	memset(&rb_frame, 0, sizeof(rb_frame));
281 	rb_frame.cmd_buf_addr_hi = upper_32_bits(psp_cmd_buf->mem_mc_addr);
282 	rb_frame.cmd_buf_addr_lo = lower_32_bits(psp_cmd_buf->mem_mc_addr);
283 	rb_frame.fence_addr_hi = upper_32_bits(psp_fence_buf->mem_mc_addr);
284 	rb_frame.fence_addr_lo = lower_32_bits(psp_fence_buf->mem_mc_addr);
285 	rb_frame.fence_value = psp_ctx->in_fence_value;
286 
287 	ret = write_frame_to_ras_psp_ring(ras_core, &rb_frame);
288 	if (ret) {
289 		psp_ctx->in_fence_value--;
290 		goto exit;
291 	}
292 
293 	while (*((uint64_t *)psp_fence_buf->mem_cpu_addr) !=
294 		   psp_ctx->in_fence_value) {
295 		if (--timeout == 0)
296 			break;
297 		/*
298 		 * Shouldn't wait for timeout when err_event_athub occurs,
299 		 * because gpu reset thread triggered and lock resource should
300 		 * be released for psp resume sequence.
301 		 */
302 		if (ras_core_ras_interrupt_detected(ras_core))
303 			break;
304 
305 		msleep(2);
306 	}
307 
308 	resp->status = gfx_cmd->resp.status;
309 	resp->session_id = gfx_cmd->resp.session_id;
310 
311 exit:
312 	ras_psp_put_gpu_mem(ras_core, psp_cmd_buf);
313 	ras_psp_put_gpu_mem(ras_core, psp_fence_buf);
314 
315 	__release_psp_cmd_lock(ras_core);
316 
317 	return ret;
318 }
319 
320 static void __check_ras_ta_cmd_resp(struct ras_core_context *ras_core,
321 			struct ras_ta_cmd *ras_cmd)
322 {
323 
324 	if (ras_cmd->ras_out_message.flags.err_inject_switch_disable_flag) {
325 		RAS_DEV_WARN(ras_core->dev, "ECC switch disabled\n");
326 		ras_cmd->ras_status = RAS_TA_STATUS__ERROR_RAS_NOT_AVAILABLE;
327 	} else if (ras_cmd->ras_out_message.flags.reg_access_failure_flag)
328 		RAS_DEV_WARN(ras_core->dev, "RAS internal register access blocked\n");
329 
330 	switch (ras_cmd->ras_status) {
331 	case RAS_TA_STATUS__ERROR_UNSUPPORTED_IP:
332 		RAS_DEV_WARN(ras_core->dev,
333 			 "RAS WARNING: cmd failed due to unsupported ip\n");
334 		break;
335 	case RAS_TA_STATUS__ERROR_UNSUPPORTED_ERROR_INJ:
336 		RAS_DEV_WARN(ras_core->dev,
337 			 "RAS WARNING: cmd failed due to unsupported error injection\n");
338 		break;
339 	case RAS_TA_STATUS__SUCCESS:
340 		break;
341 	case RAS_TA_STATUS__TEE_ERROR_ACCESS_DENIED:
342 		if (ras_cmd->cmd_id == RAS_TA_CMD_ID__TRIGGER_ERROR)
343 			RAS_DEV_WARN(ras_core->dev,
344 				 "RAS WARNING: Inject error to critical region is not allowed\n");
345 		break;
346 	default:
347 		RAS_DEV_WARN(ras_core->dev,
348 			 "RAS WARNING: ras status = 0x%X\n", ras_cmd->ras_status);
349 		break;
350 	}
351 }
352 
353 static int send_ras_ta_runtime_cmd(struct ras_core_context *ras_core,
354 			enum ras_ta_cmd_id cmd_id, void *in, uint32_t in_size,
355 			void *out, uint32_t out_size)
356 {
357 	struct ras_ta_ctx *ta_ctx = &ras_core->ras_psp.ta_ctx;
358 	struct gpu_mem_block *cmd_mem;
359 	struct ras_ta_cmd *ras_cmd;
360 	struct psp_gfx_cmd_invoke_cmd invoke_cmd = {0};
361 	struct psp_cmd_resp resp = {0};
362 	int ret = 0;
363 
364 	if (!in || (in_size > sizeof(union ras_ta_cmd_input)) ||
365 		(cmd_id >= MAX_RAS_TA_CMD_ID)) {
366 		RAS_DEV_ERR(ras_core->dev, "Invalid RAS TA command, id: %u\n", cmd_id);
367 		return -EINVAL;
368 	}
369 
370 	ras_psp_sync_system_ras_psp_status(ras_core);
371 
372 	cmd_mem = ras_psp_get_gpu_mem(ras_core, GPU_MEM_TYPE_RAS_TA_CMD);
373 	if (!cmd_mem)
374 		return -ENOMEM;
375 
376 	if (!ras_core_down_trylock_gpu_reset_lock(ras_core)) {
377 		ret = -EACCES;
378 		goto out;
379 	}
380 
381 	ras_cmd = (struct ras_ta_cmd *)cmd_mem->mem_cpu_addr;
382 
383 	mutex_lock(&ta_ctx->ta_mutex);
384 
385 	memset(ras_cmd, 0, sizeof(*ras_cmd));
386 	ras_cmd->cmd_id = cmd_id;
387 	memcpy(&ras_cmd->ras_in_message, in, in_size);
388 
389 	invoke_cmd.ta_cmd_id = cmd_id;
390 	invoke_cmd.session_id = ta_ctx->session_id;
391 
392 	ret = send_psp_cmd(ras_core, GFX_CMD_ID_INVOKE_CMD,
393 			&invoke_cmd, sizeof(invoke_cmd), &resp);
394 
395 	/* If err_event_athub occurs error inject was successful, however
396 	 *  return status from TA is no long reliable
397 	 */
398 	if (ras_core_ras_interrupt_detected(ras_core)) {
399 		ret = 0;
400 		goto unlock;
401 	}
402 
403 	if (ret || resp.status) {
404 		RAS_DEV_ERR(ras_core->dev,
405 			"RAS: Failed to send psp cmd! ret:%d, status:%u\n",
406 			ret, resp.status);
407 		ret = -ESTRPIPE;
408 		goto unlock;
409 	}
410 
411 	if (ras_cmd->if_version > RAS_TA_HOST_IF_VER) {
412 		RAS_DEV_WARN(ras_core->dev, "RAS: Unsupported Interface\n");
413 		ret = -EINVAL;
414 		goto unlock;
415 	}
416 
417 	if (!ras_cmd->ras_status && out && out_size)
418 		memcpy(out, &ras_cmd->ras_out_message, out_size);
419 
420 	__check_ras_ta_cmd_resp(ras_core, ras_cmd);
421 
422 unlock:
423 	mutex_unlock(&ta_ctx->ta_mutex);
424 	ras_core_up_gpu_reset_lock(ras_core);
425 out:
426 	ras_psp_put_gpu_mem(ras_core, cmd_mem);
427 	return ret;
428 }
429 
430 static int trigger_ras_ta_error(struct ras_core_context *ras_core,
431 	struct ras_ta_trigger_error_input *info, uint32_t instance_mask)
432 {
433 	uint32_t dev_mask = 0;
434 
435 	switch (info->block_id) {
436 	case RAS_TA_BLOCK__GFX:
437 		if (ras_gfx_get_ta_subblock(ras_core, info->inject_error_type,
438 				info->sub_block_index, &info->sub_block_index))
439 			return -EINVAL;
440 
441 		dev_mask = RAS_GET_MASK(ras_core->dev, GC, instance_mask);
442 		break;
443 	case RAS_TA_BLOCK__SDMA:
444 		dev_mask = RAS_GET_MASK(ras_core->dev, SDMA0, instance_mask);
445 		break;
446 	case RAS_TA_BLOCK__VCN:
447 	case RAS_TA_BLOCK__JPEG:
448 		dev_mask = RAS_GET_MASK(ras_core->dev, VCN, instance_mask);
449 		break;
450 	default:
451 		dev_mask = instance_mask;
452 		break;
453 	}
454 
455 	/* reuse sub_block_index for backward compatibility */
456 	dev_mask <<= RAS_TA_INST_SHIFT;
457 	dev_mask &= RAS_TA_INST_MASK;
458 	info->sub_block_index |= dev_mask;
459 
460 	return send_ras_ta_runtime_cmd(ras_core, RAS_TA_CMD_ID__TRIGGER_ERROR,
461 				info, sizeof(*info), NULL, 0);
462 }
463 
464 static int send_load_ta_fw_cmd(struct ras_core_context *ras_core,
465 				struct ras_ta_ctx *ta_ctx)
466 {
467 	struct ras_ta_fw_bin  *fw_bin = &ta_ctx->fw_bin;
468 	struct gpu_mem_block *fw_mem;
469 	struct gpu_mem_block *cmd_mem;
470 	struct ras_ta_cmd *ta_cmd;
471 	struct ras_ta_init_flags *ta_init_flags;
472 	struct psp_gfx_cmd_load_ta  psp_load_ta_cmd;
473 	struct psp_cmd_resp resp = {0};
474 	struct ras_ta_image_header *fw_hdr = NULL;
475 	int ret;
476 
477 	fw_mem = ras_psp_get_gpu_mem(ras_core, GPU_MEM_TYPE_RAS_TA_FW);
478 	if (!fw_mem)
479 		return -ENOMEM;
480 
481 	cmd_mem = ras_psp_get_gpu_mem(ras_core, GPU_MEM_TYPE_RAS_TA_CMD);
482 	if (!cmd_mem) {
483 		ret = -ENOMEM;
484 		goto err;
485 	}
486 
487 	ret = ras_psp_get_ras_ta_init_param(ras_core, &ta_ctx->init_param);
488 	if (ret)
489 		goto err;
490 
491 	if (!ras_core_down_trylock_gpu_reset_lock(ras_core)) {
492 		ret = -EACCES;
493 		goto err;
494 	}
495 
496 	/* copy ras ta binary to shared gpu memory */
497 	memcpy(fw_mem->mem_cpu_addr, fw_bin->bin_addr, fw_bin->bin_size);
498 	fw_mem->mem_size = fw_bin->bin_size;
499 
500 	/* Initialize ras ta startup parameter */
501 	ta_cmd = (struct ras_ta_cmd *)cmd_mem->mem_cpu_addr;
502 	ta_init_flags = &ta_cmd->ras_in_message.init_flags;
503 
504 	ta_init_flags->poison_mode_en = ta_ctx->init_param.poison_mode_en;
505 	ta_init_flags->dgpu_mode = ta_ctx->init_param.dgpu_mode;
506 	ta_init_flags->xcc_mask = ta_ctx->init_param.xcc_mask;
507 	ta_init_flags->channel_dis_num = ta_ctx->init_param.channel_dis_num;
508 	ta_init_flags->nps_mode = ta_ctx->init_param.nps_mode;
509 	ta_init_flags->active_umc_mask = ta_ctx->init_param.active_umc_mask;
510 
511 	/* Setup load ras ta command */
512 	memset(&psp_load_ta_cmd, 0, sizeof(psp_load_ta_cmd));
513 	psp_load_ta_cmd.app_phy_addr_lo	= lower_32_bits(fw_mem->mem_mc_addr);
514 	psp_load_ta_cmd.app_phy_addr_hi	= upper_32_bits(fw_mem->mem_mc_addr);
515 	psp_load_ta_cmd.app_len		= fw_mem->mem_size;
516 	psp_load_ta_cmd.cmd_buf_phy_addr_lo = lower_32_bits(cmd_mem->mem_mc_addr);
517 	psp_load_ta_cmd.cmd_buf_phy_addr_hi = upper_32_bits(cmd_mem->mem_mc_addr);
518 	psp_load_ta_cmd.cmd_buf_len = cmd_mem->mem_size;
519 
520 	ret = send_psp_cmd(ras_core, GFX_CMD_ID_LOAD_TA,
521 			&psp_load_ta_cmd, sizeof(psp_load_ta_cmd), &resp);
522 	if (!ret && !resp.status) {
523 		/* Read TA version at FW offset 0x60 if TA version not found*/
524 		fw_hdr = (struct ras_ta_image_header *)fw_bin->bin_addr;
525 		RAS_DEV_INFO(ras_core->dev, "PSP: RAS TA(version:%X.%X.%X.%X) is loaded.\n",
526 			(fw_hdr->image_version >> 24) & 0xFF, (fw_hdr->image_version >> 16) & 0xFF,
527 			(fw_hdr->image_version >> 8) & 0xFF, fw_hdr->image_version & 0xFF);
528 		ta_ctx->ta_version = fw_hdr->image_version;
529 		ta_ctx->session_id = resp.session_id;
530 		ta_ctx->ras_ta_initialized = true;
531 	} else {
532 		RAS_DEV_ERR(ras_core->dev,
533 			"Failed to load RAS TA! ret:%d, status:%d\n", ret, resp.status);
534 	}
535 
536 	ras_core_up_gpu_reset_lock(ras_core);
537 
538 err:
539 	ras_psp_put_gpu_mem(ras_core, fw_mem);
540 	ras_psp_put_gpu_mem(ras_core, cmd_mem);
541 	return ret;
542 }
543 
544 static int load_ras_ta_firmware(struct ras_core_context *ras_core,
545 		struct ras_psp_ta_load *ras_ta_load)
546 {
547 	struct ras_ta_ctx *ta_ctx = &ras_core->ras_psp.ta_ctx;
548 	struct ras_ta_fw_bin  *fw_bin = &ta_ctx->fw_bin;
549 	int ret;
550 
551 	fw_bin->bin_addr = ras_ta_load->bin_addr;
552 	fw_bin->bin_size = ras_ta_load->bin_size;
553 	fw_bin->fw_version = ras_ta_load->fw_version;
554 	fw_bin->feature_version = ras_ta_load->feature_version;
555 
556 	ret = send_load_ta_fw_cmd(ras_core, ta_ctx);
557 	if (!ret) {
558 		ras_ta_load->out_session_id = ta_ctx->session_id;
559 		ras_ta_load->out_loaded_ta_version = ta_ctx->ta_version;
560 	}
561 
562 	return ret;
563 }
564 
565 static int unload_ras_ta_firmware(struct ras_core_context *ras_core,
566 		struct ras_psp_ta_unload *ras_ta_unload)
567 {
568 	struct ras_ta_ctx *ta_ctx = &ras_core->ras_psp.ta_ctx;
569 	struct psp_gfx_cmd_unload_ta  cmd_unload_ta = {0};
570 	struct psp_cmd_resp resp = {0};
571 	int ret;
572 
573 	if (!ras_core_down_trylock_gpu_reset_lock(ras_core))
574 		return -EACCES;
575 
576 	cmd_unload_ta.session_id = ta_ctx->session_id;
577 	ret = send_psp_cmd(ras_core, GFX_CMD_ID_UNLOAD_TA,
578 		&cmd_unload_ta, sizeof(cmd_unload_ta), &resp);
579 	if (ret || resp.status) {
580 		RAS_DEV_ERR(ras_core->dev,
581 			"Failed to unload RAS TA! ret:%d, status:%u\n",
582 			ret, resp.status);
583 		goto unlock;
584 	}
585 
586 	kfree(ta_ctx->fw_bin.bin_addr);
587 	memset(&ta_ctx->fw_bin, 0, sizeof(ta_ctx->fw_bin));
588 	ta_ctx->ta_version = 0;
589 	ta_ctx->ras_ta_initialized = false;
590 	ta_ctx->session_id = 0;
591 
592 unlock:
593 	ras_core_up_gpu_reset_lock(ras_core);
594 
595 	return ret;
596 }
597 
598 int ras_psp_load_firmware(struct ras_core_context *ras_core,
599 	struct ras_psp_ta_load *ras_ta_load)
600 {
601 	struct ras_ta_ctx *ta_ctx = &ras_core->ras_psp.ta_ctx;
602 	struct ras_psp_ta_unload ras_ta_unload = {0};
603 	int ret;
604 
605 	if (ta_ctx->preload_ras_ta_enabled)
606 		return 0;
607 
608 	if (!ras_ta_load)
609 		return -EINVAL;
610 
611 	if (ta_ctx->ras_ta_initialized) {
612 		ras_ta_unload.ras_session_id = ta_ctx->session_id;
613 		ret = unload_ras_ta_firmware(ras_core, &ras_ta_unload);
614 		if (ret)
615 			return ret;
616 	}
617 
618 	return load_ras_ta_firmware(ras_core, ras_ta_load);
619 }
620 
621 int ras_psp_unload_firmware(struct ras_core_context *ras_core,
622 	struct ras_psp_ta_unload *ras_ta_unload)
623 {
624 	struct ras_ta_ctx *ta_ctx = &ras_core->ras_psp.ta_ctx;
625 
626 	if (ta_ctx->preload_ras_ta_enabled)
627 		return 0;
628 
629 	if ((!ras_ta_unload) ||
630 	    (ras_ta_unload->ras_session_id != ta_ctx->session_id))
631 		return -EINVAL;
632 
633 	return unload_ras_ta_firmware(ras_core, ras_ta_unload);
634 }
635 
636 int ras_psp_trigger_error(struct ras_core_context *ras_core,
637 	struct ras_ta_trigger_error_input *info, uint32_t instance_mask)
638 {
639 	struct ras_ta_ctx *ta_ctx = &ras_core->ras_psp.ta_ctx;
640 
641 	if (!ta_ctx->preload_ras_ta_enabled && !ta_ctx->ras_ta_initialized) {
642 		RAS_DEV_ERR(ras_core->dev, "RAS: ras firmware not initialized!");
643 		return -ENOEXEC;
644 	}
645 
646 	if (!info)
647 		return -EINVAL;
648 
649 	return trigger_ras_ta_error(ras_core, info, instance_mask);
650 }
651 
652 int ras_psp_query_address(struct ras_core_context *ras_core,
653 		struct ras_ta_query_address_input *addr_in,
654 		struct ras_ta_query_address_output *addr_out)
655 {
656 	struct ras_ta_ctx *ta_ctx = &ras_core->ras_psp.ta_ctx;
657 
658 	if (!ta_ctx->preload_ras_ta_enabled &&
659 	    !ta_ctx->ras_ta_initialized) {
660 		RAS_DEV_ERR(ras_core->dev, "RAS: ras firmware not initialized!");
661 		return -ENOEXEC;
662 	}
663 
664 	if (!addr_in || !addr_out)
665 		return -EINVAL;
666 
667 	return send_ras_ta_runtime_cmd(ras_core, RAS_TA_CMD_ID__QUERY_ADDRESS,
668 		addr_in, sizeof(*addr_in), addr_out, sizeof(*addr_out));
669 }
670 
671 int ras_psp_sw_init(struct ras_core_context *ras_core)
672 {
673 	struct ras_psp *psp = &ras_core->ras_psp;
674 
675 	memset(psp, 0, sizeof(*psp));
676 
677 	psp->sys_func = ras_core->config->psp_cfg.psp_sys_fn;
678 	if (!psp->sys_func) {
679 		RAS_DEV_ERR(ras_core->dev, "RAS psp sys function not configured!\n");
680 		return -EINVAL;
681 	}
682 
683 	mutex_init(&psp->psp_ctx.internal_mutex);
684 	mutex_init(&psp->ta_ctx.ta_mutex);
685 
686 	return 0;
687 }
688 
689 int ras_psp_sw_fini(struct ras_core_context *ras_core)
690 {
691 	struct ras_psp *psp = &ras_core->ras_psp;
692 
693 	mutex_destroy(&psp->psp_ctx.internal_mutex);
694 	mutex_destroy(&psp->ta_ctx.ta_mutex);
695 
696 	memset(psp, 0, sizeof(*psp));
697 
698 	return 0;
699 }
700 
701 int ras_psp_hw_init(struct ras_core_context *ras_core)
702 {
703 	struct ras_psp *psp = &ras_core->ras_psp;
704 
705 	psp->psp_ip_version = ras_core->config->psp_ip_version;
706 
707 	psp->ip_func = ras_psp_get_ip_funcs(ras_core, psp->psp_ip_version);
708 	if (!psp->ip_func)
709 		return -EINVAL;
710 
711 	/* After GPU reset, the system RAS PSP status may change.
712 	 * therefore, it is necessary to synchronize the system status again.
713 	 */
714 	ras_psp_sync_system_ras_psp_status(ras_core);
715 
716 	return 0;
717 }
718 
719 int ras_psp_hw_fini(struct ras_core_context *ras_core)
720 {
721 	return 0;
722 }
723 
724 bool ras_psp_check_supported_cmd(struct ras_core_context *ras_core,
725 		enum ras_ta_cmd_id cmd_id)
726 {
727 	struct ras_ta_ctx *ta_ctx = &ras_core->ras_psp.ta_ctx;
728 	bool ret = false;
729 
730 	if (!ta_ctx->preload_ras_ta_enabled && !ta_ctx->ras_ta_initialized)
731 		return false;
732 
733 	switch (cmd_id) {
734 	case RAS_TA_CMD_ID__QUERY_ADDRESS:
735 		/* Currently, querying the address from RAS TA is only supported
736 		 * when the RAS TA firmware is loaded during driver installation.
737 		 */
738 		if (ta_ctx->preload_ras_ta_enabled)
739 			ret = true;
740 		break;
741 	case RAS_TA_CMD_ID__TRIGGER_ERROR:
742 		ret = true;
743 		break;
744 	default:
745 		ret = false;
746 		break;
747 	}
748 
749 	return ret;
750 }
751