1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright 2025 Advanced Micro Devices, Inc. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be included in 13 * all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 19 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 21 * OTHER DEALINGS IN THE SOFTWARE. 22 * 23 */ 24 #include "ras.h" 25 #include "ras_ta_if.h" 26 #include "ras_psp.h" 27 #include "ras_psp_v13_0.h" 28 29 /* position of instance value in sub_block_index of 30 * ta_ras_trigger_error_input, the sub block uses lower 12 bits 31 */ 32 #define RAS_TA_INST_MASK 0xfffff000 33 #define RAS_TA_INST_SHIFT 0xc 34 35 static const struct ras_psp_ip_func *ras_psp_get_ip_funcs( 36 struct ras_core_context *ras_core, uint32_t ip_version) 37 { 38 switch (ip_version) { 39 case IP_VERSION(13, 0, 6): 40 case IP_VERSION(13, 0, 14): 41 case IP_VERSION(13, 0, 12): 42 return &ras_psp_v13_0; 43 default: 44 RAS_DEV_ERR(ras_core->dev, 45 "psp ip version(0x%x) is not supported!\n", ip_version); 46 break; 47 } 48 49 return NULL; 50 } 51 52 static int ras_psp_sync_system_ras_psp_status(struct ras_core_context *ras_core) 53 { 54 struct ras_psp *psp = &ras_core->ras_psp; 55 struct ras_ta_ctx *ta_ctx = &ras_core->ras_psp.ta_ctx; 56 struct ras_psp_ctx *psp_ctx = &ras_core->ras_psp.psp_ctx; 57 struct ras_psp_sys_status status = {0}; 58 int ret; 59 60 if (psp->sys_func && psp->sys_func->get_ras_psp_system_status) { 61 ret = psp->sys_func->get_ras_psp_system_status(ras_core, &status); 62 if (ret) 63 return ret; 64 65 if (status.initialized) { 66 ta_ctx->preload_ras_ta_enabled = true; 67 ta_ctx->ras_ta_initialized = status.initialized; 68 ta_ctx->session_id = status.session_id; 69 } 70 71 psp_ctx->external_mutex = status.psp_cmd_mutex; 72 } 73 74 return 0; 75 } 76 77 static int ras_psp_get_ras_ta_init_param(struct ras_core_context *ras_core, 78 struct ras_ta_init_param *ras_ta_param) 79 { 80 struct ras_psp *psp = &ras_core->ras_psp; 81 82 if (psp->sys_func && psp->sys_func->get_ras_ta_init_param) 83 return psp->sys_func->get_ras_ta_init_param(ras_core, ras_ta_param); 84 85 RAS_DEV_ERR(ras_core->dev, "Not config get_ras_ta_init_param API!!\n"); 86 return -EACCES; 87 } 88 89 static struct gpu_mem_block *ras_psp_get_gpu_mem(struct ras_core_context *ras_core, 90 enum gpu_mem_type mem_type) 91 { 92 struct ras_psp *psp = &ras_core->ras_psp; 93 struct gpu_mem_block *gpu_mem = NULL; 94 int ret; 95 96 switch (mem_type) { 97 case GPU_MEM_TYPE_RAS_PSP_RING: 98 gpu_mem = &psp->psp_ring.ras_ring_gpu_mem; 99 break; 100 case GPU_MEM_TYPE_RAS_PSP_CMD: 101 gpu_mem = &psp->psp_ctx.psp_cmd_gpu_mem; 102 break; 103 case GPU_MEM_TYPE_RAS_PSP_FENCE: 104 gpu_mem = &psp->psp_ctx.out_fence_gpu_mem; 105 break; 106 case GPU_MEM_TYPE_RAS_TA_FW: 107 gpu_mem = &psp->ta_ctx.fw_gpu_mem; 108 break; 109 case GPU_MEM_TYPE_RAS_TA_CMD: 110 gpu_mem = &psp->ta_ctx.cmd_gpu_mem; 111 break; 112 default: 113 return NULL; 114 } 115 116 if (!gpu_mem->ref_count) { 117 ret = ras_core_get_gpu_mem(ras_core, mem_type, gpu_mem); 118 if (ret) 119 return NULL; 120 gpu_mem->mem_type = mem_type; 121 } 122 123 gpu_mem->ref_count++; 124 125 return gpu_mem; 126 } 127 128 static int ras_psp_put_gpu_mem(struct ras_core_context *ras_core, 129 struct gpu_mem_block *gpu_mem) 130 { 131 if (!gpu_mem) 132 return 0; 133 134 gpu_mem->ref_count--; 135 136 if (gpu_mem->ref_count > 0) { 137 return 0; 138 } else if (gpu_mem->ref_count < 0) { 139 RAS_DEV_WARN(ras_core->dev, 140 "Duplicate free gpu memory %u\n", gpu_mem->mem_type); 141 } else { 142 ras_core_put_gpu_mem(ras_core, gpu_mem->mem_type, gpu_mem); 143 memset(gpu_mem, 0, sizeof(*gpu_mem)); 144 } 145 146 return 0; 147 } 148 149 static void __acquire_psp_cmd_lock(struct ras_core_context *ras_core) 150 { 151 struct ras_psp_ctx *psp_ctx = &ras_core->ras_psp.psp_ctx; 152 153 if (psp_ctx->external_mutex) 154 mutex_lock(psp_ctx->external_mutex); 155 else 156 mutex_lock(&psp_ctx->internal_mutex); 157 } 158 159 static void __release_psp_cmd_lock(struct ras_core_context *ras_core) 160 { 161 struct ras_psp_ctx *psp_ctx = &ras_core->ras_psp.psp_ctx; 162 163 if (psp_ctx->external_mutex) 164 mutex_unlock(psp_ctx->external_mutex); 165 else 166 mutex_unlock(&psp_ctx->internal_mutex); 167 } 168 169 static uint32_t __get_ring_frame_slot(struct ras_core_context *ras_core) 170 { 171 struct ras_psp *psp = &ras_core->ras_psp; 172 uint32_t ras_ring_wptr_dw; 173 174 ras_ring_wptr_dw = psp->ip_func->psp_ras_ring_wptr_get(ras_core); 175 176 return div64_u64((ras_ring_wptr_dw << 2), sizeof(struct psp_gfx_rb_frame)); 177 } 178 179 static int __set_ring_frame_slot(struct ras_core_context *ras_core, 180 uint32_t slot) 181 { 182 struct ras_psp *psp = &ras_core->ras_psp; 183 184 return psp->ip_func->psp_ras_ring_wptr_set(ras_core, 185 (slot * sizeof(struct psp_gfx_rb_frame)) >> 2); 186 } 187 188 static int write_frame_to_ras_psp_ring(struct ras_core_context *ras_core, 189 struct psp_gfx_rb_frame *frame) 190 { 191 struct gpu_mem_block *ring_mem; 192 struct psp_gfx_rb_frame *rb_frame; 193 uint32_t max_frame_slot; 194 uint32_t slot_idx; 195 uint32_t write_flush_read_back = 0; 196 int ret = 0; 197 198 ring_mem = ras_psp_get_gpu_mem(ras_core, GPU_MEM_TYPE_RAS_PSP_RING); 199 if (!ring_mem) 200 return -ENOMEM; 201 202 max_frame_slot = 203 div64_u64(ring_mem->mem_size, sizeof(struct psp_gfx_rb_frame)); 204 205 rb_frame = 206 (struct psp_gfx_rb_frame *)ring_mem->mem_cpu_addr; 207 208 slot_idx = __get_ring_frame_slot(ras_core); 209 if (slot_idx >= max_frame_slot) 210 slot_idx = 0; 211 212 memcpy(&rb_frame[slot_idx], frame, sizeof(*frame)); 213 214 /* Do a read to force the write of the frame before writing 215 * write pointer. 216 */ 217 write_flush_read_back = rb_frame[slot_idx].fence_value; 218 if (write_flush_read_back != frame->fence_value) { 219 RAS_DEV_ERR(ras_core->dev, 220 "Failed to submit ring cmd! cmd:0x%x:0x%x, fence:0x%x:0x%x value:%u, expected:%u\n", 221 rb_frame[slot_idx].cmd_buf_addr_hi, 222 rb_frame[slot_idx].cmd_buf_addr_lo, 223 rb_frame[slot_idx].fence_addr_hi, 224 rb_frame[slot_idx].fence_addr_lo, 225 write_flush_read_back, frame->fence_value); 226 ret = -EACCES; 227 goto err; 228 } 229 230 slot_idx++; 231 232 if (slot_idx >= max_frame_slot) 233 slot_idx = 0; 234 235 __set_ring_frame_slot(ras_core, slot_idx); 236 237 err: 238 ras_psp_put_gpu_mem(ras_core, ring_mem); 239 return ret; 240 } 241 242 static int send_psp_cmd(struct ras_core_context *ras_core, 243 enum psp_gfx_cmd_id gfx_cmd_id, void *cmd_data, 244 uint32_t cmd_size, struct psp_cmd_resp *resp) 245 { 246 struct ras_psp_ctx *psp_ctx = &ras_core->ras_psp.psp_ctx; 247 struct gpu_mem_block *psp_cmd_buf = NULL; 248 struct gpu_mem_block *psp_fence_buf = NULL; 249 struct psp_gfx_cmd_resp *gfx_cmd; 250 struct psp_gfx_rb_frame rb_frame; 251 int ret = 0; 252 int timeout = 1000; 253 254 if (!cmd_data || (cmd_size > sizeof(union psp_gfx_commands)) || !resp) { 255 RAS_DEV_ERR(ras_core->dev, "Invalid RAS PSP command, id: %u\n", gfx_cmd_id); 256 return -EINVAL; 257 } 258 259 __acquire_psp_cmd_lock(ras_core); 260 261 psp_cmd_buf = ras_psp_get_gpu_mem(ras_core, GPU_MEM_TYPE_RAS_PSP_CMD); 262 if (!psp_cmd_buf) { 263 ret = -ENOMEM; 264 goto exit; 265 } 266 267 psp_fence_buf = ras_psp_get_gpu_mem(ras_core, GPU_MEM_TYPE_RAS_PSP_FENCE); 268 if (!psp_fence_buf) { 269 ret = -ENOMEM; 270 goto exit; 271 } 272 273 gfx_cmd = (struct psp_gfx_cmd_resp *)psp_cmd_buf->mem_cpu_addr; 274 memset(gfx_cmd, 0, sizeof(*gfx_cmd)); 275 gfx_cmd->cmd_id = gfx_cmd_id; 276 memcpy(&gfx_cmd->cmd, cmd_data, cmd_size); 277 278 psp_ctx->in_fence_value++; 279 280 memset(&rb_frame, 0, sizeof(rb_frame)); 281 rb_frame.cmd_buf_addr_hi = upper_32_bits(psp_cmd_buf->mem_mc_addr); 282 rb_frame.cmd_buf_addr_lo = lower_32_bits(psp_cmd_buf->mem_mc_addr); 283 rb_frame.fence_addr_hi = upper_32_bits(psp_fence_buf->mem_mc_addr); 284 rb_frame.fence_addr_lo = lower_32_bits(psp_fence_buf->mem_mc_addr); 285 rb_frame.fence_value = psp_ctx->in_fence_value; 286 287 ret = write_frame_to_ras_psp_ring(ras_core, &rb_frame); 288 if (ret) { 289 psp_ctx->in_fence_value--; 290 goto exit; 291 } 292 293 while (*((uint64_t *)psp_fence_buf->mem_cpu_addr) != 294 psp_ctx->in_fence_value) { 295 if (--timeout == 0) 296 break; 297 /* 298 * Shouldn't wait for timeout when err_event_athub occurs, 299 * because gpu reset thread triggered and lock resource should 300 * be released for psp resume sequence. 301 */ 302 if (ras_core_ras_interrupt_detected(ras_core)) 303 break; 304 305 msleep(2); 306 } 307 308 resp->status = gfx_cmd->resp.status; 309 resp->session_id = gfx_cmd->resp.session_id; 310 311 exit: 312 ras_psp_put_gpu_mem(ras_core, psp_cmd_buf); 313 ras_psp_put_gpu_mem(ras_core, psp_fence_buf); 314 315 __release_psp_cmd_lock(ras_core); 316 317 return ret; 318 } 319 320 static void __check_ras_ta_cmd_resp(struct ras_core_context *ras_core, 321 struct ras_ta_cmd *ras_cmd) 322 { 323 324 if (ras_cmd->ras_out_message.flags.err_inject_switch_disable_flag) { 325 RAS_DEV_WARN(ras_core->dev, "ECC switch disabled\n"); 326 ras_cmd->ras_status = RAS_TA_STATUS__ERROR_RAS_NOT_AVAILABLE; 327 } else if (ras_cmd->ras_out_message.flags.reg_access_failure_flag) 328 RAS_DEV_WARN(ras_core->dev, "RAS internal register access blocked\n"); 329 330 switch (ras_cmd->ras_status) { 331 case RAS_TA_STATUS__ERROR_UNSUPPORTED_IP: 332 RAS_DEV_WARN(ras_core->dev, 333 "RAS WARNING: cmd failed due to unsupported ip\n"); 334 break; 335 case RAS_TA_STATUS__ERROR_UNSUPPORTED_ERROR_INJ: 336 RAS_DEV_WARN(ras_core->dev, 337 "RAS WARNING: cmd failed due to unsupported error injection\n"); 338 break; 339 case RAS_TA_STATUS__SUCCESS: 340 break; 341 case RAS_TA_STATUS__TEE_ERROR_ACCESS_DENIED: 342 if (ras_cmd->cmd_id == RAS_TA_CMD_ID__TRIGGER_ERROR) 343 RAS_DEV_WARN(ras_core->dev, 344 "RAS WARNING: Inject error to critical region is not allowed\n"); 345 break; 346 default: 347 RAS_DEV_WARN(ras_core->dev, 348 "RAS WARNING: ras status = 0x%X\n", ras_cmd->ras_status); 349 break; 350 } 351 } 352 353 static int send_ras_ta_runtime_cmd(struct ras_core_context *ras_core, 354 enum ras_ta_cmd_id cmd_id, void *in, uint32_t in_size, 355 void *out, uint32_t out_size) 356 { 357 struct ras_ta_ctx *ta_ctx = &ras_core->ras_psp.ta_ctx; 358 struct gpu_mem_block *cmd_mem; 359 struct ras_ta_cmd *ras_cmd; 360 struct psp_gfx_cmd_invoke_cmd invoke_cmd = {0}; 361 struct psp_cmd_resp resp = {0}; 362 int ret = 0; 363 364 if (!in || (in_size > sizeof(union ras_ta_cmd_input)) || 365 (cmd_id >= MAX_RAS_TA_CMD_ID)) { 366 RAS_DEV_ERR(ras_core->dev, "Invalid RAS TA command, id: %u\n", cmd_id); 367 return -EINVAL; 368 } 369 370 ras_psp_sync_system_ras_psp_status(ras_core); 371 372 cmd_mem = ras_psp_get_gpu_mem(ras_core, GPU_MEM_TYPE_RAS_TA_CMD); 373 if (!cmd_mem) 374 return -ENOMEM; 375 376 if (!ras_core_down_trylock_gpu_reset_lock(ras_core)) { 377 ret = -EACCES; 378 goto out; 379 } 380 381 ras_cmd = (struct ras_ta_cmd *)cmd_mem->mem_cpu_addr; 382 383 mutex_lock(&ta_ctx->ta_mutex); 384 385 memset(ras_cmd, 0, sizeof(*ras_cmd)); 386 ras_cmd->cmd_id = cmd_id; 387 memcpy(&ras_cmd->ras_in_message, in, in_size); 388 389 invoke_cmd.ta_cmd_id = cmd_id; 390 invoke_cmd.session_id = ta_ctx->session_id; 391 392 ret = send_psp_cmd(ras_core, GFX_CMD_ID_INVOKE_CMD, 393 &invoke_cmd, sizeof(invoke_cmd), &resp); 394 395 /* If err_event_athub occurs error inject was successful, however 396 * return status from TA is no long reliable 397 */ 398 if (ras_core_ras_interrupt_detected(ras_core)) { 399 ret = 0; 400 goto unlock; 401 } 402 403 if (ret || resp.status) { 404 RAS_DEV_ERR(ras_core->dev, 405 "RAS: Failed to send psp cmd! ret:%d, status:%u\n", 406 ret, resp.status); 407 ret = -ESTRPIPE; 408 goto unlock; 409 } 410 411 if (ras_cmd->if_version > RAS_TA_HOST_IF_VER) { 412 RAS_DEV_WARN(ras_core->dev, "RAS: Unsupported Interface\n"); 413 ret = -EINVAL; 414 goto unlock; 415 } 416 417 if (!ras_cmd->ras_status && out && out_size) 418 memcpy(out, &ras_cmd->ras_out_message, out_size); 419 420 __check_ras_ta_cmd_resp(ras_core, ras_cmd); 421 422 unlock: 423 mutex_unlock(&ta_ctx->ta_mutex); 424 ras_core_up_gpu_reset_lock(ras_core); 425 out: 426 ras_psp_put_gpu_mem(ras_core, cmd_mem); 427 return ret; 428 } 429 430 static int trigger_ras_ta_error(struct ras_core_context *ras_core, 431 struct ras_ta_trigger_error_input *info, uint32_t instance_mask) 432 { 433 uint32_t dev_mask = 0; 434 435 switch (info->block_id) { 436 case RAS_TA_BLOCK__GFX: 437 if (ras_gfx_get_ta_subblock(ras_core, info->inject_error_type, 438 info->sub_block_index, &info->sub_block_index)) 439 return -EINVAL; 440 441 dev_mask = RAS_GET_MASK(ras_core->dev, GC, instance_mask); 442 break; 443 case RAS_TA_BLOCK__SDMA: 444 dev_mask = RAS_GET_MASK(ras_core->dev, SDMA0, instance_mask); 445 break; 446 case RAS_TA_BLOCK__VCN: 447 case RAS_TA_BLOCK__JPEG: 448 dev_mask = RAS_GET_MASK(ras_core->dev, VCN, instance_mask); 449 break; 450 default: 451 dev_mask = instance_mask; 452 break; 453 } 454 455 /* reuse sub_block_index for backward compatibility */ 456 dev_mask <<= RAS_TA_INST_SHIFT; 457 dev_mask &= RAS_TA_INST_MASK; 458 info->sub_block_index |= dev_mask; 459 460 return send_ras_ta_runtime_cmd(ras_core, RAS_TA_CMD_ID__TRIGGER_ERROR, 461 info, sizeof(*info), NULL, 0); 462 } 463 464 static int send_load_ta_fw_cmd(struct ras_core_context *ras_core, 465 struct ras_ta_ctx *ta_ctx) 466 { 467 struct ras_ta_fw_bin *fw_bin = &ta_ctx->fw_bin; 468 struct gpu_mem_block *fw_mem; 469 struct gpu_mem_block *cmd_mem; 470 struct ras_ta_cmd *ta_cmd; 471 struct ras_ta_init_flags *ta_init_flags; 472 struct psp_gfx_cmd_load_ta psp_load_ta_cmd; 473 struct psp_cmd_resp resp = {0}; 474 struct ras_ta_image_header *fw_hdr = NULL; 475 int ret; 476 477 fw_mem = ras_psp_get_gpu_mem(ras_core, GPU_MEM_TYPE_RAS_TA_FW); 478 if (!fw_mem) 479 return -ENOMEM; 480 481 cmd_mem = ras_psp_get_gpu_mem(ras_core, GPU_MEM_TYPE_RAS_TA_CMD); 482 if (!cmd_mem) { 483 ret = -ENOMEM; 484 goto err; 485 } 486 487 ret = ras_psp_get_ras_ta_init_param(ras_core, &ta_ctx->init_param); 488 if (ret) 489 goto err; 490 491 if (!ras_core_down_trylock_gpu_reset_lock(ras_core)) { 492 ret = -EACCES; 493 goto err; 494 } 495 496 /* copy ras ta binary to shared gpu memory */ 497 memcpy(fw_mem->mem_cpu_addr, fw_bin->bin_addr, fw_bin->bin_size); 498 fw_mem->mem_size = fw_bin->bin_size; 499 500 /* Initialize ras ta startup parameter */ 501 ta_cmd = (struct ras_ta_cmd *)cmd_mem->mem_cpu_addr; 502 ta_init_flags = &ta_cmd->ras_in_message.init_flags; 503 504 ta_init_flags->poison_mode_en = ta_ctx->init_param.poison_mode_en; 505 ta_init_flags->dgpu_mode = ta_ctx->init_param.dgpu_mode; 506 ta_init_flags->xcc_mask = ta_ctx->init_param.xcc_mask; 507 ta_init_flags->channel_dis_num = ta_ctx->init_param.channel_dis_num; 508 ta_init_flags->nps_mode = ta_ctx->init_param.nps_mode; 509 ta_init_flags->active_umc_mask = ta_ctx->init_param.active_umc_mask; 510 511 /* Setup load ras ta command */ 512 memset(&psp_load_ta_cmd, 0, sizeof(psp_load_ta_cmd)); 513 psp_load_ta_cmd.app_phy_addr_lo = lower_32_bits(fw_mem->mem_mc_addr); 514 psp_load_ta_cmd.app_phy_addr_hi = upper_32_bits(fw_mem->mem_mc_addr); 515 psp_load_ta_cmd.app_len = fw_mem->mem_size; 516 psp_load_ta_cmd.cmd_buf_phy_addr_lo = lower_32_bits(cmd_mem->mem_mc_addr); 517 psp_load_ta_cmd.cmd_buf_phy_addr_hi = upper_32_bits(cmd_mem->mem_mc_addr); 518 psp_load_ta_cmd.cmd_buf_len = cmd_mem->mem_size; 519 520 ret = send_psp_cmd(ras_core, GFX_CMD_ID_LOAD_TA, 521 &psp_load_ta_cmd, sizeof(psp_load_ta_cmd), &resp); 522 if (!ret && !resp.status) { 523 /* Read TA version at FW offset 0x60 if TA version not found*/ 524 fw_hdr = (struct ras_ta_image_header *)fw_bin->bin_addr; 525 RAS_DEV_INFO(ras_core->dev, "PSP: RAS TA(version:%X.%X.%X.%X) is loaded.\n", 526 (fw_hdr->image_version >> 24) & 0xFF, (fw_hdr->image_version >> 16) & 0xFF, 527 (fw_hdr->image_version >> 8) & 0xFF, fw_hdr->image_version & 0xFF); 528 ta_ctx->ta_version = fw_hdr->image_version; 529 ta_ctx->session_id = resp.session_id; 530 ta_ctx->ras_ta_initialized = true; 531 } else { 532 RAS_DEV_ERR(ras_core->dev, 533 "Failed to load RAS TA! ret:%d, status:%d\n", ret, resp.status); 534 } 535 536 ras_core_up_gpu_reset_lock(ras_core); 537 538 err: 539 ras_psp_put_gpu_mem(ras_core, fw_mem); 540 ras_psp_put_gpu_mem(ras_core, cmd_mem); 541 return ret; 542 } 543 544 static int load_ras_ta_firmware(struct ras_core_context *ras_core, 545 struct ras_psp_ta_load *ras_ta_load) 546 { 547 struct ras_ta_ctx *ta_ctx = &ras_core->ras_psp.ta_ctx; 548 struct ras_ta_fw_bin *fw_bin = &ta_ctx->fw_bin; 549 int ret; 550 551 fw_bin->bin_addr = ras_ta_load->bin_addr; 552 fw_bin->bin_size = ras_ta_load->bin_size; 553 fw_bin->fw_version = ras_ta_load->fw_version; 554 fw_bin->feature_version = ras_ta_load->feature_version; 555 556 ret = send_load_ta_fw_cmd(ras_core, ta_ctx); 557 if (!ret) { 558 ras_ta_load->out_session_id = ta_ctx->session_id; 559 ras_ta_load->out_loaded_ta_version = ta_ctx->ta_version; 560 } 561 562 return ret; 563 } 564 565 static int unload_ras_ta_firmware(struct ras_core_context *ras_core, 566 struct ras_psp_ta_unload *ras_ta_unload) 567 { 568 struct ras_ta_ctx *ta_ctx = &ras_core->ras_psp.ta_ctx; 569 struct psp_gfx_cmd_unload_ta cmd_unload_ta = {0}; 570 struct psp_cmd_resp resp = {0}; 571 int ret; 572 573 if (!ras_core_down_trylock_gpu_reset_lock(ras_core)) 574 return -EACCES; 575 576 cmd_unload_ta.session_id = ta_ctx->session_id; 577 ret = send_psp_cmd(ras_core, GFX_CMD_ID_UNLOAD_TA, 578 &cmd_unload_ta, sizeof(cmd_unload_ta), &resp); 579 if (ret || resp.status) { 580 RAS_DEV_ERR(ras_core->dev, 581 "Failed to unload RAS TA! ret:%d, status:%u\n", 582 ret, resp.status); 583 goto unlock; 584 } 585 586 kfree(ta_ctx->fw_bin.bin_addr); 587 memset(&ta_ctx->fw_bin, 0, sizeof(ta_ctx->fw_bin)); 588 ta_ctx->ta_version = 0; 589 ta_ctx->ras_ta_initialized = false; 590 ta_ctx->session_id = 0; 591 592 unlock: 593 ras_core_up_gpu_reset_lock(ras_core); 594 595 return ret; 596 } 597 598 int ras_psp_load_firmware(struct ras_core_context *ras_core, 599 struct ras_psp_ta_load *ras_ta_load) 600 { 601 struct ras_ta_ctx *ta_ctx = &ras_core->ras_psp.ta_ctx; 602 struct ras_psp_ta_unload ras_ta_unload = {0}; 603 int ret; 604 605 if (ta_ctx->preload_ras_ta_enabled) 606 return 0; 607 608 if (!ras_ta_load) 609 return -EINVAL; 610 611 if (ta_ctx->ras_ta_initialized) { 612 ras_ta_unload.ras_session_id = ta_ctx->session_id; 613 ret = unload_ras_ta_firmware(ras_core, &ras_ta_unload); 614 if (ret) 615 return ret; 616 } 617 618 return load_ras_ta_firmware(ras_core, ras_ta_load); 619 } 620 621 int ras_psp_unload_firmware(struct ras_core_context *ras_core, 622 struct ras_psp_ta_unload *ras_ta_unload) 623 { 624 struct ras_ta_ctx *ta_ctx = &ras_core->ras_psp.ta_ctx; 625 626 if (ta_ctx->preload_ras_ta_enabled) 627 return 0; 628 629 if ((!ras_ta_unload) || 630 (ras_ta_unload->ras_session_id != ta_ctx->session_id)) 631 return -EINVAL; 632 633 return unload_ras_ta_firmware(ras_core, ras_ta_unload); 634 } 635 636 int ras_psp_trigger_error(struct ras_core_context *ras_core, 637 struct ras_ta_trigger_error_input *info, uint32_t instance_mask) 638 { 639 struct ras_ta_ctx *ta_ctx = &ras_core->ras_psp.ta_ctx; 640 641 if (!ta_ctx->preload_ras_ta_enabled && !ta_ctx->ras_ta_initialized) { 642 RAS_DEV_ERR(ras_core->dev, "RAS: ras firmware not initialized!"); 643 return -ENOEXEC; 644 } 645 646 if (!info) 647 return -EINVAL; 648 649 return trigger_ras_ta_error(ras_core, info, instance_mask); 650 } 651 652 int ras_psp_query_address(struct ras_core_context *ras_core, 653 struct ras_ta_query_address_input *addr_in, 654 struct ras_ta_query_address_output *addr_out) 655 { 656 struct ras_ta_ctx *ta_ctx = &ras_core->ras_psp.ta_ctx; 657 658 if (!ta_ctx->preload_ras_ta_enabled && 659 !ta_ctx->ras_ta_initialized) { 660 RAS_DEV_ERR(ras_core->dev, "RAS: ras firmware not initialized!"); 661 return -ENOEXEC; 662 } 663 664 if (!addr_in || !addr_out) 665 return -EINVAL; 666 667 return send_ras_ta_runtime_cmd(ras_core, RAS_TA_CMD_ID__QUERY_ADDRESS, 668 addr_in, sizeof(*addr_in), addr_out, sizeof(*addr_out)); 669 } 670 671 int ras_psp_sw_init(struct ras_core_context *ras_core) 672 { 673 struct ras_psp *psp = &ras_core->ras_psp; 674 675 memset(psp, 0, sizeof(*psp)); 676 677 psp->sys_func = ras_core->config->psp_cfg.psp_sys_fn; 678 if (!psp->sys_func) { 679 RAS_DEV_ERR(ras_core->dev, "RAS psp sys function not configured!\n"); 680 return -EINVAL; 681 } 682 683 mutex_init(&psp->psp_ctx.internal_mutex); 684 mutex_init(&psp->ta_ctx.ta_mutex); 685 686 return 0; 687 } 688 689 int ras_psp_sw_fini(struct ras_core_context *ras_core) 690 { 691 struct ras_psp *psp = &ras_core->ras_psp; 692 693 mutex_destroy(&psp->psp_ctx.internal_mutex); 694 mutex_destroy(&psp->ta_ctx.ta_mutex); 695 696 memset(psp, 0, sizeof(*psp)); 697 698 return 0; 699 } 700 701 int ras_psp_hw_init(struct ras_core_context *ras_core) 702 { 703 struct ras_psp *psp = &ras_core->ras_psp; 704 705 psp->psp_ip_version = ras_core->config->psp_ip_version; 706 707 psp->ip_func = ras_psp_get_ip_funcs(ras_core, psp->psp_ip_version); 708 if (!psp->ip_func) 709 return -EINVAL; 710 711 /* After GPU reset, the system RAS PSP status may change. 712 * therefore, it is necessary to synchronize the system status again. 713 */ 714 ras_psp_sync_system_ras_psp_status(ras_core); 715 716 return 0; 717 } 718 719 int ras_psp_hw_fini(struct ras_core_context *ras_core) 720 { 721 return 0; 722 } 723 724 bool ras_psp_check_supported_cmd(struct ras_core_context *ras_core, 725 enum ras_ta_cmd_id cmd_id) 726 { 727 struct ras_ta_ctx *ta_ctx = &ras_core->ras_psp.ta_ctx; 728 bool ret = false; 729 730 if (!ta_ctx->preload_ras_ta_enabled && !ta_ctx->ras_ta_initialized) 731 return false; 732 733 switch (cmd_id) { 734 case RAS_TA_CMD_ID__QUERY_ADDRESS: 735 /* Currently, querying the address from RAS TA is only supported 736 * when the RAS TA firmware is loaded during driver installation. 737 */ 738 if (ta_ctx->preload_ras_ta_enabled) 739 ret = true; 740 break; 741 case RAS_TA_CMD_ID__TRIGGER_ERROR: 742 ret = true; 743 break; 744 default: 745 ret = false; 746 break; 747 } 748 749 return ret; 750 } 751