1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright 2025 Advanced Micro Devices, Inc. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be included in 13 * all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 19 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 21 * OTHER DEALINGS IN THE SOFTWARE. 22 * 23 */ 24 25 #include <linux/pci.h> 26 #include "amdgpu.h" 27 #include "amdgpu_ras.h" 28 #include "ras_sys.h" 29 #include "amdgpu_ras_cmd.h" 30 #include "amdgpu_virt_ras_cmd.h" 31 #include "amdgpu_ras_mgr.h" 32 33 static int amdgpu_virt_ras_get_cmd_shared_mem(struct ras_core_context *ras_core, 34 uint32_t cmd, uint32_t mem_size, struct amdgpu_virt_shared_mem *shared_mem) 35 { 36 struct amdgpu_device *adev = ras_core->dev; 37 struct amdsriov_ras_telemetry *ras_telemetry_cpu; 38 struct amdsriov_ras_telemetry *ras_telemetry_gpu; 39 void *fw_va = adev->mman.resv_region[AMDGPU_RESV_FW_VRAM_USAGE].cpu_ptr; 40 void *drv_va = adev->mman.resv_region[AMDGPU_RESV_DRV_VRAM_USAGE].cpu_ptr; 41 uint64_t fw_vram_usage_start_offset = 0; 42 uint64_t ras_telemetry_offset = 0; 43 44 if (!adev->virt.fw_reserve.ras_telemetry) 45 return -EINVAL; 46 47 if (fw_va && fw_va <= adev->virt.fw_reserve.ras_telemetry) { 48 fw_vram_usage_start_offset = adev->mman.resv_region[AMDGPU_RESV_FW_VRAM_USAGE].offset; 49 ras_telemetry_offset = (uintptr_t)adev->virt.fw_reserve.ras_telemetry - 50 (uintptr_t)fw_va; 51 } else if (drv_va && drv_va <= adev->virt.fw_reserve.ras_telemetry) { 52 fw_vram_usage_start_offset = adev->mman.resv_region[AMDGPU_RESV_DRV_VRAM_USAGE].offset; 53 ras_telemetry_offset = (uintptr_t)adev->virt.fw_reserve.ras_telemetry - 54 (uintptr_t)drv_va; 55 } else { 56 return -EINVAL; 57 } 58 59 ras_telemetry_cpu = 60 (struct amdsriov_ras_telemetry *)adev->virt.fw_reserve.ras_telemetry; 61 ras_telemetry_gpu = 62 (struct amdsriov_ras_telemetry *)(uintptr_t)(fw_vram_usage_start_offset + 63 ras_telemetry_offset); 64 65 if (cmd == RAS_CMD__GET_ALL_BLOCK_ECC_STATUS) { 66 if (mem_size > AMD_SRIOV_UNIRAS_BLOCKS_BUF_SIZE) 67 return -ENOMEM; 68 69 shared_mem->cpu_addr = ras_telemetry_cpu->uniras_shared_mem.blocks_ecc_buf; 70 shared_mem->gpa = 71 (uintptr_t)ras_telemetry_gpu->uniras_shared_mem.blocks_ecc_buf - 72 adev->gmc.vram_start; 73 shared_mem->size = mem_size; 74 } else { 75 if (mem_size > AMD_SRIOV_UNIRAS_CMD_MAX_SIZE) 76 return -ENOMEM; 77 78 shared_mem->cpu_addr = ras_telemetry_cpu->uniras_shared_mem.cmd_buf; 79 shared_mem->gpa = 80 (uintptr_t)ras_telemetry_gpu->uniras_shared_mem.cmd_buf - 81 adev->gmc.vram_start; 82 shared_mem->size = mem_size; 83 } 84 85 return 0; 86 } 87 88 static int amdgpu_virt_ras_remote_ioctl_cmd(struct ras_core_context *ras_core, 89 struct ras_cmd_ctx *cmd, void *output_data, uint32_t output_size) 90 { 91 struct amdgpu_ras_mgr *ras_mgr = amdgpu_ras_mgr_get_context(ras_core->dev); 92 struct amdgpu_virt_ras_cmd *virt_ras = ras_mgr->virt_ras_cmd; 93 uint32_t mem_len = ALIGN(sizeof(*cmd) + output_size, AMDGPU_GPU_PAGE_SIZE); 94 struct ras_cmd_ctx *rcmd; 95 struct ras_cmd_ctx hdr_snap; 96 struct amdgpu_virt_shared_mem shared_mem = {0}; 97 int ret = 0; 98 99 mutex_lock(&virt_ras->remote_access_lock); 100 101 ret = amdgpu_virt_ras_get_cmd_shared_mem(ras_core, cmd->cmd_id, mem_len, &shared_mem); 102 if (ret) 103 goto out; 104 105 rcmd = (struct ras_cmd_ctx *)shared_mem.cpu_addr; 106 memset(rcmd, 0, mem_len); 107 memcpy(rcmd, cmd, sizeof(*cmd)); 108 109 ret = amdgpu_virt_send_remote_ras_cmd(ras_core->dev, 110 shared_mem.gpa, mem_len); 111 if (!ret) { 112 /* 113 * rcmd lives in shared memory the PF can mutate at any time. 114 * Snapshot the entire fixed-size response header into a local 115 * struct in one shot so every subsequent decision (cmd_res, 116 * output_size, version, etc.) operates on a stable copy. This 117 * defeats double-fetch / TOCTOU attacks where a malicious or 118 * buggy PF could flip cmd_res from SUCCESS to an error after 119 * our success branch, or enlarge output_size between the 120 * bounds check and the memcpy below to corrupt the caller's 121 * local output buffer. 122 */ 123 memcpy(&hdr_snap, rcmd, sizeof(hdr_snap)); 124 barrier(); 125 126 if (hdr_snap.cmd_res) { 127 ret = hdr_snap.cmd_res; 128 goto out; 129 } 130 131 cmd->cmd_res = hdr_snap.cmd_res; 132 cmd->output_size = hdr_snap.output_size; 133 134 if (hdr_snap.output_size && output_data && 135 hdr_snap.output_size <= output_size) 136 memcpy(output_data, rcmd->output_buff_raw, hdr_snap.output_size); 137 } 138 139 out: 140 mutex_unlock(&virt_ras->remote_access_lock); 141 return ret; 142 } 143 144 static int amdgpu_virt_ras_send_remote_cmd(struct ras_core_context *ras_core, 145 uint32_t cmd_id, void *input_data, uint32_t input_size, 146 void *output_data, uint32_t output_size) 147 { 148 struct ras_cmd_ctx rcmd = {0}; 149 int ret; 150 151 if (input_size > RAS_CMD_MAX_IN_SIZE) 152 return RAS_CMD__ERROR_INVALID_INPUT_SIZE; 153 154 rcmd.cmd_id = cmd_id; 155 rcmd.input_size = input_size; 156 memcpy(rcmd.input_buff_raw, input_data, input_size); 157 158 ret = amdgpu_virt_ras_remote_ioctl_cmd(ras_core, 159 &rcmd, output_data, output_size); 160 if (!ret) { 161 if (rcmd.output_size != output_size) 162 return RAS_CMD__ERROR_GENERIC; 163 } 164 165 return ret; 166 } 167 168 static int amdgpu_virt_ras_get_batch_trace_overview(struct ras_core_context *ras_core, 169 struct ras_log_batch_overview *overview) 170 { 171 struct ras_cmd_batch_trace_snapshot_req req = {0}; 172 struct ras_cmd_batch_trace_snapshot_rsp rsp = {0}; 173 int ret; 174 175 ret = amdgpu_virt_ras_send_remote_cmd(ras_core, RAS_CMD__GET_BATCH_TRACE_SNAPSHOT, 176 &req, sizeof(req), &rsp, sizeof(rsp)); 177 if (ret) 178 return ret; 179 180 overview->first_batch_id = rsp.start_batch_id; 181 overview->last_batch_id = rsp.latest_batch_id; 182 overview->logged_batch_count = rsp.total_batch_num; 183 184 return RAS_CMD__SUCCESS; 185 } 186 187 static int amdgpu_virt_ras_get_cper_snapshot(struct ras_core_context *ras_core, 188 struct ras_cmd_ctx *cmd, void *data) 189 { 190 struct amdgpu_ras_mgr *ras_mgr = amdgpu_ras_mgr_get_context(ras_core->dev); 191 struct amdgpu_virt_ras_cmd *virt_ras = 192 (struct amdgpu_virt_ras_cmd *)ras_mgr->virt_ras_cmd; 193 int ret; 194 195 if (cmd->input_size != sizeof(struct ras_cmd_cper_snapshot_req)) 196 return RAS_CMD__ERROR_INVALID_INPUT_SIZE; 197 198 ret = amdgpu_virt_ras_send_remote_cmd(ras_core, cmd->cmd_id, 199 cmd->input_buff_raw, cmd->input_size, 200 cmd->output_buff_raw, sizeof(struct ras_cmd_cper_snapshot_rsp)); 201 if (ret) 202 return ret; 203 204 memset(&virt_ras->batch_mgr, 0, sizeof(virt_ras->batch_mgr)); 205 amdgpu_virt_ras_get_batch_trace_overview(ras_core, 206 &virt_ras->batch_mgr.batch_overview); 207 208 cmd->output_size = sizeof(struct ras_cmd_cper_snapshot_rsp); 209 return RAS_CMD__SUCCESS; 210 } 211 212 static bool amdgpu_virt_ras_check_batch_cached(struct ras_cmd_batch_trace_record_rsp *rsp, 213 uint64_t batch_id) 214 { 215 return rsp->real_batch_num && 216 rsp->real_batch_num <= RAS_CMD_MAX_BATCH_NUM && 217 batch_id >= rsp->start_batch_id && 218 (batch_id - rsp->start_batch_id) < rsp->real_batch_num; 219 } 220 221 static int amdgpu_virt_ras_get_batch_records(struct ras_core_context *ras_core, uint64_t batch_id, 222 struct ras_log_info *trace_arr, uint32_t arr_num, 223 struct ras_cmd_batch_trace_record_rsp *rsp_cache) 224 { 225 struct ras_cmd_batch_trace_record_req req = { 226 .start_batch_id = batch_id, 227 .batch_num = RAS_CMD_MAX_BATCH_NUM, 228 }; 229 struct ras_cmd_batch_trace_record_rsp *rsp = rsp_cache; 230 struct batch_ras_trace_info *batch; 231 int ret = 0; 232 uint32_t i; 233 uint32_t idx; 234 235 if (!amdgpu_virt_ras_check_batch_cached(rsp, batch_id)) { 236 memset(rsp, 0, sizeof(*rsp)); 237 ret = amdgpu_virt_ras_send_remote_cmd(ras_core, RAS_CMD__GET_BATCH_TRACE_RECORD, 238 &req, sizeof(req), rsp, sizeof(*rsp)); 239 if (ret) 240 return -EPIPE; 241 242 if (!amdgpu_virt_ras_check_batch_cached(rsp, batch_id)) { 243 memset(rsp, 0, sizeof(*rsp)); 244 return -EIO; 245 } 246 } 247 248 idx = (uint32_t)(batch_id - rsp->start_batch_id); 249 batch = &rsp->batchs[idx]; 250 if (batch_id != batch->batch_id || 251 batch->trace_num > MAX_RECORD_PER_BATCH || 252 (uint32_t)batch->offset + batch->trace_num > RAS_CMD_MAX_TRACE_NUM) { 253 memset(rsp, 0, sizeof(*rsp)); 254 return -EIO; 255 } 256 257 for (i = 0; i < batch->trace_num && i < arr_num; i++) 258 memcpy(&trace_arr[i], 259 &rsp->records[batch->offset + i], sizeof(*trace_arr)); 260 261 return i; 262 } 263 264 static int amdgpu_virt_ras_get_cper_records(struct ras_core_context *ras_core, 265 struct ras_cmd_ctx *cmd, void *data) 266 { 267 struct amdgpu_ras_mgr *ras_mgr = amdgpu_ras_mgr_get_context(ras_core->dev); 268 struct amdgpu_virt_ras_cmd *virt_ras = 269 (struct amdgpu_virt_ras_cmd *)ras_mgr->virt_ras_cmd; 270 struct ras_cmd_cper_record_req *req = 271 (struct ras_cmd_cper_record_req *)cmd->input_buff_raw; 272 struct ras_cmd_cper_record_rsp *rsp = 273 (struct ras_cmd_cper_record_rsp *)cmd->output_buff_raw; 274 struct ras_log_batch_overview *overview = &virt_ras->batch_mgr.batch_overview; 275 struct ras_cmd_batch_trace_record_rsp *rsp_cache = &virt_ras->batch_mgr.batch_trace; 276 struct ras_log_info *trace; 277 uint32_t trace_count = MAX_RECORD_PER_BATCH; 278 uint32_t offset = 0, real_data_len = 0; 279 uint64_t batch_id; 280 uint8_t *out_buf; 281 int ret = 0, i, count; 282 283 if (cmd->input_size != sizeof(struct ras_cmd_cper_record_req) || 284 (cmd->output_buf_size < sizeof(*rsp))) 285 return RAS_CMD__ERROR_INVALID_INPUT_SIZE; 286 287 if (!req->buf_size || !req->buf_ptr || !req->cper_num || 288 req->buf_size > RAS_CMD_MAX_CPER_BUF_SZ) 289 return RAS_CMD__ERROR_INVALID_INPUT_DATA; 290 291 trace = kzalloc_objs(*trace, trace_count); 292 if (!trace) 293 return RAS_CMD__ERROR_GENERIC; 294 295 out_buf = kzalloc(req->buf_size, GFP_KERNEL); 296 if (!out_buf) { 297 kfree(trace); 298 return RAS_CMD__ERROR_GENERIC; 299 } 300 301 memset(out_buf, 0, req->buf_size); 302 303 for (i = 0; i < req->cper_num; i++) { 304 batch_id = req->cper_start_id + i; 305 if (batch_id >= overview->last_batch_id) 306 break; 307 count = amdgpu_virt_ras_get_batch_records(ras_core, batch_id, 308 trace, trace_count, 309 rsp_cache); 310 if (count > 0) { 311 ret = ras_cper_generate_cper(ras_core, trace, count, 312 &out_buf[offset], req->buf_size - offset, &real_data_len); 313 if (ret) 314 break; 315 316 offset += real_data_len; 317 } 318 } 319 320 if ((ret && (ret != -ENOMEM)) || 321 copy_to_user(u64_to_user_ptr(req->buf_ptr), out_buf, offset)) { 322 kfree(out_buf); 323 kfree(trace); 324 return RAS_CMD__ERROR_GENERIC; 325 } 326 327 rsp->real_data_size = offset; 328 rsp->real_cper_num = i; 329 rsp->remain_num = (ret == -ENOMEM) ? (req->cper_num - i) : 0; 330 rsp->version = 0; 331 332 cmd->output_size = sizeof(struct ras_cmd_cper_record_rsp); 333 334 kfree(out_buf); 335 kfree(trace); 336 337 return RAS_CMD__SUCCESS; 338 } 339 340 static int __fill_get_blocks_ecc_cmd(struct amdgpu_device *adev, 341 struct vram_blocks_ecc *blks_ecc) 342 { 343 struct ras_cmd_ctx *rcmd; 344 345 if (!blks_ecc || !blks_ecc->shared_mem.cpu_addr) 346 return -EINVAL; 347 348 rcmd = (struct ras_cmd_ctx *)blks_ecc->shared_mem.cpu_addr; 349 350 rcmd->cmd_id = RAS_CMD__GET_ALL_BLOCK_ECC_STATUS; 351 rcmd->input_size = sizeof(struct ras_cmd_blocks_ecc_req); 352 rcmd->output_buf_size = blks_ecc->shared_mem.size - sizeof(*rcmd); 353 354 return 0; 355 } 356 357 static int __set_cmd_auto_update(struct amdgpu_device *adev, 358 enum ras_cmd_id cmd_id, uint64_t gpa_addr, uint32_t len, bool reg) 359 { 360 struct ras_cmd_auto_update_req req = {0}; 361 struct ras_cmd_auto_update_rsp rsp = {0}; 362 int ret; 363 364 req.mode = reg ? 1 : 0; 365 req.cmd_id = cmd_id; 366 req.addr = gpa_addr; 367 req.len = len; 368 ret = amdgpu_ras_mgr_handle_ras_cmd(adev, RAS_CMD__SET_CMD_AUTO_UPDATE, 369 &req, sizeof(req), &rsp, sizeof(rsp)); 370 371 return ret; 372 } 373 374 static int amdgpu_virt_ras_get_block_ecc(struct ras_core_context *ras_core, 375 struct ras_cmd_ctx *cmd, void *data) 376 { 377 struct amdgpu_device *adev = ras_core->dev; 378 struct amdgpu_ras_mgr *ras_mgr = amdgpu_ras_mgr_get_context(adev); 379 struct amdgpu_virt_ras_cmd *virt_ras = 380 (struct amdgpu_virt_ras_cmd *)ras_mgr->virt_ras_cmd; 381 struct vram_blocks_ecc *blks_ecc = &virt_ras->blocks_ecc; 382 struct ras_cmd_ctx *blks_ecc_cmd_ctx; 383 struct ras_cmd_blocks_ecc_rsp *blks_ecc_rsp; 384 struct ras_cmd_block_ecc_info_req *input_data = 385 (struct ras_cmd_block_ecc_info_req *)cmd->input_buff_raw; 386 struct ras_cmd_block_ecc_info_rsp *output_data = 387 (struct ras_cmd_block_ecc_info_rsp *)cmd->output_buff_raw; 388 int ret = 0; 389 390 if (cmd->input_size != sizeof(struct ras_cmd_block_ecc_info_req)) 391 return RAS_CMD__ERROR_INVALID_INPUT_SIZE; 392 393 if (input_data->block_id >= MAX_RAS_BLOCK_NUM) 394 return RAS_CMD__ERROR_INVALID_INPUT_DATA; 395 396 if (__fill_get_blocks_ecc_cmd(adev, blks_ecc)) 397 return RAS_CMD__ERROR_GENERIC; 398 399 if (!virt_ras->blocks_ecc.auto_update_actived) { 400 ret = __set_cmd_auto_update(adev, RAS_CMD__GET_ALL_BLOCK_ECC_STATUS, 401 blks_ecc->shared_mem.gpa, 402 blks_ecc->shared_mem.size, true); 403 if (ret) 404 return ret; 405 406 blks_ecc->auto_update_actived = true; 407 } 408 409 blks_ecc_cmd_ctx = blks_ecc->shared_mem.cpu_addr; 410 blks_ecc_rsp = (struct ras_cmd_blocks_ecc_rsp *)blks_ecc_cmd_ctx->output_buff_raw; 411 412 output_data->ce_count = blks_ecc_rsp->blocks[input_data->block_id].ce_count; 413 output_data->ue_count = blks_ecc_rsp->blocks[input_data->block_id].ue_count; 414 output_data->de_count = blks_ecc_rsp->blocks[input_data->block_id].de_count; 415 416 cmd->output_size = sizeof(struct ras_cmd_block_ecc_info_rsp); 417 return RAS_CMD__SUCCESS; 418 } 419 420 int amdgpu_virt_ras_check_address_validity(struct amdgpu_device *adev, 421 uint64_t address, bool *hit) 422 { 423 struct ras_cmd_address_check_req req = {0}; 424 struct ras_cmd_address_check_rsp rsp = {0}; 425 int ret = 0; 426 427 req.address = address; 428 429 ret = amdgpu_ras_mgr_handle_ras_cmd(adev, RAS_CMD__CHECK_ADDRESS_VALIDITY, 430 &req, sizeof(req), &rsp, sizeof(rsp)); 431 432 if (ret) 433 return RAS_CMD__ERROR_GENERIC; 434 435 *hit = rsp.result ? true : false; 436 437 return RAS_CMD__SUCCESS; 438 } 439 440 int amdgpu_virt_ras_convert_retired_address(struct amdgpu_device *adev, 441 uint64_t address, uint64_t *pfn, uint32_t max_pfn_sz) 442 { 443 struct ras_cmd_convert_retired_address_req req = {0}; 444 struct ras_cmd_convert_retired_address_rsp rsp = {0}; 445 int ret = 0, i; 446 int retired_page_count; 447 448 if (!pfn || !max_pfn_sz) 449 return -EINVAL; 450 451 req.address = address; 452 453 ret = amdgpu_ras_mgr_handle_ras_cmd(adev, RAS_CMD__CONVERT_RETIRED_ADDRESS, 454 &req, sizeof(req), &rsp, sizeof(rsp)); 455 456 if (ret || rsp.retired_count == 0) 457 return -EINVAL; 458 459 retired_page_count = rsp.retired_count > max_pfn_sz ? max_pfn_sz : rsp.retired_count; 460 461 for (i = 0; i < retired_page_count; i++) 462 pfn[i] = rsp.retired_addr[i] >> AMDGPU_GPU_PAGE_SHIFT; 463 464 return retired_page_count; 465 } 466 467 static struct ras_cmd_func_map amdgpu_virt_ras_cmd_maps[] = { 468 {RAS_CMD__GET_CPER_SNAPSHOT, amdgpu_virt_ras_get_cper_snapshot}, 469 {RAS_CMD__GET_CPER_RECORD, amdgpu_virt_ras_get_cper_records}, 470 {RAS_CMD__GET_BLOCK_ECC_STATUS, amdgpu_virt_ras_get_block_ecc}, 471 }; 472 473 int amdgpu_virt_ras_handle_cmd(struct ras_core_context *ras_core, 474 struct ras_cmd_ctx *cmd) 475 { 476 struct ras_cmd_func_map *ras_cmd = NULL; 477 int i, res; 478 479 for (i = 0; i < ARRAY_SIZE(amdgpu_virt_ras_cmd_maps); i++) { 480 if (cmd->cmd_id == amdgpu_virt_ras_cmd_maps[i].cmd_id) { 481 ras_cmd = &amdgpu_virt_ras_cmd_maps[i]; 482 break; 483 } 484 } 485 486 if (ras_cmd) 487 res = ras_cmd->func(ras_core, cmd, NULL); 488 else 489 res = amdgpu_virt_ras_remote_ioctl_cmd(ras_core, cmd, 490 cmd->output_buff_raw, cmd->output_buf_size); 491 492 cmd->cmd_res = res; 493 494 if (!res && (cmd->output_size > cmd->output_buf_size)) { 495 RAS_DEV_ERR(ras_core->dev, 496 "Output data size 0x%x exceeds buffer size 0x%x!\n", 497 cmd->output_size, cmd->output_buf_size); 498 return RAS_CMD__SUCCESS_EXEED_BUFFER; 499 } 500 501 return RAS_CMD__SUCCESS; 502 } 503 504 int amdgpu_virt_ras_sw_init(struct amdgpu_device *adev) 505 { 506 struct amdgpu_ras_mgr *ras_mgr = amdgpu_ras_mgr_get_context(adev); 507 struct amdgpu_virt_ras_cmd *virt_ras_cmd; 508 509 ras_mgr->virt_ras_cmd = kzalloc_obj(struct amdgpu_virt_ras_cmd); 510 if (!ras_mgr->virt_ras_cmd) 511 return -ENOMEM; 512 513 virt_ras_cmd = ras_mgr->virt_ras_cmd; 514 mutex_init(&virt_ras_cmd->remote_access_lock); 515 516 return 0; 517 } 518 519 int amdgpu_virt_ras_sw_fini(struct amdgpu_device *adev) 520 { 521 struct amdgpu_ras_mgr *ras_mgr = amdgpu_ras_mgr_get_context(adev); 522 struct amdgpu_virt_ras_cmd *virt_ras_cmd = ras_mgr->virt_ras_cmd; 523 524 mutex_destroy(&virt_ras_cmd->remote_access_lock); 525 kfree(ras_mgr->virt_ras_cmd); 526 ras_mgr->virt_ras_cmd = NULL; 527 528 return 0; 529 } 530 531 int amdgpu_virt_ras_hw_init(struct amdgpu_device *adev) 532 { 533 struct amdgpu_ras_mgr *ras_mgr = amdgpu_ras_mgr_get_context(adev); 534 struct amdgpu_virt_ras_cmd *virt_ras = 535 (struct amdgpu_virt_ras_cmd *)ras_mgr->virt_ras_cmd; 536 struct vram_blocks_ecc *blks_ecc = &virt_ras->blocks_ecc; 537 538 amdgpu_virt_get_ras_capability(adev); 539 540 memset(blks_ecc, 0, sizeof(*blks_ecc)); 541 if (amdgpu_virt_ras_get_cmd_shared_mem(ras_mgr->ras_core, 542 RAS_CMD__GET_ALL_BLOCK_ECC_STATUS, 543 AMD_SRIOV_UNIRAS_BLOCKS_BUF_SIZE, &blks_ecc->shared_mem)) 544 return -ENOMEM; 545 546 return 0; 547 } 548 549 int amdgpu_virt_ras_hw_fini(struct amdgpu_device *adev) 550 { 551 struct amdgpu_ras_mgr *ras_mgr = amdgpu_ras_mgr_get_context(adev); 552 struct amdgpu_virt_ras_cmd *virt_ras = 553 (struct amdgpu_virt_ras_cmd *)ras_mgr->virt_ras_cmd; 554 struct vram_blocks_ecc *blks_ecc = &virt_ras->blocks_ecc; 555 556 if (blks_ecc->shared_mem.cpu_addr) 557 memset(blks_ecc->shared_mem.cpu_addr, 0, blks_ecc->shared_mem.size); 558 559 memset(blks_ecc, 0, sizeof(*blks_ecc)); 560 561 return 0; 562 } 563 564 int amdgpu_virt_ras_pre_reset(struct amdgpu_device *adev) 565 { 566 struct amdgpu_ras_mgr *ras_mgr = amdgpu_ras_mgr_get_context(adev); 567 struct amdgpu_virt_ras_cmd *virt_ras = 568 (struct amdgpu_virt_ras_cmd *)ras_mgr->virt_ras_cmd; 569 570 virt_ras->blocks_ecc.auto_update_actived = false; 571 return 0; 572 } 573 574 int amdgpu_virt_ras_post_reset(struct amdgpu_device *adev) 575 { 576 return 0; 577 } 578 579 void amdgpu_virt_ras_set_remote_uniras(struct amdgpu_device *adev, bool en) 580 { 581 struct amdgpu_ras_mgr *ras_mgr = amdgpu_ras_mgr_get_context(adev); 582 struct amdgpu_virt_ras_cmd *virt_ras; 583 584 if (!ras_mgr || !ras_mgr->virt_ras_cmd) 585 return; 586 587 virt_ras = (struct amdgpu_virt_ras_cmd *)ras_mgr->virt_ras_cmd; 588 virt_ras->remote_uniras_supported = en; 589 } 590 591 bool amdgpu_virt_ras_remote_uniras_enabled(struct amdgpu_device *adev) 592 { 593 struct amdgpu_ras_mgr *ras_mgr = amdgpu_ras_mgr_get_context(adev); 594 struct amdgpu_virt_ras_cmd *virt_ras; 595 596 if (amdgpu_in_reset(adev)) 597 return false; 598 599 if (!ras_mgr || !ras_mgr->virt_ras_cmd) 600 return false; 601 602 virt_ras = (struct amdgpu_virt_ras_cmd *)ras_mgr->virt_ras_cmd; 603 604 return virt_ras->remote_uniras_supported; 605 } 606