1 /* 2 * Copyright 2019 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 */ 23 24 #include <linux/firmware.h> 25 #include <drm/drm_exec.h> 26 27 #include "amdgpu_mes.h" 28 #include "amdgpu.h" 29 #include "soc15_common.h" 30 #include "amdgpu_mes_ctx.h" 31 32 #define AMDGPU_MES_MAX_NUM_OF_QUEUES_PER_PROCESS 1024 33 #define AMDGPU_ONE_DOORBELL_SIZE 8 34 35 int amdgpu_mes_doorbell_process_slice(struct amdgpu_device *adev) 36 { 37 return roundup(AMDGPU_ONE_DOORBELL_SIZE * 38 AMDGPU_MES_MAX_NUM_OF_QUEUES_PER_PROCESS, 39 PAGE_SIZE); 40 } 41 42 static int amdgpu_mes_doorbell_init(struct amdgpu_device *adev) 43 { 44 int i; 45 struct amdgpu_mes *mes = &adev->mes; 46 47 /* Bitmap for dynamic allocation of kernel doorbells */ 48 mes->doorbell_bitmap = bitmap_zalloc(PAGE_SIZE / sizeof(u32), GFP_KERNEL); 49 if (!mes->doorbell_bitmap) { 50 dev_err(adev->dev, "Failed to allocate MES doorbell bitmap\n"); 51 return -ENOMEM; 52 } 53 54 mes->num_mes_dbs = PAGE_SIZE / AMDGPU_ONE_DOORBELL_SIZE; 55 for (i = 0; i < AMDGPU_MES_PRIORITY_NUM_LEVELS; i++) { 56 adev->mes.aggregated_doorbells[i] = mes->db_start_dw_offset + i * 2; 57 set_bit(i, mes->doorbell_bitmap); 58 } 59 60 return 0; 61 } 62 63 static int amdgpu_mes_event_log_init(struct amdgpu_device *adev) 64 { 65 int r; 66 67 if (!amdgpu_mes_log_enable) 68 return 0; 69 70 r = amdgpu_bo_create_kernel(adev, adev->mes.event_log_size, PAGE_SIZE, 71 AMDGPU_GEM_DOMAIN_VRAM, 72 &adev->mes.event_log_gpu_obj, 73 &adev->mes.event_log_gpu_addr, 74 &adev->mes.event_log_cpu_addr); 75 if (r) { 76 dev_warn(adev->dev, "failed to create MES event log buffer (%d)", r); 77 return r; 78 } 79 80 memset(adev->mes.event_log_cpu_addr, 0, adev->mes.event_log_size); 81 82 return 0; 83 84 } 85 86 static void amdgpu_mes_doorbell_free(struct amdgpu_device *adev) 87 { 88 bitmap_free(adev->mes.doorbell_bitmap); 89 } 90 91 static inline u32 amdgpu_mes_get_hqd_mask(u32 num_pipe, 92 u32 num_hqd_per_pipe, 93 u32 num_reserved_hqd) 94 { 95 if (num_pipe == 0) 96 return 0; 97 98 u32 total_hqd_mask = (u32)((1ULL << num_hqd_per_pipe) - 1); 99 u32 reserved_hqd_mask = (u32)((1ULL << DIV_ROUND_UP(num_reserved_hqd, num_pipe)) - 1); 100 101 return (total_hqd_mask & ~reserved_hqd_mask); 102 } 103 104 int amdgpu_mes_init(struct amdgpu_device *adev) 105 { 106 int i, r, num_pipes; 107 u32 total_vmid_mask, reserved_vmid_mask; 108 int num_xcc = adev->gfx.xcc_mask ? NUM_XCC(adev->gfx.xcc_mask) : 1; 109 u32 gfx_hqd_mask = amdgpu_mes_get_hqd_mask(adev->gfx.me.num_pipe_per_me, 110 adev->gfx.me.num_queue_per_pipe, 111 adev->gfx.disable_kq ? 0 : adev->gfx.num_gfx_rings); 112 u32 compute_hqd_mask = amdgpu_mes_get_hqd_mask(adev->gfx.mec.num_pipe_per_mec, 113 adev->gfx.mec.num_queue_per_pipe, 114 adev->gfx.disable_kq ? 0 : adev->gfx.num_compute_rings); 115 116 adev->mes.adev = adev; 117 118 ida_init(&adev->mes.doorbell_ida); 119 spin_lock_init(&adev->mes.queue_id_lock); 120 mutex_init(&adev->mes.mutex_hidden); 121 122 for (i = 0; i < AMDGPU_MAX_MES_PIPES * num_xcc; i++) 123 spin_lock_init(&adev->mes.ring_lock[i]); 124 125 adev->mes.total_max_queue = AMDGPU_FENCE_MES_QUEUE_ID_MASK; 126 total_vmid_mask = (u32)((1UL << 16) - 1); 127 reserved_vmid_mask = (u32)((1UL << adev->vm_manager.first_kfd_vmid) - 1); 128 129 adev->mes.vmid_mask_mmhub = 0xFF00; 130 adev->mes.vmid_mask_gfxhub = total_vmid_mask & ~reserved_vmid_mask; 131 132 num_pipes = adev->gfx.me.num_pipe_per_me * adev->gfx.me.num_me; 133 if (num_pipes > AMDGPU_MES_MAX_GFX_PIPES) 134 dev_warn(adev->dev, "more gfx pipes than supported by MES! (%d vs %d)\n", 135 num_pipes, AMDGPU_MES_MAX_GFX_PIPES); 136 137 for (i = 0; i < AMDGPU_MES_MAX_GFX_PIPES; i++) { 138 if (i >= num_pipes) 139 break; 140 141 adev->mes.gfx_hqd_mask[i] = gfx_hqd_mask; 142 } 143 144 num_pipes = adev->gfx.mec.num_pipe_per_mec * adev->gfx.mec.num_mec; 145 if (num_pipes > AMDGPU_MES_MAX_COMPUTE_PIPES) 146 dev_warn(adev->dev, "more compute pipes than supported by MES! (%d vs %d)\n", 147 num_pipes, AMDGPU_MES_MAX_COMPUTE_PIPES); 148 149 for (i = 0; i < AMDGPU_MES_MAX_COMPUTE_PIPES; i++) { 150 /* 151 * Currently, only MEC1 is used for both kernel and user compute queue. 152 * To enable other MEC, we need to redistribute queues per pipe and 153 * adjust queue resource shared with kfd that needs a separate patch. 154 * Skip other MEC for now to avoid potential issues. 155 */ 156 if (i >= adev->gfx.mec.num_pipe_per_mec) 157 break; 158 159 adev->mes.compute_hqd_mask[i] = compute_hqd_mask; 160 } 161 162 num_pipes = adev->sdma.num_instances; 163 if (num_pipes > AMDGPU_MES_MAX_SDMA_PIPES) 164 dev_warn(adev->dev, "more SDMA pipes than supported by MES! (%d vs %d)\n", 165 num_pipes, AMDGPU_MES_MAX_SDMA_PIPES); 166 167 for (i = 0; i < AMDGPU_MES_MAX_SDMA_PIPES; i++) { 168 if (i >= num_pipes) 169 break; 170 adev->mes.sdma_hqd_mask[i] = 0xfc; 171 } 172 173 dev_info(adev->dev, 174 "MES: vmid_mask_mmhub 0x%08x, vmid_mask_gfxhub 0x%08x\n", 175 adev->mes.vmid_mask_mmhub, 176 adev->mes.vmid_mask_gfxhub); 177 178 dev_info(adev->dev, 179 "MES: gfx_hqd_mask 0x%08x, compute_hqd_mask 0x%08x, sdma_hqd_mask 0x%08x\n", 180 adev->mes.gfx_hqd_mask[0], 181 adev->mes.compute_hqd_mask[0], 182 adev->mes.sdma_hqd_mask[0]); 183 184 for (i = 0; i < AMDGPU_MAX_MES_PIPES * num_xcc; i++) { 185 r = amdgpu_device_wb_get(adev, &adev->mes.sch_ctx_offs[i]); 186 if (r) { 187 dev_err(adev->dev, 188 "(%d) ring trail_fence_offs wb alloc failed\n", 189 r); 190 goto error; 191 } 192 adev->mes.sch_ctx_gpu_addr[i] = 193 adev->wb.gpu_addr + (adev->mes.sch_ctx_offs[i] * 4); 194 adev->mes.sch_ctx_ptr[i] = 195 (uint64_t *)&adev->wb.wb[adev->mes.sch_ctx_offs[i]]; 196 197 r = amdgpu_device_wb_get(adev, 198 &adev->mes.query_status_fence_offs[i]); 199 if (r) { 200 dev_err(adev->dev, 201 "(%d) query_status_fence_offs wb alloc failed\n", 202 r); 203 goto error; 204 } 205 adev->mes.query_status_fence_gpu_addr[i] = adev->wb.gpu_addr + 206 (adev->mes.query_status_fence_offs[i] * 4); 207 adev->mes.query_status_fence_ptr[i] = 208 (uint64_t *)&adev->wb.wb[adev->mes.query_status_fence_offs[i]]; 209 } 210 211 r = amdgpu_mes_doorbell_init(adev); 212 if (r) 213 goto error; 214 215 r = amdgpu_mes_event_log_init(adev); 216 if (r) 217 goto error_doorbell; 218 219 if (adev->mes.hung_queue_db_array_size) { 220 for (i = 0; i < AMDGPU_MAX_MES_PIPES * num_xcc; i++) { 221 r = amdgpu_bo_create_kernel(adev, 222 adev->mes.hung_queue_db_array_size * sizeof(u32), 223 PAGE_SIZE, 224 AMDGPU_GEM_DOMAIN_GTT, 225 &adev->mes.hung_queue_db_array_gpu_obj[i], 226 &adev->mes.hung_queue_db_array_gpu_addr[i], 227 &adev->mes.hung_queue_db_array_cpu_addr[i]); 228 if (r) { 229 dev_warn(adev->dev, "failed to create MES hung db array buffer (%d)", r); 230 goto error_doorbell; 231 } 232 } 233 } 234 235 return 0; 236 237 error_doorbell: 238 amdgpu_mes_doorbell_free(adev); 239 error: 240 for (i = 0; i < AMDGPU_MAX_MES_PIPES * num_xcc; i++) { 241 if (adev->mes.sch_ctx_ptr[i]) 242 amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs[i]); 243 if (adev->mes.query_status_fence_ptr[i]) 244 amdgpu_device_wb_free(adev, 245 adev->mes.query_status_fence_offs[i]); 246 if (adev->mes.hung_queue_db_array_gpu_obj[i]) 247 amdgpu_bo_free_kernel(&adev->mes.hung_queue_db_array_gpu_obj[i], 248 &adev->mes.hung_queue_db_array_gpu_addr[i], 249 &adev->mes.hung_queue_db_array_cpu_addr[i]); 250 } 251 252 ida_destroy(&adev->mes.doorbell_ida); 253 mutex_destroy(&adev->mes.mutex_hidden); 254 return r; 255 } 256 257 void amdgpu_mes_fini(struct amdgpu_device *adev) 258 { 259 int i; 260 int num_xcc = adev->gfx.xcc_mask ? NUM_XCC(adev->gfx.xcc_mask) : 1; 261 262 amdgpu_bo_free_kernel(&adev->mes.event_log_gpu_obj, 263 &adev->mes.event_log_gpu_addr, 264 &adev->mes.event_log_cpu_addr); 265 266 for (i = 0; i < AMDGPU_MAX_MES_PIPES * num_xcc; i++) { 267 amdgpu_bo_free_kernel(&adev->mes.hung_queue_db_array_gpu_obj[i], 268 &adev->mes.hung_queue_db_array_gpu_addr[i], 269 &adev->mes.hung_queue_db_array_cpu_addr[i]); 270 271 if (adev->mes.sch_ctx_ptr[i]) 272 amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs[i]); 273 if (adev->mes.query_status_fence_ptr[i]) 274 amdgpu_device_wb_free(adev, 275 adev->mes.query_status_fence_offs[i]); 276 } 277 278 amdgpu_mes_doorbell_free(adev); 279 280 ida_destroy(&adev->mes.doorbell_ida); 281 mutex_destroy(&adev->mes.mutex_hidden); 282 } 283 284 int amdgpu_mes_suspend(struct amdgpu_device *adev) 285 { 286 struct mes_suspend_gang_input input; 287 int r; 288 289 if (!amdgpu_mes_suspend_resume_all_supported(adev)) 290 return 0; 291 292 memset(&input, 0x0, sizeof(struct mes_suspend_gang_input)); 293 input.suspend_all_gangs = 1; 294 295 /* 296 * Avoid taking any other locks under MES lock to avoid circular 297 * lock dependencies. 298 */ 299 amdgpu_mes_lock(&adev->mes); 300 r = adev->mes.funcs->suspend_gang(&adev->mes, &input); 301 amdgpu_mes_unlock(&adev->mes); 302 if (r) 303 dev_err(adev->dev, "failed to suspend all gangs"); 304 305 return r; 306 } 307 308 int amdgpu_mes_resume(struct amdgpu_device *adev) 309 { 310 struct mes_resume_gang_input input; 311 int r; 312 313 if (!amdgpu_mes_suspend_resume_all_supported(adev)) 314 return 0; 315 316 memset(&input, 0x0, sizeof(struct mes_resume_gang_input)); 317 input.resume_all_gangs = 1; 318 319 /* 320 * Avoid taking any other locks under MES lock to avoid circular 321 * lock dependencies. 322 */ 323 amdgpu_mes_lock(&adev->mes); 324 r = adev->mes.funcs->resume_gang(&adev->mes, &input); 325 amdgpu_mes_unlock(&adev->mes); 326 if (r) 327 dev_err(adev->dev, "failed to resume all gangs"); 328 329 return r; 330 } 331 332 int amdgpu_mes_map_legacy_queue(struct amdgpu_device *adev, 333 struct amdgpu_ring *ring, uint32_t xcc_id) 334 { 335 struct mes_map_legacy_queue_input queue_input; 336 int r; 337 338 memset(&queue_input, 0, sizeof(queue_input)); 339 340 queue_input.xcc_id = xcc_id; 341 queue_input.queue_type = ring->funcs->type; 342 queue_input.doorbell_offset = ring->doorbell_index; 343 queue_input.pipe_id = ring->pipe; 344 queue_input.queue_id = ring->queue; 345 queue_input.mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj); 346 queue_input.wptr_addr = ring->wptr_gpu_addr; 347 348 amdgpu_mes_lock(&adev->mes); 349 r = adev->mes.funcs->map_legacy_queue(&adev->mes, &queue_input); 350 amdgpu_mes_unlock(&adev->mes); 351 if (r) 352 dev_err(adev->dev, "failed to map legacy queue\n"); 353 354 return r; 355 } 356 357 int amdgpu_mes_unmap_legacy_queue(struct amdgpu_device *adev, 358 struct amdgpu_ring *ring, 359 enum amdgpu_unmap_queues_action action, 360 u64 gpu_addr, u64 seq, uint32_t xcc_id) 361 { 362 struct mes_unmap_legacy_queue_input queue_input; 363 int r; 364 365 queue_input.xcc_id = xcc_id; 366 queue_input.action = action; 367 queue_input.queue_type = ring->funcs->type; 368 queue_input.doorbell_offset = ring->doorbell_index; 369 queue_input.pipe_id = ring->pipe; 370 queue_input.queue_id = ring->queue; 371 queue_input.trail_fence_addr = gpu_addr; 372 queue_input.trail_fence_data = seq; 373 374 amdgpu_mes_lock(&adev->mes); 375 r = adev->mes.funcs->unmap_legacy_queue(&adev->mes, &queue_input); 376 amdgpu_mes_unlock(&adev->mes); 377 if (r) 378 dev_err(adev->dev, "failed to unmap legacy queue\n"); 379 380 return r; 381 } 382 383 int amdgpu_mes_reset_legacy_queue(struct amdgpu_device *adev, 384 struct amdgpu_ring *ring, 385 unsigned int vmid, 386 bool use_mmio, 387 uint32_t xcc_id) 388 { 389 struct mes_reset_queue_input queue_input; 390 int r; 391 392 memset(&queue_input, 0, sizeof(queue_input)); 393 394 queue_input.xcc_id = xcc_id; 395 queue_input.queue_type = ring->funcs->type; 396 queue_input.doorbell_offset = ring->doorbell_index; 397 queue_input.me_id = ring->me; 398 queue_input.pipe_id = ring->pipe; 399 queue_input.queue_id = ring->queue; 400 queue_input.mqd_addr = ring->mqd_obj ? amdgpu_bo_gpu_offset(ring->mqd_obj) : 0; 401 queue_input.wptr_addr = ring->wptr_gpu_addr; 402 queue_input.vmid = vmid; 403 queue_input.use_mmio = use_mmio; 404 queue_input.is_kq = true; 405 if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) 406 queue_input.legacy_gfx = true; 407 408 amdgpu_mes_lock(&adev->mes); 409 r = adev->mes.funcs->reset_hw_queue(&adev->mes, &queue_input); 410 amdgpu_mes_unlock(&adev->mes); 411 if (r) 412 dev_err(adev->dev, "failed to reset legacy queue\n"); 413 414 return r; 415 } 416 417 int amdgpu_mes_get_hung_queue_db_array_size(struct amdgpu_device *adev) 418 { 419 return adev->mes.hung_queue_db_array_size; 420 } 421 422 int amdgpu_mes_detect_and_reset_hung_queues(struct amdgpu_device *adev, 423 int queue_type, 424 bool detect_only, 425 unsigned int *hung_db_num, 426 u32 *hung_db_array, 427 uint32_t xcc_id) 428 { 429 struct mes_detect_and_reset_queue_input input; 430 u32 *db_array = adev->mes.hung_queue_db_array_cpu_addr[xcc_id]; 431 int r, i; 432 433 if (!hung_db_num || !hung_db_array) 434 return -EINVAL; 435 436 if ((queue_type != AMDGPU_RING_TYPE_GFX) && 437 (queue_type != AMDGPU_RING_TYPE_COMPUTE) && 438 (queue_type != AMDGPU_RING_TYPE_SDMA)) 439 return -EINVAL; 440 441 /* Clear the doorbell array before detection */ 442 memset(adev->mes.hung_queue_db_array_cpu_addr[xcc_id], AMDGPU_MES_INVALID_DB_OFFSET, 443 adev->mes.hung_queue_db_array_size * sizeof(u32)); 444 input.queue_type = queue_type; 445 input.detect_only = detect_only; 446 447 r = adev->mes.funcs->detect_and_reset_hung_queues(&adev->mes, 448 &input); 449 if (r) { 450 dev_err(adev->dev, "failed to detect and reset\n"); 451 } else { 452 *hung_db_num = 0; 453 for (i = 0; i < adev->mes.hung_queue_hqd_info_offset; i++) { 454 if (db_array[i] != AMDGPU_MES_INVALID_DB_OFFSET) { 455 hung_db_array[i] = db_array[i]; 456 *hung_db_num += 1; 457 } 458 } 459 460 /* 461 * TODO: return HQD info for MES scheduled user compute queue reset cases 462 * stored in hung_db_array hqd info offset to full array size 463 */ 464 } 465 466 return r; 467 } 468 469 uint32_t amdgpu_mes_rreg(struct amdgpu_device *adev, uint32_t reg, 470 uint32_t xcc_id) 471 { 472 struct mes_misc_op_input op_input; 473 int r, val = 0; 474 uint32_t addr_offset = 0; 475 uint64_t read_val_gpu_addr; 476 uint32_t *read_val_ptr; 477 478 if (amdgpu_device_wb_get(adev, &addr_offset)) { 479 dev_err(adev->dev, "critical bug! too many mes readers\n"); 480 goto error; 481 } 482 read_val_gpu_addr = adev->wb.gpu_addr + (addr_offset * 4); 483 read_val_ptr = (uint32_t *)&adev->wb.wb[addr_offset]; 484 op_input.xcc_id = xcc_id; 485 op_input.op = MES_MISC_OP_READ_REG; 486 op_input.read_reg.reg_offset = reg; 487 op_input.read_reg.buffer_addr = read_val_gpu_addr; 488 489 if (!adev->mes.funcs->misc_op) { 490 dev_err(adev->dev, "mes rreg is not supported!\n"); 491 goto error; 492 } 493 494 amdgpu_mes_lock(&adev->mes); 495 r = adev->mes.funcs->misc_op(&adev->mes, &op_input); 496 amdgpu_mes_unlock(&adev->mes); 497 if (r) 498 dev_err(adev->dev, "failed to read reg (0x%x)\n", reg); 499 else 500 val = *(read_val_ptr); 501 502 error: 503 if (addr_offset) 504 amdgpu_device_wb_free(adev, addr_offset); 505 return val; 506 } 507 508 int amdgpu_mes_wreg(struct amdgpu_device *adev, uint32_t reg, 509 uint32_t val, uint32_t xcc_id) 510 { 511 struct mes_misc_op_input op_input; 512 int r; 513 514 op_input.xcc_id = xcc_id; 515 op_input.op = MES_MISC_OP_WRITE_REG; 516 op_input.write_reg.reg_offset = reg; 517 op_input.write_reg.reg_value = val; 518 519 if (!adev->mes.funcs->misc_op) { 520 dev_err(adev->dev, "mes wreg is not supported!\n"); 521 r = -EINVAL; 522 goto error; 523 } 524 525 amdgpu_mes_lock(&adev->mes); 526 r = adev->mes.funcs->misc_op(&adev->mes, &op_input); 527 amdgpu_mes_unlock(&adev->mes); 528 if (r) 529 dev_err(adev->dev, "failed to write reg (0x%x)\n", reg); 530 531 error: 532 return r; 533 } 534 535 int amdgpu_mes_reg_write_reg_wait(struct amdgpu_device *adev, 536 uint32_t reg0, uint32_t reg1, 537 uint32_t ref, uint32_t mask, 538 uint32_t xcc_id) 539 { 540 struct mes_misc_op_input op_input; 541 int r; 542 543 op_input.xcc_id = xcc_id; 544 op_input.op = MES_MISC_OP_WRM_REG_WR_WAIT; 545 op_input.wrm_reg.reg0 = reg0; 546 op_input.wrm_reg.reg1 = reg1; 547 op_input.wrm_reg.ref = ref; 548 op_input.wrm_reg.mask = mask; 549 550 if (!adev->mes.funcs->misc_op) { 551 dev_err(adev->dev, "mes reg_write_reg_wait is not supported!\n"); 552 r = -EINVAL; 553 goto error; 554 } 555 556 amdgpu_mes_lock(&adev->mes); 557 r = adev->mes.funcs->misc_op(&adev->mes, &op_input); 558 amdgpu_mes_unlock(&adev->mes); 559 if (r) 560 dev_err(adev->dev, "failed to reg_write_reg_wait\n"); 561 562 error: 563 return r; 564 } 565 566 int amdgpu_mes_hdp_flush(struct amdgpu_device *adev) 567 { 568 uint32_t hdp_flush_req_offset, hdp_flush_done_offset; 569 struct amdgpu_ring *mes_ring; 570 uint32_t ref_and_mask = 0, reg_mem_engine = 0; 571 572 if (!adev->gfx.funcs->get_hdp_flush_mask) { 573 dev_err(adev->dev, "mes hdp flush is not supported.\n"); 574 return -EINVAL; 575 } 576 577 mes_ring = &adev->mes.ring[0]; 578 hdp_flush_req_offset = adev->nbio.funcs->get_hdp_flush_req_offset(adev); 579 hdp_flush_done_offset = adev->nbio.funcs->get_hdp_flush_done_offset(adev); 580 581 adev->gfx.funcs->get_hdp_flush_mask(mes_ring, &ref_and_mask, ®_mem_engine); 582 583 return amdgpu_mes_reg_write_reg_wait(adev, hdp_flush_req_offset, hdp_flush_done_offset, 584 ref_and_mask, ref_and_mask, 0); 585 } 586 587 int amdgpu_mes_set_shader_debugger(struct amdgpu_device *adev, 588 uint64_t process_context_addr, 589 uint32_t spi_gdbg_per_vmid_cntl, 590 const uint32_t *tcp_watch_cntl, 591 uint32_t flags, 592 bool trap_en, 593 uint32_t xcc_id) 594 { 595 struct mes_misc_op_input op_input = {0}; 596 int r; 597 598 if (!adev->mes.funcs->misc_op) { 599 dev_err(adev->dev, 600 "mes set shader debugger is not supported!\n"); 601 return -EINVAL; 602 } 603 604 op_input.xcc_id = xcc_id; 605 op_input.op = MES_MISC_OP_SET_SHADER_DEBUGGER; 606 op_input.set_shader_debugger.process_context_addr = process_context_addr; 607 op_input.set_shader_debugger.flags.u32all = flags; 608 609 /* use amdgpu mes_flush_shader_debugger instead */ 610 if (op_input.set_shader_debugger.flags.process_ctx_flush) 611 return -EINVAL; 612 613 op_input.set_shader_debugger.spi_gdbg_per_vmid_cntl = spi_gdbg_per_vmid_cntl; 614 memcpy(op_input.set_shader_debugger.tcp_watch_cntl, tcp_watch_cntl, 615 sizeof(op_input.set_shader_debugger.tcp_watch_cntl)); 616 617 if (((adev->mes.sched_version & AMDGPU_MES_API_VERSION_MASK) >> 618 AMDGPU_MES_API_VERSION_SHIFT) >= 14) 619 op_input.set_shader_debugger.trap_en = trap_en; 620 621 amdgpu_mes_lock(&adev->mes); 622 623 r = adev->mes.funcs->misc_op(&adev->mes, &op_input); 624 if (r) 625 dev_err(adev->dev, "failed to set_shader_debugger\n"); 626 627 amdgpu_mes_unlock(&adev->mes); 628 629 return r; 630 } 631 632 int amdgpu_mes_flush_shader_debugger(struct amdgpu_device *adev, 633 uint64_t process_context_addr, 634 uint32_t xcc_id) 635 { 636 struct mes_misc_op_input op_input = {0}; 637 int r; 638 639 if (!adev->mes.funcs->misc_op) { 640 dev_err(adev->dev, 641 "mes flush shader debugger is not supported!\n"); 642 return -EINVAL; 643 } 644 645 op_input.xcc_id = xcc_id; 646 op_input.op = MES_MISC_OP_SET_SHADER_DEBUGGER; 647 op_input.set_shader_debugger.process_context_addr = process_context_addr; 648 op_input.set_shader_debugger.flags.process_ctx_flush = true; 649 650 amdgpu_mes_lock(&adev->mes); 651 652 r = adev->mes.funcs->misc_op(&adev->mes, &op_input); 653 if (r) 654 dev_err(adev->dev, "failed to set_shader_debugger\n"); 655 656 amdgpu_mes_unlock(&adev->mes); 657 658 return r; 659 } 660 661 uint32_t amdgpu_mes_get_aggregated_doorbell_index(struct amdgpu_device *adev, 662 enum amdgpu_mes_priority_level prio) 663 { 664 return adev->mes.aggregated_doorbells[prio]; 665 } 666 667 int amdgpu_mes_init_microcode(struct amdgpu_device *adev, int pipe) 668 { 669 const struct mes_firmware_header_v1_0 *mes_hdr; 670 struct amdgpu_firmware_info *info; 671 char ucode_prefix[30]; 672 char fw_name[50]; 673 bool need_retry = false; 674 u32 *ucode_ptr; 675 int r; 676 677 amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, 678 sizeof(ucode_prefix)); 679 if (adev->enable_uni_mes) { 680 snprintf(fw_name, sizeof(fw_name), 681 "amdgpu/%s_uni_mes.bin", ucode_prefix); 682 } else if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(11, 0, 0) && 683 amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(12, 0, 0)) { 684 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mes%s.bin", 685 ucode_prefix, 686 pipe == AMDGPU_MES_SCHED_PIPE ? "_2" : "1"); 687 need_retry = true; 688 } else { 689 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mes%s.bin", 690 ucode_prefix, 691 pipe == AMDGPU_MES_SCHED_PIPE ? "" : "1"); 692 } 693 694 r = amdgpu_ucode_request(adev, &adev->mes.fw[pipe], AMDGPU_UCODE_REQUIRED, 695 "%s", fw_name); 696 if (r && need_retry && pipe == AMDGPU_MES_SCHED_PIPE) { 697 dev_info(adev->dev, "try to fall back to %s_mes.bin\n", ucode_prefix); 698 r = amdgpu_ucode_request(adev, &adev->mes.fw[pipe], 699 AMDGPU_UCODE_REQUIRED, 700 "amdgpu/%s_mes.bin", ucode_prefix); 701 } 702 703 if (r) 704 goto out; 705 706 mes_hdr = (const struct mes_firmware_header_v1_0 *) 707 adev->mes.fw[pipe]->data; 708 adev->mes.uc_start_addr[pipe] = 709 le32_to_cpu(mes_hdr->mes_uc_start_addr_lo) | 710 ((uint64_t)(le32_to_cpu(mes_hdr->mes_uc_start_addr_hi)) << 32); 711 adev->mes.data_start_addr[pipe] = 712 le32_to_cpu(mes_hdr->mes_data_start_addr_lo) | 713 ((uint64_t)(le32_to_cpu(mes_hdr->mes_data_start_addr_hi)) << 32); 714 ucode_ptr = (u32 *)(adev->mes.fw[pipe]->data + 715 sizeof(union amdgpu_firmware_header)); 716 adev->mes.fw_version[pipe] = 717 le32_to_cpu(ucode_ptr[24]) & AMDGPU_MES_VERSION_MASK; 718 719 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 720 int ucode, ucode_data; 721 722 if (pipe == AMDGPU_MES_SCHED_PIPE) { 723 ucode = AMDGPU_UCODE_ID_CP_MES; 724 ucode_data = AMDGPU_UCODE_ID_CP_MES_DATA; 725 } else { 726 ucode = AMDGPU_UCODE_ID_CP_MES1; 727 ucode_data = AMDGPU_UCODE_ID_CP_MES1_DATA; 728 } 729 730 info = &adev->firmware.ucode[ucode]; 731 info->ucode_id = ucode; 732 info->fw = adev->mes.fw[pipe]; 733 adev->firmware.fw_size += 734 ALIGN(le32_to_cpu(mes_hdr->mes_ucode_size_bytes), 735 PAGE_SIZE); 736 737 info = &adev->firmware.ucode[ucode_data]; 738 info->ucode_id = ucode_data; 739 info->fw = adev->mes.fw[pipe]; 740 adev->firmware.fw_size += 741 ALIGN(le32_to_cpu(mes_hdr->mes_ucode_data_size_bytes), 742 PAGE_SIZE); 743 } 744 745 return 0; 746 out: 747 amdgpu_ucode_release(&adev->mes.fw[pipe]); 748 return r; 749 } 750 751 bool amdgpu_mes_suspend_resume_all_supported(struct amdgpu_device *adev) 752 { 753 uint32_t mes_rev = adev->mes.sched_version & AMDGPU_MES_VERSION_MASK; 754 755 return ((amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(11, 0, 0) && 756 amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(12, 0, 0) && 757 mes_rev >= 0x63) || 758 amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(12, 0, 0)); 759 } 760 761 /* Fix me -- node_id is used to identify the correct MES instances in the future */ 762 static int amdgpu_mes_set_enforce_isolation(struct amdgpu_device *adev, 763 uint32_t node_id, bool enable) 764 { 765 struct mes_misc_op_input op_input = {0}; 766 int r; 767 768 op_input.op = MES_MISC_OP_CHANGE_CONFIG; 769 op_input.change_config.option.limit_single_process = enable ? 1 : 0; 770 771 if (!adev->mes.funcs->misc_op) { 772 dev_err(adev->dev, "mes change config is not supported!\n"); 773 r = -EINVAL; 774 goto error; 775 } 776 777 amdgpu_mes_lock(&adev->mes); 778 r = adev->mes.funcs->misc_op(&adev->mes, &op_input); 779 amdgpu_mes_unlock(&adev->mes); 780 if (r) 781 dev_err(adev->dev, "failed to change_config.\n"); 782 783 error: 784 return r; 785 } 786 787 int amdgpu_mes_update_enforce_isolation(struct amdgpu_device *adev) 788 { 789 int i, r = 0; 790 791 if (adev->enable_mes && adev->gfx.enable_cleaner_shader) { 792 mutex_lock(&adev->enforce_isolation_mutex); 793 for (i = 0; i < (adev->xcp_mgr ? adev->xcp_mgr->num_xcps : 1); i++) { 794 if (adev->enforce_isolation[i] == AMDGPU_ENFORCE_ISOLATION_ENABLE) 795 r |= amdgpu_mes_set_enforce_isolation(adev, i, true); 796 else 797 r |= amdgpu_mes_set_enforce_isolation(adev, i, false); 798 } 799 mutex_unlock(&adev->enforce_isolation_mutex); 800 } 801 return r; 802 } 803 804 #if defined(CONFIG_DEBUG_FS) 805 806 static int amdgpu_debugfs_mes_event_log_show(struct seq_file *m, void *unused) 807 { 808 struct amdgpu_device *adev = m->private; 809 uint32_t *mem = (uint32_t *)(adev->mes.event_log_cpu_addr); 810 811 seq_hex_dump(m, "", DUMP_PREFIX_OFFSET, 32, 4, 812 mem, adev->mes.event_log_size, false); 813 814 return 0; 815 } 816 817 DEFINE_SHOW_ATTRIBUTE(amdgpu_debugfs_mes_event_log); 818 819 #endif 820 821 void amdgpu_debugfs_mes_event_log_init(struct amdgpu_device *adev) 822 { 823 824 #if defined(CONFIG_DEBUG_FS) 825 struct drm_minor *minor = adev_to_drm(adev)->primary; 826 struct dentry *root = minor->debugfs_root; 827 if (adev->enable_mes && amdgpu_mes_log_enable) 828 debugfs_create_file("amdgpu_mes_event_log", 0444, root, 829 adev, &amdgpu_debugfs_mes_event_log_fops); 830 831 #endif 832 } 833