1 /* 2 * Copyright 2019 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 */ 23 24 #include <linux/firmware.h> 25 #include <drm/drm_exec.h> 26 27 #include "amdgpu_mes.h" 28 #include "amdgpu.h" 29 #include "soc15_common.h" 30 #include "amdgpu_mes_ctx.h" 31 32 #define AMDGPU_MES_MAX_NUM_OF_QUEUES_PER_PROCESS 1024 33 #define AMDGPU_ONE_DOORBELL_SIZE 8 34 35 int amdgpu_mes_doorbell_process_slice(struct amdgpu_device *adev) 36 { 37 return roundup(AMDGPU_ONE_DOORBELL_SIZE * 38 AMDGPU_MES_MAX_NUM_OF_QUEUES_PER_PROCESS, 39 PAGE_SIZE); 40 } 41 42 static int amdgpu_mes_doorbell_init(struct amdgpu_device *adev) 43 { 44 int i; 45 struct amdgpu_mes *mes = &adev->mes; 46 47 /* Bitmap for dynamic allocation of kernel doorbells */ 48 mes->doorbell_bitmap = bitmap_zalloc(PAGE_SIZE / sizeof(u32), GFP_KERNEL); 49 if (!mes->doorbell_bitmap) { 50 dev_err(adev->dev, "Failed to allocate MES doorbell bitmap\n"); 51 return -ENOMEM; 52 } 53 54 mes->num_mes_dbs = PAGE_SIZE / AMDGPU_ONE_DOORBELL_SIZE; 55 for (i = 0; i < AMDGPU_MES_PRIORITY_NUM_LEVELS; i++) { 56 adev->mes.aggregated_doorbells[i] = mes->db_start_dw_offset + i * 2; 57 set_bit(i, mes->doorbell_bitmap); 58 } 59 60 return 0; 61 } 62 63 static int amdgpu_mes_event_log_init(struct amdgpu_device *adev) 64 { 65 int r; 66 67 if (!amdgpu_mes_log_enable) 68 return 0; 69 70 r = amdgpu_bo_create_kernel(adev, adev->mes.event_log_size, PAGE_SIZE, 71 AMDGPU_GEM_DOMAIN_VRAM, 72 &adev->mes.event_log_gpu_obj, 73 &adev->mes.event_log_gpu_addr, 74 &adev->mes.event_log_cpu_addr); 75 if (r) { 76 dev_warn(adev->dev, "failed to create MES event log buffer (%d)", r); 77 return r; 78 } 79 80 memset(adev->mes.event_log_cpu_addr, 0, adev->mes.event_log_size); 81 82 return 0; 83 84 } 85 86 static void amdgpu_mes_doorbell_free(struct amdgpu_device *adev) 87 { 88 bitmap_free(adev->mes.doorbell_bitmap); 89 } 90 91 static inline u32 amdgpu_mes_get_hqd_mask(u32 num_pipe, 92 u32 num_hqd_per_pipe, 93 u32 num_reserved_hqd) 94 { 95 if (num_pipe == 0) 96 return 0; 97 98 u32 total_hqd_mask = (u32)((1ULL << num_hqd_per_pipe) - 1); 99 u32 reserved_hqd_mask = (u32)((1ULL << DIV_ROUND_UP(num_reserved_hqd, num_pipe)) - 1); 100 101 return (total_hqd_mask & ~reserved_hqd_mask); 102 } 103 104 int amdgpu_mes_init(struct amdgpu_device *adev) 105 { 106 int i, r, num_pipes; 107 u32 total_vmid_mask, reserved_vmid_mask; 108 int num_xcc = adev->gfx.xcc_mask ? NUM_XCC(adev->gfx.xcc_mask) : 1; 109 u32 gfx_hqd_mask = amdgpu_mes_get_hqd_mask(adev->gfx.me.num_pipe_per_me, 110 adev->gfx.me.num_queue_per_pipe, 111 adev->gfx.disable_kq ? 0 : adev->gfx.num_gfx_rings); 112 u32 compute_hqd_mask = amdgpu_mes_get_hqd_mask(adev->gfx.mec.num_pipe_per_mec, 113 adev->gfx.mec.num_queue_per_pipe, 114 adev->gfx.disable_kq ? 0 : adev->gfx.num_compute_rings); 115 116 adev->mes.adev = adev; 117 118 idr_init(&adev->mes.pasid_idr); 119 idr_init(&adev->mes.gang_id_idr); 120 idr_init(&adev->mes.queue_id_idr); 121 ida_init(&adev->mes.doorbell_ida); 122 spin_lock_init(&adev->mes.queue_id_lock); 123 mutex_init(&adev->mes.mutex_hidden); 124 125 for (i = 0; i < AMDGPU_MAX_MES_PIPES * num_xcc; i++) 126 spin_lock_init(&adev->mes.ring_lock[i]); 127 128 adev->mes.total_max_queue = AMDGPU_FENCE_MES_QUEUE_ID_MASK; 129 total_vmid_mask = (u32)((1UL << 16) - 1); 130 reserved_vmid_mask = (u32)((1UL << adev->vm_manager.first_kfd_vmid) - 1); 131 132 adev->mes.vmid_mask_mmhub = 0xFF00; 133 adev->mes.vmid_mask_gfxhub = total_vmid_mask & ~reserved_vmid_mask; 134 135 num_pipes = adev->gfx.me.num_pipe_per_me * adev->gfx.me.num_me; 136 if (num_pipes > AMDGPU_MES_MAX_GFX_PIPES) 137 dev_warn(adev->dev, "more gfx pipes than supported by MES! (%d vs %d)\n", 138 num_pipes, AMDGPU_MES_MAX_GFX_PIPES); 139 140 for (i = 0; i < AMDGPU_MES_MAX_GFX_PIPES; i++) { 141 if (i >= num_pipes) 142 break; 143 144 adev->mes.gfx_hqd_mask[i] = gfx_hqd_mask; 145 } 146 147 num_pipes = adev->gfx.mec.num_pipe_per_mec * adev->gfx.mec.num_mec; 148 if (num_pipes > AMDGPU_MES_MAX_COMPUTE_PIPES) 149 dev_warn(adev->dev, "more compute pipes than supported by MES! (%d vs %d)\n", 150 num_pipes, AMDGPU_MES_MAX_COMPUTE_PIPES); 151 152 for (i = 0; i < AMDGPU_MES_MAX_COMPUTE_PIPES; i++) { 153 /* 154 * Currently, only MEC1 is used for both kernel and user compute queue. 155 * To enable other MEC, we need to redistribute queues per pipe and 156 * adjust queue resource shared with kfd that needs a separate patch. 157 * Skip other MEC for now to avoid potential issues. 158 */ 159 if (i >= adev->gfx.mec.num_pipe_per_mec) 160 break; 161 162 adev->mes.compute_hqd_mask[i] = compute_hqd_mask; 163 } 164 165 num_pipes = adev->sdma.num_instances; 166 if (num_pipes > AMDGPU_MES_MAX_SDMA_PIPES) 167 dev_warn(adev->dev, "more SDMA pipes than supported by MES! (%d vs %d)\n", 168 num_pipes, AMDGPU_MES_MAX_SDMA_PIPES); 169 170 for (i = 0; i < AMDGPU_MES_MAX_SDMA_PIPES; i++) { 171 if (i >= num_pipes) 172 break; 173 adev->mes.sdma_hqd_mask[i] = 0xfc; 174 } 175 176 dev_info(adev->dev, 177 "MES: vmid_mask_mmhub 0x%08x, vmid_mask_gfxhub 0x%08x\n", 178 adev->mes.vmid_mask_mmhub, 179 adev->mes.vmid_mask_gfxhub); 180 181 dev_info(adev->dev, 182 "MES: gfx_hqd_mask 0x%08x, compute_hqd_mask 0x%08x, sdma_hqd_mask 0x%08x\n", 183 adev->mes.gfx_hqd_mask[0], 184 adev->mes.compute_hqd_mask[0], 185 adev->mes.sdma_hqd_mask[0]); 186 187 for (i = 0; i < AMDGPU_MAX_MES_PIPES * num_xcc; i++) { 188 r = amdgpu_device_wb_get(adev, &adev->mes.sch_ctx_offs[i]); 189 if (r) { 190 dev_err(adev->dev, 191 "(%d) ring trail_fence_offs wb alloc failed\n", 192 r); 193 goto error; 194 } 195 adev->mes.sch_ctx_gpu_addr[i] = 196 adev->wb.gpu_addr + (adev->mes.sch_ctx_offs[i] * 4); 197 adev->mes.sch_ctx_ptr[i] = 198 (uint64_t *)&adev->wb.wb[adev->mes.sch_ctx_offs[i]]; 199 200 r = amdgpu_device_wb_get(adev, 201 &adev->mes.query_status_fence_offs[i]); 202 if (r) { 203 dev_err(adev->dev, 204 "(%d) query_status_fence_offs wb alloc failed\n", 205 r); 206 goto error; 207 } 208 adev->mes.query_status_fence_gpu_addr[i] = adev->wb.gpu_addr + 209 (adev->mes.query_status_fence_offs[i] * 4); 210 adev->mes.query_status_fence_ptr[i] = 211 (uint64_t *)&adev->wb.wb[adev->mes.query_status_fence_offs[i]]; 212 } 213 214 r = amdgpu_mes_doorbell_init(adev); 215 if (r) 216 goto error; 217 218 r = amdgpu_mes_event_log_init(adev); 219 if (r) 220 goto error_doorbell; 221 222 if (adev->mes.hung_queue_db_array_size) { 223 for (i = 0; i < AMDGPU_MAX_MES_PIPES * num_xcc; i++) { 224 r = amdgpu_bo_create_kernel(adev, 225 adev->mes.hung_queue_db_array_size * sizeof(u32), 226 PAGE_SIZE, 227 AMDGPU_GEM_DOMAIN_GTT, 228 &adev->mes.hung_queue_db_array_gpu_obj[i], 229 &adev->mes.hung_queue_db_array_gpu_addr[i], 230 &adev->mes.hung_queue_db_array_cpu_addr[i]); 231 if (r) { 232 dev_warn(adev->dev, "failed to create MES hung db array buffer (%d)", r); 233 goto error_doorbell; 234 } 235 } 236 } 237 238 return 0; 239 240 error_doorbell: 241 amdgpu_mes_doorbell_free(adev); 242 error: 243 for (i = 0; i < AMDGPU_MAX_MES_PIPES * num_xcc; i++) { 244 if (adev->mes.sch_ctx_ptr[i]) 245 amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs[i]); 246 if (adev->mes.query_status_fence_ptr[i]) 247 amdgpu_device_wb_free(adev, 248 adev->mes.query_status_fence_offs[i]); 249 if (adev->mes.hung_queue_db_array_gpu_obj[i]) 250 amdgpu_bo_free_kernel(&adev->mes.hung_queue_db_array_gpu_obj[i], 251 &adev->mes.hung_queue_db_array_gpu_addr[i], 252 &adev->mes.hung_queue_db_array_cpu_addr[i]); 253 } 254 255 idr_destroy(&adev->mes.pasid_idr); 256 idr_destroy(&adev->mes.gang_id_idr); 257 idr_destroy(&adev->mes.queue_id_idr); 258 ida_destroy(&adev->mes.doorbell_ida); 259 mutex_destroy(&adev->mes.mutex_hidden); 260 return r; 261 } 262 263 void amdgpu_mes_fini(struct amdgpu_device *adev) 264 { 265 int i; 266 int num_xcc = adev->gfx.xcc_mask ? NUM_XCC(adev->gfx.xcc_mask) : 1; 267 268 amdgpu_bo_free_kernel(&adev->mes.event_log_gpu_obj, 269 &adev->mes.event_log_gpu_addr, 270 &adev->mes.event_log_cpu_addr); 271 272 for (i = 0; i < AMDGPU_MAX_MES_PIPES * num_xcc; i++) { 273 amdgpu_bo_free_kernel(&adev->mes.hung_queue_db_array_gpu_obj[i], 274 &adev->mes.hung_queue_db_array_gpu_addr[i], 275 &adev->mes.hung_queue_db_array_cpu_addr[i]); 276 277 if (adev->mes.sch_ctx_ptr[i]) 278 amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs[i]); 279 if (adev->mes.query_status_fence_ptr[i]) 280 amdgpu_device_wb_free(adev, 281 adev->mes.query_status_fence_offs[i]); 282 } 283 284 amdgpu_mes_doorbell_free(adev); 285 286 idr_destroy(&adev->mes.pasid_idr); 287 idr_destroy(&adev->mes.gang_id_idr); 288 idr_destroy(&adev->mes.queue_id_idr); 289 ida_destroy(&adev->mes.doorbell_ida); 290 mutex_destroy(&adev->mes.mutex_hidden); 291 } 292 293 int amdgpu_mes_suspend(struct amdgpu_device *adev) 294 { 295 struct mes_suspend_gang_input input; 296 int r; 297 298 if (!amdgpu_mes_suspend_resume_all_supported(adev)) 299 return 0; 300 301 memset(&input, 0x0, sizeof(struct mes_suspend_gang_input)); 302 input.suspend_all_gangs = 1; 303 304 /* 305 * Avoid taking any other locks under MES lock to avoid circular 306 * lock dependencies. 307 */ 308 amdgpu_mes_lock(&adev->mes); 309 r = adev->mes.funcs->suspend_gang(&adev->mes, &input); 310 amdgpu_mes_unlock(&adev->mes); 311 if (r) 312 dev_err(adev->dev, "failed to suspend all gangs"); 313 314 return r; 315 } 316 317 int amdgpu_mes_resume(struct amdgpu_device *adev) 318 { 319 struct mes_resume_gang_input input; 320 int r; 321 322 if (!amdgpu_mes_suspend_resume_all_supported(adev)) 323 return 0; 324 325 memset(&input, 0x0, sizeof(struct mes_resume_gang_input)); 326 input.resume_all_gangs = 1; 327 328 /* 329 * Avoid taking any other locks under MES lock to avoid circular 330 * lock dependencies. 331 */ 332 amdgpu_mes_lock(&adev->mes); 333 r = adev->mes.funcs->resume_gang(&adev->mes, &input); 334 amdgpu_mes_unlock(&adev->mes); 335 if (r) 336 dev_err(adev->dev, "failed to resume all gangs"); 337 338 return r; 339 } 340 341 int amdgpu_mes_map_legacy_queue(struct amdgpu_device *adev, 342 struct amdgpu_ring *ring, uint32_t xcc_id) 343 { 344 struct mes_map_legacy_queue_input queue_input; 345 int r; 346 347 memset(&queue_input, 0, sizeof(queue_input)); 348 349 queue_input.xcc_id = xcc_id; 350 queue_input.queue_type = ring->funcs->type; 351 queue_input.doorbell_offset = ring->doorbell_index; 352 queue_input.pipe_id = ring->pipe; 353 queue_input.queue_id = ring->queue; 354 queue_input.mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj); 355 queue_input.wptr_addr = ring->wptr_gpu_addr; 356 357 amdgpu_mes_lock(&adev->mes); 358 r = adev->mes.funcs->map_legacy_queue(&adev->mes, &queue_input); 359 amdgpu_mes_unlock(&adev->mes); 360 if (r) 361 dev_err(adev->dev, "failed to map legacy queue\n"); 362 363 return r; 364 } 365 366 int amdgpu_mes_unmap_legacy_queue(struct amdgpu_device *adev, 367 struct amdgpu_ring *ring, 368 enum amdgpu_unmap_queues_action action, 369 u64 gpu_addr, u64 seq, uint32_t xcc_id) 370 { 371 struct mes_unmap_legacy_queue_input queue_input; 372 int r; 373 374 queue_input.xcc_id = xcc_id; 375 queue_input.action = action; 376 queue_input.queue_type = ring->funcs->type; 377 queue_input.doorbell_offset = ring->doorbell_index; 378 queue_input.pipe_id = ring->pipe; 379 queue_input.queue_id = ring->queue; 380 queue_input.trail_fence_addr = gpu_addr; 381 queue_input.trail_fence_data = seq; 382 383 amdgpu_mes_lock(&adev->mes); 384 r = adev->mes.funcs->unmap_legacy_queue(&adev->mes, &queue_input); 385 amdgpu_mes_unlock(&adev->mes); 386 if (r) 387 dev_err(adev->dev, "failed to unmap legacy queue\n"); 388 389 return r; 390 } 391 392 int amdgpu_mes_reset_legacy_queue(struct amdgpu_device *adev, 393 struct amdgpu_ring *ring, 394 unsigned int vmid, 395 bool use_mmio, 396 uint32_t xcc_id) 397 { 398 struct mes_reset_queue_input queue_input; 399 int r; 400 401 memset(&queue_input, 0, sizeof(queue_input)); 402 403 queue_input.xcc_id = xcc_id; 404 queue_input.queue_type = ring->funcs->type; 405 queue_input.doorbell_offset = ring->doorbell_index; 406 queue_input.me_id = ring->me; 407 queue_input.pipe_id = ring->pipe; 408 queue_input.queue_id = ring->queue; 409 queue_input.mqd_addr = ring->mqd_obj ? amdgpu_bo_gpu_offset(ring->mqd_obj) : 0; 410 queue_input.wptr_addr = ring->wptr_gpu_addr; 411 queue_input.vmid = vmid; 412 queue_input.use_mmio = use_mmio; 413 queue_input.is_kq = true; 414 if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) 415 queue_input.legacy_gfx = true; 416 417 amdgpu_mes_lock(&adev->mes); 418 r = adev->mes.funcs->reset_hw_queue(&adev->mes, &queue_input); 419 amdgpu_mes_unlock(&adev->mes); 420 if (r) 421 dev_err(adev->dev, "failed to reset legacy queue\n"); 422 423 return r; 424 } 425 426 int amdgpu_mes_get_hung_queue_db_array_size(struct amdgpu_device *adev) 427 { 428 return adev->mes.hung_queue_db_array_size; 429 } 430 431 int amdgpu_mes_detect_and_reset_hung_queues(struct amdgpu_device *adev, 432 int queue_type, 433 bool detect_only, 434 unsigned int *hung_db_num, 435 u32 *hung_db_array, 436 uint32_t xcc_id) 437 { 438 struct mes_detect_and_reset_queue_input input; 439 u32 *db_array = adev->mes.hung_queue_db_array_cpu_addr[xcc_id]; 440 int r, i; 441 442 if (!hung_db_num || !hung_db_array) 443 return -EINVAL; 444 445 if ((queue_type != AMDGPU_RING_TYPE_GFX) && 446 (queue_type != AMDGPU_RING_TYPE_COMPUTE) && 447 (queue_type != AMDGPU_RING_TYPE_SDMA)) 448 return -EINVAL; 449 450 /* Clear the doorbell array before detection */ 451 memset(adev->mes.hung_queue_db_array_cpu_addr[xcc_id], AMDGPU_MES_INVALID_DB_OFFSET, 452 adev->mes.hung_queue_db_array_size * sizeof(u32)); 453 input.queue_type = queue_type; 454 input.detect_only = detect_only; 455 456 r = adev->mes.funcs->detect_and_reset_hung_queues(&adev->mes, 457 &input); 458 if (r) { 459 dev_err(adev->dev, "failed to detect and reset\n"); 460 } else { 461 *hung_db_num = 0; 462 for (i = 0; i < adev->mes.hung_queue_hqd_info_offset; i++) { 463 if (db_array[i] != AMDGPU_MES_INVALID_DB_OFFSET) { 464 hung_db_array[i] = db_array[i]; 465 *hung_db_num += 1; 466 } 467 } 468 469 /* 470 * TODO: return HQD info for MES scheduled user compute queue reset cases 471 * stored in hung_db_array hqd info offset to full array size 472 */ 473 } 474 475 return r; 476 } 477 478 uint32_t amdgpu_mes_rreg(struct amdgpu_device *adev, uint32_t reg, 479 uint32_t xcc_id) 480 { 481 struct mes_misc_op_input op_input; 482 int r, val = 0; 483 uint32_t addr_offset = 0; 484 uint64_t read_val_gpu_addr; 485 uint32_t *read_val_ptr; 486 487 if (amdgpu_device_wb_get(adev, &addr_offset)) { 488 dev_err(adev->dev, "critical bug! too many mes readers\n"); 489 goto error; 490 } 491 read_val_gpu_addr = adev->wb.gpu_addr + (addr_offset * 4); 492 read_val_ptr = (uint32_t *)&adev->wb.wb[addr_offset]; 493 op_input.xcc_id = xcc_id; 494 op_input.op = MES_MISC_OP_READ_REG; 495 op_input.read_reg.reg_offset = reg; 496 op_input.read_reg.buffer_addr = read_val_gpu_addr; 497 498 if (!adev->mes.funcs->misc_op) { 499 dev_err(adev->dev, "mes rreg is not supported!\n"); 500 goto error; 501 } 502 503 amdgpu_mes_lock(&adev->mes); 504 r = adev->mes.funcs->misc_op(&adev->mes, &op_input); 505 amdgpu_mes_unlock(&adev->mes); 506 if (r) 507 dev_err(adev->dev, "failed to read reg (0x%x)\n", reg); 508 else 509 val = *(read_val_ptr); 510 511 error: 512 if (addr_offset) 513 amdgpu_device_wb_free(adev, addr_offset); 514 return val; 515 } 516 517 int amdgpu_mes_wreg(struct amdgpu_device *adev, uint32_t reg, 518 uint32_t val, uint32_t xcc_id) 519 { 520 struct mes_misc_op_input op_input; 521 int r; 522 523 op_input.xcc_id = xcc_id; 524 op_input.op = MES_MISC_OP_WRITE_REG; 525 op_input.write_reg.reg_offset = reg; 526 op_input.write_reg.reg_value = val; 527 528 if (!adev->mes.funcs->misc_op) { 529 dev_err(adev->dev, "mes wreg is not supported!\n"); 530 r = -EINVAL; 531 goto error; 532 } 533 534 amdgpu_mes_lock(&adev->mes); 535 r = adev->mes.funcs->misc_op(&adev->mes, &op_input); 536 amdgpu_mes_unlock(&adev->mes); 537 if (r) 538 dev_err(adev->dev, "failed to write reg (0x%x)\n", reg); 539 540 error: 541 return r; 542 } 543 544 int amdgpu_mes_reg_write_reg_wait(struct amdgpu_device *adev, 545 uint32_t reg0, uint32_t reg1, 546 uint32_t ref, uint32_t mask, 547 uint32_t xcc_id) 548 { 549 struct mes_misc_op_input op_input; 550 int r; 551 552 op_input.xcc_id = xcc_id; 553 op_input.op = MES_MISC_OP_WRM_REG_WR_WAIT; 554 op_input.wrm_reg.reg0 = reg0; 555 op_input.wrm_reg.reg1 = reg1; 556 op_input.wrm_reg.ref = ref; 557 op_input.wrm_reg.mask = mask; 558 559 if (!adev->mes.funcs->misc_op) { 560 dev_err(adev->dev, "mes reg_write_reg_wait is not supported!\n"); 561 r = -EINVAL; 562 goto error; 563 } 564 565 amdgpu_mes_lock(&adev->mes); 566 r = adev->mes.funcs->misc_op(&adev->mes, &op_input); 567 amdgpu_mes_unlock(&adev->mes); 568 if (r) 569 dev_err(adev->dev, "failed to reg_write_reg_wait\n"); 570 571 error: 572 return r; 573 } 574 575 int amdgpu_mes_hdp_flush(struct amdgpu_device *adev) 576 { 577 uint32_t hdp_flush_req_offset, hdp_flush_done_offset; 578 struct amdgpu_ring *mes_ring; 579 uint32_t ref_and_mask = 0, reg_mem_engine = 0; 580 581 if (!adev->gfx.funcs->get_hdp_flush_mask) { 582 dev_err(adev->dev, "mes hdp flush is not supported.\n"); 583 return -EINVAL; 584 } 585 586 mes_ring = &adev->mes.ring[0]; 587 hdp_flush_req_offset = adev->nbio.funcs->get_hdp_flush_req_offset(adev); 588 hdp_flush_done_offset = adev->nbio.funcs->get_hdp_flush_done_offset(adev); 589 590 adev->gfx.funcs->get_hdp_flush_mask(mes_ring, &ref_and_mask, ®_mem_engine); 591 592 return amdgpu_mes_reg_write_reg_wait(adev, hdp_flush_req_offset, hdp_flush_done_offset, 593 ref_and_mask, ref_and_mask, 0); 594 } 595 596 int amdgpu_mes_set_shader_debugger(struct amdgpu_device *adev, 597 uint64_t process_context_addr, 598 uint32_t spi_gdbg_per_vmid_cntl, 599 const uint32_t *tcp_watch_cntl, 600 uint32_t flags, 601 bool trap_en, 602 uint32_t xcc_id) 603 { 604 struct mes_misc_op_input op_input = {0}; 605 int r; 606 607 if (!adev->mes.funcs->misc_op) { 608 dev_err(adev->dev, 609 "mes set shader debugger is not supported!\n"); 610 return -EINVAL; 611 } 612 613 op_input.xcc_id = xcc_id; 614 op_input.op = MES_MISC_OP_SET_SHADER_DEBUGGER; 615 op_input.set_shader_debugger.process_context_addr = process_context_addr; 616 op_input.set_shader_debugger.flags.u32all = flags; 617 618 /* use amdgpu mes_flush_shader_debugger instead */ 619 if (op_input.set_shader_debugger.flags.process_ctx_flush) 620 return -EINVAL; 621 622 op_input.set_shader_debugger.spi_gdbg_per_vmid_cntl = spi_gdbg_per_vmid_cntl; 623 memcpy(op_input.set_shader_debugger.tcp_watch_cntl, tcp_watch_cntl, 624 sizeof(op_input.set_shader_debugger.tcp_watch_cntl)); 625 626 if (((adev->mes.sched_version & AMDGPU_MES_API_VERSION_MASK) >> 627 AMDGPU_MES_API_VERSION_SHIFT) >= 14) 628 op_input.set_shader_debugger.trap_en = trap_en; 629 630 amdgpu_mes_lock(&adev->mes); 631 632 r = adev->mes.funcs->misc_op(&adev->mes, &op_input); 633 if (r) 634 dev_err(adev->dev, "failed to set_shader_debugger\n"); 635 636 amdgpu_mes_unlock(&adev->mes); 637 638 return r; 639 } 640 641 int amdgpu_mes_flush_shader_debugger(struct amdgpu_device *adev, 642 uint64_t process_context_addr, 643 uint32_t xcc_id) 644 { 645 struct mes_misc_op_input op_input = {0}; 646 int r; 647 648 if (!adev->mes.funcs->misc_op) { 649 dev_err(adev->dev, 650 "mes flush shader debugger is not supported!\n"); 651 return -EINVAL; 652 } 653 654 op_input.xcc_id = xcc_id; 655 op_input.op = MES_MISC_OP_SET_SHADER_DEBUGGER; 656 op_input.set_shader_debugger.process_context_addr = process_context_addr; 657 op_input.set_shader_debugger.flags.process_ctx_flush = true; 658 659 amdgpu_mes_lock(&adev->mes); 660 661 r = adev->mes.funcs->misc_op(&adev->mes, &op_input); 662 if (r) 663 dev_err(adev->dev, "failed to set_shader_debugger\n"); 664 665 amdgpu_mes_unlock(&adev->mes); 666 667 return r; 668 } 669 670 uint32_t amdgpu_mes_get_aggregated_doorbell_index(struct amdgpu_device *adev, 671 enum amdgpu_mes_priority_level prio) 672 { 673 return adev->mes.aggregated_doorbells[prio]; 674 } 675 676 int amdgpu_mes_init_microcode(struct amdgpu_device *adev, int pipe) 677 { 678 const struct mes_firmware_header_v1_0 *mes_hdr; 679 struct amdgpu_firmware_info *info; 680 char ucode_prefix[30]; 681 char fw_name[50]; 682 bool need_retry = false; 683 u32 *ucode_ptr; 684 int r; 685 686 amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, 687 sizeof(ucode_prefix)); 688 if (adev->enable_uni_mes) { 689 snprintf(fw_name, sizeof(fw_name), 690 "amdgpu/%s_uni_mes.bin", ucode_prefix); 691 } else if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(11, 0, 0) && 692 amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(12, 0, 0)) { 693 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mes%s.bin", 694 ucode_prefix, 695 pipe == AMDGPU_MES_SCHED_PIPE ? "_2" : "1"); 696 need_retry = true; 697 } else { 698 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mes%s.bin", 699 ucode_prefix, 700 pipe == AMDGPU_MES_SCHED_PIPE ? "" : "1"); 701 } 702 703 r = amdgpu_ucode_request(adev, &adev->mes.fw[pipe], AMDGPU_UCODE_REQUIRED, 704 "%s", fw_name); 705 if (r && need_retry && pipe == AMDGPU_MES_SCHED_PIPE) { 706 dev_info(adev->dev, "try to fall back to %s_mes.bin\n", ucode_prefix); 707 r = amdgpu_ucode_request(adev, &adev->mes.fw[pipe], 708 AMDGPU_UCODE_REQUIRED, 709 "amdgpu/%s_mes.bin", ucode_prefix); 710 } 711 712 if (r) 713 goto out; 714 715 mes_hdr = (const struct mes_firmware_header_v1_0 *) 716 adev->mes.fw[pipe]->data; 717 adev->mes.uc_start_addr[pipe] = 718 le32_to_cpu(mes_hdr->mes_uc_start_addr_lo) | 719 ((uint64_t)(le32_to_cpu(mes_hdr->mes_uc_start_addr_hi)) << 32); 720 adev->mes.data_start_addr[pipe] = 721 le32_to_cpu(mes_hdr->mes_data_start_addr_lo) | 722 ((uint64_t)(le32_to_cpu(mes_hdr->mes_data_start_addr_hi)) << 32); 723 ucode_ptr = (u32 *)(adev->mes.fw[pipe]->data + 724 sizeof(union amdgpu_firmware_header)); 725 adev->mes.fw_version[pipe] = 726 le32_to_cpu(ucode_ptr[24]) & AMDGPU_MES_VERSION_MASK; 727 728 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 729 int ucode, ucode_data; 730 731 if (pipe == AMDGPU_MES_SCHED_PIPE) { 732 ucode = AMDGPU_UCODE_ID_CP_MES; 733 ucode_data = AMDGPU_UCODE_ID_CP_MES_DATA; 734 } else { 735 ucode = AMDGPU_UCODE_ID_CP_MES1; 736 ucode_data = AMDGPU_UCODE_ID_CP_MES1_DATA; 737 } 738 739 info = &adev->firmware.ucode[ucode]; 740 info->ucode_id = ucode; 741 info->fw = adev->mes.fw[pipe]; 742 adev->firmware.fw_size += 743 ALIGN(le32_to_cpu(mes_hdr->mes_ucode_size_bytes), 744 PAGE_SIZE); 745 746 info = &adev->firmware.ucode[ucode_data]; 747 info->ucode_id = ucode_data; 748 info->fw = adev->mes.fw[pipe]; 749 adev->firmware.fw_size += 750 ALIGN(le32_to_cpu(mes_hdr->mes_ucode_data_size_bytes), 751 PAGE_SIZE); 752 } 753 754 return 0; 755 out: 756 amdgpu_ucode_release(&adev->mes.fw[pipe]); 757 return r; 758 } 759 760 bool amdgpu_mes_suspend_resume_all_supported(struct amdgpu_device *adev) 761 { 762 uint32_t mes_rev = adev->mes.sched_version & AMDGPU_MES_VERSION_MASK; 763 764 return ((amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(11, 0, 0) && 765 amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(12, 0, 0) && 766 mes_rev >= 0x63) || 767 amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(12, 0, 0)); 768 } 769 770 /* Fix me -- node_id is used to identify the correct MES instances in the future */ 771 static int amdgpu_mes_set_enforce_isolation(struct amdgpu_device *adev, 772 uint32_t node_id, bool enable) 773 { 774 struct mes_misc_op_input op_input = {0}; 775 int r; 776 777 op_input.op = MES_MISC_OP_CHANGE_CONFIG; 778 op_input.change_config.option.limit_single_process = enable ? 1 : 0; 779 780 if (!adev->mes.funcs->misc_op) { 781 dev_err(adev->dev, "mes change config is not supported!\n"); 782 r = -EINVAL; 783 goto error; 784 } 785 786 amdgpu_mes_lock(&adev->mes); 787 r = adev->mes.funcs->misc_op(&adev->mes, &op_input); 788 amdgpu_mes_unlock(&adev->mes); 789 if (r) 790 dev_err(adev->dev, "failed to change_config.\n"); 791 792 error: 793 return r; 794 } 795 796 int amdgpu_mes_update_enforce_isolation(struct amdgpu_device *adev) 797 { 798 int i, r = 0; 799 800 if (adev->enable_mes && adev->gfx.enable_cleaner_shader) { 801 mutex_lock(&adev->enforce_isolation_mutex); 802 for (i = 0; i < (adev->xcp_mgr ? adev->xcp_mgr->num_xcps : 1); i++) { 803 if (adev->enforce_isolation[i] == AMDGPU_ENFORCE_ISOLATION_ENABLE) 804 r |= amdgpu_mes_set_enforce_isolation(adev, i, true); 805 else 806 r |= amdgpu_mes_set_enforce_isolation(adev, i, false); 807 } 808 mutex_unlock(&adev->enforce_isolation_mutex); 809 } 810 return r; 811 } 812 813 #if defined(CONFIG_DEBUG_FS) 814 815 static int amdgpu_debugfs_mes_event_log_show(struct seq_file *m, void *unused) 816 { 817 struct amdgpu_device *adev = m->private; 818 uint32_t *mem = (uint32_t *)(adev->mes.event_log_cpu_addr); 819 820 seq_hex_dump(m, "", DUMP_PREFIX_OFFSET, 32, 4, 821 mem, adev->mes.event_log_size, false); 822 823 return 0; 824 } 825 826 DEFINE_SHOW_ATTRIBUTE(amdgpu_debugfs_mes_event_log); 827 828 #endif 829 830 void amdgpu_debugfs_mes_event_log_init(struct amdgpu_device *adev) 831 { 832 833 #if defined(CONFIG_DEBUG_FS) 834 struct drm_minor *minor = adev_to_drm(adev)->primary; 835 struct dentry *root = minor->debugfs_root; 836 if (adev->enable_mes && amdgpu_mes_log_enable) 837 debugfs_create_file("amdgpu_mes_event_log", 0444, root, 838 adev, &amdgpu_debugfs_mes_event_log_fops); 839 840 #endif 841 } 842