1 /* 2 * Copyright 2019 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 */ 23 24 #include <linux/firmware.h> 25 #include <drm/drm_exec.h> 26 27 #include "amdgpu_mes.h" 28 #include "amdgpu.h" 29 #include "soc15_common.h" 30 #include "amdgpu_mes_ctx.h" 31 32 #define AMDGPU_MES_MAX_NUM_OF_QUEUES_PER_PROCESS 1024 33 #define AMDGPU_ONE_DOORBELL_SIZE 8 34 #define AMDGPU_MES_RESERVED_QUEUES 2 35 36 int amdgpu_mes_doorbell_process_slice(struct amdgpu_device *adev) 37 { 38 return roundup(AMDGPU_ONE_DOORBELL_SIZE * 39 AMDGPU_MES_MAX_NUM_OF_QUEUES_PER_PROCESS, 40 PAGE_SIZE); 41 } 42 43 static int amdgpu_mes_doorbell_init(struct amdgpu_device *adev) 44 { 45 int i; 46 struct amdgpu_mes *mes = &adev->mes; 47 48 /* Bitmap for dynamic allocation of kernel doorbells */ 49 mes->doorbell_bitmap = bitmap_zalloc(PAGE_SIZE / sizeof(u32), GFP_KERNEL); 50 if (!mes->doorbell_bitmap) { 51 dev_err(adev->dev, "Failed to allocate MES doorbell bitmap\n"); 52 return -ENOMEM; 53 } 54 55 mes->num_mes_dbs = PAGE_SIZE / AMDGPU_ONE_DOORBELL_SIZE; 56 for (i = 0; i < AMDGPU_MES_PRIORITY_NUM_LEVELS; i++) { 57 adev->mes.aggregated_doorbells[i] = mes->db_start_dw_offset + i * 2; 58 set_bit(i, mes->doorbell_bitmap); 59 } 60 61 return 0; 62 } 63 64 static int amdgpu_mes_event_log_init(struct amdgpu_device *adev) 65 { 66 int r; 67 68 if (!amdgpu_mes_log_enable) 69 return 0; 70 71 r = amdgpu_bo_create_kernel(adev, adev->mes.event_log_size, PAGE_SIZE, 72 AMDGPU_GEM_DOMAIN_VRAM, 73 &adev->mes.event_log_gpu_obj, 74 &adev->mes.event_log_gpu_addr, 75 &adev->mes.event_log_cpu_addr); 76 if (r) { 77 dev_warn(adev->dev, "failed to create MES event log buffer (%d)", r); 78 return r; 79 } 80 81 memset(adev->mes.event_log_cpu_addr, 0, adev->mes.event_log_size); 82 83 return 0; 84 85 } 86 87 static void amdgpu_mes_doorbell_free(struct amdgpu_device *adev) 88 { 89 bitmap_free(adev->mes.doorbell_bitmap); 90 } 91 92 int amdgpu_mes_init(struct amdgpu_device *adev) 93 { 94 int i, r, num_pipes; 95 u32 total_vmid_mask, reserved_vmid_mask; 96 u32 queue_mask, reserved_queue_mask; 97 int num_xcc = adev->gfx.xcc_mask ? NUM_XCC(adev->gfx.xcc_mask) : 1; 98 99 adev->mes.adev = adev; 100 101 idr_init(&adev->mes.pasid_idr); 102 idr_init(&adev->mes.gang_id_idr); 103 idr_init(&adev->mes.queue_id_idr); 104 ida_init(&adev->mes.doorbell_ida); 105 spin_lock_init(&adev->mes.queue_id_lock); 106 mutex_init(&adev->mes.mutex_hidden); 107 108 for (i = 0; i < AMDGPU_MAX_MES_PIPES * num_xcc; i++) 109 spin_lock_init(&adev->mes.ring_lock[i]); 110 111 adev->mes.total_max_queue = AMDGPU_FENCE_MES_QUEUE_ID_MASK; 112 total_vmid_mask = (u32)((1UL << 16) - 1); 113 reserved_vmid_mask = (u32)((1UL << adev->vm_manager.first_kfd_vmid) - 1); 114 115 adev->mes.vmid_mask_mmhub = 0xFF00; 116 adev->mes.vmid_mask_gfxhub = total_vmid_mask & ~reserved_vmid_mask; 117 118 queue_mask = (u32)(1UL << adev->gfx.mec.num_queue_per_pipe) - 1; 119 reserved_queue_mask = (u32)(1UL << AMDGPU_MES_RESERVED_QUEUES) - 1; 120 121 num_pipes = adev->gfx.me.num_pipe_per_me * adev->gfx.me.num_me; 122 if (num_pipes > AMDGPU_MES_MAX_GFX_PIPES) 123 dev_warn(adev->dev, "more gfx pipes than supported by MES! (%d vs %d)\n", 124 num_pipes, AMDGPU_MES_MAX_GFX_PIPES); 125 126 for (i = 0; i < AMDGPU_MES_MAX_GFX_PIPES; i++) { 127 if (i >= num_pipes) 128 break; 129 if (amdgpu_ip_version(adev, GC_HWIP, 0) >= 130 IP_VERSION(12, 0, 0)) 131 /* 132 * GFX V12 has only one GFX pipe, but 8 queues in it. 133 * GFX pipe 0 queue 0 is being used by Kernel queue. 134 * Set GFX pipe 0 queue 1-7 for MES scheduling 135 * mask = 1111 1110b 136 */ 137 adev->mes.gfx_hqd_mask[i] = adev->gfx.disable_kq ? 0xFF : 0xFE; 138 else 139 /* 140 * GFX pipe 0 queue 0 is being used by Kernel queue. 141 * Set GFX pipe 0 queue 1 for MES scheduling 142 * mask = 10b 143 */ 144 adev->mes.gfx_hqd_mask[i] = adev->gfx.disable_kq ? 0x3 : 0x2; 145 } 146 147 num_pipes = adev->gfx.mec.num_pipe_per_mec * adev->gfx.mec.num_mec; 148 if (num_pipes > AMDGPU_MES_MAX_COMPUTE_PIPES) 149 dev_warn(adev->dev, "more compute pipes than supported by MES! (%d vs %d)\n", 150 num_pipes, AMDGPU_MES_MAX_COMPUTE_PIPES); 151 152 for (i = 0; i < AMDGPU_MES_MAX_COMPUTE_PIPES; i++) { 153 if (i >= num_pipes) 154 break; 155 adev->mes.compute_hqd_mask[i] = 156 adev->gfx.disable_kq ? 0xF : (queue_mask & ~reserved_queue_mask); 157 } 158 159 num_pipes = adev->sdma.num_instances; 160 if (num_pipes > AMDGPU_MES_MAX_SDMA_PIPES) 161 dev_warn(adev->dev, "more SDMA pipes than supported by MES! (%d vs %d)\n", 162 num_pipes, AMDGPU_MES_MAX_SDMA_PIPES); 163 164 for (i = 0; i < AMDGPU_MES_MAX_SDMA_PIPES; i++) { 165 if (i >= num_pipes) 166 break; 167 adev->mes.sdma_hqd_mask[i] = 0xfc; 168 } 169 170 for (i = 0; i < AMDGPU_MAX_MES_PIPES * num_xcc; i++) { 171 r = amdgpu_device_wb_get(adev, &adev->mes.sch_ctx_offs[i]); 172 if (r) { 173 dev_err(adev->dev, 174 "(%d) ring trail_fence_offs wb alloc failed\n", 175 r); 176 goto error; 177 } 178 adev->mes.sch_ctx_gpu_addr[i] = 179 adev->wb.gpu_addr + (adev->mes.sch_ctx_offs[i] * 4); 180 adev->mes.sch_ctx_ptr[i] = 181 (uint64_t *)&adev->wb.wb[adev->mes.sch_ctx_offs[i]]; 182 183 r = amdgpu_device_wb_get(adev, 184 &adev->mes.query_status_fence_offs[i]); 185 if (r) { 186 dev_err(adev->dev, 187 "(%d) query_status_fence_offs wb alloc failed\n", 188 r); 189 goto error; 190 } 191 adev->mes.query_status_fence_gpu_addr[i] = adev->wb.gpu_addr + 192 (adev->mes.query_status_fence_offs[i] * 4); 193 adev->mes.query_status_fence_ptr[i] = 194 (uint64_t *)&adev->wb.wb[adev->mes.query_status_fence_offs[i]]; 195 } 196 197 r = amdgpu_mes_doorbell_init(adev); 198 if (r) 199 goto error; 200 201 r = amdgpu_mes_event_log_init(adev); 202 if (r) 203 goto error_doorbell; 204 205 if (adev->mes.hung_queue_db_array_size) { 206 for (i = 0; i < AMDGPU_MAX_MES_PIPES * num_xcc; i++) { 207 r = amdgpu_bo_create_kernel(adev, 208 adev->mes.hung_queue_db_array_size * sizeof(u32), 209 PAGE_SIZE, 210 AMDGPU_GEM_DOMAIN_GTT, 211 &adev->mes.hung_queue_db_array_gpu_obj[i], 212 &adev->mes.hung_queue_db_array_gpu_addr[i], 213 &adev->mes.hung_queue_db_array_cpu_addr[i]); 214 if (r) { 215 dev_warn(adev->dev, "failed to create MES hung db array buffer (%d)", r); 216 goto error_doorbell; 217 } 218 } 219 } 220 221 return 0; 222 223 error_doorbell: 224 amdgpu_mes_doorbell_free(adev); 225 error: 226 for (i = 0; i < AMDGPU_MAX_MES_PIPES * num_xcc; i++) { 227 if (adev->mes.sch_ctx_ptr[i]) 228 amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs[i]); 229 if (adev->mes.query_status_fence_ptr[i]) 230 amdgpu_device_wb_free(adev, 231 adev->mes.query_status_fence_offs[i]); 232 if (adev->mes.hung_queue_db_array_gpu_obj[i]) 233 amdgpu_bo_free_kernel(&adev->mes.hung_queue_db_array_gpu_obj[i], 234 &adev->mes.hung_queue_db_array_gpu_addr[i], 235 &adev->mes.hung_queue_db_array_cpu_addr[i]); 236 } 237 238 idr_destroy(&adev->mes.pasid_idr); 239 idr_destroy(&adev->mes.gang_id_idr); 240 idr_destroy(&adev->mes.queue_id_idr); 241 ida_destroy(&adev->mes.doorbell_ida); 242 mutex_destroy(&adev->mes.mutex_hidden); 243 return r; 244 } 245 246 void amdgpu_mes_fini(struct amdgpu_device *adev) 247 { 248 int i; 249 int num_xcc = adev->gfx.xcc_mask ? NUM_XCC(adev->gfx.xcc_mask) : 1; 250 251 amdgpu_bo_free_kernel(&adev->mes.event_log_gpu_obj, 252 &adev->mes.event_log_gpu_addr, 253 &adev->mes.event_log_cpu_addr); 254 255 for (i = 0; i < AMDGPU_MAX_MES_PIPES * num_xcc; i++) { 256 amdgpu_bo_free_kernel(&adev->mes.hung_queue_db_array_gpu_obj[i], 257 &adev->mes.hung_queue_db_array_gpu_addr[i], 258 &adev->mes.hung_queue_db_array_cpu_addr[i]); 259 260 if (adev->mes.sch_ctx_ptr[i]) 261 amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs[i]); 262 if (adev->mes.query_status_fence_ptr[i]) 263 amdgpu_device_wb_free(adev, 264 adev->mes.query_status_fence_offs[i]); 265 } 266 267 amdgpu_mes_doorbell_free(adev); 268 269 idr_destroy(&adev->mes.pasid_idr); 270 idr_destroy(&adev->mes.gang_id_idr); 271 idr_destroy(&adev->mes.queue_id_idr); 272 ida_destroy(&adev->mes.doorbell_ida); 273 mutex_destroy(&adev->mes.mutex_hidden); 274 } 275 276 int amdgpu_mes_suspend(struct amdgpu_device *adev) 277 { 278 struct mes_suspend_gang_input input; 279 int r; 280 281 if (!amdgpu_mes_suspend_resume_all_supported(adev)) 282 return 0; 283 284 memset(&input, 0x0, sizeof(struct mes_suspend_gang_input)); 285 input.suspend_all_gangs = 1; 286 287 /* 288 * Avoid taking any other locks under MES lock to avoid circular 289 * lock dependencies. 290 */ 291 amdgpu_mes_lock(&adev->mes); 292 r = adev->mes.funcs->suspend_gang(&adev->mes, &input); 293 amdgpu_mes_unlock(&adev->mes); 294 if (r) 295 dev_err(adev->dev, "failed to suspend all gangs"); 296 297 return r; 298 } 299 300 int amdgpu_mes_resume(struct amdgpu_device *adev) 301 { 302 struct mes_resume_gang_input input; 303 int r; 304 305 if (!amdgpu_mes_suspend_resume_all_supported(adev)) 306 return 0; 307 308 memset(&input, 0x0, sizeof(struct mes_resume_gang_input)); 309 input.resume_all_gangs = 1; 310 311 /* 312 * Avoid taking any other locks under MES lock to avoid circular 313 * lock dependencies. 314 */ 315 amdgpu_mes_lock(&adev->mes); 316 r = adev->mes.funcs->resume_gang(&adev->mes, &input); 317 amdgpu_mes_unlock(&adev->mes); 318 if (r) 319 dev_err(adev->dev, "failed to resume all gangs"); 320 321 return r; 322 } 323 324 int amdgpu_mes_map_legacy_queue(struct amdgpu_device *adev, 325 struct amdgpu_ring *ring, uint32_t xcc_id) 326 { 327 struct mes_map_legacy_queue_input queue_input; 328 int r; 329 330 memset(&queue_input, 0, sizeof(queue_input)); 331 332 queue_input.xcc_id = xcc_id; 333 queue_input.queue_type = ring->funcs->type; 334 queue_input.doorbell_offset = ring->doorbell_index; 335 queue_input.pipe_id = ring->pipe; 336 queue_input.queue_id = ring->queue; 337 queue_input.mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj); 338 queue_input.wptr_addr = ring->wptr_gpu_addr; 339 340 amdgpu_mes_lock(&adev->mes); 341 r = adev->mes.funcs->map_legacy_queue(&adev->mes, &queue_input); 342 amdgpu_mes_unlock(&adev->mes); 343 if (r) 344 dev_err(adev->dev, "failed to map legacy queue\n"); 345 346 return r; 347 } 348 349 int amdgpu_mes_unmap_legacy_queue(struct amdgpu_device *adev, 350 struct amdgpu_ring *ring, 351 enum amdgpu_unmap_queues_action action, 352 u64 gpu_addr, u64 seq, uint32_t xcc_id) 353 { 354 struct mes_unmap_legacy_queue_input queue_input; 355 int r; 356 357 queue_input.xcc_id = xcc_id; 358 queue_input.action = action; 359 queue_input.queue_type = ring->funcs->type; 360 queue_input.doorbell_offset = ring->doorbell_index; 361 queue_input.pipe_id = ring->pipe; 362 queue_input.queue_id = ring->queue; 363 queue_input.trail_fence_addr = gpu_addr; 364 queue_input.trail_fence_data = seq; 365 366 amdgpu_mes_lock(&adev->mes); 367 r = adev->mes.funcs->unmap_legacy_queue(&adev->mes, &queue_input); 368 amdgpu_mes_unlock(&adev->mes); 369 if (r) 370 dev_err(adev->dev, "failed to unmap legacy queue\n"); 371 372 return r; 373 } 374 375 int amdgpu_mes_reset_legacy_queue(struct amdgpu_device *adev, 376 struct amdgpu_ring *ring, 377 unsigned int vmid, 378 bool use_mmio, 379 uint32_t xcc_id) 380 { 381 struct mes_reset_queue_input queue_input; 382 int r; 383 384 memset(&queue_input, 0, sizeof(queue_input)); 385 386 queue_input.xcc_id = xcc_id; 387 queue_input.queue_type = ring->funcs->type; 388 queue_input.doorbell_offset = ring->doorbell_index; 389 queue_input.me_id = ring->me; 390 queue_input.pipe_id = ring->pipe; 391 queue_input.queue_id = ring->queue; 392 queue_input.mqd_addr = ring->mqd_obj ? amdgpu_bo_gpu_offset(ring->mqd_obj) : 0; 393 queue_input.wptr_addr = ring->wptr_gpu_addr; 394 queue_input.vmid = vmid; 395 queue_input.use_mmio = use_mmio; 396 queue_input.is_kq = true; 397 if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) 398 queue_input.legacy_gfx = true; 399 400 amdgpu_mes_lock(&adev->mes); 401 r = adev->mes.funcs->reset_hw_queue(&adev->mes, &queue_input); 402 amdgpu_mes_unlock(&adev->mes); 403 if (r) 404 dev_err(adev->dev, "failed to reset legacy queue\n"); 405 406 return r; 407 } 408 409 int amdgpu_mes_get_hung_queue_db_array_size(struct amdgpu_device *adev) 410 { 411 return adev->mes.hung_queue_db_array_size; 412 } 413 414 int amdgpu_mes_detect_and_reset_hung_queues(struct amdgpu_device *adev, 415 int queue_type, 416 bool detect_only, 417 unsigned int *hung_db_num, 418 u32 *hung_db_array, 419 uint32_t xcc_id) 420 { 421 struct mes_detect_and_reset_queue_input input; 422 u32 *db_array = adev->mes.hung_queue_db_array_cpu_addr[xcc_id]; 423 int r, i; 424 425 if (!hung_db_num || !hung_db_array) 426 return -EINVAL; 427 428 if ((queue_type != AMDGPU_RING_TYPE_GFX) && 429 (queue_type != AMDGPU_RING_TYPE_COMPUTE) && 430 (queue_type != AMDGPU_RING_TYPE_SDMA)) 431 return -EINVAL; 432 433 /* Clear the doorbell array before detection */ 434 memset(adev->mes.hung_queue_db_array_cpu_addr[xcc_id], AMDGPU_MES_INVALID_DB_OFFSET, 435 adev->mes.hung_queue_db_array_size * sizeof(u32)); 436 input.queue_type = queue_type; 437 input.detect_only = detect_only; 438 439 r = adev->mes.funcs->detect_and_reset_hung_queues(&adev->mes, 440 &input); 441 if (r) { 442 dev_err(adev->dev, "failed to detect and reset\n"); 443 } else { 444 *hung_db_num = 0; 445 for (i = 0; i < adev->mes.hung_queue_hqd_info_offset; i++) { 446 if (db_array[i] != AMDGPU_MES_INVALID_DB_OFFSET) { 447 hung_db_array[i] = db_array[i]; 448 *hung_db_num += 1; 449 } 450 } 451 452 /* 453 * TODO: return HQD info for MES scheduled user compute queue reset cases 454 * stored in hung_db_array hqd info offset to full array size 455 */ 456 } 457 458 return r; 459 } 460 461 uint32_t amdgpu_mes_rreg(struct amdgpu_device *adev, uint32_t reg, 462 uint32_t xcc_id) 463 { 464 struct mes_misc_op_input op_input; 465 int r, val = 0; 466 uint32_t addr_offset = 0; 467 uint64_t read_val_gpu_addr; 468 uint32_t *read_val_ptr; 469 470 if (amdgpu_device_wb_get(adev, &addr_offset)) { 471 dev_err(adev->dev, "critical bug! too many mes readers\n"); 472 goto error; 473 } 474 read_val_gpu_addr = adev->wb.gpu_addr + (addr_offset * 4); 475 read_val_ptr = (uint32_t *)&adev->wb.wb[addr_offset]; 476 op_input.xcc_id = xcc_id; 477 op_input.op = MES_MISC_OP_READ_REG; 478 op_input.read_reg.reg_offset = reg; 479 op_input.read_reg.buffer_addr = read_val_gpu_addr; 480 481 if (!adev->mes.funcs->misc_op) { 482 dev_err(adev->dev, "mes rreg is not supported!\n"); 483 goto error; 484 } 485 486 amdgpu_mes_lock(&adev->mes); 487 r = adev->mes.funcs->misc_op(&adev->mes, &op_input); 488 amdgpu_mes_unlock(&adev->mes); 489 if (r) 490 dev_err(adev->dev, "failed to read reg (0x%x)\n", reg); 491 else 492 val = *(read_val_ptr); 493 494 error: 495 if (addr_offset) 496 amdgpu_device_wb_free(adev, addr_offset); 497 return val; 498 } 499 500 int amdgpu_mes_wreg(struct amdgpu_device *adev, uint32_t reg, 501 uint32_t val, uint32_t xcc_id) 502 { 503 struct mes_misc_op_input op_input; 504 int r; 505 506 op_input.xcc_id = xcc_id; 507 op_input.op = MES_MISC_OP_WRITE_REG; 508 op_input.write_reg.reg_offset = reg; 509 op_input.write_reg.reg_value = val; 510 511 if (!adev->mes.funcs->misc_op) { 512 dev_err(adev->dev, "mes wreg is not supported!\n"); 513 r = -EINVAL; 514 goto error; 515 } 516 517 amdgpu_mes_lock(&adev->mes); 518 r = adev->mes.funcs->misc_op(&adev->mes, &op_input); 519 amdgpu_mes_unlock(&adev->mes); 520 if (r) 521 dev_err(adev->dev, "failed to write reg (0x%x)\n", reg); 522 523 error: 524 return r; 525 } 526 527 int amdgpu_mes_reg_write_reg_wait(struct amdgpu_device *adev, 528 uint32_t reg0, uint32_t reg1, 529 uint32_t ref, uint32_t mask, 530 uint32_t xcc_id) 531 { 532 struct mes_misc_op_input op_input; 533 int r; 534 535 op_input.xcc_id = xcc_id; 536 op_input.op = MES_MISC_OP_WRM_REG_WR_WAIT; 537 op_input.wrm_reg.reg0 = reg0; 538 op_input.wrm_reg.reg1 = reg1; 539 op_input.wrm_reg.ref = ref; 540 op_input.wrm_reg.mask = mask; 541 542 if (!adev->mes.funcs->misc_op) { 543 dev_err(adev->dev, "mes reg_write_reg_wait is not supported!\n"); 544 r = -EINVAL; 545 goto error; 546 } 547 548 amdgpu_mes_lock(&adev->mes); 549 r = adev->mes.funcs->misc_op(&adev->mes, &op_input); 550 amdgpu_mes_unlock(&adev->mes); 551 if (r) 552 dev_err(adev->dev, "failed to reg_write_reg_wait\n"); 553 554 error: 555 return r; 556 } 557 558 int amdgpu_mes_hdp_flush(struct amdgpu_device *adev) 559 { 560 uint32_t hdp_flush_req_offset, hdp_flush_done_offset; 561 struct amdgpu_ring *mes_ring; 562 uint32_t ref_and_mask = 0, reg_mem_engine = 0; 563 564 if (!adev->gfx.funcs->get_hdp_flush_mask) { 565 dev_err(adev->dev, "mes hdp flush is not supported.\n"); 566 return -EINVAL; 567 } 568 569 mes_ring = &adev->mes.ring[0]; 570 hdp_flush_req_offset = adev->nbio.funcs->get_hdp_flush_req_offset(adev); 571 hdp_flush_done_offset = adev->nbio.funcs->get_hdp_flush_done_offset(adev); 572 573 adev->gfx.funcs->get_hdp_flush_mask(mes_ring, &ref_and_mask, ®_mem_engine); 574 575 return amdgpu_mes_reg_write_reg_wait(adev, hdp_flush_req_offset, hdp_flush_done_offset, 576 ref_and_mask, ref_and_mask, 0); 577 } 578 579 int amdgpu_mes_set_shader_debugger(struct amdgpu_device *adev, 580 uint64_t process_context_addr, 581 uint32_t spi_gdbg_per_vmid_cntl, 582 const uint32_t *tcp_watch_cntl, 583 uint32_t flags, 584 bool trap_en, 585 uint32_t xcc_id) 586 { 587 struct mes_misc_op_input op_input = {0}; 588 int r; 589 590 if (!adev->mes.funcs->misc_op) { 591 dev_err(adev->dev, 592 "mes set shader debugger is not supported!\n"); 593 return -EINVAL; 594 } 595 596 op_input.xcc_id = xcc_id; 597 op_input.op = MES_MISC_OP_SET_SHADER_DEBUGGER; 598 op_input.set_shader_debugger.process_context_addr = process_context_addr; 599 op_input.set_shader_debugger.flags.u32all = flags; 600 601 /* use amdgpu mes_flush_shader_debugger instead */ 602 if (op_input.set_shader_debugger.flags.process_ctx_flush) 603 return -EINVAL; 604 605 op_input.set_shader_debugger.spi_gdbg_per_vmid_cntl = spi_gdbg_per_vmid_cntl; 606 memcpy(op_input.set_shader_debugger.tcp_watch_cntl, tcp_watch_cntl, 607 sizeof(op_input.set_shader_debugger.tcp_watch_cntl)); 608 609 if (((adev->mes.sched_version & AMDGPU_MES_API_VERSION_MASK) >> 610 AMDGPU_MES_API_VERSION_SHIFT) >= 14) 611 op_input.set_shader_debugger.trap_en = trap_en; 612 613 amdgpu_mes_lock(&adev->mes); 614 615 r = adev->mes.funcs->misc_op(&adev->mes, &op_input); 616 if (r) 617 dev_err(adev->dev, "failed to set_shader_debugger\n"); 618 619 amdgpu_mes_unlock(&adev->mes); 620 621 return r; 622 } 623 624 int amdgpu_mes_flush_shader_debugger(struct amdgpu_device *adev, 625 uint64_t process_context_addr, 626 uint32_t xcc_id) 627 { 628 struct mes_misc_op_input op_input = {0}; 629 int r; 630 631 if (!adev->mes.funcs->misc_op) { 632 dev_err(adev->dev, 633 "mes flush shader debugger is not supported!\n"); 634 return -EINVAL; 635 } 636 637 op_input.xcc_id = xcc_id; 638 op_input.op = MES_MISC_OP_SET_SHADER_DEBUGGER; 639 op_input.set_shader_debugger.process_context_addr = process_context_addr; 640 op_input.set_shader_debugger.flags.process_ctx_flush = true; 641 642 amdgpu_mes_lock(&adev->mes); 643 644 r = adev->mes.funcs->misc_op(&adev->mes, &op_input); 645 if (r) 646 dev_err(adev->dev, "failed to set_shader_debugger\n"); 647 648 amdgpu_mes_unlock(&adev->mes); 649 650 return r; 651 } 652 653 uint32_t amdgpu_mes_get_aggregated_doorbell_index(struct amdgpu_device *adev, 654 enum amdgpu_mes_priority_level prio) 655 { 656 return adev->mes.aggregated_doorbells[prio]; 657 } 658 659 int amdgpu_mes_init_microcode(struct amdgpu_device *adev, int pipe) 660 { 661 const struct mes_firmware_header_v1_0 *mes_hdr; 662 struct amdgpu_firmware_info *info; 663 char ucode_prefix[30]; 664 char fw_name[50]; 665 bool need_retry = false; 666 u32 *ucode_ptr; 667 int r; 668 669 amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, 670 sizeof(ucode_prefix)); 671 if (adev->enable_uni_mes) { 672 snprintf(fw_name, sizeof(fw_name), 673 "amdgpu/%s_uni_mes.bin", ucode_prefix); 674 } else if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(11, 0, 0) && 675 amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(12, 0, 0)) { 676 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mes%s.bin", 677 ucode_prefix, 678 pipe == AMDGPU_MES_SCHED_PIPE ? "_2" : "1"); 679 need_retry = true; 680 } else { 681 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mes%s.bin", 682 ucode_prefix, 683 pipe == AMDGPU_MES_SCHED_PIPE ? "" : "1"); 684 } 685 686 r = amdgpu_ucode_request(adev, &adev->mes.fw[pipe], AMDGPU_UCODE_REQUIRED, 687 "%s", fw_name); 688 if (r && need_retry && pipe == AMDGPU_MES_SCHED_PIPE) { 689 dev_info(adev->dev, "try to fall back to %s_mes.bin\n", ucode_prefix); 690 r = amdgpu_ucode_request(adev, &adev->mes.fw[pipe], 691 AMDGPU_UCODE_REQUIRED, 692 "amdgpu/%s_mes.bin", ucode_prefix); 693 } 694 695 if (r) 696 goto out; 697 698 mes_hdr = (const struct mes_firmware_header_v1_0 *) 699 adev->mes.fw[pipe]->data; 700 adev->mes.uc_start_addr[pipe] = 701 le32_to_cpu(mes_hdr->mes_uc_start_addr_lo) | 702 ((uint64_t)(le32_to_cpu(mes_hdr->mes_uc_start_addr_hi)) << 32); 703 adev->mes.data_start_addr[pipe] = 704 le32_to_cpu(mes_hdr->mes_data_start_addr_lo) | 705 ((uint64_t)(le32_to_cpu(mes_hdr->mes_data_start_addr_hi)) << 32); 706 ucode_ptr = (u32 *)(adev->mes.fw[pipe]->data + 707 sizeof(union amdgpu_firmware_header)); 708 adev->mes.fw_version[pipe] = 709 le32_to_cpu(ucode_ptr[24]) & AMDGPU_MES_VERSION_MASK; 710 711 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 712 int ucode, ucode_data; 713 714 if (pipe == AMDGPU_MES_SCHED_PIPE) { 715 ucode = AMDGPU_UCODE_ID_CP_MES; 716 ucode_data = AMDGPU_UCODE_ID_CP_MES_DATA; 717 } else { 718 ucode = AMDGPU_UCODE_ID_CP_MES1; 719 ucode_data = AMDGPU_UCODE_ID_CP_MES1_DATA; 720 } 721 722 info = &adev->firmware.ucode[ucode]; 723 info->ucode_id = ucode; 724 info->fw = adev->mes.fw[pipe]; 725 adev->firmware.fw_size += 726 ALIGN(le32_to_cpu(mes_hdr->mes_ucode_size_bytes), 727 PAGE_SIZE); 728 729 info = &adev->firmware.ucode[ucode_data]; 730 info->ucode_id = ucode_data; 731 info->fw = adev->mes.fw[pipe]; 732 adev->firmware.fw_size += 733 ALIGN(le32_to_cpu(mes_hdr->mes_ucode_data_size_bytes), 734 PAGE_SIZE); 735 } 736 737 return 0; 738 out: 739 amdgpu_ucode_release(&adev->mes.fw[pipe]); 740 return r; 741 } 742 743 bool amdgpu_mes_suspend_resume_all_supported(struct amdgpu_device *adev) 744 { 745 uint32_t mes_rev = adev->mes.sched_version & AMDGPU_MES_VERSION_MASK; 746 747 return ((amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(11, 0, 0) && 748 amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(12, 0, 0) && 749 mes_rev >= 0x63) || 750 amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(12, 0, 0)); 751 } 752 753 /* Fix me -- node_id is used to identify the correct MES instances in the future */ 754 static int amdgpu_mes_set_enforce_isolation(struct amdgpu_device *adev, 755 uint32_t node_id, bool enable) 756 { 757 struct mes_misc_op_input op_input = {0}; 758 int r; 759 760 op_input.op = MES_MISC_OP_CHANGE_CONFIG; 761 op_input.change_config.option.limit_single_process = enable ? 1 : 0; 762 763 if (!adev->mes.funcs->misc_op) { 764 dev_err(adev->dev, "mes change config is not supported!\n"); 765 r = -EINVAL; 766 goto error; 767 } 768 769 amdgpu_mes_lock(&adev->mes); 770 r = adev->mes.funcs->misc_op(&adev->mes, &op_input); 771 amdgpu_mes_unlock(&adev->mes); 772 if (r) 773 dev_err(adev->dev, "failed to change_config.\n"); 774 775 error: 776 return r; 777 } 778 779 int amdgpu_mes_update_enforce_isolation(struct amdgpu_device *adev) 780 { 781 int i, r = 0; 782 783 if (adev->enable_mes && adev->gfx.enable_cleaner_shader) { 784 mutex_lock(&adev->enforce_isolation_mutex); 785 for (i = 0; i < (adev->xcp_mgr ? adev->xcp_mgr->num_xcps : 1); i++) { 786 if (adev->enforce_isolation[i] == AMDGPU_ENFORCE_ISOLATION_ENABLE) 787 r |= amdgpu_mes_set_enforce_isolation(adev, i, true); 788 else 789 r |= amdgpu_mes_set_enforce_isolation(adev, i, false); 790 } 791 mutex_unlock(&adev->enforce_isolation_mutex); 792 } 793 return r; 794 } 795 796 #if defined(CONFIG_DEBUG_FS) 797 798 static int amdgpu_debugfs_mes_event_log_show(struct seq_file *m, void *unused) 799 { 800 struct amdgpu_device *adev = m->private; 801 uint32_t *mem = (uint32_t *)(adev->mes.event_log_cpu_addr); 802 803 seq_hex_dump(m, "", DUMP_PREFIX_OFFSET, 32, 4, 804 mem, adev->mes.event_log_size, false); 805 806 return 0; 807 } 808 809 DEFINE_SHOW_ATTRIBUTE(amdgpu_debugfs_mes_event_log); 810 811 #endif 812 813 void amdgpu_debugfs_mes_event_log_init(struct amdgpu_device *adev) 814 { 815 816 #if defined(CONFIG_DEBUG_FS) 817 struct drm_minor *minor = adev_to_drm(adev)->primary; 818 struct dentry *root = minor->debugfs_root; 819 if (adev->enable_mes && amdgpu_mes_log_enable) 820 debugfs_create_file("amdgpu_mes_event_log", 0444, root, 821 adev, &amdgpu_debugfs_mes_event_log_fops); 822 823 #endif 824 } 825