1 /* 2 * Copyright 2019 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 */ 23 24 #include <linux/firmware.h> 25 #include <drm/drm_exec.h> 26 27 #include "amdgpu_mes.h" 28 #include "amdgpu.h" 29 #include "soc15_common.h" 30 #include "amdgpu_mes_ctx.h" 31 32 #define AMDGPU_MES_MAX_NUM_OF_QUEUES_PER_PROCESS 1024 33 #define AMDGPU_ONE_DOORBELL_SIZE 8 34 35 int amdgpu_mes_doorbell_process_slice(struct amdgpu_device *adev) 36 { 37 return roundup(AMDGPU_ONE_DOORBELL_SIZE * 38 AMDGPU_MES_MAX_NUM_OF_QUEUES_PER_PROCESS, 39 PAGE_SIZE); 40 } 41 42 static int amdgpu_mes_doorbell_init(struct amdgpu_device *adev) 43 { 44 int i; 45 struct amdgpu_mes *mes = &adev->mes; 46 47 /* Bitmap for dynamic allocation of kernel doorbells */ 48 mes->doorbell_bitmap = bitmap_zalloc(PAGE_SIZE / sizeof(u32), GFP_KERNEL); 49 if (!mes->doorbell_bitmap) { 50 dev_err(adev->dev, "Failed to allocate MES doorbell bitmap\n"); 51 return -ENOMEM; 52 } 53 54 mes->num_mes_dbs = PAGE_SIZE / AMDGPU_ONE_DOORBELL_SIZE; 55 for (i = 0; i < AMDGPU_MES_PRIORITY_NUM_LEVELS; i++) { 56 adev->mes.aggregated_doorbells[i] = mes->db_start_dw_offset + i * 2; 57 set_bit(i, mes->doorbell_bitmap); 58 } 59 60 return 0; 61 } 62 63 static int amdgpu_mes_event_log_init(struct amdgpu_device *adev) 64 { 65 int r; 66 67 if (!amdgpu_mes_log_enable) 68 return 0; 69 70 r = amdgpu_bo_create_kernel(adev, adev->mes.event_log_size, PAGE_SIZE, 71 AMDGPU_GEM_DOMAIN_VRAM, 72 &adev->mes.event_log_gpu_obj, 73 &adev->mes.event_log_gpu_addr, 74 &adev->mes.event_log_cpu_addr); 75 if (r) { 76 dev_warn(adev->dev, "failed to create MES event log buffer (%d)", r); 77 return r; 78 } 79 80 memset(adev->mes.event_log_cpu_addr, 0, adev->mes.event_log_size); 81 82 return 0; 83 84 } 85 86 static void amdgpu_mes_doorbell_free(struct amdgpu_device *adev) 87 { 88 bitmap_free(adev->mes.doorbell_bitmap); 89 } 90 91 static inline u32 amdgpu_mes_get_hqd_mask(u32 num_pipe, 92 u32 num_hqd_per_pipe, 93 u32 num_reserved_hqd) 94 { 95 if (num_pipe == 0) 96 return 0; 97 98 u32 total_hqd_mask = (u32)((1ULL << num_hqd_per_pipe) - 1); 99 u32 reserved_hqd_mask = (u32)((1ULL << DIV_ROUND_UP(num_reserved_hqd, num_pipe)) - 1); 100 101 return (total_hqd_mask & ~reserved_hqd_mask); 102 } 103 104 int amdgpu_mes_init(struct amdgpu_device *adev) 105 { 106 int i, r, num_pipes, num_queues = 0; 107 u32 total_vmid_mask, reserved_vmid_mask; 108 int num_xcc = adev->gfx.xcc_mask ? NUM_XCC(adev->gfx.xcc_mask) : 1; 109 u32 gfx_hqd_mask = amdgpu_mes_get_hqd_mask(adev->gfx.me.num_pipe_per_me, 110 adev->gfx.me.num_queue_per_pipe, 111 adev->gfx.disable_kq ? 0 : adev->gfx.num_gfx_rings); 112 u32 compute_hqd_mask = amdgpu_mes_get_hqd_mask(adev->gfx.mec.num_pipe_per_mec, 113 adev->gfx.mec.num_queue_per_pipe, 114 adev->gfx.disable_kq ? 0 : adev->gfx.num_compute_rings); 115 116 adev->mes.adev = adev; 117 118 ida_init(&adev->mes.doorbell_ida); 119 spin_lock_init(&adev->mes.queue_id_lock); 120 mutex_init(&adev->mes.mutex_hidden); 121 122 for (i = 0; i < AMDGPU_MAX_MES_PIPES * num_xcc; i++) 123 spin_lock_init(&adev->mes.ring_lock[i]); 124 125 adev->mes.total_max_queue = AMDGPU_FENCE_MES_QUEUE_ID_MASK; 126 total_vmid_mask = (u32)((1UL << 16) - 1); 127 reserved_vmid_mask = (u32)((1UL << adev->vm_manager.first_kfd_vmid) - 1); 128 129 adev->mes.vmid_mask_mmhub = 0xFF00; 130 adev->mes.vmid_mask_gfxhub = total_vmid_mask & ~reserved_vmid_mask; 131 132 num_pipes = adev->gfx.me.num_pipe_per_me * adev->gfx.me.num_me; 133 if (num_pipes > AMDGPU_MES_MAX_GFX_PIPES) 134 dev_warn(adev->dev, "more gfx pipes than supported by MES! (%d vs %d)\n", 135 num_pipes, AMDGPU_MES_MAX_GFX_PIPES); 136 137 for (i = 0; i < AMDGPU_MES_MAX_GFX_PIPES; i++) { 138 if (i >= num_pipes) 139 break; 140 141 adev->mes.gfx_hqd_mask[i] = gfx_hqd_mask; 142 } 143 144 num_pipes = adev->gfx.mec.num_pipe_per_mec * adev->gfx.mec.num_mec; 145 if (num_pipes > AMDGPU_MES_MAX_COMPUTE_PIPES) 146 dev_warn(adev->dev, "more compute pipes than supported by MES! (%d vs %d)\n", 147 num_pipes, AMDGPU_MES_MAX_COMPUTE_PIPES); 148 149 for (i = 0; i < AMDGPU_MES_MAX_COMPUTE_PIPES; i++) { 150 /* 151 * Currently, only MEC1 is used for both kernel and user compute queue. 152 * To enable other MEC, we need to redistribute queues per pipe and 153 * adjust queue resource shared with kfd that needs a separate patch. 154 * Skip other MEC for now to avoid potential issues. 155 */ 156 if (i >= adev->gfx.mec.num_pipe_per_mec) 157 break; 158 159 adev->mes.compute_hqd_mask[i] = compute_hqd_mask; 160 } 161 162 num_pipes = adev->sdma.num_inst_per_xcc ? 163 adev->sdma.num_inst_per_xcc : adev->sdma.num_instances; 164 if (num_pipes > AMDGPU_MES_MAX_SDMA_PIPES) 165 dev_warn(adev->dev, "more SDMA pipes than supported by MES! (%d vs %d)\n", 166 num_pipes, AMDGPU_MES_MAX_SDMA_PIPES); 167 168 for (i = 0; i < AMDGPU_MES_MAX_SDMA_PIPES; i++) { 169 if (i >= num_pipes) 170 break; 171 adev->mes.sdma_hqd_mask[i] = 0xfc; 172 } 173 174 dev_info(adev->dev, 175 "MES: vmid_mask_mmhub 0x%08x, vmid_mask_gfxhub 0x%08x\n", 176 adev->mes.vmid_mask_mmhub, 177 adev->mes.vmid_mask_gfxhub); 178 179 dev_info(adev->dev, 180 "MES: gfx_hqd_mask 0x%08x, compute_hqd_mask 0x%08x, sdma_hqd_mask 0x%08x\n", 181 adev->mes.gfx_hqd_mask[0], 182 adev->mes.compute_hqd_mask[0], 183 adev->mes.sdma_hqd_mask[0]); 184 185 for (i = 0; i < AMDGPU_MAX_MES_PIPES * num_xcc; i++) { 186 r = amdgpu_device_wb_get(adev, &adev->mes.sch_ctx_offs[i]); 187 if (r) { 188 dev_err(adev->dev, 189 "(%d) ring trail_fence_offs wb alloc failed\n", 190 r); 191 goto error; 192 } 193 adev->mes.sch_ctx_gpu_addr[i] = 194 adev->wb.gpu_addr + (adev->mes.sch_ctx_offs[i] * 4); 195 adev->mes.sch_ctx_ptr[i] = 196 (uint64_t *)&adev->wb.wb[adev->mes.sch_ctx_offs[i]]; 197 198 r = amdgpu_device_wb_get(adev, 199 &adev->mes.query_status_fence_offs[i]); 200 if (r) { 201 dev_err(adev->dev, 202 "(%d) query_status_fence_offs wb alloc failed\n", 203 r); 204 goto error; 205 } 206 adev->mes.query_status_fence_gpu_addr[i] = adev->wb.gpu_addr + 207 (adev->mes.query_status_fence_offs[i] * 4); 208 adev->mes.query_status_fence_ptr[i] = 209 (uint64_t *)&adev->wb.wb[adev->mes.query_status_fence_offs[i]]; 210 } 211 212 r = amdgpu_mes_doorbell_init(adev); 213 if (r) 214 goto error; 215 216 r = amdgpu_mes_event_log_init(adev); 217 if (r) 218 goto error_doorbell; 219 220 if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(12, 1, 0)) { 221 /* When queue/pipe reset is done in MES instead of in the 222 * driver, MES passes hung queues information to the driver in 223 * hung_queue_hqd_info. Calculate required space to store this 224 * information. 225 */ 226 for (i = 0; i < AMDGPU_MES_MAX_GFX_PIPES; i++) 227 num_queues += hweight32(adev->mes.gfx_hqd_mask[i]); 228 229 for (i = 0; i < AMDGPU_MES_MAX_COMPUTE_PIPES; i++) 230 num_queues += hweight32(adev->mes.compute_hqd_mask[i]); 231 232 for (i = 0; i < AMDGPU_MES_MAX_SDMA_PIPES; i++) 233 num_queues += hweight32(adev->mes.sdma_hqd_mask[i]) * num_xcc; 234 235 adev->mes.hung_queue_hqd_info_offset = num_queues; 236 adev->mes.hung_queue_db_array_size = num_queues * 2; 237 } 238 239 if (adev->mes.hung_queue_db_array_size) { 240 for (i = 0; i < AMDGPU_MAX_MES_PIPES; i++) { 241 r = amdgpu_bo_create_kernel(adev, 242 adev->mes.hung_queue_db_array_size * sizeof(u32), 243 PAGE_SIZE, 244 AMDGPU_GEM_DOMAIN_GTT, 245 &adev->mes.hung_queue_db_array_gpu_obj[i], 246 &adev->mes.hung_queue_db_array_gpu_addr[i], 247 &adev->mes.hung_queue_db_array_cpu_addr[i]); 248 if (r) { 249 dev_warn(adev->dev, "failed to create MES hung db array buffer (%d)", r); 250 goto error_doorbell; 251 } 252 } 253 } 254 255 return 0; 256 257 error_doorbell: 258 amdgpu_mes_doorbell_free(adev); 259 error: 260 for (i = 0; i < AMDGPU_MAX_MES_PIPES * num_xcc; i++) { 261 if (adev->mes.sch_ctx_ptr[i]) 262 amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs[i]); 263 if (adev->mes.query_status_fence_ptr[i]) 264 amdgpu_device_wb_free(adev, 265 adev->mes.query_status_fence_offs[i]); 266 if (adev->mes.hung_queue_db_array_gpu_obj[i]) 267 amdgpu_bo_free_kernel(&adev->mes.hung_queue_db_array_gpu_obj[i], 268 &adev->mes.hung_queue_db_array_gpu_addr[i], 269 &adev->mes.hung_queue_db_array_cpu_addr[i]); 270 } 271 272 ida_destroy(&adev->mes.doorbell_ida); 273 mutex_destroy(&adev->mes.mutex_hidden); 274 return r; 275 } 276 277 void amdgpu_mes_fini(struct amdgpu_device *adev) 278 { 279 int i; 280 int num_xcc = adev->gfx.xcc_mask ? NUM_XCC(adev->gfx.xcc_mask) : 1; 281 282 amdgpu_bo_free_kernel(&adev->mes.event_log_gpu_obj, 283 &adev->mes.event_log_gpu_addr, 284 &adev->mes.event_log_cpu_addr); 285 286 for (i = 0; i < AMDGPU_MAX_MES_PIPES * num_xcc; i++) { 287 if (adev->mes.hung_queue_db_array_gpu_obj[i]) 288 amdgpu_bo_free_kernel(&adev->mes.hung_queue_db_array_gpu_obj[i], 289 &adev->mes.hung_queue_db_array_gpu_addr[i], 290 &adev->mes.hung_queue_db_array_cpu_addr[i]); 291 if (adev->mes.sch_ctx_ptr[i]) 292 amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs[i]); 293 if (adev->mes.query_status_fence_ptr[i]) 294 amdgpu_device_wb_free(adev, 295 adev->mes.query_status_fence_offs[i]); 296 } 297 298 amdgpu_mes_doorbell_free(adev); 299 300 ida_destroy(&adev->mes.doorbell_ida); 301 mutex_destroy(&adev->mes.mutex_hidden); 302 } 303 304 int amdgpu_mes_suspend(struct amdgpu_device *adev, u32 xcc_id) 305 { 306 struct mes_suspend_gang_input input; 307 int r; 308 309 if (!amdgpu_mes_suspend_resume_all_supported(adev)) 310 return 0; 311 312 memset(&input, 0x0, sizeof(struct mes_suspend_gang_input)); 313 input.suspend_all_gangs = 1; 314 input.xcc_id = xcc_id; 315 if ((amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(12, 1, 0)) && 316 ((adev->mes.sched_version & AMDGPU_MES_VERSION_MASK) >= 0x71)) 317 input.suspend_all_sdma_gangs = 1; 318 319 /* 320 * Avoid taking any other locks under MES lock to avoid circular 321 * lock dependencies. 322 */ 323 amdgpu_mes_lock(&adev->mes); 324 r = adev->mes.funcs->suspend_gang(&adev->mes, &input); 325 amdgpu_mes_unlock(&adev->mes); 326 if (r) 327 dev_err(adev->dev, "failed to suspend all gangs"); 328 329 return r; 330 } 331 332 int amdgpu_mes_resume(struct amdgpu_device *adev, u32 xcc_id) 333 { 334 struct mes_resume_gang_input input; 335 int r; 336 337 if (!amdgpu_mes_suspend_resume_all_supported(adev)) 338 return 0; 339 340 memset(&input, 0x0, sizeof(struct mes_resume_gang_input)); 341 input.resume_all_gangs = 1; 342 input.xcc_id = xcc_id; 343 344 /* 345 * Avoid taking any other locks under MES lock to avoid circular 346 * lock dependencies. 347 */ 348 amdgpu_mes_lock(&adev->mes); 349 r = adev->mes.funcs->resume_gang(&adev->mes, &input); 350 amdgpu_mes_unlock(&adev->mes); 351 if (r) 352 dev_err(adev->dev, "failed to resume all gangs"); 353 354 return r; 355 } 356 357 int amdgpu_mes_map_legacy_queue(struct amdgpu_device *adev, 358 struct amdgpu_ring *ring, uint32_t xcc_id) 359 { 360 struct mes_map_legacy_queue_input queue_input; 361 int r; 362 363 memset(&queue_input, 0, sizeof(queue_input)); 364 365 queue_input.xcc_id = xcc_id; 366 queue_input.queue_type = ring->funcs->type; 367 queue_input.doorbell_offset = ring->doorbell_index; 368 queue_input.pipe_id = ring->pipe; 369 queue_input.queue_id = ring->queue; 370 queue_input.mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj); 371 queue_input.wptr_addr = ring->wptr_gpu_addr; 372 373 amdgpu_mes_lock(&adev->mes); 374 r = adev->mes.funcs->map_legacy_queue(&adev->mes, &queue_input); 375 amdgpu_mes_unlock(&adev->mes); 376 if (r) 377 dev_err(adev->dev, "failed to map legacy queue\n"); 378 379 return r; 380 } 381 382 int amdgpu_mes_unmap_legacy_queue(struct amdgpu_device *adev, 383 struct amdgpu_ring *ring, 384 enum amdgpu_unmap_queues_action action, 385 u64 gpu_addr, u64 seq, uint32_t xcc_id) 386 { 387 struct mes_unmap_legacy_queue_input queue_input; 388 int r; 389 390 queue_input.xcc_id = xcc_id; 391 queue_input.action = action; 392 queue_input.queue_type = ring->funcs->type; 393 queue_input.doorbell_offset = ring->doorbell_index; 394 queue_input.pipe_id = ring->pipe; 395 queue_input.queue_id = ring->queue; 396 queue_input.trail_fence_addr = gpu_addr; 397 queue_input.trail_fence_data = seq; 398 399 amdgpu_mes_lock(&adev->mes); 400 r = adev->mes.funcs->unmap_legacy_queue(&adev->mes, &queue_input); 401 amdgpu_mes_unlock(&adev->mes); 402 if (r) 403 dev_err(adev->dev, "failed to unmap legacy queue\n"); 404 405 return r; 406 } 407 408 int amdgpu_mes_reset_legacy_queue(struct amdgpu_device *adev, 409 struct amdgpu_ring *ring, 410 unsigned int vmid, 411 bool use_mmio, 412 uint32_t xcc_id) 413 { 414 struct mes_reset_queue_input queue_input; 415 int r; 416 417 memset(&queue_input, 0, sizeof(queue_input)); 418 419 queue_input.xcc_id = xcc_id; 420 queue_input.queue_type = ring->funcs->type; 421 queue_input.doorbell_offset = ring->doorbell_index; 422 queue_input.me_id = ring->me; 423 queue_input.pipe_id = ring->pipe; 424 queue_input.queue_id = ring->queue; 425 queue_input.mqd_addr = ring->mqd_obj ? amdgpu_bo_gpu_offset(ring->mqd_obj) : 0; 426 queue_input.wptr_addr = ring->wptr_gpu_addr; 427 queue_input.vmid = vmid; 428 queue_input.use_mmio = use_mmio; 429 queue_input.is_kq = true; 430 if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) 431 queue_input.legacy_gfx = true; 432 433 amdgpu_mes_lock(&adev->mes); 434 r = adev->mes.funcs->reset_hw_queue(&adev->mes, &queue_input); 435 amdgpu_mes_unlock(&adev->mes); 436 if (r) 437 dev_err(adev->dev, "failed to reset legacy queue\n"); 438 439 return r; 440 } 441 442 int amdgpu_mes_get_hung_queue_db_array_size(struct amdgpu_device *adev) 443 { 444 return adev->mes.hung_queue_db_array_size; 445 } 446 447 int amdgpu_mes_detect_and_reset_hung_queues(struct amdgpu_device *adev, 448 int queue_type, 449 bool detect_only, 450 unsigned int *hung_db_num, 451 u32 *hung_db_array, 452 uint32_t xcc_id) 453 { 454 struct mes_detect_and_reset_queue_input input; 455 u32 *db_array = adev->mes.hung_queue_db_array_cpu_addr[xcc_id]; 456 int hqd_info_offset = adev->mes.hung_queue_hqd_info_offset, r, i; 457 458 if (!hung_db_num || !hung_db_array) 459 return -EINVAL; 460 461 if ((queue_type != AMDGPU_RING_TYPE_GFX) && 462 (queue_type != AMDGPU_RING_TYPE_COMPUTE) && 463 (queue_type != AMDGPU_RING_TYPE_SDMA)) 464 return -EINVAL; 465 466 /* Clear the doorbell array before detection */ 467 memset(adev->mes.hung_queue_db_array_cpu_addr[xcc_id], AMDGPU_MES_INVALID_DB_OFFSET, 468 adev->mes.hung_queue_db_array_size * sizeof(u32)); 469 input.queue_type = queue_type; 470 input.detect_only = detect_only; 471 input.xcc_id = xcc_id; 472 473 r = adev->mes.funcs->detect_and_reset_hung_queues(&adev->mes, 474 &input); 475 476 if (r && detect_only) { 477 dev_err(adev->dev, "Failed to detect hung queues\n"); 478 return r; 479 } 480 481 *hung_db_num = 0; 482 /* MES passes hung queues' doorbell to driver */ 483 for (i = 0; i < adev->mes.hung_queue_hqd_info_offset; i++) { 484 /* Finding hung queues where db_array[i] is a valid doorbell */ 485 if (db_array[i] != AMDGPU_MES_INVALID_DB_OFFSET) { 486 hung_db_array[i] = db_array[i]; 487 *hung_db_num += 1; 488 } 489 } 490 491 if (r && !(*hung_db_num)) { 492 dev_err(adev->dev, "Failed to detect and reset hung queues\n"); 493 return r; 494 } 495 496 for (i = hqd_info_offset; i < hqd_info_offset + *hung_db_num; i++) 497 hung_db_array[i] = db_array[i]; 498 499 return r; 500 } 501 502 uint32_t amdgpu_mes_rreg(struct amdgpu_device *adev, uint32_t reg, 503 uint32_t xcc_id) 504 { 505 struct mes_misc_op_input op_input; 506 int r, val = 0; 507 uint32_t addr_offset = 0; 508 uint64_t read_val_gpu_addr; 509 uint32_t *read_val_ptr; 510 511 if (amdgpu_device_wb_get(adev, &addr_offset)) { 512 dev_err(adev->dev, "critical bug! too many mes readers\n"); 513 goto error; 514 } 515 read_val_gpu_addr = adev->wb.gpu_addr + (addr_offset * 4); 516 read_val_ptr = (uint32_t *)&adev->wb.wb[addr_offset]; 517 op_input.xcc_id = xcc_id; 518 op_input.op = MES_MISC_OP_READ_REG; 519 op_input.read_reg.reg_offset = reg; 520 op_input.read_reg.buffer_addr = read_val_gpu_addr; 521 522 if (!adev->mes.funcs->misc_op) { 523 dev_err(adev->dev, "mes rreg is not supported!\n"); 524 goto error; 525 } 526 527 amdgpu_mes_lock(&adev->mes); 528 r = adev->mes.funcs->misc_op(&adev->mes, &op_input); 529 amdgpu_mes_unlock(&adev->mes); 530 if (r) 531 dev_err(adev->dev, "failed to read reg (0x%x)\n", reg); 532 else 533 val = *(read_val_ptr); 534 535 error: 536 if (addr_offset) 537 amdgpu_device_wb_free(adev, addr_offset); 538 return val; 539 } 540 541 int amdgpu_mes_wreg(struct amdgpu_device *adev, uint32_t reg, 542 uint32_t val, uint32_t xcc_id) 543 { 544 struct mes_misc_op_input op_input; 545 int r; 546 547 op_input.xcc_id = xcc_id; 548 op_input.op = MES_MISC_OP_WRITE_REG; 549 op_input.write_reg.reg_offset = reg; 550 op_input.write_reg.reg_value = val; 551 552 if (!adev->mes.funcs->misc_op) { 553 dev_err(adev->dev, "mes wreg is not supported!\n"); 554 r = -EINVAL; 555 goto error; 556 } 557 558 amdgpu_mes_lock(&adev->mes); 559 r = adev->mes.funcs->misc_op(&adev->mes, &op_input); 560 amdgpu_mes_unlock(&adev->mes); 561 if (r) 562 dev_err(adev->dev, "failed to write reg (0x%x)\n", reg); 563 564 error: 565 return r; 566 } 567 568 int amdgpu_mes_reg_write_reg_wait(struct amdgpu_device *adev, 569 uint32_t reg0, uint32_t reg1, 570 uint32_t ref, uint32_t mask, 571 uint32_t xcc_id) 572 { 573 struct mes_misc_op_input op_input; 574 int r; 575 576 op_input.xcc_id = xcc_id; 577 op_input.op = MES_MISC_OP_WRM_REG_WR_WAIT; 578 op_input.wrm_reg.reg0 = reg0; 579 op_input.wrm_reg.reg1 = reg1; 580 op_input.wrm_reg.ref = ref; 581 op_input.wrm_reg.mask = mask; 582 583 if (!adev->mes.funcs->misc_op) { 584 dev_err(adev->dev, "mes reg_write_reg_wait is not supported!\n"); 585 r = -EINVAL; 586 goto error; 587 } 588 589 amdgpu_mes_lock(&adev->mes); 590 r = adev->mes.funcs->misc_op(&adev->mes, &op_input); 591 amdgpu_mes_unlock(&adev->mes); 592 if (r) 593 dev_err(adev->dev, "failed to reg_write_reg_wait\n"); 594 595 error: 596 return r; 597 } 598 599 int amdgpu_mes_hdp_flush(struct amdgpu_device *adev) 600 { 601 uint32_t hdp_flush_req_offset, hdp_flush_done_offset; 602 struct amdgpu_ring *mes_ring; 603 uint32_t ref_and_mask = 0, reg_mem_engine = 0; 604 605 if (!adev->gfx.funcs->get_hdp_flush_mask) { 606 dev_err(adev->dev, "mes hdp flush is not supported.\n"); 607 return -EINVAL; 608 } 609 610 mes_ring = &adev->mes.ring[0]; 611 hdp_flush_req_offset = adev->nbio.funcs->get_hdp_flush_req_offset(adev); 612 hdp_flush_done_offset = adev->nbio.funcs->get_hdp_flush_done_offset(adev); 613 614 adev->gfx.funcs->get_hdp_flush_mask(mes_ring, &ref_and_mask, ®_mem_engine); 615 616 return amdgpu_mes_reg_write_reg_wait(adev, hdp_flush_req_offset, hdp_flush_done_offset, 617 ref_and_mask, ref_and_mask, 0); 618 } 619 620 int amdgpu_mes_set_shader_debugger(struct amdgpu_device *adev, 621 uint64_t process_context_addr, 622 uint32_t spi_gdbg_per_vmid_cntl, 623 const uint32_t *tcp_watch_cntl, 624 uint32_t flags, 625 bool trap_en, 626 uint32_t xcc_id) 627 { 628 struct mes_misc_op_input op_input = {0}; 629 int r; 630 631 if (!adev->mes.funcs->misc_op) { 632 dev_err(adev->dev, 633 "mes set shader debugger is not supported!\n"); 634 return -EINVAL; 635 } 636 637 op_input.xcc_id = xcc_id; 638 op_input.op = MES_MISC_OP_SET_SHADER_DEBUGGER; 639 op_input.set_shader_debugger.process_context_addr = process_context_addr; 640 op_input.set_shader_debugger.flags.u32all = flags; 641 642 /* use amdgpu mes_flush_shader_debugger instead */ 643 if (op_input.set_shader_debugger.flags.process_ctx_flush) 644 return -EINVAL; 645 646 op_input.set_shader_debugger.spi_gdbg_per_vmid_cntl = spi_gdbg_per_vmid_cntl; 647 memcpy(op_input.set_shader_debugger.tcp_watch_cntl, tcp_watch_cntl, 648 sizeof(op_input.set_shader_debugger.tcp_watch_cntl)); 649 650 if (((adev->mes.sched_version & AMDGPU_MES_API_VERSION_MASK) >> 651 AMDGPU_MES_API_VERSION_SHIFT) >= 14) 652 op_input.set_shader_debugger.trap_en = trap_en; 653 654 amdgpu_mes_lock(&adev->mes); 655 656 r = adev->mes.funcs->misc_op(&adev->mes, &op_input); 657 if (r) 658 dev_err(adev->dev, "failed to set_shader_debugger\n"); 659 660 amdgpu_mes_unlock(&adev->mes); 661 662 return r; 663 } 664 665 int amdgpu_mes_flush_shader_debugger(struct amdgpu_device *adev, 666 uint64_t process_context_addr, 667 uint32_t xcc_id) 668 { 669 struct mes_misc_op_input op_input = {0}; 670 int r; 671 672 if (!adev->mes.funcs->misc_op) { 673 dev_err(adev->dev, 674 "mes flush shader debugger is not supported!\n"); 675 return -EINVAL; 676 } 677 678 op_input.xcc_id = xcc_id; 679 op_input.op = MES_MISC_OP_SET_SHADER_DEBUGGER; 680 op_input.set_shader_debugger.process_context_addr = process_context_addr; 681 op_input.set_shader_debugger.flags.process_ctx_flush = true; 682 683 amdgpu_mes_lock(&adev->mes); 684 685 r = adev->mes.funcs->misc_op(&adev->mes, &op_input); 686 if (r) 687 dev_err(adev->dev, "failed to set_shader_debugger\n"); 688 689 amdgpu_mes_unlock(&adev->mes); 690 691 return r; 692 } 693 694 uint32_t amdgpu_mes_get_aggregated_doorbell_index(struct amdgpu_device *adev, 695 enum amdgpu_mes_priority_level prio) 696 { 697 return adev->mes.aggregated_doorbells[prio]; 698 } 699 700 int amdgpu_mes_init_microcode(struct amdgpu_device *adev, int pipe) 701 { 702 const struct mes_firmware_header_v1_0 *mes_hdr; 703 struct amdgpu_firmware_info *info; 704 char ucode_prefix[30]; 705 char fw_name[50]; 706 bool need_retry = false; 707 u32 *ucode_ptr; 708 int r; 709 710 amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, 711 sizeof(ucode_prefix)); 712 if (adev->enable_uni_mes) { 713 snprintf(fw_name, sizeof(fw_name), 714 "amdgpu/%s_uni_mes.bin", ucode_prefix); 715 } else if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(11, 0, 0) && 716 amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(12, 0, 0)) { 717 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mes%s.bin", 718 ucode_prefix, 719 pipe == AMDGPU_MES_SCHED_PIPE ? "_2" : "1"); 720 need_retry = true; 721 } else { 722 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mes%s.bin", 723 ucode_prefix, 724 pipe == AMDGPU_MES_SCHED_PIPE ? "" : "1"); 725 } 726 727 r = amdgpu_ucode_request(adev, &adev->mes.fw[pipe], AMDGPU_UCODE_REQUIRED, 728 "%s", fw_name); 729 if (r && need_retry && pipe == AMDGPU_MES_SCHED_PIPE) { 730 dev_info(adev->dev, "try to fall back to %s_mes.bin\n", ucode_prefix); 731 r = amdgpu_ucode_request(adev, &adev->mes.fw[pipe], 732 AMDGPU_UCODE_REQUIRED, 733 "amdgpu/%s_mes.bin", ucode_prefix); 734 } 735 736 if (r) 737 goto out; 738 739 mes_hdr = (const struct mes_firmware_header_v1_0 *) 740 adev->mes.fw[pipe]->data; 741 adev->mes.uc_start_addr[pipe] = 742 le32_to_cpu(mes_hdr->mes_uc_start_addr_lo) | 743 ((uint64_t)(le32_to_cpu(mes_hdr->mes_uc_start_addr_hi)) << 32); 744 adev->mes.data_start_addr[pipe] = 745 le32_to_cpu(mes_hdr->mes_data_start_addr_lo) | 746 ((uint64_t)(le32_to_cpu(mes_hdr->mes_data_start_addr_hi)) << 32); 747 ucode_ptr = (u32 *)(adev->mes.fw[pipe]->data + 748 sizeof(union amdgpu_firmware_header)); 749 adev->mes.fw_version[pipe] = 750 le32_to_cpu(ucode_ptr[24]) & AMDGPU_MES_VERSION_MASK; 751 752 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 753 int ucode, ucode_data; 754 755 if (pipe == AMDGPU_MES_SCHED_PIPE) { 756 ucode = AMDGPU_UCODE_ID_CP_MES; 757 ucode_data = AMDGPU_UCODE_ID_CP_MES_DATA; 758 } else { 759 ucode = AMDGPU_UCODE_ID_CP_MES1; 760 ucode_data = AMDGPU_UCODE_ID_CP_MES1_DATA; 761 } 762 763 info = &adev->firmware.ucode[ucode]; 764 info->ucode_id = ucode; 765 info->fw = adev->mes.fw[pipe]; 766 adev->firmware.fw_size += 767 ALIGN(le32_to_cpu(mes_hdr->mes_ucode_size_bytes), 768 PAGE_SIZE); 769 770 info = &adev->firmware.ucode[ucode_data]; 771 info->ucode_id = ucode_data; 772 info->fw = adev->mes.fw[pipe]; 773 adev->firmware.fw_size += 774 ALIGN(le32_to_cpu(mes_hdr->mes_ucode_data_size_bytes), 775 PAGE_SIZE); 776 } 777 778 return 0; 779 out: 780 amdgpu_ucode_release(&adev->mes.fw[pipe]); 781 return r; 782 } 783 784 void amdgpu_mes_validate_fw_version(struct amdgpu_device *adev) 785 { 786 u32 fw_from_ucode = adev->mes.fw_version[AMDGPU_MES_SCHED_PIPE]; 787 u32 fw_from_reg = adev->mes.sched_version & AMDGPU_MES_VERSION_MASK; 788 789 if (fw_from_ucode != fw_from_reg) 790 dev_info(adev->dev, 791 "MES firmware reports incorrect version in ucode binary (0x%x vs 0x%x)\n", 792 fw_from_ucode, fw_from_reg); 793 } 794 795 796 bool amdgpu_mes_suspend_resume_all_supported(struct amdgpu_device *adev) 797 { 798 uint32_t mes_rev = adev->mes.sched_version & AMDGPU_MES_VERSION_MASK; 799 800 return ((amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(11, 0, 0) && 801 amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(12, 0, 0) && 802 mes_rev >= 0x63) || 803 amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(12, 0, 0)); 804 } 805 806 bool amdgpu_mes_queue_reset_by_mes_supported(struct amdgpu_device *adev) 807 { 808 return (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(12, 1, 0) && 809 (adev->mes.sched_version & AMDGPU_MES_VERSION_MASK) >= 0x73); 810 } 811 812 /* Fix me -- node_id is used to identify the correct MES instances in the future */ 813 static int amdgpu_mes_set_enforce_isolation(struct amdgpu_device *adev, 814 uint32_t node_id, bool enable) 815 { 816 struct mes_misc_op_input op_input = {0}; 817 int r; 818 819 op_input.op = MES_MISC_OP_CHANGE_CONFIG; 820 op_input.change_config.option.limit_single_process = enable ? 1 : 0; 821 822 if (!adev->mes.funcs->misc_op) { 823 dev_err(adev->dev, "mes change config is not supported!\n"); 824 r = -EINVAL; 825 goto error; 826 } 827 828 amdgpu_mes_lock(&adev->mes); 829 r = adev->mes.funcs->misc_op(&adev->mes, &op_input); 830 amdgpu_mes_unlock(&adev->mes); 831 if (r) 832 dev_err(adev->dev, "failed to change_config.\n"); 833 834 error: 835 return r; 836 } 837 838 int amdgpu_mes_update_enforce_isolation(struct amdgpu_device *adev) 839 { 840 int i, r = 0; 841 842 if (adev->enable_mes && adev->gfx.enable_cleaner_shader) { 843 mutex_lock(&adev->enforce_isolation_mutex); 844 for (i = 0; i < (adev->xcp_mgr ? adev->xcp_mgr->num_xcps : 1); i++) { 845 if (adev->enforce_isolation[i] == AMDGPU_ENFORCE_ISOLATION_ENABLE) 846 r |= amdgpu_mes_set_enforce_isolation(adev, i, true); 847 else 848 r |= amdgpu_mes_set_enforce_isolation(adev, i, false); 849 } 850 mutex_unlock(&adev->enforce_isolation_mutex); 851 } 852 return r; 853 } 854 855 #if defined(CONFIG_DEBUG_FS) 856 857 static int amdgpu_debugfs_mes_event_log_show(struct seq_file *m, void *unused) 858 { 859 struct amdgpu_device *adev = m->private; 860 uint32_t *mem = (uint32_t *)(adev->mes.event_log_cpu_addr); 861 862 seq_hex_dump(m, "", DUMP_PREFIX_OFFSET, 32, 4, 863 mem, adev->mes.event_log_size, false); 864 865 return 0; 866 } 867 868 DEFINE_SHOW_ATTRIBUTE(amdgpu_debugfs_mes_event_log); 869 870 #endif 871 872 void amdgpu_debugfs_mes_event_log_init(struct amdgpu_device *adev) 873 { 874 875 #if defined(CONFIG_DEBUG_FS) 876 struct drm_minor *minor = adev_to_drm(adev)->primary; 877 struct dentry *root = minor->debugfs_root; 878 if (adev->enable_mes && amdgpu_mes_log_enable) 879 debugfs_create_file("amdgpu_mes_event_log", 0444, root, 880 adev, &amdgpu_debugfs_mes_event_log_fops); 881 882 #endif 883 } 884