1 /* 2 * Copyright 2019 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 */ 23 24 #include <linux/firmware.h> 25 #include <drm/drm_exec.h> 26 27 #include "amdgpu_mes.h" 28 #include "amdgpu.h" 29 #include "soc15_common.h" 30 #include "amdgpu_mes_ctx.h" 31 32 #define AMDGPU_MES_MAX_NUM_OF_QUEUES_PER_PROCESS 1024 33 #define AMDGPU_ONE_DOORBELL_SIZE 8 34 35 int amdgpu_mes_doorbell_process_slice(struct amdgpu_device *adev) 36 { 37 return roundup(AMDGPU_ONE_DOORBELL_SIZE * 38 AMDGPU_MES_MAX_NUM_OF_QUEUES_PER_PROCESS, 39 PAGE_SIZE); 40 } 41 42 static int amdgpu_mes_doorbell_init(struct amdgpu_device *adev) 43 { 44 int i; 45 struct amdgpu_mes *mes = &adev->mes; 46 47 /* Bitmap for dynamic allocation of kernel doorbells */ 48 mes->doorbell_bitmap = bitmap_zalloc(PAGE_SIZE / sizeof(u32), GFP_KERNEL); 49 if (!mes->doorbell_bitmap) { 50 dev_err(adev->dev, "Failed to allocate MES doorbell bitmap\n"); 51 return -ENOMEM; 52 } 53 54 mes->num_mes_dbs = PAGE_SIZE / AMDGPU_ONE_DOORBELL_SIZE; 55 for (i = 0; i < AMDGPU_MES_PRIORITY_NUM_LEVELS; i++) { 56 adev->mes.aggregated_doorbells[i] = mes->db_start_dw_offset + i * 2; 57 set_bit(i, mes->doorbell_bitmap); 58 } 59 60 return 0; 61 } 62 63 static int amdgpu_mes_event_log_init(struct amdgpu_device *adev) 64 { 65 int r; 66 67 if (!amdgpu_mes_log_enable) 68 return 0; 69 70 r = amdgpu_bo_create_kernel(adev, adev->mes.event_log_size, PAGE_SIZE, 71 AMDGPU_GEM_DOMAIN_VRAM, 72 &adev->mes.event_log_gpu_obj, 73 &adev->mes.event_log_gpu_addr, 74 &adev->mes.event_log_cpu_addr); 75 if (r) { 76 dev_warn(adev->dev, "failed to create MES event log buffer (%d)", r); 77 return r; 78 } 79 80 memset(adev->mes.event_log_cpu_addr, 0, adev->mes.event_log_size); 81 82 return 0; 83 84 } 85 86 static void amdgpu_mes_doorbell_free(struct amdgpu_device *adev) 87 { 88 bitmap_free(adev->mes.doorbell_bitmap); 89 } 90 91 static inline u32 amdgpu_mes_get_hqd_mask(u32 num_pipe, 92 u32 num_hqd_per_pipe, 93 u32 num_reserved_hqd) 94 { 95 if (num_pipe == 0) 96 return 0; 97 98 u32 total_hqd_mask = (u32)((1ULL << num_hqd_per_pipe) - 1); 99 u32 reserved_hqd_mask = (u32)((1ULL << DIV_ROUND_UP(num_reserved_hqd, num_pipe)) - 1); 100 101 return (total_hqd_mask & ~reserved_hqd_mask); 102 } 103 104 int amdgpu_mes_init(struct amdgpu_device *adev) 105 { 106 int i, r, num_pipes, num_queues = 0; 107 u32 total_vmid_mask, reserved_vmid_mask; 108 int num_xcc = adev->gfx.xcc_mask ? NUM_XCC(adev->gfx.xcc_mask) : 1; 109 u32 gfx_hqd_mask = amdgpu_mes_get_hqd_mask(adev->gfx.me.num_pipe_per_me, 110 adev->gfx.me.num_queue_per_pipe, 111 adev->gfx.disable_kq ? 0 : adev->gfx.num_gfx_rings); 112 u32 compute_hqd_mask = amdgpu_mes_get_hqd_mask(adev->gfx.mec.num_pipe_per_mec, 113 adev->gfx.mec.num_queue_per_pipe, 114 adev->gfx.disable_kq ? 0 : adev->gfx.num_compute_rings); 115 116 adev->mes.adev = adev; 117 118 ida_init(&adev->mes.doorbell_ida); 119 spin_lock_init(&adev->mes.queue_id_lock); 120 mutex_init(&adev->mes.mutex_hidden); 121 122 for (i = 0; i < AMDGPU_MAX_MES_PIPES * num_xcc; i++) 123 spin_lock_init(&adev->mes.ring_lock[i]); 124 125 adev->mes.total_max_queue = AMDGPU_FENCE_MES_QUEUE_ID_MASK; 126 total_vmid_mask = (u32)((1UL << 16) - 1); 127 reserved_vmid_mask = (u32)((1UL << adev->vm_manager.first_kfd_vmid) - 1); 128 129 adev->mes.vmid_mask_mmhub = 0xFF00; 130 adev->mes.vmid_mask_gfxhub = total_vmid_mask & ~reserved_vmid_mask; 131 132 num_pipes = adev->gfx.me.num_pipe_per_me * adev->gfx.me.num_me; 133 if (num_pipes > AMDGPU_MES_MAX_GFX_PIPES) 134 dev_warn(adev->dev, "more gfx pipes than supported by MES! (%d vs %d)\n", 135 num_pipes, AMDGPU_MES_MAX_GFX_PIPES); 136 137 for (i = 0; i < AMDGPU_MES_MAX_GFX_PIPES; i++) { 138 if (i >= num_pipes) 139 break; 140 141 adev->mes.gfx_hqd_mask[i] = gfx_hqd_mask; 142 } 143 144 num_pipes = adev->gfx.mec.num_pipe_per_mec * adev->gfx.mec.num_mec; 145 if (num_pipes > AMDGPU_MES_MAX_COMPUTE_PIPES) 146 dev_warn(adev->dev, "more compute pipes than supported by MES! (%d vs %d)\n", 147 num_pipes, AMDGPU_MES_MAX_COMPUTE_PIPES); 148 149 for (i = 0; i < AMDGPU_MES_MAX_COMPUTE_PIPES; i++) { 150 /* 151 * Currently, only MEC1 is used for both kernel and user compute queue. 152 * To enable other MEC, we need to redistribute queues per pipe and 153 * adjust queue resource shared with kfd that needs a separate patch. 154 * Skip other MEC for now to avoid potential issues. 155 */ 156 if (i >= adev->gfx.mec.num_pipe_per_mec) 157 break; 158 159 adev->mes.compute_hqd_mask[i] = compute_hqd_mask; 160 } 161 162 num_pipes = adev->sdma.num_inst_per_xcc ? 163 adev->sdma.num_inst_per_xcc : adev->sdma.num_instances; 164 if (num_pipes > AMDGPU_MES_MAX_SDMA_PIPES) 165 dev_warn(adev->dev, "more SDMA pipes than supported by MES! (%d vs %d)\n", 166 num_pipes, AMDGPU_MES_MAX_SDMA_PIPES); 167 168 for (i = 0; i < AMDGPU_MES_MAX_SDMA_PIPES; i++) { 169 if (i >= num_pipes) 170 break; 171 adev->mes.sdma_hqd_mask[i] = 0xfc; 172 } 173 174 dev_info(adev->dev, 175 "MES: vmid_mask_mmhub 0x%08x, vmid_mask_gfxhub 0x%08x\n", 176 adev->mes.vmid_mask_mmhub, 177 adev->mes.vmid_mask_gfxhub); 178 179 dev_info(adev->dev, 180 "MES: gfx_hqd_mask 0x%08x, compute_hqd_mask 0x%08x, sdma_hqd_mask 0x%08x\n", 181 adev->mes.gfx_hqd_mask[0], 182 adev->mes.compute_hqd_mask[0], 183 adev->mes.sdma_hqd_mask[0]); 184 185 for (i = 0; i < AMDGPU_MAX_MES_PIPES * num_xcc; i++) { 186 r = amdgpu_device_wb_get(adev, &adev->mes.sch_ctx_offs[i]); 187 if (r) { 188 dev_err(adev->dev, 189 "(%d) ring trail_fence_offs wb alloc failed\n", 190 r); 191 goto error; 192 } 193 adev->mes.sch_ctx_gpu_addr[i] = 194 adev->wb.gpu_addr + (adev->mes.sch_ctx_offs[i] * 4); 195 adev->mes.sch_ctx_ptr[i] = 196 (uint64_t *)&adev->wb.wb[adev->mes.sch_ctx_offs[i]]; 197 198 r = amdgpu_device_wb_get(adev, 199 &adev->mes.query_status_fence_offs[i]); 200 if (r) { 201 dev_err(adev->dev, 202 "(%d) query_status_fence_offs wb alloc failed\n", 203 r); 204 goto error; 205 } 206 adev->mes.query_status_fence_gpu_addr[i] = adev->wb.gpu_addr + 207 (adev->mes.query_status_fence_offs[i] * 4); 208 adev->mes.query_status_fence_ptr[i] = 209 (uint64_t *)&adev->wb.wb[adev->mes.query_status_fence_offs[i]]; 210 } 211 212 r = amdgpu_mes_doorbell_init(adev); 213 if (r) 214 goto error; 215 216 r = amdgpu_mes_event_log_init(adev); 217 if (r) 218 goto error_doorbell; 219 220 if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(12, 1, 0)) { 221 /* When queue/pipe reset is done in MES instead of in the 222 * driver, MES passes hung queues information to the driver in 223 * hung_queue_hqd_info. Calculate required space to store this 224 * information. 225 */ 226 for (i = 0; i < AMDGPU_MES_MAX_GFX_PIPES; i++) 227 num_queues += hweight32(adev->mes.gfx_hqd_mask[i]); 228 229 for (i = 0; i < AMDGPU_MES_MAX_COMPUTE_PIPES; i++) 230 num_queues += hweight32(adev->mes.compute_hqd_mask[i]); 231 232 for (i = 0; i < AMDGPU_MES_MAX_SDMA_PIPES; i++) 233 num_queues += hweight32(adev->mes.sdma_hqd_mask[i]) * num_xcc; 234 235 adev->mes.hung_queue_hqd_info_offset = num_queues; 236 adev->mes.hung_queue_db_array_size = num_queues * 2; 237 } 238 239 if (adev->mes.hung_queue_db_array_size) { 240 for (i = 0; i < AMDGPU_MAX_MES_PIPES; i++) { 241 r = amdgpu_bo_create_kernel(adev, 242 adev->mes.hung_queue_db_array_size * sizeof(u32), 243 PAGE_SIZE, 244 AMDGPU_GEM_DOMAIN_GTT, 245 &adev->mes.hung_queue_db_array_gpu_obj[i], 246 &adev->mes.hung_queue_db_array_gpu_addr[i], 247 &adev->mes.hung_queue_db_array_cpu_addr[i]); 248 if (r) { 249 dev_warn(adev->dev, "failed to create MES hung db array buffer (%d)", r); 250 goto error_doorbell; 251 } 252 } 253 } 254 255 return 0; 256 257 error_doorbell: 258 amdgpu_mes_doorbell_free(adev); 259 error: 260 for (i = 0; i < AMDGPU_MAX_MES_PIPES * num_xcc; i++) { 261 if (adev->mes.sch_ctx_ptr[i]) 262 amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs[i]); 263 if (adev->mes.query_status_fence_ptr[i]) 264 amdgpu_device_wb_free(adev, 265 adev->mes.query_status_fence_offs[i]); 266 if (adev->mes.hung_queue_db_array_gpu_obj[i]) 267 amdgpu_bo_free_kernel(&adev->mes.hung_queue_db_array_gpu_obj[i], 268 &adev->mes.hung_queue_db_array_gpu_addr[i], 269 &adev->mes.hung_queue_db_array_cpu_addr[i]); 270 } 271 272 ida_destroy(&adev->mes.doorbell_ida); 273 mutex_destroy(&adev->mes.mutex_hidden); 274 return r; 275 } 276 277 void amdgpu_mes_fini(struct amdgpu_device *adev) 278 { 279 int i; 280 int num_xcc = adev->gfx.xcc_mask ? NUM_XCC(adev->gfx.xcc_mask) : 1; 281 282 amdgpu_bo_free_kernel(&adev->mes.event_log_gpu_obj, 283 &adev->mes.event_log_gpu_addr, 284 &adev->mes.event_log_cpu_addr); 285 286 for (i = 0; i < AMDGPU_MAX_MES_PIPES * num_xcc; i++) { 287 if (adev->mes.hung_queue_db_array_gpu_obj[i]) 288 amdgpu_bo_free_kernel(&adev->mes.hung_queue_db_array_gpu_obj[i], 289 &adev->mes.hung_queue_db_array_gpu_addr[i], 290 &adev->mes.hung_queue_db_array_cpu_addr[i]); 291 if (adev->mes.sch_ctx_ptr[i]) 292 amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs[i]); 293 if (adev->mes.query_status_fence_ptr[i]) 294 amdgpu_device_wb_free(adev, 295 adev->mes.query_status_fence_offs[i]); 296 } 297 298 amdgpu_mes_doorbell_free(adev); 299 300 ida_destroy(&adev->mes.doorbell_ida); 301 mutex_destroy(&adev->mes.mutex_hidden); 302 } 303 304 int amdgpu_mes_suspend(struct amdgpu_device *adev, u32 xcc_id) 305 { 306 struct mes_suspend_gang_input input; 307 int r; 308 309 if (!amdgpu_mes_suspend_resume_all_supported(adev)) 310 return 0; 311 312 memset(&input, 0x0, sizeof(struct mes_suspend_gang_input)); 313 input.suspend_all_gangs = 1; 314 input.xcc_id = xcc_id; 315 316 /* 317 * Avoid taking any other locks under MES lock to avoid circular 318 * lock dependencies. 319 */ 320 amdgpu_mes_lock(&adev->mes); 321 r = adev->mes.funcs->suspend_gang(&adev->mes, &input); 322 amdgpu_mes_unlock(&adev->mes); 323 if (r) 324 dev_err(adev->dev, "failed to suspend all gangs"); 325 326 return r; 327 } 328 329 int amdgpu_mes_resume(struct amdgpu_device *adev, u32 xcc_id) 330 { 331 struct mes_resume_gang_input input; 332 int r; 333 334 if (!amdgpu_mes_suspend_resume_all_supported(adev)) 335 return 0; 336 337 memset(&input, 0x0, sizeof(struct mes_resume_gang_input)); 338 input.resume_all_gangs = 1; 339 input.xcc_id = xcc_id; 340 341 /* 342 * Avoid taking any other locks under MES lock to avoid circular 343 * lock dependencies. 344 */ 345 amdgpu_mes_lock(&adev->mes); 346 r = adev->mes.funcs->resume_gang(&adev->mes, &input); 347 amdgpu_mes_unlock(&adev->mes); 348 if (r) 349 dev_err(adev->dev, "failed to resume all gangs"); 350 351 return r; 352 } 353 354 int amdgpu_mes_map_legacy_queue(struct amdgpu_device *adev, 355 struct amdgpu_ring *ring, uint32_t xcc_id) 356 { 357 struct mes_map_legacy_queue_input queue_input; 358 int r; 359 360 memset(&queue_input, 0, sizeof(queue_input)); 361 362 queue_input.xcc_id = xcc_id; 363 queue_input.queue_type = ring->funcs->type; 364 queue_input.doorbell_offset = ring->doorbell_index; 365 queue_input.pipe_id = ring->pipe; 366 queue_input.queue_id = ring->queue; 367 queue_input.mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj); 368 queue_input.wptr_addr = ring->wptr_gpu_addr; 369 370 amdgpu_mes_lock(&adev->mes); 371 r = adev->mes.funcs->map_legacy_queue(&adev->mes, &queue_input); 372 amdgpu_mes_unlock(&adev->mes); 373 if (r) 374 dev_err(adev->dev, "failed to map legacy queue\n"); 375 376 return r; 377 } 378 379 int amdgpu_mes_unmap_legacy_queue(struct amdgpu_device *adev, 380 struct amdgpu_ring *ring, 381 enum amdgpu_unmap_queues_action action, 382 u64 gpu_addr, u64 seq, uint32_t xcc_id) 383 { 384 struct mes_unmap_legacy_queue_input queue_input; 385 int r; 386 387 queue_input.xcc_id = xcc_id; 388 queue_input.action = action; 389 queue_input.queue_type = ring->funcs->type; 390 queue_input.doorbell_offset = ring->doorbell_index; 391 queue_input.pipe_id = ring->pipe; 392 queue_input.queue_id = ring->queue; 393 queue_input.trail_fence_addr = gpu_addr; 394 queue_input.trail_fence_data = seq; 395 396 amdgpu_mes_lock(&adev->mes); 397 r = adev->mes.funcs->unmap_legacy_queue(&adev->mes, &queue_input); 398 amdgpu_mes_unlock(&adev->mes); 399 if (r) 400 dev_err(adev->dev, "failed to unmap legacy queue\n"); 401 402 return r; 403 } 404 405 int amdgpu_mes_reset_legacy_queue(struct amdgpu_device *adev, 406 struct amdgpu_ring *ring, 407 unsigned int vmid, 408 bool use_mmio, 409 uint32_t xcc_id) 410 { 411 struct mes_reset_queue_input queue_input; 412 int r; 413 414 memset(&queue_input, 0, sizeof(queue_input)); 415 416 queue_input.xcc_id = xcc_id; 417 queue_input.queue_type = ring->funcs->type; 418 queue_input.doorbell_offset = ring->doorbell_index; 419 queue_input.me_id = ring->me; 420 queue_input.pipe_id = ring->pipe; 421 queue_input.queue_id = ring->queue; 422 queue_input.mqd_addr = ring->mqd_obj ? amdgpu_bo_gpu_offset(ring->mqd_obj) : 0; 423 queue_input.wptr_addr = ring->wptr_gpu_addr; 424 queue_input.vmid = vmid; 425 queue_input.use_mmio = use_mmio; 426 queue_input.is_kq = true; 427 if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) 428 queue_input.legacy_gfx = true; 429 430 amdgpu_mes_lock(&adev->mes); 431 r = adev->mes.funcs->reset_hw_queue(&adev->mes, &queue_input); 432 amdgpu_mes_unlock(&adev->mes); 433 if (r) 434 dev_err(adev->dev, "failed to reset legacy queue\n"); 435 436 return r; 437 } 438 439 int amdgpu_mes_get_hung_queue_db_array_size(struct amdgpu_device *adev) 440 { 441 return adev->mes.hung_queue_db_array_size; 442 } 443 444 int amdgpu_mes_detect_and_reset_hung_queues(struct amdgpu_device *adev, 445 int queue_type, 446 bool detect_only, 447 unsigned int *hung_db_num, 448 u32 *hung_db_array, 449 uint32_t xcc_id) 450 { 451 struct mes_detect_and_reset_queue_input input; 452 u32 *db_array = adev->mes.hung_queue_db_array_cpu_addr[xcc_id]; 453 int hqd_info_offset = adev->mes.hung_queue_hqd_info_offset, r, i; 454 455 if (!hung_db_num || !hung_db_array) 456 return -EINVAL; 457 458 if ((queue_type != AMDGPU_RING_TYPE_GFX) && 459 (queue_type != AMDGPU_RING_TYPE_COMPUTE) && 460 (queue_type != AMDGPU_RING_TYPE_SDMA)) 461 return -EINVAL; 462 463 /* Clear the doorbell array before detection */ 464 memset(adev->mes.hung_queue_db_array_cpu_addr[xcc_id], AMDGPU_MES_INVALID_DB_OFFSET, 465 adev->mes.hung_queue_db_array_size * sizeof(u32)); 466 input.queue_type = queue_type; 467 input.detect_only = detect_only; 468 input.xcc_id = xcc_id; 469 470 r = adev->mes.funcs->detect_and_reset_hung_queues(&adev->mes, 471 &input); 472 473 if (r && detect_only) { 474 dev_err(adev->dev, "Failed to detect hung queues\n"); 475 return r; 476 } 477 478 *hung_db_num = 0; 479 /* MES passes hung queues' doorbell to driver */ 480 for (i = 0; i < adev->mes.hung_queue_hqd_info_offset; i++) { 481 /* Finding hung queues where db_array[i] is a valid doorbell */ 482 if (db_array[i] != AMDGPU_MES_INVALID_DB_OFFSET) { 483 hung_db_array[i] = db_array[i]; 484 *hung_db_num += 1; 485 } 486 } 487 488 if (r && !(*hung_db_num)) { 489 dev_err(adev->dev, "Failed to detect and reset hung queues\n"); 490 return r; 491 } 492 493 for (i = hqd_info_offset; i < hqd_info_offset + *hung_db_num; i++) 494 hung_db_array[i] = db_array[i]; 495 496 return r; 497 } 498 499 uint32_t amdgpu_mes_rreg(struct amdgpu_device *adev, uint32_t reg, 500 uint32_t xcc_id) 501 { 502 struct mes_misc_op_input op_input; 503 int r, val = 0; 504 uint32_t addr_offset = 0; 505 uint64_t read_val_gpu_addr; 506 uint32_t *read_val_ptr; 507 508 if (amdgpu_device_wb_get(adev, &addr_offset)) { 509 dev_err(adev->dev, "critical bug! too many mes readers\n"); 510 goto error; 511 } 512 read_val_gpu_addr = adev->wb.gpu_addr + (addr_offset * 4); 513 read_val_ptr = (uint32_t *)&adev->wb.wb[addr_offset]; 514 op_input.xcc_id = xcc_id; 515 op_input.op = MES_MISC_OP_READ_REG; 516 op_input.read_reg.reg_offset = reg; 517 op_input.read_reg.buffer_addr = read_val_gpu_addr; 518 519 if (!adev->mes.funcs->misc_op) { 520 dev_err(adev->dev, "mes rreg is not supported!\n"); 521 goto error; 522 } 523 524 amdgpu_mes_lock(&adev->mes); 525 r = adev->mes.funcs->misc_op(&adev->mes, &op_input); 526 amdgpu_mes_unlock(&adev->mes); 527 if (r) 528 dev_err(adev->dev, "failed to read reg (0x%x)\n", reg); 529 else 530 val = *(read_val_ptr); 531 532 error: 533 if (addr_offset) 534 amdgpu_device_wb_free(adev, addr_offset); 535 return val; 536 } 537 538 int amdgpu_mes_wreg(struct amdgpu_device *adev, uint32_t reg, 539 uint32_t val, uint32_t xcc_id) 540 { 541 struct mes_misc_op_input op_input; 542 int r; 543 544 op_input.xcc_id = xcc_id; 545 op_input.op = MES_MISC_OP_WRITE_REG; 546 op_input.write_reg.reg_offset = reg; 547 op_input.write_reg.reg_value = val; 548 549 if (!adev->mes.funcs->misc_op) { 550 dev_err(adev->dev, "mes wreg is not supported!\n"); 551 r = -EINVAL; 552 goto error; 553 } 554 555 amdgpu_mes_lock(&adev->mes); 556 r = adev->mes.funcs->misc_op(&adev->mes, &op_input); 557 amdgpu_mes_unlock(&adev->mes); 558 if (r) 559 dev_err(adev->dev, "failed to write reg (0x%x)\n", reg); 560 561 error: 562 return r; 563 } 564 565 int amdgpu_mes_reg_write_reg_wait(struct amdgpu_device *adev, 566 uint32_t reg0, uint32_t reg1, 567 uint32_t ref, uint32_t mask, 568 uint32_t xcc_id) 569 { 570 struct mes_misc_op_input op_input; 571 int r; 572 573 op_input.xcc_id = xcc_id; 574 op_input.op = MES_MISC_OP_WRM_REG_WR_WAIT; 575 op_input.wrm_reg.reg0 = reg0; 576 op_input.wrm_reg.reg1 = reg1; 577 op_input.wrm_reg.ref = ref; 578 op_input.wrm_reg.mask = mask; 579 580 if (!adev->mes.funcs->misc_op) { 581 dev_err(adev->dev, "mes reg_write_reg_wait is not supported!\n"); 582 r = -EINVAL; 583 goto error; 584 } 585 586 amdgpu_mes_lock(&adev->mes); 587 r = adev->mes.funcs->misc_op(&adev->mes, &op_input); 588 amdgpu_mes_unlock(&adev->mes); 589 if (r) 590 dev_err(adev->dev, "failed to reg_write_reg_wait\n"); 591 592 error: 593 return r; 594 } 595 596 int amdgpu_mes_hdp_flush(struct amdgpu_device *adev) 597 { 598 uint32_t hdp_flush_req_offset, hdp_flush_done_offset; 599 struct amdgpu_ring *mes_ring; 600 uint32_t ref_and_mask = 0, reg_mem_engine = 0; 601 602 if (!adev->gfx.funcs->get_hdp_flush_mask) { 603 dev_err(adev->dev, "mes hdp flush is not supported.\n"); 604 return -EINVAL; 605 } 606 607 mes_ring = &adev->mes.ring[0]; 608 hdp_flush_req_offset = adev->nbio.funcs->get_hdp_flush_req_offset(adev); 609 hdp_flush_done_offset = adev->nbio.funcs->get_hdp_flush_done_offset(adev); 610 611 adev->gfx.funcs->get_hdp_flush_mask(mes_ring, &ref_and_mask, ®_mem_engine); 612 613 return amdgpu_mes_reg_write_reg_wait(adev, hdp_flush_req_offset, hdp_flush_done_offset, 614 ref_and_mask, ref_and_mask, 0); 615 } 616 617 int amdgpu_mes_set_shader_debugger(struct amdgpu_device *adev, 618 uint64_t process_context_addr, 619 uint32_t spi_gdbg_per_vmid_cntl, 620 const uint32_t *tcp_watch_cntl, 621 uint32_t flags, 622 bool trap_en, 623 uint32_t xcc_id) 624 { 625 struct mes_misc_op_input op_input = {0}; 626 int r; 627 628 if (!adev->mes.funcs->misc_op) { 629 dev_err(adev->dev, 630 "mes set shader debugger is not supported!\n"); 631 return -EINVAL; 632 } 633 634 op_input.xcc_id = xcc_id; 635 op_input.op = MES_MISC_OP_SET_SHADER_DEBUGGER; 636 op_input.set_shader_debugger.process_context_addr = process_context_addr; 637 op_input.set_shader_debugger.flags.u32all = flags; 638 639 /* use amdgpu mes_flush_shader_debugger instead */ 640 if (op_input.set_shader_debugger.flags.process_ctx_flush) 641 return -EINVAL; 642 643 op_input.set_shader_debugger.spi_gdbg_per_vmid_cntl = spi_gdbg_per_vmid_cntl; 644 memcpy(op_input.set_shader_debugger.tcp_watch_cntl, tcp_watch_cntl, 645 sizeof(op_input.set_shader_debugger.tcp_watch_cntl)); 646 647 if (((adev->mes.sched_version & AMDGPU_MES_API_VERSION_MASK) >> 648 AMDGPU_MES_API_VERSION_SHIFT) >= 14) 649 op_input.set_shader_debugger.trap_en = trap_en; 650 651 amdgpu_mes_lock(&adev->mes); 652 653 r = adev->mes.funcs->misc_op(&adev->mes, &op_input); 654 if (r) 655 dev_err(adev->dev, "failed to set_shader_debugger\n"); 656 657 amdgpu_mes_unlock(&adev->mes); 658 659 return r; 660 } 661 662 int amdgpu_mes_flush_shader_debugger(struct amdgpu_device *adev, 663 uint64_t process_context_addr, 664 uint32_t xcc_id) 665 { 666 struct mes_misc_op_input op_input = {0}; 667 int r; 668 669 if (!adev->mes.funcs->misc_op) { 670 dev_err(adev->dev, 671 "mes flush shader debugger is not supported!\n"); 672 return -EINVAL; 673 } 674 675 op_input.xcc_id = xcc_id; 676 op_input.op = MES_MISC_OP_SET_SHADER_DEBUGGER; 677 op_input.set_shader_debugger.process_context_addr = process_context_addr; 678 op_input.set_shader_debugger.flags.process_ctx_flush = true; 679 680 amdgpu_mes_lock(&adev->mes); 681 682 r = adev->mes.funcs->misc_op(&adev->mes, &op_input); 683 if (r) 684 dev_err(adev->dev, "failed to set_shader_debugger\n"); 685 686 amdgpu_mes_unlock(&adev->mes); 687 688 return r; 689 } 690 691 uint32_t amdgpu_mes_get_aggregated_doorbell_index(struct amdgpu_device *adev, 692 enum amdgpu_mes_priority_level prio) 693 { 694 return adev->mes.aggregated_doorbells[prio]; 695 } 696 697 int amdgpu_mes_init_microcode(struct amdgpu_device *adev, int pipe) 698 { 699 const struct mes_firmware_header_v1_0 *mes_hdr; 700 struct amdgpu_firmware_info *info; 701 char ucode_prefix[30]; 702 char fw_name[50]; 703 bool need_retry = false; 704 u32 *ucode_ptr; 705 int r; 706 707 amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, 708 sizeof(ucode_prefix)); 709 if (adev->enable_uni_mes) { 710 snprintf(fw_name, sizeof(fw_name), 711 "amdgpu/%s_uni_mes.bin", ucode_prefix); 712 } else if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(11, 0, 0) && 713 amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(12, 0, 0)) { 714 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mes%s.bin", 715 ucode_prefix, 716 pipe == AMDGPU_MES_SCHED_PIPE ? "_2" : "1"); 717 need_retry = true; 718 } else { 719 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mes%s.bin", 720 ucode_prefix, 721 pipe == AMDGPU_MES_SCHED_PIPE ? "" : "1"); 722 } 723 724 r = amdgpu_ucode_request(adev, &adev->mes.fw[pipe], AMDGPU_UCODE_REQUIRED, 725 "%s", fw_name); 726 if (r && need_retry && pipe == AMDGPU_MES_SCHED_PIPE) { 727 dev_info(adev->dev, "try to fall back to %s_mes.bin\n", ucode_prefix); 728 r = amdgpu_ucode_request(adev, &adev->mes.fw[pipe], 729 AMDGPU_UCODE_REQUIRED, 730 "amdgpu/%s_mes.bin", ucode_prefix); 731 } 732 733 if (r) 734 goto out; 735 736 mes_hdr = (const struct mes_firmware_header_v1_0 *) 737 adev->mes.fw[pipe]->data; 738 adev->mes.uc_start_addr[pipe] = 739 le32_to_cpu(mes_hdr->mes_uc_start_addr_lo) | 740 ((uint64_t)(le32_to_cpu(mes_hdr->mes_uc_start_addr_hi)) << 32); 741 adev->mes.data_start_addr[pipe] = 742 le32_to_cpu(mes_hdr->mes_data_start_addr_lo) | 743 ((uint64_t)(le32_to_cpu(mes_hdr->mes_data_start_addr_hi)) << 32); 744 ucode_ptr = (u32 *)(adev->mes.fw[pipe]->data + 745 sizeof(union amdgpu_firmware_header)); 746 adev->mes.fw_version[pipe] = 747 le32_to_cpu(ucode_ptr[24]) & AMDGPU_MES_VERSION_MASK; 748 749 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 750 int ucode, ucode_data; 751 752 if (pipe == AMDGPU_MES_SCHED_PIPE) { 753 ucode = AMDGPU_UCODE_ID_CP_MES; 754 ucode_data = AMDGPU_UCODE_ID_CP_MES_DATA; 755 } else { 756 ucode = AMDGPU_UCODE_ID_CP_MES1; 757 ucode_data = AMDGPU_UCODE_ID_CP_MES1_DATA; 758 } 759 760 info = &adev->firmware.ucode[ucode]; 761 info->ucode_id = ucode; 762 info->fw = adev->mes.fw[pipe]; 763 adev->firmware.fw_size += 764 ALIGN(le32_to_cpu(mes_hdr->mes_ucode_size_bytes), 765 PAGE_SIZE); 766 767 info = &adev->firmware.ucode[ucode_data]; 768 info->ucode_id = ucode_data; 769 info->fw = adev->mes.fw[pipe]; 770 adev->firmware.fw_size += 771 ALIGN(le32_to_cpu(mes_hdr->mes_ucode_data_size_bytes), 772 PAGE_SIZE); 773 } 774 775 return 0; 776 out: 777 amdgpu_ucode_release(&adev->mes.fw[pipe]); 778 return r; 779 } 780 781 bool amdgpu_mes_suspend_resume_all_supported(struct amdgpu_device *adev) 782 { 783 uint32_t mes_rev = adev->mes.sched_version & AMDGPU_MES_VERSION_MASK; 784 785 return ((amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(11, 0, 0) && 786 amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(12, 0, 0) && 787 mes_rev >= 0x63) || 788 amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(12, 0, 0)); 789 } 790 791 bool amdgpu_mes_queue_reset_by_mes_supported(struct amdgpu_device *adev) 792 { 793 return (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(12, 1, 0) && 794 (adev->mes.sched_version & AMDGPU_MES_VERSION_MASK) >= 0x73); 795 } 796 797 /* Fix me -- node_id is used to identify the correct MES instances in the future */ 798 static int amdgpu_mes_set_enforce_isolation(struct amdgpu_device *adev, 799 uint32_t node_id, bool enable) 800 { 801 struct mes_misc_op_input op_input = {0}; 802 int r; 803 804 op_input.op = MES_MISC_OP_CHANGE_CONFIG; 805 op_input.change_config.option.limit_single_process = enable ? 1 : 0; 806 807 if (!adev->mes.funcs->misc_op) { 808 dev_err(adev->dev, "mes change config is not supported!\n"); 809 r = -EINVAL; 810 goto error; 811 } 812 813 amdgpu_mes_lock(&adev->mes); 814 r = adev->mes.funcs->misc_op(&adev->mes, &op_input); 815 amdgpu_mes_unlock(&adev->mes); 816 if (r) 817 dev_err(adev->dev, "failed to change_config.\n"); 818 819 error: 820 return r; 821 } 822 823 int amdgpu_mes_update_enforce_isolation(struct amdgpu_device *adev) 824 { 825 int i, r = 0; 826 827 if (adev->enable_mes && adev->gfx.enable_cleaner_shader) { 828 mutex_lock(&adev->enforce_isolation_mutex); 829 for (i = 0; i < (adev->xcp_mgr ? adev->xcp_mgr->num_xcps : 1); i++) { 830 if (adev->enforce_isolation[i] == AMDGPU_ENFORCE_ISOLATION_ENABLE) 831 r |= amdgpu_mes_set_enforce_isolation(adev, i, true); 832 else 833 r |= amdgpu_mes_set_enforce_isolation(adev, i, false); 834 } 835 mutex_unlock(&adev->enforce_isolation_mutex); 836 } 837 return r; 838 } 839 840 #if defined(CONFIG_DEBUG_FS) 841 842 static int amdgpu_debugfs_mes_event_log_show(struct seq_file *m, void *unused) 843 { 844 struct amdgpu_device *adev = m->private; 845 uint32_t *mem = (uint32_t *)(adev->mes.event_log_cpu_addr); 846 847 seq_hex_dump(m, "", DUMP_PREFIX_OFFSET, 32, 4, 848 mem, adev->mes.event_log_size, false); 849 850 return 0; 851 } 852 853 DEFINE_SHOW_ATTRIBUTE(amdgpu_debugfs_mes_event_log); 854 855 #endif 856 857 void amdgpu_debugfs_mes_event_log_init(struct amdgpu_device *adev) 858 { 859 860 #if defined(CONFIG_DEBUG_FS) 861 struct drm_minor *minor = adev_to_drm(adev)->primary; 862 struct dentry *root = minor->debugfs_root; 863 if (adev->enable_mes && amdgpu_mes_log_enable) 864 debugfs_create_file("amdgpu_mes_event_log", 0444, root, 865 adev, &amdgpu_debugfs_mes_event_log_fops); 866 867 #endif 868 } 869