1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright 2014 Advanced Micro Devices, Inc. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be included in 13 * all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 19 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 21 * OTHER DEALINGS IN THE SOFTWARE. 22 */ 23 24 #include "amdgpu_amdkfd.h" 25 #include "amd_pcie.h" 26 #include "amd_shared.h" 27 28 #include "amdgpu.h" 29 #include "amdgpu_gfx.h" 30 #include "amdgpu_dma_buf.h" 31 #include <drm/ttm/ttm_tt.h> 32 #include <linux/module.h> 33 #include <linux/dma-buf.h> 34 #include "amdgpu_xgmi.h" 35 #include <uapi/linux/kfd_ioctl.h> 36 #include "amdgpu_ras.h" 37 #include "amdgpu_umc.h" 38 #include "amdgpu_reset.h" 39 #include "amdgpu_ras_mgr.h" 40 41 /* Total memory size in system memory and all GPU VRAM. Used to 42 * estimate worst case amount of memory to reserve for page tables 43 */ 44 uint64_t amdgpu_amdkfd_total_mem_size; 45 46 static bool kfd_initialized; 47 48 int amdgpu_amdkfd_init(void) 49 { 50 struct sysinfo si; 51 int ret; 52 53 si_meminfo(&si); 54 amdgpu_amdkfd_total_mem_size = si.freeram - si.freehigh; 55 amdgpu_amdkfd_total_mem_size *= si.mem_unit; 56 57 ret = kgd2kfd_init(); 58 kfd_initialized = !ret; 59 60 return ret; 61 } 62 63 void amdgpu_amdkfd_fini(void) 64 { 65 if (kfd_initialized) { 66 kgd2kfd_exit(); 67 kfd_initialized = false; 68 } 69 } 70 71 void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev) 72 { 73 bool vf = amdgpu_sriov_vf(adev); 74 75 if (!kfd_initialized) 76 return; 77 78 adev->kfd.dev = kgd2kfd_probe(adev, vf); 79 } 80 81 /** 82 * amdgpu_doorbell_get_kfd_info - Report doorbell configuration required to 83 * setup amdkfd 84 * 85 * @adev: amdgpu_device pointer 86 * @aperture_base: output returning doorbell aperture base physical address 87 * @aperture_size: output returning doorbell aperture size in bytes 88 * @start_offset: output returning # of doorbell bytes reserved for amdgpu. 89 * 90 * amdgpu and amdkfd share the doorbell aperture. amdgpu sets it up, 91 * takes doorbells required for its own rings and reports the setup to amdkfd. 92 * amdgpu reserved doorbells are at the start of the doorbell aperture. 93 */ 94 static void amdgpu_doorbell_get_kfd_info(struct amdgpu_device *adev, 95 phys_addr_t *aperture_base, 96 size_t *aperture_size, 97 size_t *start_offset) 98 { 99 /* 100 * The first num_kernel_doorbells are used by amdgpu. 101 * amdkfd takes whatever's left in the aperture. 102 */ 103 if (adev->enable_mes) { 104 /* 105 * With MES enabled, we only need to initialize 106 * the base address. The size and offset are 107 * not initialized as AMDGPU manages the whole 108 * doorbell space. 109 */ 110 *aperture_base = adev->doorbell.base; 111 *aperture_size = 0; 112 *start_offset = 0; 113 } else if (adev->doorbell.size > adev->doorbell.num_kernel_doorbells * 114 sizeof(u32)) { 115 *aperture_base = adev->doorbell.base; 116 *aperture_size = adev->doorbell.size; 117 *start_offset = adev->doorbell.num_kernel_doorbells * sizeof(u32); 118 } else { 119 *aperture_base = 0; 120 *aperture_size = 0; 121 *start_offset = 0; 122 } 123 } 124 125 126 static void amdgpu_amdkfd_reset_work(struct work_struct *work) 127 { 128 struct amdgpu_device *adev = container_of(work, struct amdgpu_device, 129 kfd.reset_work); 130 131 struct amdgpu_reset_context reset_context; 132 133 memset(&reset_context, 0, sizeof(reset_context)); 134 135 reset_context.method = AMD_RESET_METHOD_NONE; 136 reset_context.reset_req_dev = adev; 137 reset_context.src = adev->enable_mes ? 138 AMDGPU_RESET_SRC_MES : 139 AMDGPU_RESET_SRC_HWS; 140 clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags); 141 142 amdgpu_device_gpu_recover(adev, NULL, &reset_context); 143 } 144 145 static const struct drm_client_funcs kfd_client_funcs = { 146 .unregister = drm_client_release, 147 }; 148 149 int amdgpu_amdkfd_drm_client_create(struct amdgpu_device *adev) 150 { 151 int ret; 152 153 if (!adev->kfd.init_complete || adev->kfd.client.dev) 154 return 0; 155 156 ret = drm_client_init(&adev->ddev, &adev->kfd.client, "kfd", 157 &kfd_client_funcs); 158 if (ret) { 159 dev_err(adev->dev, "Failed to init DRM client: %d\n", 160 ret); 161 return ret; 162 } 163 164 drm_client_register(&adev->kfd.client); 165 166 return 0; 167 } 168 169 void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) 170 { 171 int i; 172 int last_valid_bit; 173 174 amdgpu_amdkfd_gpuvm_init_mem_limits(); 175 176 if (adev->kfd.dev) { 177 struct kgd2kfd_shared_resources gpu_resources = { 178 .compute_vmid_bitmap = 179 ((1 << AMDGPU_NUM_VMID) - 1) - 180 ((1 << adev->vm_manager.first_kfd_vmid) - 1), 181 .num_pipe_per_mec = adev->gfx.mec.num_pipe_per_mec, 182 .num_queue_per_pipe = adev->gfx.mec.num_queue_per_pipe, 183 .gpuvm_size = min(adev->vm_manager.max_pfn 184 << AMDGPU_GPU_PAGE_SHIFT, 185 AMDGPU_GMC_HOLE_START), 186 .drm_render_minor = adev_to_drm(adev)->render->index, 187 .sdma_doorbell_idx = adev->doorbell_index.sdma_engine, 188 .enable_mes = adev->enable_mes, 189 }; 190 191 /* this is going to have a few of the MSBs set that we need to 192 * clear 193 */ 194 bitmap_complement(gpu_resources.cp_queue_bitmap, 195 adev->gfx.mec_bitmap[0].queue_bitmap, 196 AMDGPU_MAX_QUEUES); 197 198 /* According to linux/bitmap.h we shouldn't use bitmap_clear if 199 * nbits is not compile time constant 200 */ 201 last_valid_bit = 1 /* only first MEC can have compute queues */ 202 * adev->gfx.mec.num_pipe_per_mec 203 * adev->gfx.mec.num_queue_per_pipe; 204 for (i = last_valid_bit; i < AMDGPU_MAX_QUEUES; ++i) 205 clear_bit(i, gpu_resources.cp_queue_bitmap); 206 207 amdgpu_doorbell_get_kfd_info(adev, 208 &gpu_resources.doorbell_physical_address, 209 &gpu_resources.doorbell_aperture_size, 210 &gpu_resources.doorbell_start_offset); 211 212 /* Since SOC15, BIF starts to statically use the 213 * lower 12 bits of doorbell addresses for routing 214 * based on settings in registers like 215 * SDMA0_DOORBELL_RANGE etc.. 216 * In order to route a doorbell to CP engine, the lower 217 * 12 bits of its address has to be outside the range 218 * set for SDMA, VCN, and IH blocks. 219 */ 220 if (adev->asic_type >= CHIP_VEGA10) { 221 gpu_resources.non_cp_doorbells_start = 222 adev->doorbell_index.first_non_cp; 223 gpu_resources.non_cp_doorbells_end = 224 adev->doorbell_index.last_non_cp; 225 } 226 227 adev->kfd.init_complete = kgd2kfd_device_init(adev->kfd.dev, 228 &gpu_resources); 229 230 amdgpu_amdkfd_total_mem_size += adev->gmc.real_vram_size; 231 232 INIT_WORK(&adev->kfd.reset_work, amdgpu_amdkfd_reset_work); 233 } 234 } 235 236 void amdgpu_amdkfd_device_fini_sw(struct amdgpu_device *adev) 237 { 238 if (adev->kfd.dev) { 239 kgd2kfd_device_exit(adev->kfd.dev); 240 adev->kfd.dev = NULL; 241 amdgpu_amdkfd_total_mem_size -= adev->gmc.real_vram_size; 242 } 243 } 244 245 void amdgpu_amdkfd_interrupt(struct amdgpu_device *adev, 246 const void *ih_ring_entry) 247 { 248 if (adev->kfd.dev) 249 kgd2kfd_interrupt(adev->kfd.dev, ih_ring_entry); 250 } 251 252 void amdgpu_amdkfd_suspend(struct amdgpu_device *adev, bool suspend_proc) 253 { 254 if (adev->kfd.dev) { 255 if (adev->in_s0ix) 256 kgd2kfd_stop_sched_all_nodes(adev->kfd.dev); 257 else 258 kgd2kfd_suspend(adev->kfd.dev, suspend_proc); 259 } 260 } 261 262 int amdgpu_amdkfd_resume(struct amdgpu_device *adev, bool resume_proc) 263 { 264 int r = 0; 265 266 if (adev->kfd.dev) { 267 if (adev->in_s0ix) 268 r = kgd2kfd_start_sched_all_nodes(adev->kfd.dev); 269 else 270 r = kgd2kfd_resume(adev->kfd.dev, resume_proc); 271 } 272 273 return r; 274 } 275 276 void amdgpu_amdkfd_suspend_process(struct amdgpu_device *adev) 277 { 278 if (adev->kfd.dev) 279 kgd2kfd_suspend_process(adev->kfd.dev); 280 } 281 282 int amdgpu_amdkfd_resume_process(struct amdgpu_device *adev) 283 { 284 int r = 0; 285 286 if (adev->kfd.dev) 287 r = kgd2kfd_resume_process(adev->kfd.dev); 288 289 return r; 290 } 291 292 int amdgpu_amdkfd_pre_reset(struct amdgpu_device *adev, 293 struct amdgpu_reset_context *reset_context) 294 { 295 int r = 0; 296 297 if (adev->kfd.dev) 298 r = kgd2kfd_pre_reset(adev->kfd.dev, reset_context); 299 300 return r; 301 } 302 303 int amdgpu_amdkfd_post_reset(struct amdgpu_device *adev) 304 { 305 int r = 0; 306 307 if (adev->kfd.dev) 308 r = kgd2kfd_post_reset(adev->kfd.dev); 309 310 return r; 311 } 312 313 void amdgpu_amdkfd_gpu_reset(struct amdgpu_device *adev) 314 { 315 if (amdgpu_device_should_recover_gpu(adev)) 316 amdgpu_reset_domain_schedule(adev->reset_domain, 317 &adev->kfd.reset_work); 318 } 319 320 int amdgpu_amdkfd_alloc_gtt_mem(struct amdgpu_device *adev, size_t size, 321 void **mem_obj, uint64_t *gpu_addr, 322 void **cpu_ptr, bool cp_mqd_gfx9) 323 { 324 struct amdgpu_bo *bo = NULL; 325 struct amdgpu_bo_param bp; 326 int r; 327 void *cpu_ptr_tmp = NULL; 328 329 memset(&bp, 0, sizeof(bp)); 330 bp.size = size; 331 bp.byte_align = PAGE_SIZE; 332 bp.domain = AMDGPU_GEM_DOMAIN_GTT; 333 bp.flags = AMDGPU_GEM_CREATE_CPU_GTT_USWC; 334 bp.type = ttm_bo_type_kernel; 335 bp.resv = NULL; 336 bp.bo_ptr_size = sizeof(struct amdgpu_bo); 337 338 if (cp_mqd_gfx9) 339 bp.flags |= AMDGPU_GEM_CREATE_CP_MQD_GFX9; 340 341 r = amdgpu_bo_create(adev, &bp, &bo); 342 if (r) { 343 dev_err(adev->dev, 344 "failed to allocate BO for amdkfd (%d)\n", r); 345 return r; 346 } 347 348 /* map the buffer */ 349 r = amdgpu_bo_reserve(bo, true); 350 if (r) { 351 dev_err(adev->dev, "(%d) failed to reserve bo for amdkfd\n", r); 352 goto allocate_mem_reserve_bo_failed; 353 } 354 355 r = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT); 356 if (r) { 357 dev_err(adev->dev, "(%d) failed to pin bo for amdkfd\n", r); 358 goto allocate_mem_pin_bo_failed; 359 } 360 361 r = amdgpu_ttm_alloc_gart(&bo->tbo); 362 if (r) { 363 dev_err(adev->dev, "%p bind failed\n", bo); 364 goto allocate_mem_kmap_bo_failed; 365 } 366 367 r = amdgpu_bo_kmap(bo, &cpu_ptr_tmp); 368 if (r) { 369 dev_err(adev->dev, 370 "(%d) failed to map bo to kernel for amdkfd\n", r); 371 goto allocate_mem_kmap_bo_failed; 372 } 373 374 *mem_obj = bo; 375 *gpu_addr = amdgpu_bo_gpu_offset(bo); 376 *cpu_ptr = cpu_ptr_tmp; 377 378 amdgpu_bo_unreserve(bo); 379 380 return 0; 381 382 allocate_mem_kmap_bo_failed: 383 amdgpu_bo_unpin(bo); 384 allocate_mem_pin_bo_failed: 385 amdgpu_bo_unreserve(bo); 386 allocate_mem_reserve_bo_failed: 387 amdgpu_bo_unref(&bo); 388 389 return r; 390 } 391 392 void amdgpu_amdkfd_free_gtt_mem(struct amdgpu_device *adev, void **mem_obj) 393 { 394 struct amdgpu_bo **bo = (struct amdgpu_bo **) mem_obj; 395 396 if (!bo || !*bo) 397 return; 398 399 (void)amdgpu_bo_reserve(*bo, true); 400 amdgpu_bo_kunmap(*bo); 401 amdgpu_bo_unpin(*bo); 402 amdgpu_bo_unreserve(*bo); 403 amdgpu_bo_unref(bo); 404 } 405 406 int amdgpu_amdkfd_alloc_gws(struct amdgpu_device *adev, size_t size, 407 void **mem_obj) 408 { 409 struct amdgpu_bo *bo = NULL; 410 struct amdgpu_bo_user *ubo; 411 struct amdgpu_bo_param bp; 412 int r; 413 414 memset(&bp, 0, sizeof(bp)); 415 bp.size = size; 416 bp.byte_align = 1; 417 bp.domain = AMDGPU_GEM_DOMAIN_GWS; 418 bp.flags = AMDGPU_GEM_CREATE_NO_CPU_ACCESS; 419 bp.type = ttm_bo_type_device; 420 bp.resv = NULL; 421 bp.bo_ptr_size = sizeof(struct amdgpu_bo); 422 423 r = amdgpu_bo_create_user(adev, &bp, &ubo); 424 if (r) { 425 dev_err(adev->dev, 426 "failed to allocate gws BO for amdkfd (%d)\n", r); 427 return r; 428 } 429 430 bo = &ubo->bo; 431 *mem_obj = bo; 432 return 0; 433 } 434 435 void amdgpu_amdkfd_free_gws(struct amdgpu_device *adev, void *mem_obj) 436 { 437 struct amdgpu_bo *bo = (struct amdgpu_bo *)mem_obj; 438 439 amdgpu_bo_unref(&bo); 440 } 441 442 uint32_t amdgpu_amdkfd_get_fw_version(struct amdgpu_device *adev, 443 enum kgd_engine_type type) 444 { 445 switch (type) { 446 case KGD_ENGINE_PFP: 447 return adev->gfx.pfp_fw_version; 448 449 case KGD_ENGINE_ME: 450 return adev->gfx.me_fw_version; 451 452 case KGD_ENGINE_CE: 453 return adev->gfx.ce_fw_version; 454 455 case KGD_ENGINE_MEC1: 456 return adev->gfx.mec_fw_version; 457 458 case KGD_ENGINE_MEC2: 459 return adev->gfx.mec2_fw_version; 460 461 case KGD_ENGINE_RLC: 462 return adev->gfx.rlc_fw_version; 463 464 case KGD_ENGINE_SDMA1: 465 return adev->sdma.instance[0].fw_version; 466 467 case KGD_ENGINE_SDMA2: 468 return adev->sdma.instance[1].fw_version; 469 470 default: 471 return 0; 472 } 473 474 return 0; 475 } 476 477 void amdgpu_amdkfd_get_local_mem_info(struct amdgpu_device *adev, 478 struct kfd_local_mem_info *mem_info, 479 struct amdgpu_xcp *xcp) 480 { 481 memset(mem_info, 0, sizeof(*mem_info)); 482 483 if (xcp) { 484 if (adev->gmc.real_vram_size == adev->gmc.visible_vram_size) 485 mem_info->local_mem_size_public = 486 KFD_XCP_MEMORY_SIZE(adev, xcp->id); 487 else 488 mem_info->local_mem_size_private = 489 KFD_XCP_MEMORY_SIZE(adev, xcp->id); 490 } else if (adev->apu_prefer_gtt) { 491 mem_info->local_mem_size_public = (ttm_tt_pages_limit() << PAGE_SHIFT); 492 mem_info->local_mem_size_private = 0; 493 } else { 494 mem_info->local_mem_size_public = adev->gmc.visible_vram_size; 495 mem_info->local_mem_size_private = adev->gmc.real_vram_size - 496 adev->gmc.visible_vram_size; 497 } 498 mem_info->vram_width = adev->gmc.vram_width; 499 500 pr_debug("Address base: %pap public 0x%llx private 0x%llx\n", 501 &adev->gmc.aper_base, 502 mem_info->local_mem_size_public, 503 mem_info->local_mem_size_private); 504 505 if (adev->pm.dpm_enabled) { 506 if (amdgpu_emu_mode == 1) 507 mem_info->mem_clk_max = 0; 508 else 509 mem_info->mem_clk_max = amdgpu_dpm_get_mclk(adev, false) / 100; 510 } else 511 mem_info->mem_clk_max = 100; 512 } 513 514 uint64_t amdgpu_amdkfd_get_gpu_clock_counter(struct amdgpu_device *adev) 515 { 516 if (adev->gfx.funcs->get_gpu_clock_counter) 517 return adev->gfx.funcs->get_gpu_clock_counter(adev); 518 return 0; 519 } 520 521 uint32_t amdgpu_amdkfd_get_max_engine_clock_in_mhz(struct amdgpu_device *adev) 522 { 523 /* the sclk is in quantas of 10kHz */ 524 if (adev->pm.dpm_enabled) 525 return amdgpu_dpm_get_sclk(adev, false) / 100; 526 else 527 return 100; 528 } 529 530 int amdgpu_amdkfd_get_dmabuf_info(struct amdgpu_device *adev, int dma_buf_fd, 531 struct amdgpu_device **dmabuf_adev, 532 uint64_t *bo_size, void *metadata_buffer, 533 size_t buffer_size, uint32_t *metadata_size, 534 uint32_t *flags, int8_t *xcp_id) 535 { 536 struct dma_buf *dma_buf; 537 struct drm_gem_object *obj; 538 struct amdgpu_bo *bo; 539 uint64_t metadata_flags; 540 int r = -EINVAL; 541 542 dma_buf = dma_buf_get(dma_buf_fd); 543 if (IS_ERR(dma_buf)) 544 return PTR_ERR(dma_buf); 545 546 if (dma_buf->ops != &amdgpu_dmabuf_ops) 547 /* Can't handle non-graphics buffers */ 548 goto out_put; 549 550 obj = dma_buf->priv; 551 if (obj->dev->driver != adev_to_drm(adev)->driver) 552 /* Can't handle buffers from different drivers */ 553 goto out_put; 554 555 adev = drm_to_adev(obj->dev); 556 bo = gem_to_amdgpu_bo(obj); 557 if (!(bo->preferred_domains & (AMDGPU_GEM_DOMAIN_VRAM | 558 AMDGPU_GEM_DOMAIN_GTT))) 559 /* Only VRAM and GTT BOs are supported */ 560 goto out_put; 561 562 r = 0; 563 if (dmabuf_adev) 564 *dmabuf_adev = adev; 565 if (bo_size) 566 *bo_size = amdgpu_bo_size(bo); 567 if (metadata_buffer) 568 r = amdgpu_bo_get_metadata(bo, metadata_buffer, buffer_size, 569 metadata_size, &metadata_flags); 570 if (flags) { 571 *flags = (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ? 572 KFD_IOC_ALLOC_MEM_FLAGS_VRAM 573 : KFD_IOC_ALLOC_MEM_FLAGS_GTT; 574 575 if (bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) 576 *flags |= KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC; 577 } 578 if (xcp_id) 579 *xcp_id = bo->xcp_id; 580 581 out_put: 582 dma_buf_put(dma_buf); 583 return r; 584 } 585 586 int amdgpu_amdkfd_get_pcie_bandwidth_mbytes(struct amdgpu_device *adev, bool is_min) 587 { 588 int num_lanes_shift = (is_min ? ffs(adev->pm.pcie_mlw_mask) : 589 fls(adev->pm.pcie_mlw_mask)) - 1; 590 int gen_speed_shift = (is_min ? ffs(adev->pm.pcie_gen_mask & 591 CAIL_PCIE_LINK_SPEED_SUPPORT_MASK) : 592 fls(adev->pm.pcie_gen_mask & 593 CAIL_PCIE_LINK_SPEED_SUPPORT_MASK)) - 1; 594 uint32_t num_lanes_mask = 1 << num_lanes_shift; 595 uint32_t gen_speed_mask = 1 << gen_speed_shift; 596 int num_lanes_factor = 0, gen_speed_mbits_factor = 0; 597 598 switch (num_lanes_mask) { 599 case CAIL_PCIE_LINK_WIDTH_SUPPORT_X1: 600 num_lanes_factor = 1; 601 break; 602 case CAIL_PCIE_LINK_WIDTH_SUPPORT_X2: 603 num_lanes_factor = 2; 604 break; 605 case CAIL_PCIE_LINK_WIDTH_SUPPORT_X4: 606 num_lanes_factor = 4; 607 break; 608 case CAIL_PCIE_LINK_WIDTH_SUPPORT_X8: 609 num_lanes_factor = 8; 610 break; 611 case CAIL_PCIE_LINK_WIDTH_SUPPORT_X12: 612 num_lanes_factor = 12; 613 break; 614 case CAIL_PCIE_LINK_WIDTH_SUPPORT_X16: 615 num_lanes_factor = 16; 616 break; 617 case CAIL_PCIE_LINK_WIDTH_SUPPORT_X32: 618 num_lanes_factor = 32; 619 break; 620 } 621 622 switch (gen_speed_mask) { 623 case CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1: 624 gen_speed_mbits_factor = 2500; 625 break; 626 case CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2: 627 gen_speed_mbits_factor = 5000; 628 break; 629 case CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3: 630 gen_speed_mbits_factor = 8000; 631 break; 632 case CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4: 633 gen_speed_mbits_factor = 16000; 634 break; 635 case CAIL_PCIE_LINK_SPEED_SUPPORT_GEN5: 636 gen_speed_mbits_factor = 32000; 637 break; 638 } 639 640 return (num_lanes_factor * gen_speed_mbits_factor)/BITS_PER_BYTE; 641 } 642 643 int amdgpu_amdkfd_submit_ib(struct amdgpu_device *adev, 644 enum kgd_engine_type engine, 645 uint32_t vmid, uint64_t gpu_addr, 646 uint32_t *ib_cmd, uint32_t ib_len) 647 { 648 struct amdgpu_job *job; 649 struct amdgpu_ib *ib; 650 struct amdgpu_ring *ring; 651 struct dma_fence *f = NULL; 652 int ret; 653 654 switch (engine) { 655 case KGD_ENGINE_MEC1: 656 ring = &adev->gfx.compute_ring[0]; 657 break; 658 case KGD_ENGINE_SDMA1: 659 ring = &adev->sdma.instance[0].ring; 660 break; 661 case KGD_ENGINE_SDMA2: 662 ring = &adev->sdma.instance[1].ring; 663 break; 664 default: 665 pr_err("Invalid engine in IB submission: %d\n", engine); 666 ret = -EINVAL; 667 goto err; 668 } 669 670 ret = amdgpu_job_alloc(adev, NULL, NULL, NULL, 1, &job, 0); 671 if (ret) 672 goto err; 673 674 ib = &job->ibs[0]; 675 memset(ib, 0, sizeof(struct amdgpu_ib)); 676 677 ib->gpu_addr = gpu_addr; 678 ib->ptr = ib_cmd; 679 ib->length_dw = ib_len; 680 /* This works for NO_HWS. TODO: need to handle without knowing VMID */ 681 job->vmid = vmid; 682 job->num_ibs = 1; 683 684 ret = amdgpu_ib_schedule(ring, 1, ib, job, &f); 685 686 if (ret) { 687 DRM_ERROR("amdgpu: failed to schedule IB.\n"); 688 goto err_ib_sched; 689 } 690 691 /* Drop the initial kref_init count (see drm_sched_main as example) */ 692 dma_fence_put(f); 693 ret = dma_fence_wait(f, false); 694 695 err_ib_sched: 696 amdgpu_job_free(job); 697 err: 698 return ret; 699 } 700 701 void amdgpu_amdkfd_set_compute_idle(struct amdgpu_device *adev, bool idle) 702 { 703 enum amd_powergating_state state = idle ? AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE; 704 if ((IP_VERSION_MAJ(amdgpu_ip_version(adev, GC_HWIP, 0)) == 11 && 705 ((adev->mes.kiq_version & AMDGPU_MES_VERSION_MASK) <= 64)) || 706 (IP_VERSION_MAJ(amdgpu_ip_version(adev, GC_HWIP, 0)) == 12)) { 707 pr_debug("GFXOFF is %s\n", idle ? "enabled" : "disabled"); 708 amdgpu_gfx_off_ctrl(adev, idle); 709 } else if ((IP_VERSION_MAJ(amdgpu_ip_version(adev, GC_HWIP, 0)) == 9) && 710 (adev->flags & AMD_IS_APU)) { 711 /* Disable GFXOFF and PG. Temporary workaround 712 * to fix some compute applications issue on GFX9. 713 */ 714 struct amdgpu_ip_block *gfx_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GFX); 715 if (gfx_block != NULL) 716 gfx_block->version->funcs->set_powergating_state((void *)gfx_block, state); 717 } 718 amdgpu_dpm_switch_power_profile(adev, 719 PP_SMC_POWER_PROFILE_COMPUTE, 720 !idle); 721 } 722 723 bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid) 724 { 725 if (adev->kfd.dev) 726 return vmid >= adev->vm_manager.first_kfd_vmid; 727 728 return false; 729 } 730 731 bool amdgpu_amdkfd_have_atomics_support(struct amdgpu_device *adev) 732 { 733 return adev->have_atomics_support; 734 } 735 736 void amdgpu_amdkfd_debug_mem_fence(struct amdgpu_device *adev) 737 { 738 amdgpu_device_flush_hdp(adev, NULL); 739 } 740 741 bool amdgpu_amdkfd_is_fed(struct amdgpu_device *adev) 742 { 743 return amdgpu_ras_get_fed_status(adev); 744 } 745 746 void amdgpu_amdkfd_ras_pasid_poison_consumption_handler(struct amdgpu_device *adev, 747 enum amdgpu_ras_block block, uint16_t pasid, 748 pasid_notify pasid_fn, void *data, uint32_t reset) 749 { 750 751 if (amdgpu_uniras_enabled(adev)) { 752 struct ras_ih_info ih_info; 753 754 memset(&ih_info, 0, sizeof(ih_info)); 755 ih_info.block = block; 756 ih_info.pasid = pasid; 757 ih_info.reset = reset; 758 ih_info.pasid_fn = pasid_fn; 759 ih_info.data = data; 760 amdgpu_ras_mgr_handle_consumer_interrupt(adev, &ih_info); 761 return; 762 } 763 764 amdgpu_umc_pasid_poison_handler(adev, block, pasid, pasid_fn, data, reset); 765 } 766 767 void amdgpu_amdkfd_ras_poison_consumption_handler(struct amdgpu_device *adev, 768 enum amdgpu_ras_block block, uint32_t reset) 769 { 770 amdgpu_umc_pasid_poison_handler(adev, block, 0, NULL, NULL, reset); 771 } 772 773 int amdgpu_amdkfd_send_close_event_drain_irq(struct amdgpu_device *adev, 774 uint32_t *payload) 775 { 776 int ret; 777 778 /* Device or IH ring is not ready so bail. */ 779 ret = amdgpu_ih_wait_on_checkpoint_process_ts(adev, &adev->irq.ih); 780 if (ret) 781 return ret; 782 783 /* Send payload to fence KFD interrupts */ 784 amdgpu_amdkfd_interrupt(adev, payload); 785 786 return 0; 787 } 788 789 int amdgpu_amdkfd_check_and_lock_kfd(struct amdgpu_device *adev) 790 { 791 return kgd2kfd_check_and_lock_kfd(adev->kfd.dev); 792 } 793 794 void amdgpu_amdkfd_unlock_kfd(struct amdgpu_device *adev) 795 { 796 kgd2kfd_unlock_kfd(adev->kfd.dev); 797 } 798 799 800 u64 amdgpu_amdkfd_xcp_memory_size(struct amdgpu_device *adev, int xcp_id) 801 { 802 s8 mem_id = KFD_XCP_MEM_ID(adev, xcp_id); 803 u64 tmp; 804 805 if (adev->gmc.num_mem_partitions && xcp_id >= 0 && mem_id >= 0) { 806 if (adev->gmc.is_app_apu && adev->gmc.num_mem_partitions == 1) { 807 /* In NPS1 mode, we should restrict the vram reporting 808 * tied to the ttm_pages_limit which is 1/2 of the system 809 * memory. For other partition modes, the HBM is uniformly 810 * divided already per numa node reported. If user wants to 811 * go beyond the default ttm limit and maximize the ROCm 812 * allocations, they can go up to max ttm and sysmem limits. 813 */ 814 815 tmp = (ttm_tt_pages_limit() << PAGE_SHIFT) / num_online_nodes(); 816 } else { 817 tmp = adev->gmc.mem_partitions[mem_id].size; 818 } 819 do_div(tmp, adev->xcp_mgr->num_xcp_per_mem_partition); 820 return ALIGN_DOWN(tmp, PAGE_SIZE); 821 } else if (adev->apu_prefer_gtt) { 822 return (ttm_tt_pages_limit() << PAGE_SHIFT); 823 } else { 824 return adev->gmc.real_vram_size; 825 } 826 } 827 828 int amdgpu_amdkfd_unmap_hiq(struct amdgpu_device *adev, u32 doorbell_off, 829 u32 inst) 830 { 831 struct amdgpu_kiq *kiq = &adev->gfx.kiq[inst]; 832 struct amdgpu_ring *kiq_ring = &kiq->ring; 833 struct amdgpu_ring_funcs *ring_funcs; 834 struct amdgpu_ring *ring; 835 int r = 0; 836 837 if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) 838 return -EINVAL; 839 840 if (!kiq_ring->sched.ready || amdgpu_in_reset(adev)) 841 return 0; 842 843 ring_funcs = kzalloc(sizeof(*ring_funcs), GFP_KERNEL); 844 if (!ring_funcs) 845 return -ENOMEM; 846 847 ring = kzalloc(sizeof(*ring), GFP_KERNEL); 848 if (!ring) { 849 r = -ENOMEM; 850 goto free_ring_funcs; 851 } 852 853 ring_funcs->type = AMDGPU_RING_TYPE_COMPUTE; 854 ring->doorbell_index = doorbell_off; 855 ring->funcs = ring_funcs; 856 857 spin_lock(&kiq->ring_lock); 858 859 if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) { 860 spin_unlock(&kiq->ring_lock); 861 r = -ENOMEM; 862 goto free_ring; 863 } 864 865 kiq->pmf->kiq_unmap_queues(kiq_ring, ring, RESET_QUEUES, 0, 0); 866 867 /* Submit unmap queue packet */ 868 amdgpu_ring_commit(kiq_ring); 869 /* 870 * Ring test will do a basic scratch register change check. Just run 871 * this to ensure that unmap queues that is submitted before got 872 * processed successfully before returning. 873 */ 874 r = amdgpu_ring_test_helper(kiq_ring); 875 876 spin_unlock(&kiq->ring_lock); 877 878 free_ring: 879 kfree(ring); 880 881 free_ring_funcs: 882 kfree(ring_funcs); 883 884 return r; 885 } 886 887 /* Stop scheduling on KFD */ 888 int amdgpu_amdkfd_stop_sched(struct amdgpu_device *adev, uint32_t node_id) 889 { 890 if (!adev->kfd.init_complete) 891 return 0; 892 893 return kgd2kfd_stop_sched(adev->kfd.dev, node_id); 894 } 895 896 /* Start scheduling on KFD */ 897 int amdgpu_amdkfd_start_sched(struct amdgpu_device *adev, uint32_t node_id) 898 { 899 if (!adev->kfd.init_complete) 900 return 0; 901 902 return kgd2kfd_start_sched(adev->kfd.dev, node_id); 903 } 904 905 /* check if there are KFD queues active */ 906 bool amdgpu_amdkfd_compute_active(struct amdgpu_device *adev, uint32_t node_id) 907 { 908 if (!adev->kfd.init_complete) 909 return false; 910 911 return kgd2kfd_compute_active(adev->kfd.dev, node_id); 912 } 913 914 /* Config CGTT_SQ_CLK_CTRL */ 915 int amdgpu_amdkfd_config_sq_perfmon(struct amdgpu_device *adev, uint32_t xcp_id, 916 bool core_override_enable, bool reg_override_enable, bool perfmon_override_enable) 917 { 918 int r; 919 920 if (!adev->kfd.init_complete) 921 return 0; 922 923 r = psp_config_sq_perfmon(&adev->psp, xcp_id, core_override_enable, 924 reg_override_enable, perfmon_override_enable); 925 926 return r; 927 } 928