1130e0371SOded Gabbay /* 2130e0371SOded Gabbay * Copyright 2014 Advanced Micro Devices, Inc. 3130e0371SOded Gabbay * 4130e0371SOded Gabbay * Permission is hereby granted, free of charge, to any person obtaining a 5130e0371SOded Gabbay * copy of this software and associated documentation files (the "Software"), 6130e0371SOded Gabbay * to deal in the Software without restriction, including without limitation 7130e0371SOded Gabbay * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8130e0371SOded Gabbay * and/or sell copies of the Software, and to permit persons to whom the 9130e0371SOded Gabbay * Software is furnished to do so, subject to the following conditions: 10130e0371SOded Gabbay * 11130e0371SOded Gabbay * The above copyright notice and this permission notice shall be included in 12130e0371SOded Gabbay * all copies or substantial portions of the Software. 13130e0371SOded Gabbay * 14130e0371SOded Gabbay * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15130e0371SOded Gabbay * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16130e0371SOded Gabbay * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17130e0371SOded Gabbay * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18130e0371SOded Gabbay * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19130e0371SOded Gabbay * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20130e0371SOded Gabbay * OTHER DEALINGS IN THE SOFTWARE. 21130e0371SOded Gabbay */ 22130e0371SOded Gabbay 23130e0371SOded Gabbay #include "amdgpu_amdkfd.h" 242f7d10b3SJammy Zhou #include "amd_shared.h" 25130e0371SOded Gabbay #include <drm/drmP.h> 26130e0371SOded Gabbay #include "amdgpu.h" 272db0cdbeSAlex Deucher #include "amdgpu_gfx.h" 28130e0371SOded Gabbay #include <linux/module.h> 29130e0371SOded Gabbay 30130e0371SOded Gabbay const struct kgd2kfd_calls *kgd2kfd; 318eabaf54SKent Russell bool (*kgd2kfd_init_p)(unsigned int, const struct kgd2kfd_calls**); 32130e0371SOded Gabbay 33155494dbSFelix Kuehling static const unsigned int compute_vmid_bitmap = 0xFF00; 34155494dbSFelix Kuehling 35efb1c658SOded Gabbay int amdgpu_amdkfd_init(void) 36130e0371SOded Gabbay { 37efb1c658SOded Gabbay int ret; 38efb1c658SOded Gabbay 39130e0371SOded Gabbay #if defined(CONFIG_HSA_AMD_MODULE) 408eabaf54SKent Russell int (*kgd2kfd_init_p)(unsigned int, const struct kgd2kfd_calls**); 41130e0371SOded Gabbay 42130e0371SOded Gabbay kgd2kfd_init_p = symbol_request(kgd2kfd_init); 43130e0371SOded Gabbay 44130e0371SOded Gabbay if (kgd2kfd_init_p == NULL) 45efb1c658SOded Gabbay return -ENOENT; 46efb1c658SOded Gabbay 47efb1c658SOded Gabbay ret = kgd2kfd_init_p(KFD_INTERFACE_VERSION, &kgd2kfd); 48efb1c658SOded Gabbay if (ret) { 49efb1c658SOded Gabbay symbol_put(kgd2kfd_init); 50efb1c658SOded Gabbay kgd2kfd = NULL; 51efb1c658SOded Gabbay } 52efb1c658SOded Gabbay 53fcdfa432SOded Gabbay 54efb1c658SOded Gabbay #elif defined(CONFIG_HSA_AMD) 55fcdfa432SOded Gabbay 56efb1c658SOded Gabbay ret = kgd2kfd_init(KFD_INTERFACE_VERSION, &kgd2kfd); 57efb1c658SOded Gabbay if (ret) 58efb1c658SOded Gabbay kgd2kfd = NULL; 59efb1c658SOded Gabbay 60efb1c658SOded Gabbay #else 61fcdfa432SOded Gabbay kgd2kfd = NULL; 62efb1c658SOded Gabbay ret = -ENOENT; 63130e0371SOded Gabbay #endif 64fcdfa432SOded Gabbay 65fcdfa432SOded Gabbay #if defined(CONFIG_HSA_AMD_MODULE) || defined(CONFIG_HSA_AMD) 66a46a2cd1SFelix Kuehling amdgpu_amdkfd_gpuvm_init_mem_limits(); 67fcdfa432SOded Gabbay #endif 68efb1c658SOded Gabbay 69efb1c658SOded Gabbay return ret; 70130e0371SOded Gabbay } 71130e0371SOded Gabbay 72130e0371SOded Gabbay void amdgpu_amdkfd_fini(void) 73130e0371SOded Gabbay { 74130e0371SOded Gabbay if (kgd2kfd) { 75130e0371SOded Gabbay kgd2kfd->exit(); 76130e0371SOded Gabbay symbol_put(kgd2kfd_init); 77130e0371SOded Gabbay } 78130e0371SOded Gabbay } 79130e0371SOded Gabbay 80dc102c43SAndres Rodriguez void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev) 81130e0371SOded Gabbay { 825c33f214SFelix Kuehling const struct kfd2kgd_calls *kfd2kgd; 835c33f214SFelix Kuehling 845c33f214SFelix Kuehling if (!kgd2kfd) 855c33f214SFelix Kuehling return; 865c33f214SFelix Kuehling 875c33f214SFelix Kuehling switch (adev->asic_type) { 885c33f214SFelix Kuehling #ifdef CONFIG_DRM_AMDGPU_CIK 895c33f214SFelix Kuehling case CHIP_KAVERI: 9030d13424SFelix Kuehling case CHIP_HAWAII: 915c33f214SFelix Kuehling kfd2kgd = amdgpu_amdkfd_gfx_7_get_functions(); 925c33f214SFelix Kuehling break; 935c33f214SFelix Kuehling #endif 945c33f214SFelix Kuehling case CHIP_CARRIZO: 9530d13424SFelix Kuehling case CHIP_TONGA: 9630d13424SFelix Kuehling case CHIP_FIJI: 9730d13424SFelix Kuehling case CHIP_POLARIS10: 9830d13424SFelix Kuehling case CHIP_POLARIS11: 995c33f214SFelix Kuehling kfd2kgd = amdgpu_amdkfd_gfx_8_0_get_functions(); 1005c33f214SFelix Kuehling break; 101d5a114a6SFelix Kuehling case CHIP_VEGA10: 102d5a114a6SFelix Kuehling case CHIP_RAVEN: 103d5a114a6SFelix Kuehling kfd2kgd = amdgpu_amdkfd_gfx_9_0_get_functions(); 104d5a114a6SFelix Kuehling break; 1055c33f214SFelix Kuehling default: 106c3032fd9STom Stellard dev_info(adev->dev, "kfd not supported on this ASIC\n"); 1075c33f214SFelix Kuehling return; 1085c33f214SFelix Kuehling } 1095c33f214SFelix Kuehling 110dc102c43SAndres Rodriguez adev->kfd = kgd2kfd->probe((struct kgd_dev *)adev, 111dc102c43SAndres Rodriguez adev->pdev, kfd2kgd); 112130e0371SOded Gabbay } 113130e0371SOded Gabbay 11422cb0164SAlex Deucher /** 11522cb0164SAlex Deucher * amdgpu_doorbell_get_kfd_info - Report doorbell configuration required to 11622cb0164SAlex Deucher * setup amdkfd 11722cb0164SAlex Deucher * 11822cb0164SAlex Deucher * @adev: amdgpu_device pointer 11922cb0164SAlex Deucher * @aperture_base: output returning doorbell aperture base physical address 12022cb0164SAlex Deucher * @aperture_size: output returning doorbell aperture size in bytes 12122cb0164SAlex Deucher * @start_offset: output returning # of doorbell bytes reserved for amdgpu. 12222cb0164SAlex Deucher * 12322cb0164SAlex Deucher * amdgpu and amdkfd share the doorbell aperture. amdgpu sets it up, 12422cb0164SAlex Deucher * takes doorbells required for its own rings and reports the setup to amdkfd. 12522cb0164SAlex Deucher * amdgpu reserved doorbells are at the start of the doorbell aperture. 12622cb0164SAlex Deucher */ 12722cb0164SAlex Deucher static void amdgpu_doorbell_get_kfd_info(struct amdgpu_device *adev, 12822cb0164SAlex Deucher phys_addr_t *aperture_base, 12922cb0164SAlex Deucher size_t *aperture_size, 13022cb0164SAlex Deucher size_t *start_offset) 13122cb0164SAlex Deucher { 13222cb0164SAlex Deucher /* 13322cb0164SAlex Deucher * The first num_doorbells are used by amdgpu. 13422cb0164SAlex Deucher * amdkfd takes whatever's left in the aperture. 13522cb0164SAlex Deucher */ 13622cb0164SAlex Deucher if (adev->doorbell.size > adev->doorbell.num_doorbells * sizeof(u32)) { 13722cb0164SAlex Deucher *aperture_base = adev->doorbell.base; 13822cb0164SAlex Deucher *aperture_size = adev->doorbell.size; 13922cb0164SAlex Deucher *start_offset = adev->doorbell.num_doorbells * sizeof(u32); 14022cb0164SAlex Deucher } else { 14122cb0164SAlex Deucher *aperture_base = 0; 14222cb0164SAlex Deucher *aperture_size = 0; 14322cb0164SAlex Deucher *start_offset = 0; 14422cb0164SAlex Deucher } 14522cb0164SAlex Deucher } 14622cb0164SAlex Deucher 147dc102c43SAndres Rodriguez void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) 148130e0371SOded Gabbay { 149d0b63bb3SAndres Rodriguez int i; 150d0b63bb3SAndres Rodriguez int last_valid_bit; 151dc102c43SAndres Rodriguez if (adev->kfd) { 152130e0371SOded Gabbay struct kgd2kfd_shared_resources gpu_resources = { 153155494dbSFelix Kuehling .compute_vmid_bitmap = compute_vmid_bitmap, 154d0b63bb3SAndres Rodriguez .num_pipe_per_mec = adev->gfx.mec.num_pipe_per_mec, 155155494dbSFelix Kuehling .num_queue_per_pipe = adev->gfx.mec.num_queue_per_pipe, 156155494dbSFelix Kuehling .gpuvm_size = min(adev->vm_manager.max_pfn 157155494dbSFelix Kuehling << AMDGPU_GPU_PAGE_SHIFT, 158155494dbSFelix Kuehling AMDGPU_VA_HOLE_START), 159155494dbSFelix Kuehling .drm_render_minor = adev->ddev->render->index 160130e0371SOded Gabbay }; 161130e0371SOded Gabbay 162d0b63bb3SAndres Rodriguez /* this is going to have a few of the MSBs set that we need to 163d0b63bb3SAndres Rodriguez * clear */ 164d0b63bb3SAndres Rodriguez bitmap_complement(gpu_resources.queue_bitmap, 165d0b63bb3SAndres Rodriguez adev->gfx.mec.queue_bitmap, 166d0b63bb3SAndres Rodriguez KGD_MAX_QUEUES); 167d0b63bb3SAndres Rodriguez 1687b2124a5SAndres Rodriguez /* remove the KIQ bit as well */ 1697b2124a5SAndres Rodriguez if (adev->gfx.kiq.ring.ready) 1702db0cdbeSAlex Deucher clear_bit(amdgpu_gfx_queue_to_bit(adev, 1717b2124a5SAndres Rodriguez adev->gfx.kiq.ring.me - 1, 1727b2124a5SAndres Rodriguez adev->gfx.kiq.ring.pipe, 1737b2124a5SAndres Rodriguez adev->gfx.kiq.ring.queue), 1747b2124a5SAndres Rodriguez gpu_resources.queue_bitmap); 1757b2124a5SAndres Rodriguez 176d0b63bb3SAndres Rodriguez /* According to linux/bitmap.h we shouldn't use bitmap_clear if 177d0b63bb3SAndres Rodriguez * nbits is not compile time constant */ 1783447d220SJay Cornwall last_valid_bit = 1 /* only first MEC can have compute queues */ 179d0b63bb3SAndres Rodriguez * adev->gfx.mec.num_pipe_per_mec 180d0b63bb3SAndres Rodriguez * adev->gfx.mec.num_queue_per_pipe; 181d0b63bb3SAndres Rodriguez for (i = last_valid_bit; i < KGD_MAX_QUEUES; ++i) 182d0b63bb3SAndres Rodriguez clear_bit(i, gpu_resources.queue_bitmap); 183d0b63bb3SAndres Rodriguez 184dc102c43SAndres Rodriguez amdgpu_doorbell_get_kfd_info(adev, 185130e0371SOded Gabbay &gpu_resources.doorbell_physical_address, 186130e0371SOded Gabbay &gpu_resources.doorbell_aperture_size, 187130e0371SOded Gabbay &gpu_resources.doorbell_start_offset); 188642a0e80SFelix Kuehling if (adev->asic_type >= CHIP_VEGA10) { 189642a0e80SFelix Kuehling /* On SOC15 the BIF is involved in routing 190642a0e80SFelix Kuehling * doorbells using the low 12 bits of the 191642a0e80SFelix Kuehling * address. Communicate the assignments to 192642a0e80SFelix Kuehling * KFD. KFD uses two doorbell pages per 193642a0e80SFelix Kuehling * process in case of 64-bit doorbells so we 194642a0e80SFelix Kuehling * can use each doorbell assignment twice. 195642a0e80SFelix Kuehling */ 196642a0e80SFelix Kuehling gpu_resources.sdma_doorbell[0][0] = 197642a0e80SFelix Kuehling AMDGPU_DOORBELL64_sDMA_ENGINE0; 198642a0e80SFelix Kuehling gpu_resources.sdma_doorbell[0][1] = 199642a0e80SFelix Kuehling AMDGPU_DOORBELL64_sDMA_ENGINE0 + 0x200; 200642a0e80SFelix Kuehling gpu_resources.sdma_doorbell[1][0] = 201642a0e80SFelix Kuehling AMDGPU_DOORBELL64_sDMA_ENGINE1; 202642a0e80SFelix Kuehling gpu_resources.sdma_doorbell[1][1] = 203642a0e80SFelix Kuehling AMDGPU_DOORBELL64_sDMA_ENGINE1 + 0x200; 204642a0e80SFelix Kuehling /* Doorbells 0x0f0-0ff and 0x2f0-2ff are reserved for 205642a0e80SFelix Kuehling * SDMA, IH and VCN. So don't use them for the CP. 206642a0e80SFelix Kuehling */ 207642a0e80SFelix Kuehling gpu_resources.reserved_doorbell_mask = 0x1f0; 208642a0e80SFelix Kuehling gpu_resources.reserved_doorbell_val = 0x0f0; 209642a0e80SFelix Kuehling } 210130e0371SOded Gabbay 211dc102c43SAndres Rodriguez kgd2kfd->device_init(adev->kfd, &gpu_resources); 212130e0371SOded Gabbay } 213130e0371SOded Gabbay } 214130e0371SOded Gabbay 215dc102c43SAndres Rodriguez void amdgpu_amdkfd_device_fini(struct amdgpu_device *adev) 216130e0371SOded Gabbay { 217dc102c43SAndres Rodriguez if (adev->kfd) { 218dc102c43SAndres Rodriguez kgd2kfd->device_exit(adev->kfd); 219dc102c43SAndres Rodriguez adev->kfd = NULL; 220130e0371SOded Gabbay } 221130e0371SOded Gabbay } 222130e0371SOded Gabbay 223dc102c43SAndres Rodriguez void amdgpu_amdkfd_interrupt(struct amdgpu_device *adev, 224130e0371SOded Gabbay const void *ih_ring_entry) 225130e0371SOded Gabbay { 226dc102c43SAndres Rodriguez if (adev->kfd) 227dc102c43SAndres Rodriguez kgd2kfd->interrupt(adev->kfd, ih_ring_entry); 228130e0371SOded Gabbay } 229130e0371SOded Gabbay 230dc102c43SAndres Rodriguez void amdgpu_amdkfd_suspend(struct amdgpu_device *adev) 231130e0371SOded Gabbay { 232dc102c43SAndres Rodriguez if (adev->kfd) 233dc102c43SAndres Rodriguez kgd2kfd->suspend(adev->kfd); 234130e0371SOded Gabbay } 235130e0371SOded Gabbay 236dc102c43SAndres Rodriguez int amdgpu_amdkfd_resume(struct amdgpu_device *adev) 237130e0371SOded Gabbay { 238130e0371SOded Gabbay int r = 0; 239130e0371SOded Gabbay 240dc102c43SAndres Rodriguez if (adev->kfd) 241dc102c43SAndres Rodriguez r = kgd2kfd->resume(adev->kfd); 242130e0371SOded Gabbay 243130e0371SOded Gabbay return r; 244130e0371SOded Gabbay } 245130e0371SOded Gabbay 246130e0371SOded Gabbay int alloc_gtt_mem(struct kgd_dev *kgd, size_t size, 247130e0371SOded Gabbay void **mem_obj, uint64_t *gpu_addr, 248130e0371SOded Gabbay void **cpu_ptr) 249130e0371SOded Gabbay { 250dc102c43SAndres Rodriguez struct amdgpu_device *adev = (struct amdgpu_device *)kgd; 251473fee47SYong Zhao struct amdgpu_bo *bo = NULL; 2523216c6b7SChunming Zhou struct amdgpu_bo_param bp; 253130e0371SOded Gabbay int r; 254473fee47SYong Zhao void *cpu_ptr_tmp = NULL; 255130e0371SOded Gabbay 2563216c6b7SChunming Zhou memset(&bp, 0, sizeof(bp)); 2573216c6b7SChunming Zhou bp.size = size; 2583216c6b7SChunming Zhou bp.byte_align = PAGE_SIZE; 2593216c6b7SChunming Zhou bp.domain = AMDGPU_GEM_DOMAIN_GTT; 2603216c6b7SChunming Zhou bp.flags = AMDGPU_GEM_CREATE_CPU_GTT_USWC; 2613216c6b7SChunming Zhou bp.type = ttm_bo_type_kernel; 2623216c6b7SChunming Zhou bp.resv = NULL; 2633216c6b7SChunming Zhou r = amdgpu_bo_create(adev, &bp, &bo); 264130e0371SOded Gabbay if (r) { 265dc102c43SAndres Rodriguez dev_err(adev->dev, 266130e0371SOded Gabbay "failed to allocate BO for amdkfd (%d)\n", r); 267130e0371SOded Gabbay return r; 268130e0371SOded Gabbay } 269130e0371SOded Gabbay 270130e0371SOded Gabbay /* map the buffer */ 271473fee47SYong Zhao r = amdgpu_bo_reserve(bo, true); 272130e0371SOded Gabbay if (r) { 273dc102c43SAndres Rodriguez dev_err(adev->dev, "(%d) failed to reserve bo for amdkfd\n", r); 274130e0371SOded Gabbay goto allocate_mem_reserve_bo_failed; 275130e0371SOded Gabbay } 276130e0371SOded Gabbay 277*7b7c6c81SJunwei Zhang r = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT); 278130e0371SOded Gabbay if (r) { 279dc102c43SAndres Rodriguez dev_err(adev->dev, "(%d) failed to pin bo for amdkfd\n", r); 280130e0371SOded Gabbay goto allocate_mem_pin_bo_failed; 281130e0371SOded Gabbay } 282130e0371SOded Gabbay 283473fee47SYong Zhao r = amdgpu_bo_kmap(bo, &cpu_ptr_tmp); 284130e0371SOded Gabbay if (r) { 285dc102c43SAndres Rodriguez dev_err(adev->dev, 286130e0371SOded Gabbay "(%d) failed to map bo to kernel for amdkfd\n", r); 287130e0371SOded Gabbay goto allocate_mem_kmap_bo_failed; 288130e0371SOded Gabbay } 289130e0371SOded Gabbay 290473fee47SYong Zhao *mem_obj = bo; 291*7b7c6c81SJunwei Zhang *gpu_addr = amdgpu_bo_gpu_offset(bo); 292473fee47SYong Zhao *cpu_ptr = cpu_ptr_tmp; 293473fee47SYong Zhao 294473fee47SYong Zhao amdgpu_bo_unreserve(bo); 295130e0371SOded Gabbay 296130e0371SOded Gabbay return 0; 297130e0371SOded Gabbay 298130e0371SOded Gabbay allocate_mem_kmap_bo_failed: 299473fee47SYong Zhao amdgpu_bo_unpin(bo); 300130e0371SOded Gabbay allocate_mem_pin_bo_failed: 301473fee47SYong Zhao amdgpu_bo_unreserve(bo); 302130e0371SOded Gabbay allocate_mem_reserve_bo_failed: 303473fee47SYong Zhao amdgpu_bo_unref(&bo); 304130e0371SOded Gabbay 305130e0371SOded Gabbay return r; 306130e0371SOded Gabbay } 307130e0371SOded Gabbay 308130e0371SOded Gabbay void free_gtt_mem(struct kgd_dev *kgd, void *mem_obj) 309130e0371SOded Gabbay { 310473fee47SYong Zhao struct amdgpu_bo *bo = (struct amdgpu_bo *) mem_obj; 311130e0371SOded Gabbay 312473fee47SYong Zhao amdgpu_bo_reserve(bo, true); 313473fee47SYong Zhao amdgpu_bo_kunmap(bo); 314473fee47SYong Zhao amdgpu_bo_unpin(bo); 315473fee47SYong Zhao amdgpu_bo_unreserve(bo); 316473fee47SYong Zhao amdgpu_bo_unref(&(bo)); 317130e0371SOded Gabbay } 318130e0371SOded Gabbay 31930f1c042SHarish Kasiviswanathan void get_local_mem_info(struct kgd_dev *kgd, 32030f1c042SHarish Kasiviswanathan struct kfd_local_mem_info *mem_info) 32130f1c042SHarish Kasiviswanathan { 32230f1c042SHarish Kasiviswanathan struct amdgpu_device *adev = (struct amdgpu_device *)kgd; 32330f1c042SHarish Kasiviswanathan uint64_t address_mask = adev->dev->dma_mask ? ~*adev->dev->dma_mask : 32430f1c042SHarish Kasiviswanathan ~((1ULL << 32) - 1); 325770d13b1SChristian König resource_size_t aper_limit = adev->gmc.aper_base + adev->gmc.aper_size; 32630f1c042SHarish Kasiviswanathan 32730f1c042SHarish Kasiviswanathan memset(mem_info, 0, sizeof(*mem_info)); 328770d13b1SChristian König if (!(adev->gmc.aper_base & address_mask || aper_limit & address_mask)) { 329770d13b1SChristian König mem_info->local_mem_size_public = adev->gmc.visible_vram_size; 330770d13b1SChristian König mem_info->local_mem_size_private = adev->gmc.real_vram_size - 331770d13b1SChristian König adev->gmc.visible_vram_size; 33230f1c042SHarish Kasiviswanathan } else { 33330f1c042SHarish Kasiviswanathan mem_info->local_mem_size_public = 0; 334770d13b1SChristian König mem_info->local_mem_size_private = adev->gmc.real_vram_size; 33530f1c042SHarish Kasiviswanathan } 336770d13b1SChristian König mem_info->vram_width = adev->gmc.vram_width; 33730f1c042SHarish Kasiviswanathan 338fb8baefcSArnd Bergmann pr_debug("Address base: %pap limit %pap public 0x%llx private 0x%llx\n", 339770d13b1SChristian König &adev->gmc.aper_base, &aper_limit, 34030f1c042SHarish Kasiviswanathan mem_info->local_mem_size_public, 34130f1c042SHarish Kasiviswanathan mem_info->local_mem_size_private); 34230f1c042SHarish Kasiviswanathan 34330f1c042SHarish Kasiviswanathan if (amdgpu_sriov_vf(adev)) 34430f1c042SHarish Kasiviswanathan mem_info->mem_clk_max = adev->clock.default_mclk / 100; 3457ba01f9eSShaoyun Liu else if (adev->powerplay.pp_funcs) 34630f1c042SHarish Kasiviswanathan mem_info->mem_clk_max = amdgpu_dpm_get_mclk(adev, false) / 100; 3477ba01f9eSShaoyun Liu else 3487ba01f9eSShaoyun Liu mem_info->mem_clk_max = 100; 34930f1c042SHarish Kasiviswanathan } 35030f1c042SHarish Kasiviswanathan 351130e0371SOded Gabbay uint64_t get_gpu_clock_counter(struct kgd_dev *kgd) 352130e0371SOded Gabbay { 353dc102c43SAndres Rodriguez struct amdgpu_device *adev = (struct amdgpu_device *)kgd; 354130e0371SOded Gabbay 355dc102c43SAndres Rodriguez if (adev->gfx.funcs->get_gpu_clock_counter) 356dc102c43SAndres Rodriguez return adev->gfx.funcs->get_gpu_clock_counter(adev); 357130e0371SOded Gabbay return 0; 358130e0371SOded Gabbay } 359130e0371SOded Gabbay 360130e0371SOded Gabbay uint32_t get_max_engine_clock_in_mhz(struct kgd_dev *kgd) 361130e0371SOded Gabbay { 362dc102c43SAndres Rodriguez struct amdgpu_device *adev = (struct amdgpu_device *)kgd; 363130e0371SOded Gabbay 364a9efcc19SFelix Kuehling /* the sclk is in quantas of 10kHz */ 365a9efcc19SFelix Kuehling if (amdgpu_sriov_vf(adev)) 366a9efcc19SFelix Kuehling return adev->clock.default_sclk / 100; 3677ba01f9eSShaoyun Liu else if (adev->powerplay.pp_funcs) 368a9efcc19SFelix Kuehling return amdgpu_dpm_get_sclk(adev, false) / 100; 3697ba01f9eSShaoyun Liu else 3707ba01f9eSShaoyun Liu return 100; 371130e0371SOded Gabbay } 372ebdebf42SFlora Cui 373ebdebf42SFlora Cui void get_cu_info(struct kgd_dev *kgd, struct kfd_cu_info *cu_info) 374ebdebf42SFlora Cui { 375ebdebf42SFlora Cui struct amdgpu_device *adev = (struct amdgpu_device *)kgd; 376ebdebf42SFlora Cui struct amdgpu_cu_info acu_info = adev->gfx.cu_info; 377ebdebf42SFlora Cui 378ebdebf42SFlora Cui memset(cu_info, 0, sizeof(*cu_info)); 379ebdebf42SFlora Cui if (sizeof(cu_info->cu_bitmap) != sizeof(acu_info.bitmap)) 380ebdebf42SFlora Cui return; 381ebdebf42SFlora Cui 382ebdebf42SFlora Cui cu_info->cu_active_number = acu_info.number; 383ebdebf42SFlora Cui cu_info->cu_ao_mask = acu_info.ao_cu_mask; 384ebdebf42SFlora Cui memcpy(&cu_info->cu_bitmap[0], &acu_info.bitmap[0], 385ebdebf42SFlora Cui sizeof(acu_info.bitmap)); 386ebdebf42SFlora Cui cu_info->num_shader_engines = adev->gfx.config.max_shader_engines; 387ebdebf42SFlora Cui cu_info->num_shader_arrays_per_engine = adev->gfx.config.max_sh_per_se; 388ebdebf42SFlora Cui cu_info->num_cu_per_sh = adev->gfx.config.max_cu_per_sh; 389ebdebf42SFlora Cui cu_info->simd_per_cu = acu_info.simd_per_cu; 390ebdebf42SFlora Cui cu_info->max_waves_per_simd = acu_info.max_waves_per_simd; 391ebdebf42SFlora Cui cu_info->wave_front_size = acu_info.wave_front_size; 392ebdebf42SFlora Cui cu_info->max_scratch_slots_per_cu = acu_info.max_scratch_slots_per_cu; 393ebdebf42SFlora Cui cu_info->lds_size = acu_info.lds_size; 394ebdebf42SFlora Cui } 3959f0a0b41SKent Russell 3969f0a0b41SKent Russell uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd) 3979f0a0b41SKent Russell { 3989f0a0b41SKent Russell struct amdgpu_device *adev = (struct amdgpu_device *)kgd; 3999f0a0b41SKent Russell 4009f0a0b41SKent Russell return amdgpu_vram_mgr_usage(&adev->mman.bdev.man[TTM_PL_VRAM]); 4019f0a0b41SKent Russell } 402155494dbSFelix Kuehling 4034c660c8fSFelix Kuehling int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine, 4044c660c8fSFelix Kuehling uint32_t vmid, uint64_t gpu_addr, 4054c660c8fSFelix Kuehling uint32_t *ib_cmd, uint32_t ib_len) 4064c660c8fSFelix Kuehling { 4074c660c8fSFelix Kuehling struct amdgpu_device *adev = (struct amdgpu_device *)kgd; 4084c660c8fSFelix Kuehling struct amdgpu_job *job; 4094c660c8fSFelix Kuehling struct amdgpu_ib *ib; 4104c660c8fSFelix Kuehling struct amdgpu_ring *ring; 4114c660c8fSFelix Kuehling struct dma_fence *f = NULL; 4124c660c8fSFelix Kuehling int ret; 4134c660c8fSFelix Kuehling 4144c660c8fSFelix Kuehling switch (engine) { 4154c660c8fSFelix Kuehling case KGD_ENGINE_MEC1: 4164c660c8fSFelix Kuehling ring = &adev->gfx.compute_ring[0]; 4174c660c8fSFelix Kuehling break; 4184c660c8fSFelix Kuehling case KGD_ENGINE_SDMA1: 4194c660c8fSFelix Kuehling ring = &adev->sdma.instance[0].ring; 4204c660c8fSFelix Kuehling break; 4214c660c8fSFelix Kuehling case KGD_ENGINE_SDMA2: 4224c660c8fSFelix Kuehling ring = &adev->sdma.instance[1].ring; 4234c660c8fSFelix Kuehling break; 4244c660c8fSFelix Kuehling default: 4254c660c8fSFelix Kuehling pr_err("Invalid engine in IB submission: %d\n", engine); 4264c660c8fSFelix Kuehling ret = -EINVAL; 4274c660c8fSFelix Kuehling goto err; 4284c660c8fSFelix Kuehling } 4294c660c8fSFelix Kuehling 4304c660c8fSFelix Kuehling ret = amdgpu_job_alloc(adev, 1, &job, NULL); 4314c660c8fSFelix Kuehling if (ret) 4324c660c8fSFelix Kuehling goto err; 4334c660c8fSFelix Kuehling 4344c660c8fSFelix Kuehling ib = &job->ibs[0]; 4354c660c8fSFelix Kuehling memset(ib, 0, sizeof(struct amdgpu_ib)); 4364c660c8fSFelix Kuehling 4374c660c8fSFelix Kuehling ib->gpu_addr = gpu_addr; 4384c660c8fSFelix Kuehling ib->ptr = ib_cmd; 4394c660c8fSFelix Kuehling ib->length_dw = ib_len; 4404c660c8fSFelix Kuehling /* This works for NO_HWS. TODO: need to handle without knowing VMID */ 4414c660c8fSFelix Kuehling job->vmid = vmid; 4424c660c8fSFelix Kuehling 4434c660c8fSFelix Kuehling ret = amdgpu_ib_schedule(ring, 1, ib, job, &f); 4444c660c8fSFelix Kuehling if (ret) { 4454c660c8fSFelix Kuehling DRM_ERROR("amdgpu: failed to schedule IB.\n"); 4464c660c8fSFelix Kuehling goto err_ib_sched; 4474c660c8fSFelix Kuehling } 4484c660c8fSFelix Kuehling 4494c660c8fSFelix Kuehling ret = dma_fence_wait(f, false); 4504c660c8fSFelix Kuehling 4514c660c8fSFelix Kuehling err_ib_sched: 4524c660c8fSFelix Kuehling dma_fence_put(f); 4534c660c8fSFelix Kuehling amdgpu_job_free(job); 4544c660c8fSFelix Kuehling err: 4554c660c8fSFelix Kuehling return ret; 4564c660c8fSFelix Kuehling } 4574c660c8fSFelix Kuehling 458155494dbSFelix Kuehling bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid) 459155494dbSFelix Kuehling { 460155494dbSFelix Kuehling if (adev->kfd) { 461155494dbSFelix Kuehling if ((1 << vmid) & compute_vmid_bitmap) 462155494dbSFelix Kuehling return true; 463155494dbSFelix Kuehling } 464155494dbSFelix Kuehling 465155494dbSFelix Kuehling return false; 466155494dbSFelix Kuehling } 467fcdfa432SOded Gabbay 468fcdfa432SOded Gabbay #if !defined(CONFIG_HSA_AMD_MODULE) && !defined(CONFIG_HSA_AMD) 469fcdfa432SOded Gabbay bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm) 470fcdfa432SOded Gabbay { 471fcdfa432SOded Gabbay return false; 472fcdfa432SOded Gabbay } 473fcdfa432SOded Gabbay 474fcdfa432SOded Gabbay void amdgpu_amdkfd_unreserve_system_memory_limit(struct amdgpu_bo *bo) 475fcdfa432SOded Gabbay { 476fcdfa432SOded Gabbay } 477fcdfa432SOded Gabbay 478fcdfa432SOded Gabbay void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev, 479fcdfa432SOded Gabbay struct amdgpu_vm *vm) 480fcdfa432SOded Gabbay { 481fcdfa432SOded Gabbay } 482fcdfa432SOded Gabbay 483fcdfa432SOded Gabbay struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f) 484fcdfa432SOded Gabbay { 485fcdfa432SOded Gabbay return NULL; 486fcdfa432SOded Gabbay } 487fcdfa432SOded Gabbay 488fcdfa432SOded Gabbay int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem, struct mm_struct *mm) 489fcdfa432SOded Gabbay { 490fcdfa432SOded Gabbay return 0; 491fcdfa432SOded Gabbay } 492fcdfa432SOded Gabbay 493fcdfa432SOded Gabbay struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void) 494fcdfa432SOded Gabbay { 495fcdfa432SOded Gabbay return NULL; 496fcdfa432SOded Gabbay } 497fcdfa432SOded Gabbay 498fcdfa432SOded Gabbay struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void) 499fcdfa432SOded Gabbay { 500fcdfa432SOded Gabbay return NULL; 501fcdfa432SOded Gabbay } 502fcdfa432SOded Gabbay 503fcdfa432SOded Gabbay struct kfd2kgd_calls *amdgpu_amdkfd_gfx_9_0_get_functions(void) 504fcdfa432SOded Gabbay { 505fcdfa432SOded Gabbay return NULL; 506fcdfa432SOded Gabbay } 507fcdfa432SOded Gabbay #endif 508