xref: /linux/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c (revision 6475ae2b742876aa9b2a0aff7ba60f5c81917614)
1130e0371SOded Gabbay /*
2130e0371SOded Gabbay  * Copyright 2014 Advanced Micro Devices, Inc.
3130e0371SOded Gabbay  *
4130e0371SOded Gabbay  * Permission is hereby granted, free of charge, to any person obtaining a
5130e0371SOded Gabbay  * copy of this software and associated documentation files (the "Software"),
6130e0371SOded Gabbay  * to deal in the Software without restriction, including without limitation
7130e0371SOded Gabbay  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8130e0371SOded Gabbay  * and/or sell copies of the Software, and to permit persons to whom the
9130e0371SOded Gabbay  * Software is furnished to do so, subject to the following conditions:
10130e0371SOded Gabbay  *
11130e0371SOded Gabbay  * The above copyright notice and this permission notice shall be included in
12130e0371SOded Gabbay  * all copies or substantial portions of the Software.
13130e0371SOded Gabbay  *
14130e0371SOded Gabbay  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15130e0371SOded Gabbay  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16130e0371SOded Gabbay  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17130e0371SOded Gabbay  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18130e0371SOded Gabbay  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19130e0371SOded Gabbay  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20130e0371SOded Gabbay  * OTHER DEALINGS IN THE SOFTWARE.
21130e0371SOded Gabbay  */
22130e0371SOded Gabbay 
23130e0371SOded Gabbay #include "amdgpu_amdkfd.h"
2493304810SJonathan Kim #include "amd_pcie.h"
252f7d10b3SJammy Zhou #include "amd_shared.h"
26fdf2f6c5SSam Ravnborg 
27130e0371SOded Gabbay #include "amdgpu.h"
282db0cdbeSAlex Deucher #include "amdgpu_gfx.h"
292fbd6f94SChristian König #include "amdgpu_dma_buf.h"
30130e0371SOded Gabbay #include <linux/module.h>
311dde0ea9SFelix Kuehling #include <linux/dma-buf.h>
32da361dd1Sshaoyunl #include "amdgpu_xgmi.h"
331d251d90SYong Zhao #include <uapi/linux/kfd_ioctl.h>
34c7490949STao Zhou #include "amdgpu_ras.h"
35c7490949STao Zhou #include "amdgpu_umc.h"
36130e0371SOded Gabbay 
37611736d8SFelix Kuehling /* Total memory size in system memory and all GPU VRAM. Used to
38611736d8SFelix Kuehling  * estimate worst case amount of memory to reserve for page tables
39611736d8SFelix Kuehling  */
40611736d8SFelix Kuehling uint64_t amdgpu_amdkfd_total_mem_size;
41611736d8SFelix Kuehling 
42402bde58Skernel test robot static bool kfd_initialized;
43c7651b73SFelix Kuehling 
44efb1c658SOded Gabbay int amdgpu_amdkfd_init(void)
45130e0371SOded Gabbay {
46611736d8SFelix Kuehling 	struct sysinfo si;
47efb1c658SOded Gabbay 	int ret;
48efb1c658SOded Gabbay 
49611736d8SFelix Kuehling 	si_meminfo(&si);
50df23d1bbSOak Zeng 	amdgpu_amdkfd_total_mem_size = si.freeram - si.freehigh;
51611736d8SFelix Kuehling 	amdgpu_amdkfd_total_mem_size *= si.mem_unit;
52611736d8SFelix Kuehling 
53308176d6SAmber Lin 	ret = kgd2kfd_init();
5482b7b619SAmber Lin 	amdgpu_amdkfd_gpuvm_init_mem_limits();
55c7651b73SFelix Kuehling 	kfd_initialized = !ret;
56fcdfa432SOded Gabbay 
57efb1c658SOded Gabbay 	return ret;
58130e0371SOded Gabbay }
59130e0371SOded Gabbay 
60130e0371SOded Gabbay void amdgpu_amdkfd_fini(void)
61130e0371SOded Gabbay {
62c7651b73SFelix Kuehling 	if (kfd_initialized) {
638e07e267SAmber Lin 		kgd2kfd_exit();
64c7651b73SFelix Kuehling 		kfd_initialized = false;
65c7651b73SFelix Kuehling 	}
66130e0371SOded Gabbay }
67130e0371SOded Gabbay 
68dc102c43SAndres Rodriguez void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev)
69130e0371SOded Gabbay {
70050091abSYong Zhao 	bool vf = amdgpu_sriov_vf(adev);
715c33f214SFelix Kuehling 
72c7651b73SFelix Kuehling 	if (!kfd_initialized)
73c7651b73SFelix Kuehling 		return;
74c7651b73SFelix Kuehling 
75b5d1d755SGraham Sider 	adev->kfd.dev = kgd2kfd_probe(adev, vf);
76611736d8SFelix Kuehling 
77611736d8SFelix Kuehling 	if (adev->kfd.dev)
78611736d8SFelix Kuehling 		amdgpu_amdkfd_total_mem_size += adev->gmc.real_vram_size;
79130e0371SOded Gabbay }
80130e0371SOded Gabbay 
8122cb0164SAlex Deucher /**
8222cb0164SAlex Deucher  * amdgpu_doorbell_get_kfd_info - Report doorbell configuration required to
8322cb0164SAlex Deucher  *                                setup amdkfd
8422cb0164SAlex Deucher  *
8522cb0164SAlex Deucher  * @adev: amdgpu_device pointer
8622cb0164SAlex Deucher  * @aperture_base: output returning doorbell aperture base physical address
8722cb0164SAlex Deucher  * @aperture_size: output returning doorbell aperture size in bytes
8822cb0164SAlex Deucher  * @start_offset: output returning # of doorbell bytes reserved for amdgpu.
8922cb0164SAlex Deucher  *
9022cb0164SAlex Deucher  * amdgpu and amdkfd share the doorbell aperture. amdgpu sets it up,
9122cb0164SAlex Deucher  * takes doorbells required for its own rings and reports the setup to amdkfd.
9222cb0164SAlex Deucher  * amdgpu reserved doorbells are at the start of the doorbell aperture.
9322cb0164SAlex Deucher  */
9422cb0164SAlex Deucher static void amdgpu_doorbell_get_kfd_info(struct amdgpu_device *adev,
9522cb0164SAlex Deucher 					 phys_addr_t *aperture_base,
9622cb0164SAlex Deucher 					 size_t *aperture_size,
9722cb0164SAlex Deucher 					 size_t *start_offset)
9822cb0164SAlex Deucher {
9922cb0164SAlex Deucher 	/*
10022cb0164SAlex Deucher 	 * The first num_doorbells are used by amdgpu.
10122cb0164SAlex Deucher 	 * amdkfd takes whatever's left in the aperture.
10222cb0164SAlex Deucher 	 */
10322cb0164SAlex Deucher 	if (adev->doorbell.size > adev->doorbell.num_doorbells * sizeof(u32)) {
10422cb0164SAlex Deucher 		*aperture_base = adev->doorbell.base;
10522cb0164SAlex Deucher 		*aperture_size = adev->doorbell.size;
10622cb0164SAlex Deucher 		*start_offset = adev->doorbell.num_doorbells * sizeof(u32);
10722cb0164SAlex Deucher 	} else {
10822cb0164SAlex Deucher 		*aperture_base = 0;
10922cb0164SAlex Deucher 		*aperture_size = 0;
11022cb0164SAlex Deucher 		*start_offset = 0;
11122cb0164SAlex Deucher 	}
11222cb0164SAlex Deucher }
11322cb0164SAlex Deucher 
114dc102c43SAndres Rodriguez void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
115130e0371SOded Gabbay {
116234441ddSYong Zhao 	int i;
117d0b63bb3SAndres Rodriguez 	int last_valid_bit;
118611736d8SFelix Kuehling 
119611736d8SFelix Kuehling 	if (adev->kfd.dev) {
120130e0371SOded Gabbay 		struct kgd2kfd_shared_resources gpu_resources = {
12140111ec2SFelix Kuehling 			.compute_vmid_bitmap =
12240111ec2SFelix Kuehling 				((1 << AMDGPU_NUM_VMID) - 1) -
12340111ec2SFelix Kuehling 				((1 << adev->vm_manager.first_kfd_vmid) - 1),
124d0b63bb3SAndres Rodriguez 			.num_pipe_per_mec = adev->gfx.mec.num_pipe_per_mec,
125155494dbSFelix Kuehling 			.num_queue_per_pipe = adev->gfx.mec.num_queue_per_pipe,
126155494dbSFelix Kuehling 			.gpuvm_size = min(adev->vm_manager.max_pfn
127155494dbSFelix Kuehling 					  << AMDGPU_GPU_PAGE_SHIFT,
128ad9a5b78SChristian König 					  AMDGPU_GMC_HOLE_START),
1294a580877SLuben Tuikov 			.drm_render_minor = adev_to_drm(adev)->render->index,
130234441ddSYong Zhao 			.sdma_doorbell_idx = adev->doorbell_index.sdma_engine,
131234441ddSYong Zhao 
132130e0371SOded Gabbay 		};
133130e0371SOded Gabbay 
134d0b63bb3SAndres Rodriguez 		/* this is going to have a few of the MSBs set that we need to
1350d87c9cfSKent Russell 		 * clear
1360d87c9cfSKent Russell 		 */
137e6945304SYong Zhao 		bitmap_complement(gpu_resources.cp_queue_bitmap,
138d0b63bb3SAndres Rodriguez 				  adev->gfx.mec.queue_bitmap,
139d0b63bb3SAndres Rodriguez 				  KGD_MAX_QUEUES);
140d0b63bb3SAndres Rodriguez 
141d0b63bb3SAndres Rodriguez 		/* According to linux/bitmap.h we shouldn't use bitmap_clear if
1420d87c9cfSKent Russell 		 * nbits is not compile time constant
1430d87c9cfSKent Russell 		 */
1443447d220SJay Cornwall 		last_valid_bit = 1 /* only first MEC can have compute queues */
145d0b63bb3SAndres Rodriguez 				* adev->gfx.mec.num_pipe_per_mec
146d0b63bb3SAndres Rodriguez 				* adev->gfx.mec.num_queue_per_pipe;
147d0b63bb3SAndres Rodriguez 		for (i = last_valid_bit; i < KGD_MAX_QUEUES; ++i)
148e6945304SYong Zhao 			clear_bit(i, gpu_resources.cp_queue_bitmap);
149d0b63bb3SAndres Rodriguez 
150dc102c43SAndres Rodriguez 		amdgpu_doorbell_get_kfd_info(adev,
151130e0371SOded Gabbay 				&gpu_resources.doorbell_physical_address,
152130e0371SOded Gabbay 				&gpu_resources.doorbell_aperture_size,
153130e0371SOded Gabbay 				&gpu_resources.doorbell_start_offset);
154c5892230SShaoyun Liu 
1551f86805aSYong Zhao 		/* Since SOC15, BIF starts to statically use the
1561f86805aSYong Zhao 		 * lower 12 bits of doorbell addresses for routing
1571f86805aSYong Zhao 		 * based on settings in registers like
1581f86805aSYong Zhao 		 * SDMA0_DOORBELL_RANGE etc..
1591f86805aSYong Zhao 		 * In order to route a doorbell to CP engine, the lower
1601f86805aSYong Zhao 		 * 12 bits of its address has to be outside the range
1611f86805aSYong Zhao 		 * set for SDMA, VCN, and IH blocks.
162642a0e80SFelix Kuehling 		 */
163234441ddSYong Zhao 		if (adev->asic_type >= CHIP_VEGA10) {
1641f86805aSYong Zhao 			gpu_resources.non_cp_doorbells_start =
1651f86805aSYong Zhao 					adev->doorbell_index.first_non_cp;
1661f86805aSYong Zhao 			gpu_resources.non_cp_doorbells_end =
1671f86805aSYong Zhao 					adev->doorbell_index.last_non_cp;
168234441ddSYong Zhao 		}
169130e0371SOded Gabbay 
1708e2712e7Sshaoyunl 		adev->kfd.init_complete = kgd2kfd_device_init(adev->kfd.dev,
1718e2712e7Sshaoyunl 						adev_to_drm(adev), &gpu_resources);
172130e0371SOded Gabbay 	}
173130e0371SOded Gabbay }
174130e0371SOded Gabbay 
175e9669fb7SAndrey Grodzovsky void amdgpu_amdkfd_device_fini_sw(struct amdgpu_device *adev)
176130e0371SOded Gabbay {
177611736d8SFelix Kuehling 	if (adev->kfd.dev) {
1788e07e267SAmber Lin 		kgd2kfd_device_exit(adev->kfd.dev);
179611736d8SFelix Kuehling 		adev->kfd.dev = NULL;
180130e0371SOded Gabbay 	}
181130e0371SOded Gabbay }
182130e0371SOded Gabbay 
183dc102c43SAndres Rodriguez void amdgpu_amdkfd_interrupt(struct amdgpu_device *adev,
184130e0371SOded Gabbay 		const void *ih_ring_entry)
185130e0371SOded Gabbay {
186611736d8SFelix Kuehling 	if (adev->kfd.dev)
1878e07e267SAmber Lin 		kgd2kfd_interrupt(adev->kfd.dev, ih_ring_entry);
188130e0371SOded Gabbay }
189130e0371SOded Gabbay 
1909593f4d6SRajneesh Bhardwaj void amdgpu_amdkfd_suspend(struct amdgpu_device *adev, bool run_pm)
191130e0371SOded Gabbay {
192611736d8SFelix Kuehling 	if (adev->kfd.dev)
1939593f4d6SRajneesh Bhardwaj 		kgd2kfd_suspend(adev->kfd.dev, run_pm);
194130e0371SOded Gabbay }
195130e0371SOded Gabbay 
19680660084SJames Zhu int amdgpu_amdkfd_resume_iommu(struct amdgpu_device *adev)
19780660084SJames Zhu {
19880660084SJames Zhu 	int r = 0;
19980660084SJames Zhu 
20080660084SJames Zhu 	if (adev->kfd.dev)
20180660084SJames Zhu 		r = kgd2kfd_resume_iommu(adev->kfd.dev);
20280660084SJames Zhu 
20380660084SJames Zhu 	return r;
20480660084SJames Zhu }
20580660084SJames Zhu 
2069593f4d6SRajneesh Bhardwaj int amdgpu_amdkfd_resume(struct amdgpu_device *adev, bool run_pm)
207130e0371SOded Gabbay {
208130e0371SOded Gabbay 	int r = 0;
209130e0371SOded Gabbay 
210611736d8SFelix Kuehling 	if (adev->kfd.dev)
2119593f4d6SRajneesh Bhardwaj 		r = kgd2kfd_resume(adev->kfd.dev, run_pm);
212130e0371SOded Gabbay 
213130e0371SOded Gabbay 	return r;
214130e0371SOded Gabbay }
215130e0371SOded Gabbay 
2165c6dd71eSShaoyun Liu int amdgpu_amdkfd_pre_reset(struct amdgpu_device *adev)
2175c6dd71eSShaoyun Liu {
2185c6dd71eSShaoyun Liu 	int r = 0;
2195c6dd71eSShaoyun Liu 
220611736d8SFelix Kuehling 	if (adev->kfd.dev)
2218e07e267SAmber Lin 		r = kgd2kfd_pre_reset(adev->kfd.dev);
2225c6dd71eSShaoyun Liu 
2235c6dd71eSShaoyun Liu 	return r;
2245c6dd71eSShaoyun Liu }
2255c6dd71eSShaoyun Liu 
2265c6dd71eSShaoyun Liu int amdgpu_amdkfd_post_reset(struct amdgpu_device *adev)
2275c6dd71eSShaoyun Liu {
2285c6dd71eSShaoyun Liu 	int r = 0;
2295c6dd71eSShaoyun Liu 
230611736d8SFelix Kuehling 	if (adev->kfd.dev)
2318e07e267SAmber Lin 		r = kgd2kfd_post_reset(adev->kfd.dev);
2325c6dd71eSShaoyun Liu 
2335c6dd71eSShaoyun Liu 	return r;
2345c6dd71eSShaoyun Liu }
2355c6dd71eSShaoyun Liu 
2366bfc7c7eSGraham Sider void amdgpu_amdkfd_gpu_reset(struct amdgpu_device *adev)
23724da5a9cSShaoyun Liu {
23812938fadSChristian König 	if (amdgpu_device_should_recover_gpu(adev))
23912938fadSChristian König 		amdgpu_device_gpu_recover(adev, NULL);
24024da5a9cSShaoyun Liu }
24124da5a9cSShaoyun Liu 
2426bfc7c7eSGraham Sider int amdgpu_amdkfd_alloc_gtt_mem(struct amdgpu_device *adev, size_t size,
243130e0371SOded Gabbay 				void **mem_obj, uint64_t *gpu_addr,
244fa5bde80SYong Zhao 				void **cpu_ptr, bool cp_mqd_gfx9)
245130e0371SOded Gabbay {
246473fee47SYong Zhao 	struct amdgpu_bo *bo = NULL;
2473216c6b7SChunming Zhou 	struct amdgpu_bo_param bp;
248130e0371SOded Gabbay 	int r;
249473fee47SYong Zhao 	void *cpu_ptr_tmp = NULL;
250130e0371SOded Gabbay 
2513216c6b7SChunming Zhou 	memset(&bp, 0, sizeof(bp));
2523216c6b7SChunming Zhou 	bp.size = size;
2533216c6b7SChunming Zhou 	bp.byte_align = PAGE_SIZE;
2543216c6b7SChunming Zhou 	bp.domain = AMDGPU_GEM_DOMAIN_GTT;
2553216c6b7SChunming Zhou 	bp.flags = AMDGPU_GEM_CREATE_CPU_GTT_USWC;
2563216c6b7SChunming Zhou 	bp.type = ttm_bo_type_kernel;
2573216c6b7SChunming Zhou 	bp.resv = NULL;
2589fd5543eSNirmoy Das 	bp.bo_ptr_size = sizeof(struct amdgpu_bo);
25915426dbbSYong Zhao 
260fa5bde80SYong Zhao 	if (cp_mqd_gfx9)
261fa5bde80SYong Zhao 		bp.flags |= AMDGPU_GEM_CREATE_CP_MQD_GFX9;
26215426dbbSYong Zhao 
2633216c6b7SChunming Zhou 	r = amdgpu_bo_create(adev, &bp, &bo);
264130e0371SOded Gabbay 	if (r) {
265dc102c43SAndres Rodriguez 		dev_err(adev->dev,
266130e0371SOded Gabbay 			"failed to allocate BO for amdkfd (%d)\n", r);
267130e0371SOded Gabbay 		return r;
268130e0371SOded Gabbay 	}
269130e0371SOded Gabbay 
270130e0371SOded Gabbay 	/* map the buffer */
271473fee47SYong Zhao 	r = amdgpu_bo_reserve(bo, true);
272130e0371SOded Gabbay 	if (r) {
273dc102c43SAndres Rodriguez 		dev_err(adev->dev, "(%d) failed to reserve bo for amdkfd\n", r);
274130e0371SOded Gabbay 		goto allocate_mem_reserve_bo_failed;
275130e0371SOded Gabbay 	}
276130e0371SOded Gabbay 
2777b7c6c81SJunwei Zhang 	r = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT);
278130e0371SOded Gabbay 	if (r) {
279dc102c43SAndres Rodriguez 		dev_err(adev->dev, "(%d) failed to pin bo for amdkfd\n", r);
280130e0371SOded Gabbay 		goto allocate_mem_pin_bo_failed;
281130e0371SOded Gabbay 	}
282130e0371SOded Gabbay 
283bb812f1eSJunwei Zhang 	r = amdgpu_ttm_alloc_gart(&bo->tbo);
284bb812f1eSJunwei Zhang 	if (r) {
285bb812f1eSJunwei Zhang 		dev_err(adev->dev, "%p bind failed\n", bo);
286bb812f1eSJunwei Zhang 		goto allocate_mem_kmap_bo_failed;
287bb812f1eSJunwei Zhang 	}
288bb812f1eSJunwei Zhang 
289473fee47SYong Zhao 	r = amdgpu_bo_kmap(bo, &cpu_ptr_tmp);
290130e0371SOded Gabbay 	if (r) {
291dc102c43SAndres Rodriguez 		dev_err(adev->dev,
292130e0371SOded Gabbay 			"(%d) failed to map bo to kernel for amdkfd\n", r);
293130e0371SOded Gabbay 		goto allocate_mem_kmap_bo_failed;
294130e0371SOded Gabbay 	}
295130e0371SOded Gabbay 
296473fee47SYong Zhao 	*mem_obj = bo;
2977b7c6c81SJunwei Zhang 	*gpu_addr = amdgpu_bo_gpu_offset(bo);
298473fee47SYong Zhao 	*cpu_ptr = cpu_ptr_tmp;
299473fee47SYong Zhao 
300473fee47SYong Zhao 	amdgpu_bo_unreserve(bo);
301130e0371SOded Gabbay 
302130e0371SOded Gabbay 	return 0;
303130e0371SOded Gabbay 
304130e0371SOded Gabbay allocate_mem_kmap_bo_failed:
305473fee47SYong Zhao 	amdgpu_bo_unpin(bo);
306130e0371SOded Gabbay allocate_mem_pin_bo_failed:
307473fee47SYong Zhao 	amdgpu_bo_unreserve(bo);
308130e0371SOded Gabbay allocate_mem_reserve_bo_failed:
309473fee47SYong Zhao 	amdgpu_bo_unref(&bo);
310130e0371SOded Gabbay 
311130e0371SOded Gabbay 	return r;
312130e0371SOded Gabbay }
313130e0371SOded Gabbay 
3146bfc7c7eSGraham Sider void amdgpu_amdkfd_free_gtt_mem(struct amdgpu_device *adev, void *mem_obj)
315130e0371SOded Gabbay {
316473fee47SYong Zhao 	struct amdgpu_bo *bo = (struct amdgpu_bo *) mem_obj;
317130e0371SOded Gabbay 
318473fee47SYong Zhao 	amdgpu_bo_reserve(bo, true);
319473fee47SYong Zhao 	amdgpu_bo_kunmap(bo);
320473fee47SYong Zhao 	amdgpu_bo_unpin(bo);
321473fee47SYong Zhao 	amdgpu_bo_unreserve(bo);
322473fee47SYong Zhao 	amdgpu_bo_unref(&(bo));
323130e0371SOded Gabbay }
324130e0371SOded Gabbay 
3256bfc7c7eSGraham Sider int amdgpu_amdkfd_alloc_gws(struct amdgpu_device *adev, size_t size,
326ca66fb8fSOak Zeng 				void **mem_obj)
327ca66fb8fSOak Zeng {
328ca66fb8fSOak Zeng 	struct amdgpu_bo *bo = NULL;
32922b40f7aSNirmoy Das 	struct amdgpu_bo_user *ubo;
330ca66fb8fSOak Zeng 	struct amdgpu_bo_param bp;
331ca66fb8fSOak Zeng 	int r;
332ca66fb8fSOak Zeng 
333ca66fb8fSOak Zeng 	memset(&bp, 0, sizeof(bp));
334ca66fb8fSOak Zeng 	bp.size = size;
335ca66fb8fSOak Zeng 	bp.byte_align = 1;
336ca66fb8fSOak Zeng 	bp.domain = AMDGPU_GEM_DOMAIN_GWS;
337ca66fb8fSOak Zeng 	bp.flags = AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
338ca66fb8fSOak Zeng 	bp.type = ttm_bo_type_device;
339ca66fb8fSOak Zeng 	bp.resv = NULL;
3409fd5543eSNirmoy Das 	bp.bo_ptr_size = sizeof(struct amdgpu_bo);
341ca66fb8fSOak Zeng 
34222b40f7aSNirmoy Das 	r = amdgpu_bo_create_user(adev, &bp, &ubo);
343ca66fb8fSOak Zeng 	if (r) {
344ca66fb8fSOak Zeng 		dev_err(adev->dev,
345ca66fb8fSOak Zeng 			"failed to allocate gws BO for amdkfd (%d)\n", r);
346ca66fb8fSOak Zeng 		return r;
347ca66fb8fSOak Zeng 	}
348ca66fb8fSOak Zeng 
34922b40f7aSNirmoy Das 	bo = &ubo->bo;
350ca66fb8fSOak Zeng 	*mem_obj = bo;
351ca66fb8fSOak Zeng 	return 0;
352ca66fb8fSOak Zeng }
353ca66fb8fSOak Zeng 
3546bfc7c7eSGraham Sider void amdgpu_amdkfd_free_gws(struct amdgpu_device *adev, void *mem_obj)
355ca66fb8fSOak Zeng {
356ca66fb8fSOak Zeng 	struct amdgpu_bo *bo = (struct amdgpu_bo *)mem_obj;
357ca66fb8fSOak Zeng 
358ca66fb8fSOak Zeng 	amdgpu_bo_unref(&bo);
359ca66fb8fSOak Zeng }
360ca66fb8fSOak Zeng 
361574c4183SGraham Sider uint32_t amdgpu_amdkfd_get_fw_version(struct amdgpu_device *adev,
3620da8b10eSAmber Lin 				      enum kgd_engine_type type)
3630da8b10eSAmber Lin {
3640da8b10eSAmber Lin 	switch (type) {
3650da8b10eSAmber Lin 	case KGD_ENGINE_PFP:
3660da8b10eSAmber Lin 		return adev->gfx.pfp_fw_version;
3670da8b10eSAmber Lin 
3680da8b10eSAmber Lin 	case KGD_ENGINE_ME:
3690da8b10eSAmber Lin 		return adev->gfx.me_fw_version;
3700da8b10eSAmber Lin 
3710da8b10eSAmber Lin 	case KGD_ENGINE_CE:
3720da8b10eSAmber Lin 		return adev->gfx.ce_fw_version;
3730da8b10eSAmber Lin 
3740da8b10eSAmber Lin 	case KGD_ENGINE_MEC1:
3750da8b10eSAmber Lin 		return adev->gfx.mec_fw_version;
3760da8b10eSAmber Lin 
3770da8b10eSAmber Lin 	case KGD_ENGINE_MEC2:
3780da8b10eSAmber Lin 		return adev->gfx.mec2_fw_version;
3790da8b10eSAmber Lin 
3800da8b10eSAmber Lin 	case KGD_ENGINE_RLC:
3810da8b10eSAmber Lin 		return adev->gfx.rlc_fw_version;
3820da8b10eSAmber Lin 
3830da8b10eSAmber Lin 	case KGD_ENGINE_SDMA1:
3840da8b10eSAmber Lin 		return adev->sdma.instance[0].fw_version;
3850da8b10eSAmber Lin 
3860da8b10eSAmber Lin 	case KGD_ENGINE_SDMA2:
3870da8b10eSAmber Lin 		return adev->sdma.instance[1].fw_version;
3880da8b10eSAmber Lin 
3890da8b10eSAmber Lin 	default:
3900da8b10eSAmber Lin 		return 0;
3910da8b10eSAmber Lin 	}
3920da8b10eSAmber Lin 
3930da8b10eSAmber Lin 	return 0;
3940da8b10eSAmber Lin }
3950da8b10eSAmber Lin 
396574c4183SGraham Sider void amdgpu_amdkfd_get_local_mem_info(struct amdgpu_device *adev,
39730f1c042SHarish Kasiviswanathan 				      struct kfd_local_mem_info *mem_info)
39830f1c042SHarish Kasiviswanathan {
39930f1c042SHarish Kasiviswanathan 	memset(mem_info, 0, sizeof(*mem_info));
4004c7e8a9eSGang Ba 
401770d13b1SChristian König 	mem_info->local_mem_size_public = adev->gmc.visible_vram_size;
402770d13b1SChristian König 	mem_info->local_mem_size_private = adev->gmc.real_vram_size -
403770d13b1SChristian König 						adev->gmc.visible_vram_size;
4044c7e8a9eSGang Ba 
405770d13b1SChristian König 	mem_info->vram_width = adev->gmc.vram_width;
40630f1c042SHarish Kasiviswanathan 
4074c7e8a9eSGang Ba 	pr_debug("Address base: %pap public 0x%llx private 0x%llx\n",
4084c7e8a9eSGang Ba 			&adev->gmc.aper_base,
40930f1c042SHarish Kasiviswanathan 			mem_info->local_mem_size_public,
41030f1c042SHarish Kasiviswanathan 			mem_info->local_mem_size_private);
41130f1c042SHarish Kasiviswanathan 
41230f1c042SHarish Kasiviswanathan 	if (amdgpu_sriov_vf(adev))
41330f1c042SHarish Kasiviswanathan 		mem_info->mem_clk_max = adev->clock.default_mclk / 100;
414944effd3SKent Russell 	else if (adev->pm.dpm_enabled) {
4156bdadb20SHawking Zhang 		if (amdgpu_emu_mode == 1)
4166bdadb20SHawking Zhang 			mem_info->mem_clk_max = 0;
4177ba01f9eSShaoyun Liu 		else
4186bdadb20SHawking Zhang 			mem_info->mem_clk_max = amdgpu_dpm_get_mclk(adev, false) / 100;
4196bdadb20SHawking Zhang 	} else
4207ba01f9eSShaoyun Liu 		mem_info->mem_clk_max = 100;
42130f1c042SHarish Kasiviswanathan }
42230f1c042SHarish Kasiviswanathan 
423574c4183SGraham Sider uint64_t amdgpu_amdkfd_get_gpu_clock_counter(struct amdgpu_device *adev)
424130e0371SOded Gabbay {
425dc102c43SAndres Rodriguez 	if (adev->gfx.funcs->get_gpu_clock_counter)
426dc102c43SAndres Rodriguez 		return adev->gfx.funcs->get_gpu_clock_counter(adev);
427130e0371SOded Gabbay 	return 0;
428130e0371SOded Gabbay }
429130e0371SOded Gabbay 
430574c4183SGraham Sider uint32_t amdgpu_amdkfd_get_max_engine_clock_in_mhz(struct amdgpu_device *adev)
431130e0371SOded Gabbay {
432a9efcc19SFelix Kuehling 	/* the sclk is in quantas of 10kHz */
433a9efcc19SFelix Kuehling 	if (amdgpu_sriov_vf(adev))
434a9efcc19SFelix Kuehling 		return adev->clock.default_sclk / 100;
435944effd3SKent Russell 	else if (adev->pm.dpm_enabled)
436a9efcc19SFelix Kuehling 		return amdgpu_dpm_get_sclk(adev, false) / 100;
4377ba01f9eSShaoyun Liu 	else
4387ba01f9eSShaoyun Liu 		return 100;
439130e0371SOded Gabbay }
440ebdebf42SFlora Cui 
441574c4183SGraham Sider void amdgpu_amdkfd_get_cu_info(struct amdgpu_device *adev, struct kfd_cu_info *cu_info)
442ebdebf42SFlora Cui {
443ebdebf42SFlora Cui 	struct amdgpu_cu_info acu_info = adev->gfx.cu_info;
444ebdebf42SFlora Cui 
445ebdebf42SFlora Cui 	memset(cu_info, 0, sizeof(*cu_info));
446ebdebf42SFlora Cui 	if (sizeof(cu_info->cu_bitmap) != sizeof(acu_info.bitmap))
447ebdebf42SFlora Cui 		return;
448ebdebf42SFlora Cui 
449ebdebf42SFlora Cui 	cu_info->cu_active_number = acu_info.number;
450ebdebf42SFlora Cui 	cu_info->cu_ao_mask = acu_info.ao_cu_mask;
451ebdebf42SFlora Cui 	memcpy(&cu_info->cu_bitmap[0], &acu_info.bitmap[0],
452ebdebf42SFlora Cui 	       sizeof(acu_info.bitmap));
453ebdebf42SFlora Cui 	cu_info->num_shader_engines = adev->gfx.config.max_shader_engines;
454ebdebf42SFlora Cui 	cu_info->num_shader_arrays_per_engine = adev->gfx.config.max_sh_per_se;
455ebdebf42SFlora Cui 	cu_info->num_cu_per_sh = adev->gfx.config.max_cu_per_sh;
456ebdebf42SFlora Cui 	cu_info->simd_per_cu = acu_info.simd_per_cu;
457ebdebf42SFlora Cui 	cu_info->max_waves_per_simd = acu_info.max_waves_per_simd;
458ebdebf42SFlora Cui 	cu_info->wave_front_size = acu_info.wave_front_size;
459ebdebf42SFlora Cui 	cu_info->max_scratch_slots_per_cu = acu_info.max_scratch_slots_per_cu;
460ebdebf42SFlora Cui 	cu_info->lds_size = acu_info.lds_size;
461ebdebf42SFlora Cui }
4629f0a0b41SKent Russell 
463574c4183SGraham Sider int amdgpu_amdkfd_get_dmabuf_info(struct amdgpu_device *adev, int dma_buf_fd,
464574c4183SGraham Sider 				  struct amdgpu_device **dmabuf_adev,
4651dde0ea9SFelix Kuehling 				  uint64_t *bo_size, void *metadata_buffer,
4661dde0ea9SFelix Kuehling 				  size_t buffer_size, uint32_t *metadata_size,
4671dde0ea9SFelix Kuehling 				  uint32_t *flags)
4681dde0ea9SFelix Kuehling {
4691dde0ea9SFelix Kuehling 	struct dma_buf *dma_buf;
4701dde0ea9SFelix Kuehling 	struct drm_gem_object *obj;
4711dde0ea9SFelix Kuehling 	struct amdgpu_bo *bo;
4721dde0ea9SFelix Kuehling 	uint64_t metadata_flags;
4731dde0ea9SFelix Kuehling 	int r = -EINVAL;
4741dde0ea9SFelix Kuehling 
4751dde0ea9SFelix Kuehling 	dma_buf = dma_buf_get(dma_buf_fd);
4761dde0ea9SFelix Kuehling 	if (IS_ERR(dma_buf))
4771dde0ea9SFelix Kuehling 		return PTR_ERR(dma_buf);
4781dde0ea9SFelix Kuehling 
4791dde0ea9SFelix Kuehling 	if (dma_buf->ops != &amdgpu_dmabuf_ops)
4801dde0ea9SFelix Kuehling 		/* Can't handle non-graphics buffers */
4811dde0ea9SFelix Kuehling 		goto out_put;
4821dde0ea9SFelix Kuehling 
4831dde0ea9SFelix Kuehling 	obj = dma_buf->priv;
4844a580877SLuben Tuikov 	if (obj->dev->driver != adev_to_drm(adev)->driver)
4851dde0ea9SFelix Kuehling 		/* Can't handle buffers from different drivers */
4861dde0ea9SFelix Kuehling 		goto out_put;
4871dde0ea9SFelix Kuehling 
4881348969aSLuben Tuikov 	adev = drm_to_adev(obj->dev);
4891dde0ea9SFelix Kuehling 	bo = gem_to_amdgpu_bo(obj);
4901dde0ea9SFelix Kuehling 	if (!(bo->preferred_domains & (AMDGPU_GEM_DOMAIN_VRAM |
4911dde0ea9SFelix Kuehling 				    AMDGPU_GEM_DOMAIN_GTT)))
4921dde0ea9SFelix Kuehling 		/* Only VRAM and GTT BOs are supported */
4931dde0ea9SFelix Kuehling 		goto out_put;
4941dde0ea9SFelix Kuehling 
4951dde0ea9SFelix Kuehling 	r = 0;
496574c4183SGraham Sider 	if (dmabuf_adev)
497574c4183SGraham Sider 		*dmabuf_adev = adev;
4981dde0ea9SFelix Kuehling 	if (bo_size)
4991dde0ea9SFelix Kuehling 		*bo_size = amdgpu_bo_size(bo);
5001dde0ea9SFelix Kuehling 	if (metadata_buffer)
5011dde0ea9SFelix Kuehling 		r = amdgpu_bo_get_metadata(bo, metadata_buffer, buffer_size,
5021dde0ea9SFelix Kuehling 					   metadata_size, &metadata_flags);
5031dde0ea9SFelix Kuehling 	if (flags) {
5041dde0ea9SFelix Kuehling 		*flags = (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ?
5051d251d90SYong Zhao 				KFD_IOC_ALLOC_MEM_FLAGS_VRAM
5061d251d90SYong Zhao 				: KFD_IOC_ALLOC_MEM_FLAGS_GTT;
5071dde0ea9SFelix Kuehling 
5081dde0ea9SFelix Kuehling 		if (bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)
5091d251d90SYong Zhao 			*flags |= KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC;
5101dde0ea9SFelix Kuehling 	}
5111dde0ea9SFelix Kuehling 
5121dde0ea9SFelix Kuehling out_put:
5131dde0ea9SFelix Kuehling 	dma_buf_put(dma_buf);
5141dde0ea9SFelix Kuehling 	return r;
5151dde0ea9SFelix Kuehling }
5161dde0ea9SFelix Kuehling 
517574c4183SGraham Sider uint8_t amdgpu_amdkfd_get_xgmi_hops_count(struct amdgpu_device *dst,
518574c4183SGraham Sider 					  struct amdgpu_device *src)
519da361dd1Sshaoyunl {
520574c4183SGraham Sider 	struct amdgpu_device *peer_adev = src;
521574c4183SGraham Sider 	struct amdgpu_device *adev = dst;
522da361dd1Sshaoyunl 	int ret = amdgpu_xgmi_get_hops_count(adev, peer_adev);
523da361dd1Sshaoyunl 
524da361dd1Sshaoyunl 	if (ret < 0) {
525da361dd1Sshaoyunl 		DRM_ERROR("amdgpu: failed to get  xgmi hops count between node %d and %d. ret = %d\n",
526da361dd1Sshaoyunl 			adev->gmc.xgmi.physical_node_id,
527da361dd1Sshaoyunl 			peer_adev->gmc.xgmi.physical_node_id, ret);
528da361dd1Sshaoyunl 		ret = 0;
529da361dd1Sshaoyunl 	}
530da361dd1Sshaoyunl 	return  (uint8_t)ret;
531da361dd1Sshaoyunl }
532db8b62c0SShaoyun Liu 
533574c4183SGraham Sider int amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(struct amdgpu_device *dst,
534574c4183SGraham Sider 					    struct amdgpu_device *src,
535574c4183SGraham Sider 					    bool is_min)
5363f46c4e9SJonathan Kim {
537574c4183SGraham Sider 	struct amdgpu_device *adev = dst, *peer_adev;
5383f46c4e9SJonathan Kim 	int num_links;
5393f46c4e9SJonathan Kim 
5403f46c4e9SJonathan Kim 	if (adev->asic_type != CHIP_ALDEBARAN)
5413f46c4e9SJonathan Kim 		return 0;
5423f46c4e9SJonathan Kim 
5433f46c4e9SJonathan Kim 	if (src)
544574c4183SGraham Sider 		peer_adev = src;
5453f46c4e9SJonathan Kim 
5463f46c4e9SJonathan Kim 	/* num links returns 0 for indirect peers since indirect route is unknown. */
5473f46c4e9SJonathan Kim 	num_links = is_min ? 1 : amdgpu_xgmi_get_num_links(adev, peer_adev);
5483f46c4e9SJonathan Kim 	if (num_links < 0) {
5493f46c4e9SJonathan Kim 		DRM_ERROR("amdgpu: failed to get xgmi num links between node %d and %d. ret = %d\n",
5503f46c4e9SJonathan Kim 			adev->gmc.xgmi.physical_node_id,
5513f46c4e9SJonathan Kim 			peer_adev->gmc.xgmi.physical_node_id, num_links);
5523f46c4e9SJonathan Kim 		num_links = 0;
5533f46c4e9SJonathan Kim 	}
5543f46c4e9SJonathan Kim 
5553f46c4e9SJonathan Kim 	/* Aldebaran xGMI DPM is defeatured so assume x16 x 25Gbps for bandwidth. */
5563f46c4e9SJonathan Kim 	return (num_links * 16 * 25000)/BITS_PER_BYTE;
5573f46c4e9SJonathan Kim }
5583f46c4e9SJonathan Kim 
559574c4183SGraham Sider int amdgpu_amdkfd_get_pcie_bandwidth_mbytes(struct amdgpu_device *adev, bool is_min)
56093304810SJonathan Kim {
56193304810SJonathan Kim 	int num_lanes_shift = (is_min ? ffs(adev->pm.pcie_mlw_mask) :
56293304810SJonathan Kim 							fls(adev->pm.pcie_mlw_mask)) - 1;
56393304810SJonathan Kim 	int gen_speed_shift = (is_min ? ffs(adev->pm.pcie_gen_mask &
56493304810SJonathan Kim 						CAIL_PCIE_LINK_SPEED_SUPPORT_MASK) :
56593304810SJonathan Kim 					fls(adev->pm.pcie_gen_mask &
56693304810SJonathan Kim 						CAIL_PCIE_LINK_SPEED_SUPPORT_MASK)) - 1;
56793304810SJonathan Kim 	uint32_t num_lanes_mask = 1 << num_lanes_shift;
56893304810SJonathan Kim 	uint32_t gen_speed_mask = 1 << gen_speed_shift;
56993304810SJonathan Kim 	int num_lanes_factor = 0, gen_speed_mbits_factor = 0;
57093304810SJonathan Kim 
57193304810SJonathan Kim 	switch (num_lanes_mask) {
57293304810SJonathan Kim 	case CAIL_PCIE_LINK_WIDTH_SUPPORT_X1:
57393304810SJonathan Kim 		num_lanes_factor = 1;
57493304810SJonathan Kim 		break;
57593304810SJonathan Kim 	case CAIL_PCIE_LINK_WIDTH_SUPPORT_X2:
57693304810SJonathan Kim 		num_lanes_factor = 2;
57793304810SJonathan Kim 		break;
57893304810SJonathan Kim 	case CAIL_PCIE_LINK_WIDTH_SUPPORT_X4:
57993304810SJonathan Kim 		num_lanes_factor = 4;
58093304810SJonathan Kim 		break;
58193304810SJonathan Kim 	case CAIL_PCIE_LINK_WIDTH_SUPPORT_X8:
58293304810SJonathan Kim 		num_lanes_factor = 8;
58393304810SJonathan Kim 		break;
58493304810SJonathan Kim 	case CAIL_PCIE_LINK_WIDTH_SUPPORT_X12:
58593304810SJonathan Kim 		num_lanes_factor = 12;
58693304810SJonathan Kim 		break;
58793304810SJonathan Kim 	case CAIL_PCIE_LINK_WIDTH_SUPPORT_X16:
58893304810SJonathan Kim 		num_lanes_factor = 16;
58993304810SJonathan Kim 		break;
59093304810SJonathan Kim 	case CAIL_PCIE_LINK_WIDTH_SUPPORT_X32:
59193304810SJonathan Kim 		num_lanes_factor = 32;
59293304810SJonathan Kim 		break;
59393304810SJonathan Kim 	}
59493304810SJonathan Kim 
59593304810SJonathan Kim 	switch (gen_speed_mask) {
59693304810SJonathan Kim 	case CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1:
59793304810SJonathan Kim 		gen_speed_mbits_factor = 2500;
59893304810SJonathan Kim 		break;
59993304810SJonathan Kim 	case CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2:
60093304810SJonathan Kim 		gen_speed_mbits_factor = 5000;
60193304810SJonathan Kim 		break;
60293304810SJonathan Kim 	case CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3:
60393304810SJonathan Kim 		gen_speed_mbits_factor = 8000;
60493304810SJonathan Kim 		break;
60593304810SJonathan Kim 	case CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4:
60693304810SJonathan Kim 		gen_speed_mbits_factor = 16000;
60793304810SJonathan Kim 		break;
60893304810SJonathan Kim 	case CAIL_PCIE_LINK_SPEED_SUPPORT_GEN5:
60993304810SJonathan Kim 		gen_speed_mbits_factor = 32000;
61093304810SJonathan Kim 		break;
61193304810SJonathan Kim 	}
61293304810SJonathan Kim 
61393304810SJonathan Kim 	return (num_lanes_factor * gen_speed_mbits_factor)/BITS_PER_BYTE;
61493304810SJonathan Kim }
61593304810SJonathan Kim 
6166bfc7c7eSGraham Sider int amdgpu_amdkfd_submit_ib(struct amdgpu_device *adev,
6176bfc7c7eSGraham Sider 				enum kgd_engine_type engine,
6184c660c8fSFelix Kuehling 				uint32_t vmid, uint64_t gpu_addr,
6194c660c8fSFelix Kuehling 				uint32_t *ib_cmd, uint32_t ib_len)
6204c660c8fSFelix Kuehling {
6214c660c8fSFelix Kuehling 	struct amdgpu_job *job;
6224c660c8fSFelix Kuehling 	struct amdgpu_ib *ib;
6234c660c8fSFelix Kuehling 	struct amdgpu_ring *ring;
6244c660c8fSFelix Kuehling 	struct dma_fence *f = NULL;
6254c660c8fSFelix Kuehling 	int ret;
6264c660c8fSFelix Kuehling 
6274c660c8fSFelix Kuehling 	switch (engine) {
6284c660c8fSFelix Kuehling 	case KGD_ENGINE_MEC1:
6294c660c8fSFelix Kuehling 		ring = &adev->gfx.compute_ring[0];
6304c660c8fSFelix Kuehling 		break;
6314c660c8fSFelix Kuehling 	case KGD_ENGINE_SDMA1:
6324c660c8fSFelix Kuehling 		ring = &adev->sdma.instance[0].ring;
6334c660c8fSFelix Kuehling 		break;
6344c660c8fSFelix Kuehling 	case KGD_ENGINE_SDMA2:
6354c660c8fSFelix Kuehling 		ring = &adev->sdma.instance[1].ring;
6364c660c8fSFelix Kuehling 		break;
6374c660c8fSFelix Kuehling 	default:
6384c660c8fSFelix Kuehling 		pr_err("Invalid engine in IB submission: %d\n", engine);
6394c660c8fSFelix Kuehling 		ret = -EINVAL;
6404c660c8fSFelix Kuehling 		goto err;
6414c660c8fSFelix Kuehling 	}
6424c660c8fSFelix Kuehling 
6434c660c8fSFelix Kuehling 	ret = amdgpu_job_alloc(adev, 1, &job, NULL);
6444c660c8fSFelix Kuehling 	if (ret)
6454c660c8fSFelix Kuehling 		goto err;
6464c660c8fSFelix Kuehling 
6474c660c8fSFelix Kuehling 	ib = &job->ibs[0];
6484c660c8fSFelix Kuehling 	memset(ib, 0, sizeof(struct amdgpu_ib));
6494c660c8fSFelix Kuehling 
6504c660c8fSFelix Kuehling 	ib->gpu_addr = gpu_addr;
6514c660c8fSFelix Kuehling 	ib->ptr = ib_cmd;
6524c660c8fSFelix Kuehling 	ib->length_dw = ib_len;
6534c660c8fSFelix Kuehling 	/* This works for NO_HWS. TODO: need to handle without knowing VMID */
6544c660c8fSFelix Kuehling 	job->vmid = vmid;
6554c660c8fSFelix Kuehling 
6564c660c8fSFelix Kuehling 	ret = amdgpu_ib_schedule(ring, 1, ib, job, &f);
65794561899SDennis Li 
6584c660c8fSFelix Kuehling 	if (ret) {
6594c660c8fSFelix Kuehling 		DRM_ERROR("amdgpu: failed to schedule IB.\n");
6604c660c8fSFelix Kuehling 		goto err_ib_sched;
6614c660c8fSFelix Kuehling 	}
6624c660c8fSFelix Kuehling 
6634c660c8fSFelix Kuehling 	ret = dma_fence_wait(f, false);
6644c660c8fSFelix Kuehling 
6654c660c8fSFelix Kuehling err_ib_sched:
6664c660c8fSFelix Kuehling 	amdgpu_job_free(job);
6674c660c8fSFelix Kuehling err:
6684c660c8fSFelix Kuehling 	return ret;
6694c660c8fSFelix Kuehling }
6704c660c8fSFelix Kuehling 
6716bfc7c7eSGraham Sider void amdgpu_amdkfd_set_compute_idle(struct amdgpu_device *adev, bool idle)
67201c097dbSFelix Kuehling {
67301c097dbSFelix Kuehling 	amdgpu_dpm_switch_power_profile(adev,
674919a52fcSFelix Kuehling 					PP_SMC_POWER_PROFILE_COMPUTE,
675919a52fcSFelix Kuehling 					!idle);
67601c097dbSFelix Kuehling }
67701c097dbSFelix Kuehling 
678155494dbSFelix Kuehling bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid)
679155494dbSFelix Kuehling {
68040111ec2SFelix Kuehling 	if (adev->kfd.dev)
68140111ec2SFelix Kuehling 		return vmid >= adev->vm_manager.first_kfd_vmid;
682155494dbSFelix Kuehling 
683155494dbSFelix Kuehling 	return false;
684155494dbSFelix Kuehling }
685fcdfa432SOded Gabbay 
6866bfc7c7eSGraham Sider int amdgpu_amdkfd_flush_gpu_tlb_vmid(struct amdgpu_device *adev,
6876bfc7c7eSGraham Sider 				     uint16_t vmid)
688ffa02269SAlex Sierra {
689ffa02269SAlex Sierra 	if (adev->family == AMDGPU_FAMILY_AI) {
690ffa02269SAlex Sierra 		int i;
691ffa02269SAlex Sierra 
692ffa02269SAlex Sierra 		for (i = 0; i < adev->num_vmhubs; i++)
693ffa02269SAlex Sierra 			amdgpu_gmc_flush_gpu_tlb(adev, vmid, i, 0);
694ffa02269SAlex Sierra 	} else {
695ffa02269SAlex Sierra 		amdgpu_gmc_flush_gpu_tlb(adev, vmid, AMDGPU_GFXHUB_0, 0);
696ffa02269SAlex Sierra 	}
697ffa02269SAlex Sierra 
698ffa02269SAlex Sierra 	return 0;
699ffa02269SAlex Sierra }
700ffa02269SAlex Sierra 
7016bfc7c7eSGraham Sider int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
7026bfc7c7eSGraham Sider 				      uint16_t pasid, enum TLB_FLUSH_TYPE flush_type)
703ffa02269SAlex Sierra {
704ffa02269SAlex Sierra 	bool all_hub = false;
705ffa02269SAlex Sierra 
706ffa02269SAlex Sierra 	if (adev->family == AMDGPU_FAMILY_AI)
707ffa02269SAlex Sierra 		all_hub = true;
708ffa02269SAlex Sierra 
709ffa02269SAlex Sierra 	return amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, flush_type, all_hub);
710ffa02269SAlex Sierra }
711ffa02269SAlex Sierra 
7126bfc7c7eSGraham Sider bool amdgpu_amdkfd_have_atomics_support(struct amdgpu_device *adev)
713aabf3a95SJack Xiao {
714aabf3a95SJack Xiao 	return adev->have_atomics_support;
715aabf3a95SJack Xiao }
716c7490949STao Zhou 
717b6485bedSTao Zhou void amdgpu_amdkfd_ras_poison_consumption_handler(struct amdgpu_device *adev, bool reset)
718c7490949STao Zhou {
719c7490949STao Zhou 	struct ras_err_data err_data = {0, 0, 0, NULL};
720c7490949STao Zhou 
721c7490949STao Zhou 	/* CPU MCA will handle page retirement if connected_to_cpu is 1 */
722c7490949STao Zhou 	if (!adev->gmc.xgmi.connected_to_cpu)
723fec8c524STao Zhou 		amdgpu_umc_poison_handler(adev, &err_data, reset);
724b6485bedSTao Zhou 	else if (reset)
7256bfc7c7eSGraham Sider 		amdgpu_amdkfd_gpu_reset(adev);
726c7490949STao Zhou }
727*6475ae2bSTao Zhou 
728*6475ae2bSTao Zhou bool amdgpu_amdkfd_ras_query_utcl2_poison_status(struct amdgpu_device *adev)
729*6475ae2bSTao Zhou {
730*6475ae2bSTao Zhou 	if (adev->gfx.ras->query_utcl2_poison_status)
731*6475ae2bSTao Zhou 		return adev->gfx.ras->query_utcl2_poison_status(adev);
732*6475ae2bSTao Zhou 	else
733*6475ae2bSTao Zhou 		return false;
734*6475ae2bSTao Zhou }
735