xref: /linux/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c (revision 2302d507149f0ae7cc697089ab5675a2d4cf9d2a)
12f4ca1baSJingyu Wang // SPDX-License-Identifier: MIT
2130e0371SOded Gabbay /*
3130e0371SOded Gabbay  * Copyright 2014 Advanced Micro Devices, Inc.
4130e0371SOded Gabbay  *
5130e0371SOded Gabbay  * Permission is hereby granted, free of charge, to any person obtaining a
6130e0371SOded Gabbay  * copy of this software and associated documentation files (the "Software"),
7130e0371SOded Gabbay  * to deal in the Software without restriction, including without limitation
8130e0371SOded Gabbay  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9130e0371SOded Gabbay  * and/or sell copies of the Software, and to permit persons to whom the
10130e0371SOded Gabbay  * Software is furnished to do so, subject to the following conditions:
11130e0371SOded Gabbay  *
12130e0371SOded Gabbay  * The above copyright notice and this permission notice shall be included in
13130e0371SOded Gabbay  * all copies or substantial portions of the Software.
14130e0371SOded Gabbay  *
15130e0371SOded Gabbay  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16130e0371SOded Gabbay  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17130e0371SOded Gabbay  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18130e0371SOded Gabbay  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
19130e0371SOded Gabbay  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20130e0371SOded Gabbay  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
21130e0371SOded Gabbay  * OTHER DEALINGS IN THE SOFTWARE.
22130e0371SOded Gabbay  */
23130e0371SOded Gabbay 
24130e0371SOded Gabbay #include "amdgpu_amdkfd.h"
2593304810SJonathan Kim #include "amd_pcie.h"
262f7d10b3SJammy Zhou #include "amd_shared.h"
27fdf2f6c5SSam Ravnborg 
28130e0371SOded Gabbay #include "amdgpu.h"
292db0cdbeSAlex Deucher #include "amdgpu_gfx.h"
302fbd6f94SChristian König #include "amdgpu_dma_buf.h"
31130e0371SOded Gabbay #include <linux/module.h>
321dde0ea9SFelix Kuehling #include <linux/dma-buf.h>
33da361dd1Sshaoyunl #include "amdgpu_xgmi.h"
341d251d90SYong Zhao #include <uapi/linux/kfd_ioctl.h>
35c7490949STao Zhou #include "amdgpu_ras.h"
36c7490949STao Zhou #include "amdgpu_umc.h"
37b5fd0cf3SAndrey Grodzovsky #include "amdgpu_reset.h"
38130e0371SOded Gabbay 
39611736d8SFelix Kuehling /* Total memory size in system memory and all GPU VRAM. Used to
40611736d8SFelix Kuehling  * estimate worst case amount of memory to reserve for page tables
41611736d8SFelix Kuehling  */
42611736d8SFelix Kuehling uint64_t amdgpu_amdkfd_total_mem_size;
43611736d8SFelix Kuehling 
44402bde58Skernel test robot static bool kfd_initialized;
45c7651b73SFelix Kuehling 
46efb1c658SOded Gabbay int amdgpu_amdkfd_init(void)
47130e0371SOded Gabbay {
48611736d8SFelix Kuehling 	struct sysinfo si;
49efb1c658SOded Gabbay 	int ret;
50efb1c658SOded Gabbay 
51611736d8SFelix Kuehling 	si_meminfo(&si);
52df23d1bbSOak Zeng 	amdgpu_amdkfd_total_mem_size = si.freeram - si.freehigh;
53611736d8SFelix Kuehling 	amdgpu_amdkfd_total_mem_size *= si.mem_unit;
54611736d8SFelix Kuehling 
55308176d6SAmber Lin 	ret = kgd2kfd_init();
5682b7b619SAmber Lin 	amdgpu_amdkfd_gpuvm_init_mem_limits();
57c7651b73SFelix Kuehling 	kfd_initialized = !ret;
58fcdfa432SOded Gabbay 
59efb1c658SOded Gabbay 	return ret;
60130e0371SOded Gabbay }
61130e0371SOded Gabbay 
62130e0371SOded Gabbay void amdgpu_amdkfd_fini(void)
63130e0371SOded Gabbay {
64c7651b73SFelix Kuehling 	if (kfd_initialized) {
658e07e267SAmber Lin 		kgd2kfd_exit();
66c7651b73SFelix Kuehling 		kfd_initialized = false;
67c7651b73SFelix Kuehling 	}
68130e0371SOded Gabbay }
69130e0371SOded Gabbay 
70dc102c43SAndres Rodriguez void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev)
71130e0371SOded Gabbay {
72050091abSYong Zhao 	bool vf = amdgpu_sriov_vf(adev);
735c33f214SFelix Kuehling 
74c7651b73SFelix Kuehling 	if (!kfd_initialized)
75c7651b73SFelix Kuehling 		return;
76c7651b73SFelix Kuehling 
77b5d1d755SGraham Sider 	adev->kfd.dev = kgd2kfd_probe(adev, vf);
78130e0371SOded Gabbay }
79130e0371SOded Gabbay 
8022cb0164SAlex Deucher /**
8122cb0164SAlex Deucher  * amdgpu_doorbell_get_kfd_info - Report doorbell configuration required to
8222cb0164SAlex Deucher  *                                setup amdkfd
8322cb0164SAlex Deucher  *
8422cb0164SAlex Deucher  * @adev: amdgpu_device pointer
8522cb0164SAlex Deucher  * @aperture_base: output returning doorbell aperture base physical address
8622cb0164SAlex Deucher  * @aperture_size: output returning doorbell aperture size in bytes
8722cb0164SAlex Deucher  * @start_offset: output returning # of doorbell bytes reserved for amdgpu.
8822cb0164SAlex Deucher  *
8922cb0164SAlex Deucher  * amdgpu and amdkfd share the doorbell aperture. amdgpu sets it up,
9022cb0164SAlex Deucher  * takes doorbells required for its own rings and reports the setup to amdkfd.
9122cb0164SAlex Deucher  * amdgpu reserved doorbells are at the start of the doorbell aperture.
9222cb0164SAlex Deucher  */
9322cb0164SAlex Deucher static void amdgpu_doorbell_get_kfd_info(struct amdgpu_device *adev,
9422cb0164SAlex Deucher 					 phys_addr_t *aperture_base,
9522cb0164SAlex Deucher 					 size_t *aperture_size,
9622cb0164SAlex Deucher 					 size_t *start_offset)
9722cb0164SAlex Deucher {
9822cb0164SAlex Deucher 	/*
9922cb0164SAlex Deucher 	 * The first num_doorbells are used by amdgpu.
10022cb0164SAlex Deucher 	 * amdkfd takes whatever's left in the aperture.
10122cb0164SAlex Deucher 	 */
102cc009e61SMukul Joshi 	if (adev->enable_mes) {
103cc009e61SMukul Joshi 		/*
104cc009e61SMukul Joshi 		 * With MES enabled, we only need to initialize
105cc009e61SMukul Joshi 		 * the base address. The size and offset are
106cc009e61SMukul Joshi 		 * not initialized as AMDGPU manages the whole
107cc009e61SMukul Joshi 		 * doorbell space.
108cc009e61SMukul Joshi 		 */
109cc009e61SMukul Joshi 		*aperture_base = adev->doorbell.base;
110cc009e61SMukul Joshi 		*aperture_size = 0;
111cc009e61SMukul Joshi 		*start_offset = 0;
112cc009e61SMukul Joshi 	} else if (adev->doorbell.size > adev->doorbell.num_doorbells *
113cc009e61SMukul Joshi 						sizeof(u32)) {
11422cb0164SAlex Deucher 		*aperture_base = adev->doorbell.base;
11522cb0164SAlex Deucher 		*aperture_size = adev->doorbell.size;
11622cb0164SAlex Deucher 		*start_offset = adev->doorbell.num_doorbells * sizeof(u32);
11722cb0164SAlex Deucher 	} else {
11822cb0164SAlex Deucher 		*aperture_base = 0;
11922cb0164SAlex Deucher 		*aperture_size = 0;
12022cb0164SAlex Deucher 		*start_offset = 0;
12122cb0164SAlex Deucher 	}
12222cb0164SAlex Deucher }
12322cb0164SAlex Deucher 
124b5fd0cf3SAndrey Grodzovsky 
125b5fd0cf3SAndrey Grodzovsky static void amdgpu_amdkfd_reset_work(struct work_struct *work)
126b5fd0cf3SAndrey Grodzovsky {
127b5fd0cf3SAndrey Grodzovsky 	struct amdgpu_device *adev = container_of(work, struct amdgpu_device,
128b5fd0cf3SAndrey Grodzovsky 						  kfd.reset_work);
129b5fd0cf3SAndrey Grodzovsky 
130f1549c09SLikun Gao 	struct amdgpu_reset_context reset_context;
1312f4ca1baSJingyu Wang 
132f1549c09SLikun Gao 	memset(&reset_context, 0, sizeof(reset_context));
133f1549c09SLikun Gao 
134f1549c09SLikun Gao 	reset_context.method = AMD_RESET_METHOD_NONE;
135f1549c09SLikun Gao 	reset_context.reset_req_dev = adev;
136f1549c09SLikun Gao 	clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
137dac6b808SVictor Zhao 	clear_bit(AMDGPU_SKIP_MODE2_RESET, &reset_context.flags);
138f1549c09SLikun Gao 
139f1549c09SLikun Gao 	amdgpu_device_gpu_recover(adev, NULL, &reset_context);
140b5fd0cf3SAndrey Grodzovsky }
141b5fd0cf3SAndrey Grodzovsky 
142dc102c43SAndres Rodriguez void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
143130e0371SOded Gabbay {
144234441ddSYong Zhao 	int i;
145d0b63bb3SAndres Rodriguez 	int last_valid_bit;
146611736d8SFelix Kuehling 
147611736d8SFelix Kuehling 	if (adev->kfd.dev) {
148130e0371SOded Gabbay 		struct kgd2kfd_shared_resources gpu_resources = {
14940111ec2SFelix Kuehling 			.compute_vmid_bitmap =
15040111ec2SFelix Kuehling 				((1 << AMDGPU_NUM_VMID) - 1) -
15140111ec2SFelix Kuehling 				((1 << adev->vm_manager.first_kfd_vmid) - 1),
152d0b63bb3SAndres Rodriguez 			.num_pipe_per_mec = adev->gfx.mec.num_pipe_per_mec,
153155494dbSFelix Kuehling 			.num_queue_per_pipe = adev->gfx.mec.num_queue_per_pipe,
154155494dbSFelix Kuehling 			.gpuvm_size = min(adev->vm_manager.max_pfn
155155494dbSFelix Kuehling 					  << AMDGPU_GPU_PAGE_SHIFT,
156ad9a5b78SChristian König 					  AMDGPU_GMC_HOLE_START),
1574a580877SLuben Tuikov 			.drm_render_minor = adev_to_drm(adev)->render->index,
158234441ddSYong Zhao 			.sdma_doorbell_idx = adev->doorbell_index.sdma_engine,
159cc009e61SMukul Joshi 			.enable_mes = adev->enable_mes,
160130e0371SOded Gabbay 		};
161130e0371SOded Gabbay 
162d0b63bb3SAndres Rodriguez 		/* this is going to have a few of the MSBs set that we need to
1630d87c9cfSKent Russell 		 * clear
1640d87c9cfSKent Russell 		 */
165e6945304SYong Zhao 		bitmap_complement(gpu_resources.cp_queue_bitmap,
166d0b63bb3SAndres Rodriguez 				  adev->gfx.mec.queue_bitmap,
167d0b63bb3SAndres Rodriguez 				  KGD_MAX_QUEUES);
168d0b63bb3SAndres Rodriguez 
169d0b63bb3SAndres Rodriguez 		/* According to linux/bitmap.h we shouldn't use bitmap_clear if
1700d87c9cfSKent Russell 		 * nbits is not compile time constant
1710d87c9cfSKent Russell 		 */
1723447d220SJay Cornwall 		last_valid_bit = 1 /* only first MEC can have compute queues */
173d0b63bb3SAndres Rodriguez 				* adev->gfx.mec.num_pipe_per_mec
174d0b63bb3SAndres Rodriguez 				* adev->gfx.mec.num_queue_per_pipe;
175d0b63bb3SAndres Rodriguez 		for (i = last_valid_bit; i < KGD_MAX_QUEUES; ++i)
176e6945304SYong Zhao 			clear_bit(i, gpu_resources.cp_queue_bitmap);
177d0b63bb3SAndres Rodriguez 
178dc102c43SAndres Rodriguez 		amdgpu_doorbell_get_kfd_info(adev,
179130e0371SOded Gabbay 				&gpu_resources.doorbell_physical_address,
180130e0371SOded Gabbay 				&gpu_resources.doorbell_aperture_size,
181130e0371SOded Gabbay 				&gpu_resources.doorbell_start_offset);
182c5892230SShaoyun Liu 
1831f86805aSYong Zhao 		/* Since SOC15, BIF starts to statically use the
1841f86805aSYong Zhao 		 * lower 12 bits of doorbell addresses for routing
1851f86805aSYong Zhao 		 * based on settings in registers like
1861f86805aSYong Zhao 		 * SDMA0_DOORBELL_RANGE etc..
1871f86805aSYong Zhao 		 * In order to route a doorbell to CP engine, the lower
1881f86805aSYong Zhao 		 * 12 bits of its address has to be outside the range
1891f86805aSYong Zhao 		 * set for SDMA, VCN, and IH blocks.
190642a0e80SFelix Kuehling 		 */
191234441ddSYong Zhao 		if (adev->asic_type >= CHIP_VEGA10) {
1921f86805aSYong Zhao 			gpu_resources.non_cp_doorbells_start =
1931f86805aSYong Zhao 					adev->doorbell_index.first_non_cp;
1941f86805aSYong Zhao 			gpu_resources.non_cp_doorbells_end =
1951f86805aSYong Zhao 					adev->doorbell_index.last_non_cp;
196234441ddSYong Zhao 		}
197130e0371SOded Gabbay 
1988e2712e7Sshaoyunl 		adev->kfd.init_complete = kgd2kfd_device_init(adev->kfd.dev,
1998e2712e7Sshaoyunl 						adev_to_drm(adev), &gpu_resources);
200b5fd0cf3SAndrey Grodzovsky 
201*2302d507SPhilip Yang 		amdgpu_amdkfd_total_mem_size += adev->gmc.real_vram_size;
202*2302d507SPhilip Yang 
203b5fd0cf3SAndrey Grodzovsky 		INIT_WORK(&adev->kfd.reset_work, amdgpu_amdkfd_reset_work);
204130e0371SOded Gabbay 	}
205130e0371SOded Gabbay }
206130e0371SOded Gabbay 
207e9669fb7SAndrey Grodzovsky void amdgpu_amdkfd_device_fini_sw(struct amdgpu_device *adev)
208130e0371SOded Gabbay {
209611736d8SFelix Kuehling 	if (adev->kfd.dev) {
2108e07e267SAmber Lin 		kgd2kfd_device_exit(adev->kfd.dev);
211611736d8SFelix Kuehling 		adev->kfd.dev = NULL;
212*2302d507SPhilip Yang 		amdgpu_amdkfd_total_mem_size -= adev->gmc.real_vram_size;
213130e0371SOded Gabbay 	}
214130e0371SOded Gabbay }
215130e0371SOded Gabbay 
216dc102c43SAndres Rodriguez void amdgpu_amdkfd_interrupt(struct amdgpu_device *adev,
217130e0371SOded Gabbay 		const void *ih_ring_entry)
218130e0371SOded Gabbay {
219611736d8SFelix Kuehling 	if (adev->kfd.dev)
2208e07e267SAmber Lin 		kgd2kfd_interrupt(adev->kfd.dev, ih_ring_entry);
221130e0371SOded Gabbay }
222130e0371SOded Gabbay 
2239593f4d6SRajneesh Bhardwaj void amdgpu_amdkfd_suspend(struct amdgpu_device *adev, bool run_pm)
224130e0371SOded Gabbay {
225611736d8SFelix Kuehling 	if (adev->kfd.dev)
2269593f4d6SRajneesh Bhardwaj 		kgd2kfd_suspend(adev->kfd.dev, run_pm);
227130e0371SOded Gabbay }
228130e0371SOded Gabbay 
22980660084SJames Zhu int amdgpu_amdkfd_resume_iommu(struct amdgpu_device *adev)
23080660084SJames Zhu {
23180660084SJames Zhu 	int r = 0;
23280660084SJames Zhu 
23380660084SJames Zhu 	if (adev->kfd.dev)
23480660084SJames Zhu 		r = kgd2kfd_resume_iommu(adev->kfd.dev);
23580660084SJames Zhu 
23680660084SJames Zhu 	return r;
23780660084SJames Zhu }
23880660084SJames Zhu 
2399593f4d6SRajneesh Bhardwaj int amdgpu_amdkfd_resume(struct amdgpu_device *adev, bool run_pm)
240130e0371SOded Gabbay {
241130e0371SOded Gabbay 	int r = 0;
242130e0371SOded Gabbay 
243611736d8SFelix Kuehling 	if (adev->kfd.dev)
2449593f4d6SRajneesh Bhardwaj 		r = kgd2kfd_resume(adev->kfd.dev, run_pm);
245130e0371SOded Gabbay 
246130e0371SOded Gabbay 	return r;
247130e0371SOded Gabbay }
248130e0371SOded Gabbay 
2495c6dd71eSShaoyun Liu int amdgpu_amdkfd_pre_reset(struct amdgpu_device *adev)
2505c6dd71eSShaoyun Liu {
2515c6dd71eSShaoyun Liu 	int r = 0;
2525c6dd71eSShaoyun Liu 
253611736d8SFelix Kuehling 	if (adev->kfd.dev)
2548e07e267SAmber Lin 		r = kgd2kfd_pre_reset(adev->kfd.dev);
2555c6dd71eSShaoyun Liu 
2565c6dd71eSShaoyun Liu 	return r;
2575c6dd71eSShaoyun Liu }
2585c6dd71eSShaoyun Liu 
2595c6dd71eSShaoyun Liu int amdgpu_amdkfd_post_reset(struct amdgpu_device *adev)
2605c6dd71eSShaoyun Liu {
2615c6dd71eSShaoyun Liu 	int r = 0;
2625c6dd71eSShaoyun Liu 
263611736d8SFelix Kuehling 	if (adev->kfd.dev)
2648e07e267SAmber Lin 		r = kgd2kfd_post_reset(adev->kfd.dev);
2655c6dd71eSShaoyun Liu 
2665c6dd71eSShaoyun Liu 	return r;
2675c6dd71eSShaoyun Liu }
2685c6dd71eSShaoyun Liu 
2696bfc7c7eSGraham Sider void amdgpu_amdkfd_gpu_reset(struct amdgpu_device *adev)
27024da5a9cSShaoyun Liu {
27112938fadSChristian König 	if (amdgpu_device_should_recover_gpu(adev))
272b5fd0cf3SAndrey Grodzovsky 		amdgpu_reset_domain_schedule(adev->reset_domain,
273b5fd0cf3SAndrey Grodzovsky 					     &adev->kfd.reset_work);
27424da5a9cSShaoyun Liu }
27524da5a9cSShaoyun Liu 
2766bfc7c7eSGraham Sider int amdgpu_amdkfd_alloc_gtt_mem(struct amdgpu_device *adev, size_t size,
277130e0371SOded Gabbay 				void **mem_obj, uint64_t *gpu_addr,
278fa5bde80SYong Zhao 				void **cpu_ptr, bool cp_mqd_gfx9)
279130e0371SOded Gabbay {
280473fee47SYong Zhao 	struct amdgpu_bo *bo = NULL;
2813216c6b7SChunming Zhou 	struct amdgpu_bo_param bp;
282130e0371SOded Gabbay 	int r;
283473fee47SYong Zhao 	void *cpu_ptr_tmp = NULL;
284130e0371SOded Gabbay 
2853216c6b7SChunming Zhou 	memset(&bp, 0, sizeof(bp));
2863216c6b7SChunming Zhou 	bp.size = size;
2873216c6b7SChunming Zhou 	bp.byte_align = PAGE_SIZE;
2883216c6b7SChunming Zhou 	bp.domain = AMDGPU_GEM_DOMAIN_GTT;
2893216c6b7SChunming Zhou 	bp.flags = AMDGPU_GEM_CREATE_CPU_GTT_USWC;
2903216c6b7SChunming Zhou 	bp.type = ttm_bo_type_kernel;
2913216c6b7SChunming Zhou 	bp.resv = NULL;
2929fd5543eSNirmoy Das 	bp.bo_ptr_size = sizeof(struct amdgpu_bo);
29315426dbbSYong Zhao 
294fa5bde80SYong Zhao 	if (cp_mqd_gfx9)
295fa5bde80SYong Zhao 		bp.flags |= AMDGPU_GEM_CREATE_CP_MQD_GFX9;
29615426dbbSYong Zhao 
2973216c6b7SChunming Zhou 	r = amdgpu_bo_create(adev, &bp, &bo);
298130e0371SOded Gabbay 	if (r) {
299dc102c43SAndres Rodriguez 		dev_err(adev->dev,
300130e0371SOded Gabbay 			"failed to allocate BO for amdkfd (%d)\n", r);
301130e0371SOded Gabbay 		return r;
302130e0371SOded Gabbay 	}
303130e0371SOded Gabbay 
304130e0371SOded Gabbay 	/* map the buffer */
305473fee47SYong Zhao 	r = amdgpu_bo_reserve(bo, true);
306130e0371SOded Gabbay 	if (r) {
307dc102c43SAndres Rodriguez 		dev_err(adev->dev, "(%d) failed to reserve bo for amdkfd\n", r);
308130e0371SOded Gabbay 		goto allocate_mem_reserve_bo_failed;
309130e0371SOded Gabbay 	}
310130e0371SOded Gabbay 
3117b7c6c81SJunwei Zhang 	r = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT);
312130e0371SOded Gabbay 	if (r) {
313dc102c43SAndres Rodriguez 		dev_err(adev->dev, "(%d) failed to pin bo for amdkfd\n", r);
314130e0371SOded Gabbay 		goto allocate_mem_pin_bo_failed;
315130e0371SOded Gabbay 	}
316130e0371SOded Gabbay 
317bb812f1eSJunwei Zhang 	r = amdgpu_ttm_alloc_gart(&bo->tbo);
318bb812f1eSJunwei Zhang 	if (r) {
319bb812f1eSJunwei Zhang 		dev_err(adev->dev, "%p bind failed\n", bo);
320bb812f1eSJunwei Zhang 		goto allocate_mem_kmap_bo_failed;
321bb812f1eSJunwei Zhang 	}
322bb812f1eSJunwei Zhang 
323473fee47SYong Zhao 	r = amdgpu_bo_kmap(bo, &cpu_ptr_tmp);
324130e0371SOded Gabbay 	if (r) {
325dc102c43SAndres Rodriguez 		dev_err(adev->dev,
326130e0371SOded Gabbay 			"(%d) failed to map bo to kernel for amdkfd\n", r);
327130e0371SOded Gabbay 		goto allocate_mem_kmap_bo_failed;
328130e0371SOded Gabbay 	}
329130e0371SOded Gabbay 
330473fee47SYong Zhao 	*mem_obj = bo;
3317b7c6c81SJunwei Zhang 	*gpu_addr = amdgpu_bo_gpu_offset(bo);
332473fee47SYong Zhao 	*cpu_ptr = cpu_ptr_tmp;
333473fee47SYong Zhao 
334473fee47SYong Zhao 	amdgpu_bo_unreserve(bo);
335130e0371SOded Gabbay 
336130e0371SOded Gabbay 	return 0;
337130e0371SOded Gabbay 
338130e0371SOded Gabbay allocate_mem_kmap_bo_failed:
339473fee47SYong Zhao 	amdgpu_bo_unpin(bo);
340130e0371SOded Gabbay allocate_mem_pin_bo_failed:
341473fee47SYong Zhao 	amdgpu_bo_unreserve(bo);
342130e0371SOded Gabbay allocate_mem_reserve_bo_failed:
343473fee47SYong Zhao 	amdgpu_bo_unref(&bo);
344130e0371SOded Gabbay 
345130e0371SOded Gabbay 	return r;
346130e0371SOded Gabbay }
347130e0371SOded Gabbay 
3486bfc7c7eSGraham Sider void amdgpu_amdkfd_free_gtt_mem(struct amdgpu_device *adev, void *mem_obj)
349130e0371SOded Gabbay {
350473fee47SYong Zhao 	struct amdgpu_bo *bo = (struct amdgpu_bo *) mem_obj;
351130e0371SOded Gabbay 
352473fee47SYong Zhao 	amdgpu_bo_reserve(bo, true);
353473fee47SYong Zhao 	amdgpu_bo_kunmap(bo);
354473fee47SYong Zhao 	amdgpu_bo_unpin(bo);
355473fee47SYong Zhao 	amdgpu_bo_unreserve(bo);
356473fee47SYong Zhao 	amdgpu_bo_unref(&(bo));
357130e0371SOded Gabbay }
358130e0371SOded Gabbay 
3596bfc7c7eSGraham Sider int amdgpu_amdkfd_alloc_gws(struct amdgpu_device *adev, size_t size,
360ca66fb8fSOak Zeng 				void **mem_obj)
361ca66fb8fSOak Zeng {
362ca66fb8fSOak Zeng 	struct amdgpu_bo *bo = NULL;
36322b40f7aSNirmoy Das 	struct amdgpu_bo_user *ubo;
364ca66fb8fSOak Zeng 	struct amdgpu_bo_param bp;
365ca66fb8fSOak Zeng 	int r;
366ca66fb8fSOak Zeng 
367ca66fb8fSOak Zeng 	memset(&bp, 0, sizeof(bp));
368ca66fb8fSOak Zeng 	bp.size = size;
369ca66fb8fSOak Zeng 	bp.byte_align = 1;
370ca66fb8fSOak Zeng 	bp.domain = AMDGPU_GEM_DOMAIN_GWS;
371ca66fb8fSOak Zeng 	bp.flags = AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
372ca66fb8fSOak Zeng 	bp.type = ttm_bo_type_device;
373ca66fb8fSOak Zeng 	bp.resv = NULL;
3749fd5543eSNirmoy Das 	bp.bo_ptr_size = sizeof(struct amdgpu_bo);
375ca66fb8fSOak Zeng 
37622b40f7aSNirmoy Das 	r = amdgpu_bo_create_user(adev, &bp, &ubo);
377ca66fb8fSOak Zeng 	if (r) {
378ca66fb8fSOak Zeng 		dev_err(adev->dev,
379ca66fb8fSOak Zeng 			"failed to allocate gws BO for amdkfd (%d)\n", r);
380ca66fb8fSOak Zeng 		return r;
381ca66fb8fSOak Zeng 	}
382ca66fb8fSOak Zeng 
38322b40f7aSNirmoy Das 	bo = &ubo->bo;
384ca66fb8fSOak Zeng 	*mem_obj = bo;
385ca66fb8fSOak Zeng 	return 0;
386ca66fb8fSOak Zeng }
387ca66fb8fSOak Zeng 
3886bfc7c7eSGraham Sider void amdgpu_amdkfd_free_gws(struct amdgpu_device *adev, void *mem_obj)
389ca66fb8fSOak Zeng {
390ca66fb8fSOak Zeng 	struct amdgpu_bo *bo = (struct amdgpu_bo *)mem_obj;
391ca66fb8fSOak Zeng 
392ca66fb8fSOak Zeng 	amdgpu_bo_unref(&bo);
393ca66fb8fSOak Zeng }
394ca66fb8fSOak Zeng 
395574c4183SGraham Sider uint32_t amdgpu_amdkfd_get_fw_version(struct amdgpu_device *adev,
3960da8b10eSAmber Lin 				      enum kgd_engine_type type)
3970da8b10eSAmber Lin {
3980da8b10eSAmber Lin 	switch (type) {
3990da8b10eSAmber Lin 	case KGD_ENGINE_PFP:
4000da8b10eSAmber Lin 		return adev->gfx.pfp_fw_version;
4010da8b10eSAmber Lin 
4020da8b10eSAmber Lin 	case KGD_ENGINE_ME:
4030da8b10eSAmber Lin 		return adev->gfx.me_fw_version;
4040da8b10eSAmber Lin 
4050da8b10eSAmber Lin 	case KGD_ENGINE_CE:
4060da8b10eSAmber Lin 		return adev->gfx.ce_fw_version;
4070da8b10eSAmber Lin 
4080da8b10eSAmber Lin 	case KGD_ENGINE_MEC1:
4090da8b10eSAmber Lin 		return adev->gfx.mec_fw_version;
4100da8b10eSAmber Lin 
4110da8b10eSAmber Lin 	case KGD_ENGINE_MEC2:
4120da8b10eSAmber Lin 		return adev->gfx.mec2_fw_version;
4130da8b10eSAmber Lin 
4140da8b10eSAmber Lin 	case KGD_ENGINE_RLC:
4150da8b10eSAmber Lin 		return adev->gfx.rlc_fw_version;
4160da8b10eSAmber Lin 
4170da8b10eSAmber Lin 	case KGD_ENGINE_SDMA1:
4180da8b10eSAmber Lin 		return adev->sdma.instance[0].fw_version;
4190da8b10eSAmber Lin 
4200da8b10eSAmber Lin 	case KGD_ENGINE_SDMA2:
4210da8b10eSAmber Lin 		return adev->sdma.instance[1].fw_version;
4220da8b10eSAmber Lin 
4230da8b10eSAmber Lin 	default:
4240da8b10eSAmber Lin 		return 0;
4250da8b10eSAmber Lin 	}
4260da8b10eSAmber Lin 
4270da8b10eSAmber Lin 	return 0;
4280da8b10eSAmber Lin }
4290da8b10eSAmber Lin 
430574c4183SGraham Sider void amdgpu_amdkfd_get_local_mem_info(struct amdgpu_device *adev,
43130f1c042SHarish Kasiviswanathan 				      struct kfd_local_mem_info *mem_info)
43230f1c042SHarish Kasiviswanathan {
43330f1c042SHarish Kasiviswanathan 	memset(mem_info, 0, sizeof(*mem_info));
4344c7e8a9eSGang Ba 
435770d13b1SChristian König 	mem_info->local_mem_size_public = adev->gmc.visible_vram_size;
436770d13b1SChristian König 	mem_info->local_mem_size_private = adev->gmc.real_vram_size -
437770d13b1SChristian König 						adev->gmc.visible_vram_size;
4384c7e8a9eSGang Ba 
439770d13b1SChristian König 	mem_info->vram_width = adev->gmc.vram_width;
44030f1c042SHarish Kasiviswanathan 
4414c7e8a9eSGang Ba 	pr_debug("Address base: %pap public 0x%llx private 0x%llx\n",
4424c7e8a9eSGang Ba 			&adev->gmc.aper_base,
44330f1c042SHarish Kasiviswanathan 			mem_info->local_mem_size_public,
44430f1c042SHarish Kasiviswanathan 			mem_info->local_mem_size_private);
44530f1c042SHarish Kasiviswanathan 
44630f1c042SHarish Kasiviswanathan 	if (amdgpu_sriov_vf(adev))
44730f1c042SHarish Kasiviswanathan 		mem_info->mem_clk_max = adev->clock.default_mclk / 100;
448944effd3SKent Russell 	else if (adev->pm.dpm_enabled) {
4496bdadb20SHawking Zhang 		if (amdgpu_emu_mode == 1)
4506bdadb20SHawking Zhang 			mem_info->mem_clk_max = 0;
4517ba01f9eSShaoyun Liu 		else
4526bdadb20SHawking Zhang 			mem_info->mem_clk_max = amdgpu_dpm_get_mclk(adev, false) / 100;
4536bdadb20SHawking Zhang 	} else
4547ba01f9eSShaoyun Liu 		mem_info->mem_clk_max = 100;
45530f1c042SHarish Kasiviswanathan }
45630f1c042SHarish Kasiviswanathan 
457574c4183SGraham Sider uint64_t amdgpu_amdkfd_get_gpu_clock_counter(struct amdgpu_device *adev)
458130e0371SOded Gabbay {
459dc102c43SAndres Rodriguez 	if (adev->gfx.funcs->get_gpu_clock_counter)
460dc102c43SAndres Rodriguez 		return adev->gfx.funcs->get_gpu_clock_counter(adev);
461130e0371SOded Gabbay 	return 0;
462130e0371SOded Gabbay }
463130e0371SOded Gabbay 
464574c4183SGraham Sider uint32_t amdgpu_amdkfd_get_max_engine_clock_in_mhz(struct amdgpu_device *adev)
465130e0371SOded Gabbay {
466a9efcc19SFelix Kuehling 	/* the sclk is in quantas of 10kHz */
467a9efcc19SFelix Kuehling 	if (amdgpu_sriov_vf(adev))
468a9efcc19SFelix Kuehling 		return adev->clock.default_sclk / 100;
469944effd3SKent Russell 	else if (adev->pm.dpm_enabled)
470a9efcc19SFelix Kuehling 		return amdgpu_dpm_get_sclk(adev, false) / 100;
4717ba01f9eSShaoyun Liu 	else
4727ba01f9eSShaoyun Liu 		return 100;
473130e0371SOded Gabbay }
474ebdebf42SFlora Cui 
475574c4183SGraham Sider void amdgpu_amdkfd_get_cu_info(struct amdgpu_device *adev, struct kfd_cu_info *cu_info)
476ebdebf42SFlora Cui {
477ebdebf42SFlora Cui 	struct amdgpu_cu_info acu_info = adev->gfx.cu_info;
478ebdebf42SFlora Cui 
479ebdebf42SFlora Cui 	memset(cu_info, 0, sizeof(*cu_info));
480ebdebf42SFlora Cui 	if (sizeof(cu_info->cu_bitmap) != sizeof(acu_info.bitmap))
481ebdebf42SFlora Cui 		return;
482ebdebf42SFlora Cui 
483ebdebf42SFlora Cui 	cu_info->cu_active_number = acu_info.number;
484ebdebf42SFlora Cui 	cu_info->cu_ao_mask = acu_info.ao_cu_mask;
485ebdebf42SFlora Cui 	memcpy(&cu_info->cu_bitmap[0], &acu_info.bitmap[0],
486ebdebf42SFlora Cui 	       sizeof(acu_info.bitmap));
487ebdebf42SFlora Cui 	cu_info->num_shader_engines = adev->gfx.config.max_shader_engines;
488ebdebf42SFlora Cui 	cu_info->num_shader_arrays_per_engine = adev->gfx.config.max_sh_per_se;
489ebdebf42SFlora Cui 	cu_info->num_cu_per_sh = adev->gfx.config.max_cu_per_sh;
490ebdebf42SFlora Cui 	cu_info->simd_per_cu = acu_info.simd_per_cu;
491ebdebf42SFlora Cui 	cu_info->max_waves_per_simd = acu_info.max_waves_per_simd;
492ebdebf42SFlora Cui 	cu_info->wave_front_size = acu_info.wave_front_size;
493ebdebf42SFlora Cui 	cu_info->max_scratch_slots_per_cu = acu_info.max_scratch_slots_per_cu;
494ebdebf42SFlora Cui 	cu_info->lds_size = acu_info.lds_size;
495ebdebf42SFlora Cui }
4969f0a0b41SKent Russell 
497574c4183SGraham Sider int amdgpu_amdkfd_get_dmabuf_info(struct amdgpu_device *adev, int dma_buf_fd,
498574c4183SGraham Sider 				  struct amdgpu_device **dmabuf_adev,
4991dde0ea9SFelix Kuehling 				  uint64_t *bo_size, void *metadata_buffer,
5001dde0ea9SFelix Kuehling 				  size_t buffer_size, uint32_t *metadata_size,
5011dde0ea9SFelix Kuehling 				  uint32_t *flags)
5021dde0ea9SFelix Kuehling {
5031dde0ea9SFelix Kuehling 	struct dma_buf *dma_buf;
5041dde0ea9SFelix Kuehling 	struct drm_gem_object *obj;
5051dde0ea9SFelix Kuehling 	struct amdgpu_bo *bo;
5061dde0ea9SFelix Kuehling 	uint64_t metadata_flags;
5071dde0ea9SFelix Kuehling 	int r = -EINVAL;
5081dde0ea9SFelix Kuehling 
5091dde0ea9SFelix Kuehling 	dma_buf = dma_buf_get(dma_buf_fd);
5101dde0ea9SFelix Kuehling 	if (IS_ERR(dma_buf))
5111dde0ea9SFelix Kuehling 		return PTR_ERR(dma_buf);
5121dde0ea9SFelix Kuehling 
5131dde0ea9SFelix Kuehling 	if (dma_buf->ops != &amdgpu_dmabuf_ops)
5141dde0ea9SFelix Kuehling 		/* Can't handle non-graphics buffers */
5151dde0ea9SFelix Kuehling 		goto out_put;
5161dde0ea9SFelix Kuehling 
5171dde0ea9SFelix Kuehling 	obj = dma_buf->priv;
5184a580877SLuben Tuikov 	if (obj->dev->driver != adev_to_drm(adev)->driver)
5191dde0ea9SFelix Kuehling 		/* Can't handle buffers from different drivers */
5201dde0ea9SFelix Kuehling 		goto out_put;
5211dde0ea9SFelix Kuehling 
5221348969aSLuben Tuikov 	adev = drm_to_adev(obj->dev);
5231dde0ea9SFelix Kuehling 	bo = gem_to_amdgpu_bo(obj);
5241dde0ea9SFelix Kuehling 	if (!(bo->preferred_domains & (AMDGPU_GEM_DOMAIN_VRAM |
5251dde0ea9SFelix Kuehling 				    AMDGPU_GEM_DOMAIN_GTT)))
5261dde0ea9SFelix Kuehling 		/* Only VRAM and GTT BOs are supported */
5271dde0ea9SFelix Kuehling 		goto out_put;
5281dde0ea9SFelix Kuehling 
5291dde0ea9SFelix Kuehling 	r = 0;
530574c4183SGraham Sider 	if (dmabuf_adev)
531574c4183SGraham Sider 		*dmabuf_adev = adev;
5321dde0ea9SFelix Kuehling 	if (bo_size)
5331dde0ea9SFelix Kuehling 		*bo_size = amdgpu_bo_size(bo);
5341dde0ea9SFelix Kuehling 	if (metadata_buffer)
5351dde0ea9SFelix Kuehling 		r = amdgpu_bo_get_metadata(bo, metadata_buffer, buffer_size,
5361dde0ea9SFelix Kuehling 					   metadata_size, &metadata_flags);
5371dde0ea9SFelix Kuehling 	if (flags) {
5381dde0ea9SFelix Kuehling 		*flags = (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ?
5391d251d90SYong Zhao 				KFD_IOC_ALLOC_MEM_FLAGS_VRAM
5401d251d90SYong Zhao 				: KFD_IOC_ALLOC_MEM_FLAGS_GTT;
5411dde0ea9SFelix Kuehling 
5421dde0ea9SFelix Kuehling 		if (bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)
5431d251d90SYong Zhao 			*flags |= KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC;
5441dde0ea9SFelix Kuehling 	}
5451dde0ea9SFelix Kuehling 
5461dde0ea9SFelix Kuehling out_put:
5471dde0ea9SFelix Kuehling 	dma_buf_put(dma_buf);
5481dde0ea9SFelix Kuehling 	return r;
5491dde0ea9SFelix Kuehling }
5501dde0ea9SFelix Kuehling 
551574c4183SGraham Sider uint8_t amdgpu_amdkfd_get_xgmi_hops_count(struct amdgpu_device *dst,
552574c4183SGraham Sider 					  struct amdgpu_device *src)
553da361dd1Sshaoyunl {
554574c4183SGraham Sider 	struct amdgpu_device *peer_adev = src;
555574c4183SGraham Sider 	struct amdgpu_device *adev = dst;
556da361dd1Sshaoyunl 	int ret = amdgpu_xgmi_get_hops_count(adev, peer_adev);
557da361dd1Sshaoyunl 
558da361dd1Sshaoyunl 	if (ret < 0) {
559da361dd1Sshaoyunl 		DRM_ERROR("amdgpu: failed to get  xgmi hops count between node %d and %d. ret = %d\n",
560da361dd1Sshaoyunl 			adev->gmc.xgmi.physical_node_id,
561da361dd1Sshaoyunl 			peer_adev->gmc.xgmi.physical_node_id, ret);
562da361dd1Sshaoyunl 		ret = 0;
563da361dd1Sshaoyunl 	}
564da361dd1Sshaoyunl 	return  (uint8_t)ret;
565da361dd1Sshaoyunl }
566db8b62c0SShaoyun Liu 
567574c4183SGraham Sider int amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(struct amdgpu_device *dst,
568574c4183SGraham Sider 					    struct amdgpu_device *src,
569574c4183SGraham Sider 					    bool is_min)
5703f46c4e9SJonathan Kim {
571574c4183SGraham Sider 	struct amdgpu_device *adev = dst, *peer_adev;
5723f46c4e9SJonathan Kim 	int num_links;
5733f46c4e9SJonathan Kim 
5743f46c4e9SJonathan Kim 	if (adev->asic_type != CHIP_ALDEBARAN)
5753f46c4e9SJonathan Kim 		return 0;
5763f46c4e9SJonathan Kim 
5773f46c4e9SJonathan Kim 	if (src)
578574c4183SGraham Sider 		peer_adev = src;
5793f46c4e9SJonathan Kim 
5803f46c4e9SJonathan Kim 	/* num links returns 0 for indirect peers since indirect route is unknown. */
5813f46c4e9SJonathan Kim 	num_links = is_min ? 1 : amdgpu_xgmi_get_num_links(adev, peer_adev);
5823f46c4e9SJonathan Kim 	if (num_links < 0) {
5833f46c4e9SJonathan Kim 		DRM_ERROR("amdgpu: failed to get xgmi num links between node %d and %d. ret = %d\n",
5843f46c4e9SJonathan Kim 			adev->gmc.xgmi.physical_node_id,
5853f46c4e9SJonathan Kim 			peer_adev->gmc.xgmi.physical_node_id, num_links);
5863f46c4e9SJonathan Kim 		num_links = 0;
5873f46c4e9SJonathan Kim 	}
5883f46c4e9SJonathan Kim 
5893f46c4e9SJonathan Kim 	/* Aldebaran xGMI DPM is defeatured so assume x16 x 25Gbps for bandwidth. */
5903f46c4e9SJonathan Kim 	return (num_links * 16 * 25000)/BITS_PER_BYTE;
5913f46c4e9SJonathan Kim }
5923f46c4e9SJonathan Kim 
593574c4183SGraham Sider int amdgpu_amdkfd_get_pcie_bandwidth_mbytes(struct amdgpu_device *adev, bool is_min)
59493304810SJonathan Kim {
59593304810SJonathan Kim 	int num_lanes_shift = (is_min ? ffs(adev->pm.pcie_mlw_mask) :
59693304810SJonathan Kim 							fls(adev->pm.pcie_mlw_mask)) - 1;
59793304810SJonathan Kim 	int gen_speed_shift = (is_min ? ffs(adev->pm.pcie_gen_mask &
59893304810SJonathan Kim 						CAIL_PCIE_LINK_SPEED_SUPPORT_MASK) :
59993304810SJonathan Kim 					fls(adev->pm.pcie_gen_mask &
60093304810SJonathan Kim 						CAIL_PCIE_LINK_SPEED_SUPPORT_MASK)) - 1;
60193304810SJonathan Kim 	uint32_t num_lanes_mask = 1 << num_lanes_shift;
60293304810SJonathan Kim 	uint32_t gen_speed_mask = 1 << gen_speed_shift;
60393304810SJonathan Kim 	int num_lanes_factor = 0, gen_speed_mbits_factor = 0;
60493304810SJonathan Kim 
60593304810SJonathan Kim 	switch (num_lanes_mask) {
60693304810SJonathan Kim 	case CAIL_PCIE_LINK_WIDTH_SUPPORT_X1:
60793304810SJonathan Kim 		num_lanes_factor = 1;
60893304810SJonathan Kim 		break;
60993304810SJonathan Kim 	case CAIL_PCIE_LINK_WIDTH_SUPPORT_X2:
61093304810SJonathan Kim 		num_lanes_factor = 2;
61193304810SJonathan Kim 		break;
61293304810SJonathan Kim 	case CAIL_PCIE_LINK_WIDTH_SUPPORT_X4:
61393304810SJonathan Kim 		num_lanes_factor = 4;
61493304810SJonathan Kim 		break;
61593304810SJonathan Kim 	case CAIL_PCIE_LINK_WIDTH_SUPPORT_X8:
61693304810SJonathan Kim 		num_lanes_factor = 8;
61793304810SJonathan Kim 		break;
61893304810SJonathan Kim 	case CAIL_PCIE_LINK_WIDTH_SUPPORT_X12:
61993304810SJonathan Kim 		num_lanes_factor = 12;
62093304810SJonathan Kim 		break;
62193304810SJonathan Kim 	case CAIL_PCIE_LINK_WIDTH_SUPPORT_X16:
62293304810SJonathan Kim 		num_lanes_factor = 16;
62393304810SJonathan Kim 		break;
62493304810SJonathan Kim 	case CAIL_PCIE_LINK_WIDTH_SUPPORT_X32:
62593304810SJonathan Kim 		num_lanes_factor = 32;
62693304810SJonathan Kim 		break;
62793304810SJonathan Kim 	}
62893304810SJonathan Kim 
62993304810SJonathan Kim 	switch (gen_speed_mask) {
63093304810SJonathan Kim 	case CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1:
63193304810SJonathan Kim 		gen_speed_mbits_factor = 2500;
63293304810SJonathan Kim 		break;
63393304810SJonathan Kim 	case CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2:
63493304810SJonathan Kim 		gen_speed_mbits_factor = 5000;
63593304810SJonathan Kim 		break;
63693304810SJonathan Kim 	case CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3:
63793304810SJonathan Kim 		gen_speed_mbits_factor = 8000;
63893304810SJonathan Kim 		break;
63993304810SJonathan Kim 	case CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4:
64093304810SJonathan Kim 		gen_speed_mbits_factor = 16000;
64193304810SJonathan Kim 		break;
64293304810SJonathan Kim 	case CAIL_PCIE_LINK_SPEED_SUPPORT_GEN5:
64393304810SJonathan Kim 		gen_speed_mbits_factor = 32000;
64493304810SJonathan Kim 		break;
64593304810SJonathan Kim 	}
64693304810SJonathan Kim 
64793304810SJonathan Kim 	return (num_lanes_factor * gen_speed_mbits_factor)/BITS_PER_BYTE;
64893304810SJonathan Kim }
64993304810SJonathan Kim 
6506bfc7c7eSGraham Sider int amdgpu_amdkfd_submit_ib(struct amdgpu_device *adev,
6516bfc7c7eSGraham Sider 				enum kgd_engine_type engine,
6524c660c8fSFelix Kuehling 				uint32_t vmid, uint64_t gpu_addr,
6534c660c8fSFelix Kuehling 				uint32_t *ib_cmd, uint32_t ib_len)
6544c660c8fSFelix Kuehling {
6554c660c8fSFelix Kuehling 	struct amdgpu_job *job;
6564c660c8fSFelix Kuehling 	struct amdgpu_ib *ib;
6574c660c8fSFelix Kuehling 	struct amdgpu_ring *ring;
6584c660c8fSFelix Kuehling 	struct dma_fence *f = NULL;
6594c660c8fSFelix Kuehling 	int ret;
6604c660c8fSFelix Kuehling 
6614c660c8fSFelix Kuehling 	switch (engine) {
6624c660c8fSFelix Kuehling 	case KGD_ENGINE_MEC1:
6634c660c8fSFelix Kuehling 		ring = &adev->gfx.compute_ring[0];
6644c660c8fSFelix Kuehling 		break;
6654c660c8fSFelix Kuehling 	case KGD_ENGINE_SDMA1:
6664c660c8fSFelix Kuehling 		ring = &adev->sdma.instance[0].ring;
6674c660c8fSFelix Kuehling 		break;
6684c660c8fSFelix Kuehling 	case KGD_ENGINE_SDMA2:
6694c660c8fSFelix Kuehling 		ring = &adev->sdma.instance[1].ring;
6704c660c8fSFelix Kuehling 		break;
6714c660c8fSFelix Kuehling 	default:
6724c660c8fSFelix Kuehling 		pr_err("Invalid engine in IB submission: %d\n", engine);
6734c660c8fSFelix Kuehling 		ret = -EINVAL;
6744c660c8fSFelix Kuehling 		goto err;
6754c660c8fSFelix Kuehling 	}
6764c660c8fSFelix Kuehling 
6774c660c8fSFelix Kuehling 	ret = amdgpu_job_alloc(adev, 1, &job, NULL);
6784c660c8fSFelix Kuehling 	if (ret)
6794c660c8fSFelix Kuehling 		goto err;
6804c660c8fSFelix Kuehling 
6814c660c8fSFelix Kuehling 	ib = &job->ibs[0];
6824c660c8fSFelix Kuehling 	memset(ib, 0, sizeof(struct amdgpu_ib));
6834c660c8fSFelix Kuehling 
6844c660c8fSFelix Kuehling 	ib->gpu_addr = gpu_addr;
6854c660c8fSFelix Kuehling 	ib->ptr = ib_cmd;
6864c660c8fSFelix Kuehling 	ib->length_dw = ib_len;
6874c660c8fSFelix Kuehling 	/* This works for NO_HWS. TODO: need to handle without knowing VMID */
6884c660c8fSFelix Kuehling 	job->vmid = vmid;
6894624459cSChristian König 	job->num_ibs = 1;
6904c660c8fSFelix Kuehling 
6914c660c8fSFelix Kuehling 	ret = amdgpu_ib_schedule(ring, 1, ib, job, &f);
69294561899SDennis Li 
6934c660c8fSFelix Kuehling 	if (ret) {
6944c660c8fSFelix Kuehling 		DRM_ERROR("amdgpu: failed to schedule IB.\n");
6954c660c8fSFelix Kuehling 		goto err_ib_sched;
6964c660c8fSFelix Kuehling 	}
6974c660c8fSFelix Kuehling 
6989ae55f03SAndrey Grodzovsky 	/* Drop the initial kref_init count (see drm_sched_main as example) */
6999ae55f03SAndrey Grodzovsky 	dma_fence_put(f);
7004c660c8fSFelix Kuehling 	ret = dma_fence_wait(f, false);
7014c660c8fSFelix Kuehling 
7024c660c8fSFelix Kuehling err_ib_sched:
7034c660c8fSFelix Kuehling 	amdgpu_job_free(job);
7044c660c8fSFelix Kuehling err:
7054c660c8fSFelix Kuehling 	return ret;
7064c660c8fSFelix Kuehling }
7074c660c8fSFelix Kuehling 
7086bfc7c7eSGraham Sider void amdgpu_amdkfd_set_compute_idle(struct amdgpu_device *adev, bool idle)
70901c097dbSFelix Kuehling {
71001c097dbSFelix Kuehling 	amdgpu_dpm_switch_power_profile(adev,
711919a52fcSFelix Kuehling 					PP_SMC_POWER_PROFILE_COMPUTE,
712919a52fcSFelix Kuehling 					!idle);
71301c097dbSFelix Kuehling }
71401c097dbSFelix Kuehling 
715155494dbSFelix Kuehling bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid)
716155494dbSFelix Kuehling {
71740111ec2SFelix Kuehling 	if (adev->kfd.dev)
71840111ec2SFelix Kuehling 		return vmid >= adev->vm_manager.first_kfd_vmid;
719155494dbSFelix Kuehling 
720155494dbSFelix Kuehling 	return false;
721155494dbSFelix Kuehling }
722fcdfa432SOded Gabbay 
7236bfc7c7eSGraham Sider int amdgpu_amdkfd_flush_gpu_tlb_vmid(struct amdgpu_device *adev,
7246bfc7c7eSGraham Sider 				     uint16_t vmid)
725ffa02269SAlex Sierra {
726ffa02269SAlex Sierra 	if (adev->family == AMDGPU_FAMILY_AI) {
727ffa02269SAlex Sierra 		int i;
728ffa02269SAlex Sierra 
729ffa02269SAlex Sierra 		for (i = 0; i < adev->num_vmhubs; i++)
730ffa02269SAlex Sierra 			amdgpu_gmc_flush_gpu_tlb(adev, vmid, i, 0);
731ffa02269SAlex Sierra 	} else {
732ffa02269SAlex Sierra 		amdgpu_gmc_flush_gpu_tlb(adev, vmid, AMDGPU_GFXHUB_0, 0);
733ffa02269SAlex Sierra 	}
734ffa02269SAlex Sierra 
735ffa02269SAlex Sierra 	return 0;
736ffa02269SAlex Sierra }
737ffa02269SAlex Sierra 
7386bfc7c7eSGraham Sider int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
7396bfc7c7eSGraham Sider 				      uint16_t pasid, enum TLB_FLUSH_TYPE flush_type)
740ffa02269SAlex Sierra {
741ffa02269SAlex Sierra 	bool all_hub = false;
742ffa02269SAlex Sierra 
743508f748bSRuili Ji 	if (adev->family == AMDGPU_FAMILY_AI ||
744508f748bSRuili Ji 	    adev->family == AMDGPU_FAMILY_RV)
745ffa02269SAlex Sierra 		all_hub = true;
746ffa02269SAlex Sierra 
747ffa02269SAlex Sierra 	return amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, flush_type, all_hub);
748ffa02269SAlex Sierra }
749ffa02269SAlex Sierra 
7506bfc7c7eSGraham Sider bool amdgpu_amdkfd_have_atomics_support(struct amdgpu_device *adev)
751aabf3a95SJack Xiao {
752aabf3a95SJack Xiao 	return adev->have_atomics_support;
753aabf3a95SJack Xiao }
754c7490949STao Zhou 
755b6485bedSTao Zhou void amdgpu_amdkfd_ras_poison_consumption_handler(struct amdgpu_device *adev, bool reset)
756c7490949STao Zhou {
757c7490949STao Zhou 	struct ras_err_data err_data = {0, 0, 0, NULL};
758c7490949STao Zhou 
759fec8c524STao Zhou 	amdgpu_umc_poison_handler(adev, &err_data, reset);
760c7490949STao Zhou }
7616475ae2bSTao Zhou 
7626475ae2bSTao Zhou bool amdgpu_amdkfd_ras_query_utcl2_poison_status(struct amdgpu_device *adev)
7636475ae2bSTao Zhou {
7643cd3e731SFelix Kuehling 	if (adev->gfx.ras && adev->gfx.ras->query_utcl2_poison_status)
7656475ae2bSTao Zhou 		return adev->gfx.ras->query_utcl2_poison_status(adev);
7666475ae2bSTao Zhou 	else
7676475ae2bSTao Zhou 		return false;
7686475ae2bSTao Zhou }
769