xref: /linux/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c (revision 0bc119fa2ebecd5a42a37fb22f27accb1f0ca75b)
12f4ca1baSJingyu Wang // SPDX-License-Identifier: MIT
2130e0371SOded Gabbay /*
3130e0371SOded Gabbay  * Copyright 2014 Advanced Micro Devices, Inc.
4130e0371SOded Gabbay  *
5130e0371SOded Gabbay  * Permission is hereby granted, free of charge, to any person obtaining a
6130e0371SOded Gabbay  * copy of this software and associated documentation files (the "Software"),
7130e0371SOded Gabbay  * to deal in the Software without restriction, including without limitation
8130e0371SOded Gabbay  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9130e0371SOded Gabbay  * and/or sell copies of the Software, and to permit persons to whom the
10130e0371SOded Gabbay  * Software is furnished to do so, subject to the following conditions:
11130e0371SOded Gabbay  *
12130e0371SOded Gabbay  * The above copyright notice and this permission notice shall be included in
13130e0371SOded Gabbay  * all copies or substantial portions of the Software.
14130e0371SOded Gabbay  *
15130e0371SOded Gabbay  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16130e0371SOded Gabbay  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17130e0371SOded Gabbay  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18130e0371SOded Gabbay  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
19130e0371SOded Gabbay  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20130e0371SOded Gabbay  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
21130e0371SOded Gabbay  * OTHER DEALINGS IN THE SOFTWARE.
22130e0371SOded Gabbay  */
23130e0371SOded Gabbay 
24130e0371SOded Gabbay #include "amdgpu_amdkfd.h"
2593304810SJonathan Kim #include "amd_pcie.h"
262f7d10b3SJammy Zhou #include "amd_shared.h"
27fdf2f6c5SSam Ravnborg 
28130e0371SOded Gabbay #include "amdgpu.h"
292db0cdbeSAlex Deucher #include "amdgpu_gfx.h"
302fbd6f94SChristian König #include "amdgpu_dma_buf.h"
31130e0371SOded Gabbay #include <linux/module.h>
321dde0ea9SFelix Kuehling #include <linux/dma-buf.h>
33da361dd1Sshaoyunl #include "amdgpu_xgmi.h"
341d251d90SYong Zhao #include <uapi/linux/kfd_ioctl.h>
35c7490949STao Zhou #include "amdgpu_ras.h"
36c7490949STao Zhou #include "amdgpu_umc.h"
37b5fd0cf3SAndrey Grodzovsky #include "amdgpu_reset.h"
38130e0371SOded Gabbay 
39611736d8SFelix Kuehling /* Total memory size in system memory and all GPU VRAM. Used to
40611736d8SFelix Kuehling  * estimate worst case amount of memory to reserve for page tables
41611736d8SFelix Kuehling  */
42611736d8SFelix Kuehling uint64_t amdgpu_amdkfd_total_mem_size;
43611736d8SFelix Kuehling 
44402bde58Skernel test robot static bool kfd_initialized;
45c7651b73SFelix Kuehling 
46efb1c658SOded Gabbay int amdgpu_amdkfd_init(void)
47130e0371SOded Gabbay {
48611736d8SFelix Kuehling 	struct sysinfo si;
49efb1c658SOded Gabbay 	int ret;
50efb1c658SOded Gabbay 
51611736d8SFelix Kuehling 	si_meminfo(&si);
52df23d1bbSOak Zeng 	amdgpu_amdkfd_total_mem_size = si.freeram - si.freehigh;
53611736d8SFelix Kuehling 	amdgpu_amdkfd_total_mem_size *= si.mem_unit;
54611736d8SFelix Kuehling 
55308176d6SAmber Lin 	ret = kgd2kfd_init();
56c7651b73SFelix Kuehling 	kfd_initialized = !ret;
57fcdfa432SOded Gabbay 
58efb1c658SOded Gabbay 	return ret;
59130e0371SOded Gabbay }
60130e0371SOded Gabbay 
61130e0371SOded Gabbay void amdgpu_amdkfd_fini(void)
62130e0371SOded Gabbay {
63c7651b73SFelix Kuehling 	if (kfd_initialized) {
648e07e267SAmber Lin 		kgd2kfd_exit();
65c7651b73SFelix Kuehling 		kfd_initialized = false;
66c7651b73SFelix Kuehling 	}
67130e0371SOded Gabbay }
68130e0371SOded Gabbay 
69dc102c43SAndres Rodriguez void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev)
70130e0371SOded Gabbay {
71050091abSYong Zhao 	bool vf = amdgpu_sriov_vf(adev);
725c33f214SFelix Kuehling 
73c7651b73SFelix Kuehling 	if (!kfd_initialized)
74c7651b73SFelix Kuehling 		return;
75c7651b73SFelix Kuehling 
76b5d1d755SGraham Sider 	adev->kfd.dev = kgd2kfd_probe(adev, vf);
77130e0371SOded Gabbay }
78130e0371SOded Gabbay 
7922cb0164SAlex Deucher /**
8022cb0164SAlex Deucher  * amdgpu_doorbell_get_kfd_info - Report doorbell configuration required to
8122cb0164SAlex Deucher  *                                setup amdkfd
8222cb0164SAlex Deucher  *
8322cb0164SAlex Deucher  * @adev: amdgpu_device pointer
8422cb0164SAlex Deucher  * @aperture_base: output returning doorbell aperture base physical address
8522cb0164SAlex Deucher  * @aperture_size: output returning doorbell aperture size in bytes
8622cb0164SAlex Deucher  * @start_offset: output returning # of doorbell bytes reserved for amdgpu.
8722cb0164SAlex Deucher  *
8822cb0164SAlex Deucher  * amdgpu and amdkfd share the doorbell aperture. amdgpu sets it up,
8922cb0164SAlex Deucher  * takes doorbells required for its own rings and reports the setup to amdkfd.
9022cb0164SAlex Deucher  * amdgpu reserved doorbells are at the start of the doorbell aperture.
9122cb0164SAlex Deucher  */
9222cb0164SAlex Deucher static void amdgpu_doorbell_get_kfd_info(struct amdgpu_device *adev,
9322cb0164SAlex Deucher 					 phys_addr_t *aperture_base,
9422cb0164SAlex Deucher 					 size_t *aperture_size,
9522cb0164SAlex Deucher 					 size_t *start_offset)
9622cb0164SAlex Deucher {
9722cb0164SAlex Deucher 	/*
980512e9ffSShashank Sharma 	 * The first num_kernel_doorbells are used by amdgpu.
9922cb0164SAlex Deucher 	 * amdkfd takes whatever's left in the aperture.
10022cb0164SAlex Deucher 	 */
101cc009e61SMukul Joshi 	if (adev->enable_mes) {
102cc009e61SMukul Joshi 		/*
103cc009e61SMukul Joshi 		 * With MES enabled, we only need to initialize
104cc009e61SMukul Joshi 		 * the base address. The size and offset are
105cc009e61SMukul Joshi 		 * not initialized as AMDGPU manages the whole
106cc009e61SMukul Joshi 		 * doorbell space.
107cc009e61SMukul Joshi 		 */
108cc009e61SMukul Joshi 		*aperture_base = adev->doorbell.base;
109cc009e61SMukul Joshi 		*aperture_size = 0;
110cc009e61SMukul Joshi 		*start_offset = 0;
1110512e9ffSShashank Sharma 	} else if (adev->doorbell.size > adev->doorbell.num_kernel_doorbells *
112cc009e61SMukul Joshi 						sizeof(u32)) {
11322cb0164SAlex Deucher 		*aperture_base = adev->doorbell.base;
11422cb0164SAlex Deucher 		*aperture_size = adev->doorbell.size;
1150512e9ffSShashank Sharma 		*start_offset = adev->doorbell.num_kernel_doorbells * sizeof(u32);
11622cb0164SAlex Deucher 	} else {
11722cb0164SAlex Deucher 		*aperture_base = 0;
11822cb0164SAlex Deucher 		*aperture_size = 0;
11922cb0164SAlex Deucher 		*start_offset = 0;
12022cb0164SAlex Deucher 	}
12122cb0164SAlex Deucher }
12222cb0164SAlex Deucher 
123b5fd0cf3SAndrey Grodzovsky 
124b5fd0cf3SAndrey Grodzovsky static void amdgpu_amdkfd_reset_work(struct work_struct *work)
125b5fd0cf3SAndrey Grodzovsky {
126b5fd0cf3SAndrey Grodzovsky 	struct amdgpu_device *adev = container_of(work, struct amdgpu_device,
127b5fd0cf3SAndrey Grodzovsky 						  kfd.reset_work);
128b5fd0cf3SAndrey Grodzovsky 
129f1549c09SLikun Gao 	struct amdgpu_reset_context reset_context;
1302f4ca1baSJingyu Wang 
131f1549c09SLikun Gao 	memset(&reset_context, 0, sizeof(reset_context));
132f1549c09SLikun Gao 
133f1549c09SLikun Gao 	reset_context.method = AMD_RESET_METHOD_NONE;
134f1549c09SLikun Gao 	reset_context.reset_req_dev = adev;
135f1549c09SLikun Gao 	clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
136f1549c09SLikun Gao 
137f1549c09SLikun Gao 	amdgpu_device_gpu_recover(adev, NULL, &reset_context);
138b5fd0cf3SAndrey Grodzovsky }
139b5fd0cf3SAndrey Grodzovsky 
140dc102c43SAndres Rodriguez void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
141130e0371SOded Gabbay {
142234441ddSYong Zhao 	int i;
143d0b63bb3SAndres Rodriguez 	int last_valid_bit;
144611736d8SFelix Kuehling 
14527fb73a0SMukul Joshi 	amdgpu_amdkfd_gpuvm_init_mem_limits();
14627fb73a0SMukul Joshi 
147611736d8SFelix Kuehling 	if (adev->kfd.dev) {
148130e0371SOded Gabbay 		struct kgd2kfd_shared_resources gpu_resources = {
14940111ec2SFelix Kuehling 			.compute_vmid_bitmap =
15040111ec2SFelix Kuehling 				((1 << AMDGPU_NUM_VMID) - 1) -
15140111ec2SFelix Kuehling 				((1 << adev->vm_manager.first_kfd_vmid) - 1),
152d0b63bb3SAndres Rodriguez 			.num_pipe_per_mec = adev->gfx.mec.num_pipe_per_mec,
153155494dbSFelix Kuehling 			.num_queue_per_pipe = adev->gfx.mec.num_queue_per_pipe,
154155494dbSFelix Kuehling 			.gpuvm_size = min(adev->vm_manager.max_pfn
155155494dbSFelix Kuehling 					  << AMDGPU_GPU_PAGE_SHIFT,
156ad9a5b78SChristian König 					  AMDGPU_GMC_HOLE_START),
1574a580877SLuben Tuikov 			.drm_render_minor = adev_to_drm(adev)->render->index,
158234441ddSYong Zhao 			.sdma_doorbell_idx = adev->doorbell_index.sdma_engine,
159cc009e61SMukul Joshi 			.enable_mes = adev->enable_mes,
160130e0371SOded Gabbay 		};
161130e0371SOded Gabbay 
162d0b63bb3SAndres Rodriguez 		/* this is going to have a few of the MSBs set that we need to
1630d87c9cfSKent Russell 		 * clear
1640d87c9cfSKent Russell 		 */
165e6945304SYong Zhao 		bitmap_complement(gpu_resources.cp_queue_bitmap,
166be697aa3SLe Ma 				  adev->gfx.mec_bitmap[0].queue_bitmap,
167d0b63bb3SAndres Rodriguez 				  KGD_MAX_QUEUES);
168d0b63bb3SAndres Rodriguez 
169d0b63bb3SAndres Rodriguez 		/* According to linux/bitmap.h we shouldn't use bitmap_clear if
1700d87c9cfSKent Russell 		 * nbits is not compile time constant
1710d87c9cfSKent Russell 		 */
1723447d220SJay Cornwall 		last_valid_bit = 1 /* only first MEC can have compute queues */
173d0b63bb3SAndres Rodriguez 				* adev->gfx.mec.num_pipe_per_mec
174d0b63bb3SAndres Rodriguez 				* adev->gfx.mec.num_queue_per_pipe;
175d0b63bb3SAndres Rodriguez 		for (i = last_valid_bit; i < KGD_MAX_QUEUES; ++i)
176e6945304SYong Zhao 			clear_bit(i, gpu_resources.cp_queue_bitmap);
177d0b63bb3SAndres Rodriguez 
178dc102c43SAndres Rodriguez 		amdgpu_doorbell_get_kfd_info(adev,
179130e0371SOded Gabbay 				&gpu_resources.doorbell_physical_address,
180130e0371SOded Gabbay 				&gpu_resources.doorbell_aperture_size,
181130e0371SOded Gabbay 				&gpu_resources.doorbell_start_offset);
182c5892230SShaoyun Liu 
1831f86805aSYong Zhao 		/* Since SOC15, BIF starts to statically use the
1841f86805aSYong Zhao 		 * lower 12 bits of doorbell addresses for routing
1851f86805aSYong Zhao 		 * based on settings in registers like
1861f86805aSYong Zhao 		 * SDMA0_DOORBELL_RANGE etc..
1871f86805aSYong Zhao 		 * In order to route a doorbell to CP engine, the lower
1881f86805aSYong Zhao 		 * 12 bits of its address has to be outside the range
1891f86805aSYong Zhao 		 * set for SDMA, VCN, and IH blocks.
190642a0e80SFelix Kuehling 		 */
191234441ddSYong Zhao 		if (adev->asic_type >= CHIP_VEGA10) {
1921f86805aSYong Zhao 			gpu_resources.non_cp_doorbells_start =
1931f86805aSYong Zhao 					adev->doorbell_index.first_non_cp;
1941f86805aSYong Zhao 			gpu_resources.non_cp_doorbells_end =
1951f86805aSYong Zhao 					adev->doorbell_index.last_non_cp;
196234441ddSYong Zhao 		}
197130e0371SOded Gabbay 
1988e2712e7Sshaoyunl 		adev->kfd.init_complete = kgd2kfd_device_init(adev->kfd.dev,
199d69a3b76SMukul Joshi 							&gpu_resources);
200b5fd0cf3SAndrey Grodzovsky 
2012302d507SPhilip Yang 		amdgpu_amdkfd_total_mem_size += adev->gmc.real_vram_size;
2022302d507SPhilip Yang 
203b5fd0cf3SAndrey Grodzovsky 		INIT_WORK(&adev->kfd.reset_work, amdgpu_amdkfd_reset_work);
204130e0371SOded Gabbay 	}
205130e0371SOded Gabbay }
206130e0371SOded Gabbay 
207e9669fb7SAndrey Grodzovsky void amdgpu_amdkfd_device_fini_sw(struct amdgpu_device *adev)
208130e0371SOded Gabbay {
209611736d8SFelix Kuehling 	if (adev->kfd.dev) {
2108e07e267SAmber Lin 		kgd2kfd_device_exit(adev->kfd.dev);
211611736d8SFelix Kuehling 		adev->kfd.dev = NULL;
2122302d507SPhilip Yang 		amdgpu_amdkfd_total_mem_size -= adev->gmc.real_vram_size;
213130e0371SOded Gabbay 	}
214130e0371SOded Gabbay }
215130e0371SOded Gabbay 
216dc102c43SAndres Rodriguez void amdgpu_amdkfd_interrupt(struct amdgpu_device *adev,
217130e0371SOded Gabbay 		const void *ih_ring_entry)
218130e0371SOded Gabbay {
219611736d8SFelix Kuehling 	if (adev->kfd.dev)
2208e07e267SAmber Lin 		kgd2kfd_interrupt(adev->kfd.dev, ih_ring_entry);
221130e0371SOded Gabbay }
222130e0371SOded Gabbay 
2239593f4d6SRajneesh Bhardwaj void amdgpu_amdkfd_suspend(struct amdgpu_device *adev, bool run_pm)
224130e0371SOded Gabbay {
225611736d8SFelix Kuehling 	if (adev->kfd.dev)
2269593f4d6SRajneesh Bhardwaj 		kgd2kfd_suspend(adev->kfd.dev, run_pm);
227130e0371SOded Gabbay }
228130e0371SOded Gabbay 
2299593f4d6SRajneesh Bhardwaj int amdgpu_amdkfd_resume(struct amdgpu_device *adev, bool run_pm)
230130e0371SOded Gabbay {
231130e0371SOded Gabbay 	int r = 0;
232130e0371SOded Gabbay 
233611736d8SFelix Kuehling 	if (adev->kfd.dev)
2349593f4d6SRajneesh Bhardwaj 		r = kgd2kfd_resume(adev->kfd.dev, run_pm);
235130e0371SOded Gabbay 
236130e0371SOded Gabbay 	return r;
237130e0371SOded Gabbay }
238130e0371SOded Gabbay 
2395c6dd71eSShaoyun Liu int amdgpu_amdkfd_pre_reset(struct amdgpu_device *adev)
2405c6dd71eSShaoyun Liu {
2415c6dd71eSShaoyun Liu 	int r = 0;
2425c6dd71eSShaoyun Liu 
243611736d8SFelix Kuehling 	if (adev->kfd.dev)
2448e07e267SAmber Lin 		r = kgd2kfd_pre_reset(adev->kfd.dev);
2455c6dd71eSShaoyun Liu 
2465c6dd71eSShaoyun Liu 	return r;
2475c6dd71eSShaoyun Liu }
2485c6dd71eSShaoyun Liu 
2495c6dd71eSShaoyun Liu int amdgpu_amdkfd_post_reset(struct amdgpu_device *adev)
2505c6dd71eSShaoyun Liu {
2515c6dd71eSShaoyun Liu 	int r = 0;
2525c6dd71eSShaoyun Liu 
253611736d8SFelix Kuehling 	if (adev->kfd.dev)
2548e07e267SAmber Lin 		r = kgd2kfd_post_reset(adev->kfd.dev);
2555c6dd71eSShaoyun Liu 
2565c6dd71eSShaoyun Liu 	return r;
2575c6dd71eSShaoyun Liu }
2585c6dd71eSShaoyun Liu 
2596bfc7c7eSGraham Sider void amdgpu_amdkfd_gpu_reset(struct amdgpu_device *adev)
26024da5a9cSShaoyun Liu {
26112938fadSChristian König 	if (amdgpu_device_should_recover_gpu(adev))
262b5fd0cf3SAndrey Grodzovsky 		amdgpu_reset_domain_schedule(adev->reset_domain,
263b5fd0cf3SAndrey Grodzovsky 					     &adev->kfd.reset_work);
26424da5a9cSShaoyun Liu }
26524da5a9cSShaoyun Liu 
2666bfc7c7eSGraham Sider int amdgpu_amdkfd_alloc_gtt_mem(struct amdgpu_device *adev, size_t size,
267130e0371SOded Gabbay 				void **mem_obj, uint64_t *gpu_addr,
268fa5bde80SYong Zhao 				void **cpu_ptr, bool cp_mqd_gfx9)
269130e0371SOded Gabbay {
270473fee47SYong Zhao 	struct amdgpu_bo *bo = NULL;
2713216c6b7SChunming Zhou 	struct amdgpu_bo_param bp;
272130e0371SOded Gabbay 	int r;
273473fee47SYong Zhao 	void *cpu_ptr_tmp = NULL;
274130e0371SOded Gabbay 
2753216c6b7SChunming Zhou 	memset(&bp, 0, sizeof(bp));
2763216c6b7SChunming Zhou 	bp.size = size;
2773216c6b7SChunming Zhou 	bp.byte_align = PAGE_SIZE;
2783216c6b7SChunming Zhou 	bp.domain = AMDGPU_GEM_DOMAIN_GTT;
2793216c6b7SChunming Zhou 	bp.flags = AMDGPU_GEM_CREATE_CPU_GTT_USWC;
2803216c6b7SChunming Zhou 	bp.type = ttm_bo_type_kernel;
2813216c6b7SChunming Zhou 	bp.resv = NULL;
2829fd5543eSNirmoy Das 	bp.bo_ptr_size = sizeof(struct amdgpu_bo);
28315426dbbSYong Zhao 
284fa5bde80SYong Zhao 	if (cp_mqd_gfx9)
285fa5bde80SYong Zhao 		bp.flags |= AMDGPU_GEM_CREATE_CP_MQD_GFX9;
28615426dbbSYong Zhao 
2873216c6b7SChunming Zhou 	r = amdgpu_bo_create(adev, &bp, &bo);
288130e0371SOded Gabbay 	if (r) {
289dc102c43SAndres Rodriguez 		dev_err(adev->dev,
290130e0371SOded Gabbay 			"failed to allocate BO for amdkfd (%d)\n", r);
291130e0371SOded Gabbay 		return r;
292130e0371SOded Gabbay 	}
293130e0371SOded Gabbay 
294130e0371SOded Gabbay 	/* map the buffer */
295473fee47SYong Zhao 	r = amdgpu_bo_reserve(bo, true);
296130e0371SOded Gabbay 	if (r) {
297dc102c43SAndres Rodriguez 		dev_err(adev->dev, "(%d) failed to reserve bo for amdkfd\n", r);
298130e0371SOded Gabbay 		goto allocate_mem_reserve_bo_failed;
299130e0371SOded Gabbay 	}
300130e0371SOded Gabbay 
3017b7c6c81SJunwei Zhang 	r = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT);
302130e0371SOded Gabbay 	if (r) {
303dc102c43SAndres Rodriguez 		dev_err(adev->dev, "(%d) failed to pin bo for amdkfd\n", r);
304130e0371SOded Gabbay 		goto allocate_mem_pin_bo_failed;
305130e0371SOded Gabbay 	}
306130e0371SOded Gabbay 
307bb812f1eSJunwei Zhang 	r = amdgpu_ttm_alloc_gart(&bo->tbo);
308bb812f1eSJunwei Zhang 	if (r) {
309bb812f1eSJunwei Zhang 		dev_err(adev->dev, "%p bind failed\n", bo);
310bb812f1eSJunwei Zhang 		goto allocate_mem_kmap_bo_failed;
311bb812f1eSJunwei Zhang 	}
312bb812f1eSJunwei Zhang 
313473fee47SYong Zhao 	r = amdgpu_bo_kmap(bo, &cpu_ptr_tmp);
314130e0371SOded Gabbay 	if (r) {
315dc102c43SAndres Rodriguez 		dev_err(adev->dev,
316130e0371SOded Gabbay 			"(%d) failed to map bo to kernel for amdkfd\n", r);
317130e0371SOded Gabbay 		goto allocate_mem_kmap_bo_failed;
318130e0371SOded Gabbay 	}
319130e0371SOded Gabbay 
320473fee47SYong Zhao 	*mem_obj = bo;
3217b7c6c81SJunwei Zhang 	*gpu_addr = amdgpu_bo_gpu_offset(bo);
322473fee47SYong Zhao 	*cpu_ptr = cpu_ptr_tmp;
323473fee47SYong Zhao 
324473fee47SYong Zhao 	amdgpu_bo_unreserve(bo);
325130e0371SOded Gabbay 
326130e0371SOded Gabbay 	return 0;
327130e0371SOded Gabbay 
328130e0371SOded Gabbay allocate_mem_kmap_bo_failed:
329473fee47SYong Zhao 	amdgpu_bo_unpin(bo);
330130e0371SOded Gabbay allocate_mem_pin_bo_failed:
331473fee47SYong Zhao 	amdgpu_bo_unreserve(bo);
332130e0371SOded Gabbay allocate_mem_reserve_bo_failed:
333473fee47SYong Zhao 	amdgpu_bo_unref(&bo);
334130e0371SOded Gabbay 
335130e0371SOded Gabbay 	return r;
336130e0371SOded Gabbay }
337130e0371SOded Gabbay 
3386bfc7c7eSGraham Sider void amdgpu_amdkfd_free_gtt_mem(struct amdgpu_device *adev, void *mem_obj)
339130e0371SOded Gabbay {
340473fee47SYong Zhao 	struct amdgpu_bo *bo = (struct amdgpu_bo *) mem_obj;
341130e0371SOded Gabbay 
342473fee47SYong Zhao 	amdgpu_bo_reserve(bo, true);
343473fee47SYong Zhao 	amdgpu_bo_kunmap(bo);
344473fee47SYong Zhao 	amdgpu_bo_unpin(bo);
345473fee47SYong Zhao 	amdgpu_bo_unreserve(bo);
346473fee47SYong Zhao 	amdgpu_bo_unref(&(bo));
347130e0371SOded Gabbay }
348130e0371SOded Gabbay 
3496bfc7c7eSGraham Sider int amdgpu_amdkfd_alloc_gws(struct amdgpu_device *adev, size_t size,
350ca66fb8fSOak Zeng 				void **mem_obj)
351ca66fb8fSOak Zeng {
352ca66fb8fSOak Zeng 	struct amdgpu_bo *bo = NULL;
35322b40f7aSNirmoy Das 	struct amdgpu_bo_user *ubo;
354ca66fb8fSOak Zeng 	struct amdgpu_bo_param bp;
355ca66fb8fSOak Zeng 	int r;
356ca66fb8fSOak Zeng 
357ca66fb8fSOak Zeng 	memset(&bp, 0, sizeof(bp));
358ca66fb8fSOak Zeng 	bp.size = size;
359ca66fb8fSOak Zeng 	bp.byte_align = 1;
360ca66fb8fSOak Zeng 	bp.domain = AMDGPU_GEM_DOMAIN_GWS;
361ca66fb8fSOak Zeng 	bp.flags = AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
362ca66fb8fSOak Zeng 	bp.type = ttm_bo_type_device;
363ca66fb8fSOak Zeng 	bp.resv = NULL;
3649fd5543eSNirmoy Das 	bp.bo_ptr_size = sizeof(struct amdgpu_bo);
365ca66fb8fSOak Zeng 
36622b40f7aSNirmoy Das 	r = amdgpu_bo_create_user(adev, &bp, &ubo);
367ca66fb8fSOak Zeng 	if (r) {
368ca66fb8fSOak Zeng 		dev_err(adev->dev,
369ca66fb8fSOak Zeng 			"failed to allocate gws BO for amdkfd (%d)\n", r);
370ca66fb8fSOak Zeng 		return r;
371ca66fb8fSOak Zeng 	}
372ca66fb8fSOak Zeng 
37322b40f7aSNirmoy Das 	bo = &ubo->bo;
374ca66fb8fSOak Zeng 	*mem_obj = bo;
375ca66fb8fSOak Zeng 	return 0;
376ca66fb8fSOak Zeng }
377ca66fb8fSOak Zeng 
3786bfc7c7eSGraham Sider void amdgpu_amdkfd_free_gws(struct amdgpu_device *adev, void *mem_obj)
379ca66fb8fSOak Zeng {
380ca66fb8fSOak Zeng 	struct amdgpu_bo *bo = (struct amdgpu_bo *)mem_obj;
381ca66fb8fSOak Zeng 
382ca66fb8fSOak Zeng 	amdgpu_bo_unref(&bo);
383ca66fb8fSOak Zeng }
384ca66fb8fSOak Zeng 
385574c4183SGraham Sider uint32_t amdgpu_amdkfd_get_fw_version(struct amdgpu_device *adev,
3860da8b10eSAmber Lin 				      enum kgd_engine_type type)
3870da8b10eSAmber Lin {
3880da8b10eSAmber Lin 	switch (type) {
3890da8b10eSAmber Lin 	case KGD_ENGINE_PFP:
3900da8b10eSAmber Lin 		return adev->gfx.pfp_fw_version;
3910da8b10eSAmber Lin 
3920da8b10eSAmber Lin 	case KGD_ENGINE_ME:
3930da8b10eSAmber Lin 		return adev->gfx.me_fw_version;
3940da8b10eSAmber Lin 
3950da8b10eSAmber Lin 	case KGD_ENGINE_CE:
3960da8b10eSAmber Lin 		return adev->gfx.ce_fw_version;
3970da8b10eSAmber Lin 
3980da8b10eSAmber Lin 	case KGD_ENGINE_MEC1:
3990da8b10eSAmber Lin 		return adev->gfx.mec_fw_version;
4000da8b10eSAmber Lin 
4010da8b10eSAmber Lin 	case KGD_ENGINE_MEC2:
4020da8b10eSAmber Lin 		return adev->gfx.mec2_fw_version;
4030da8b10eSAmber Lin 
4040da8b10eSAmber Lin 	case KGD_ENGINE_RLC:
4050da8b10eSAmber Lin 		return adev->gfx.rlc_fw_version;
4060da8b10eSAmber Lin 
4070da8b10eSAmber Lin 	case KGD_ENGINE_SDMA1:
4080da8b10eSAmber Lin 		return adev->sdma.instance[0].fw_version;
4090da8b10eSAmber Lin 
4100da8b10eSAmber Lin 	case KGD_ENGINE_SDMA2:
4110da8b10eSAmber Lin 		return adev->sdma.instance[1].fw_version;
4120da8b10eSAmber Lin 
4130da8b10eSAmber Lin 	default:
4140da8b10eSAmber Lin 		return 0;
4150da8b10eSAmber Lin 	}
4160da8b10eSAmber Lin 
4170da8b10eSAmber Lin 	return 0;
4180da8b10eSAmber Lin }
4190da8b10eSAmber Lin 
420574c4183SGraham Sider void amdgpu_amdkfd_get_local_mem_info(struct amdgpu_device *adev,
421315e29ecSMukul Joshi 				      struct kfd_local_mem_info *mem_info,
4229a3ce1a7SHawking Zhang 				      struct amdgpu_xcp *xcp)
42330f1c042SHarish Kasiviswanathan {
42430f1c042SHarish Kasiviswanathan 	memset(mem_info, 0, sizeof(*mem_info));
4254c7e8a9eSGang Ba 
4269a3ce1a7SHawking Zhang 	if (xcp) {
427315e29ecSMukul Joshi 		if (adev->gmc.real_vram_size == adev->gmc.visible_vram_size)
428315e29ecSMukul Joshi 			mem_info->local_mem_size_public =
4299a3ce1a7SHawking Zhang 					KFD_XCP_MEMORY_SIZE(adev, xcp->id);
430315e29ecSMukul Joshi 		else
431315e29ecSMukul Joshi 			mem_info->local_mem_size_private =
4329a3ce1a7SHawking Zhang 					KFD_XCP_MEMORY_SIZE(adev, xcp->id);
433315e29ecSMukul Joshi 	} else {
434770d13b1SChristian König 		mem_info->local_mem_size_public = adev->gmc.visible_vram_size;
435770d13b1SChristian König 		mem_info->local_mem_size_private = adev->gmc.real_vram_size -
436770d13b1SChristian König 						adev->gmc.visible_vram_size;
437315e29ecSMukul Joshi 	}
438770d13b1SChristian König 	mem_info->vram_width = adev->gmc.vram_width;
43930f1c042SHarish Kasiviswanathan 
4404c7e8a9eSGang Ba 	pr_debug("Address base: %pap public 0x%llx private 0x%llx\n",
4414c7e8a9eSGang Ba 			&adev->gmc.aper_base,
44230f1c042SHarish Kasiviswanathan 			mem_info->local_mem_size_public,
44330f1c042SHarish Kasiviswanathan 			mem_info->local_mem_size_private);
44430f1c042SHarish Kasiviswanathan 
445*0bc119faSHorace Chen 	if (adev->pm.dpm_enabled) {
4466bdadb20SHawking Zhang 		if (amdgpu_emu_mode == 1)
4476bdadb20SHawking Zhang 			mem_info->mem_clk_max = 0;
4487ba01f9eSShaoyun Liu 		else
4496bdadb20SHawking Zhang 			mem_info->mem_clk_max = amdgpu_dpm_get_mclk(adev, false) / 100;
4506bdadb20SHawking Zhang 	} else
4517ba01f9eSShaoyun Liu 		mem_info->mem_clk_max = 100;
45230f1c042SHarish Kasiviswanathan }
45330f1c042SHarish Kasiviswanathan 
454574c4183SGraham Sider uint64_t amdgpu_amdkfd_get_gpu_clock_counter(struct amdgpu_device *adev)
455130e0371SOded Gabbay {
456dc102c43SAndres Rodriguez 	if (adev->gfx.funcs->get_gpu_clock_counter)
457dc102c43SAndres Rodriguez 		return adev->gfx.funcs->get_gpu_clock_counter(adev);
458130e0371SOded Gabbay 	return 0;
459130e0371SOded Gabbay }
460130e0371SOded Gabbay 
461574c4183SGraham Sider uint32_t amdgpu_amdkfd_get_max_engine_clock_in_mhz(struct amdgpu_device *adev)
462130e0371SOded Gabbay {
463a9efcc19SFelix Kuehling 	/* the sclk is in quantas of 10kHz */
464*0bc119faSHorace Chen 	if (adev->pm.dpm_enabled)
465a9efcc19SFelix Kuehling 		return amdgpu_dpm_get_sclk(adev, false) / 100;
4667ba01f9eSShaoyun Liu 	else
4677ba01f9eSShaoyun Liu 		return 100;
468130e0371SOded Gabbay }
469ebdebf42SFlora Cui 
470574c4183SGraham Sider void amdgpu_amdkfd_get_cu_info(struct amdgpu_device *adev, struct kfd_cu_info *cu_info)
471ebdebf42SFlora Cui {
472ebdebf42SFlora Cui 	struct amdgpu_cu_info acu_info = adev->gfx.cu_info;
473ebdebf42SFlora Cui 
474ebdebf42SFlora Cui 	memset(cu_info, 0, sizeof(*cu_info));
475ebdebf42SFlora Cui 	if (sizeof(cu_info->cu_bitmap) != sizeof(acu_info.bitmap))
476ebdebf42SFlora Cui 		return;
477ebdebf42SFlora Cui 
478ebdebf42SFlora Cui 	cu_info->cu_active_number = acu_info.number;
479ebdebf42SFlora Cui 	cu_info->cu_ao_mask = acu_info.ao_cu_mask;
480ebdebf42SFlora Cui 	memcpy(&cu_info->cu_bitmap[0], &acu_info.bitmap[0],
481ebdebf42SFlora Cui 	       sizeof(acu_info.bitmap));
482ebdebf42SFlora Cui 	cu_info->num_shader_engines = adev->gfx.config.max_shader_engines;
483ebdebf42SFlora Cui 	cu_info->num_shader_arrays_per_engine = adev->gfx.config.max_sh_per_se;
484ebdebf42SFlora Cui 	cu_info->num_cu_per_sh = adev->gfx.config.max_cu_per_sh;
485ebdebf42SFlora Cui 	cu_info->simd_per_cu = acu_info.simd_per_cu;
486ebdebf42SFlora Cui 	cu_info->max_waves_per_simd = acu_info.max_waves_per_simd;
487ebdebf42SFlora Cui 	cu_info->wave_front_size = acu_info.wave_front_size;
488ebdebf42SFlora Cui 	cu_info->max_scratch_slots_per_cu = acu_info.max_scratch_slots_per_cu;
489ebdebf42SFlora Cui 	cu_info->lds_size = acu_info.lds_size;
490ebdebf42SFlora Cui }
4919f0a0b41SKent Russell 
492574c4183SGraham Sider int amdgpu_amdkfd_get_dmabuf_info(struct amdgpu_device *adev, int dma_buf_fd,
493574c4183SGraham Sider 				  struct amdgpu_device **dmabuf_adev,
4941dde0ea9SFelix Kuehling 				  uint64_t *bo_size, void *metadata_buffer,
4951dde0ea9SFelix Kuehling 				  size_t buffer_size, uint32_t *metadata_size,
4962fa9ff25SPhilip Yang 				  uint32_t *flags, int8_t *xcp_id)
4971dde0ea9SFelix Kuehling {
4981dde0ea9SFelix Kuehling 	struct dma_buf *dma_buf;
4991dde0ea9SFelix Kuehling 	struct drm_gem_object *obj;
5001dde0ea9SFelix Kuehling 	struct amdgpu_bo *bo;
5011dde0ea9SFelix Kuehling 	uint64_t metadata_flags;
5021dde0ea9SFelix Kuehling 	int r = -EINVAL;
5031dde0ea9SFelix Kuehling 
5041dde0ea9SFelix Kuehling 	dma_buf = dma_buf_get(dma_buf_fd);
5051dde0ea9SFelix Kuehling 	if (IS_ERR(dma_buf))
5061dde0ea9SFelix Kuehling 		return PTR_ERR(dma_buf);
5071dde0ea9SFelix Kuehling 
5081dde0ea9SFelix Kuehling 	if (dma_buf->ops != &amdgpu_dmabuf_ops)
5091dde0ea9SFelix Kuehling 		/* Can't handle non-graphics buffers */
5101dde0ea9SFelix Kuehling 		goto out_put;
5111dde0ea9SFelix Kuehling 
5121dde0ea9SFelix Kuehling 	obj = dma_buf->priv;
5134a580877SLuben Tuikov 	if (obj->dev->driver != adev_to_drm(adev)->driver)
5141dde0ea9SFelix Kuehling 		/* Can't handle buffers from different drivers */
5151dde0ea9SFelix Kuehling 		goto out_put;
5161dde0ea9SFelix Kuehling 
5171348969aSLuben Tuikov 	adev = drm_to_adev(obj->dev);
5181dde0ea9SFelix Kuehling 	bo = gem_to_amdgpu_bo(obj);
5191dde0ea9SFelix Kuehling 	if (!(bo->preferred_domains & (AMDGPU_GEM_DOMAIN_VRAM |
5201dde0ea9SFelix Kuehling 				    AMDGPU_GEM_DOMAIN_GTT)))
5211dde0ea9SFelix Kuehling 		/* Only VRAM and GTT BOs are supported */
5221dde0ea9SFelix Kuehling 		goto out_put;
5231dde0ea9SFelix Kuehling 
5241dde0ea9SFelix Kuehling 	r = 0;
525574c4183SGraham Sider 	if (dmabuf_adev)
526574c4183SGraham Sider 		*dmabuf_adev = adev;
5271dde0ea9SFelix Kuehling 	if (bo_size)
5281dde0ea9SFelix Kuehling 		*bo_size = amdgpu_bo_size(bo);
5291dde0ea9SFelix Kuehling 	if (metadata_buffer)
5301dde0ea9SFelix Kuehling 		r = amdgpu_bo_get_metadata(bo, metadata_buffer, buffer_size,
5311dde0ea9SFelix Kuehling 					   metadata_size, &metadata_flags);
5321dde0ea9SFelix Kuehling 	if (flags) {
5331dde0ea9SFelix Kuehling 		*flags = (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ?
5341d251d90SYong Zhao 				KFD_IOC_ALLOC_MEM_FLAGS_VRAM
5351d251d90SYong Zhao 				: KFD_IOC_ALLOC_MEM_FLAGS_GTT;
5361dde0ea9SFelix Kuehling 
5371dde0ea9SFelix Kuehling 		if (bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)
5381d251d90SYong Zhao 			*flags |= KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC;
5391dde0ea9SFelix Kuehling 	}
5402fa9ff25SPhilip Yang 	if (xcp_id)
5412fa9ff25SPhilip Yang 		*xcp_id = bo->xcp_id;
5421dde0ea9SFelix Kuehling 
5431dde0ea9SFelix Kuehling out_put:
5441dde0ea9SFelix Kuehling 	dma_buf_put(dma_buf);
5451dde0ea9SFelix Kuehling 	return r;
5461dde0ea9SFelix Kuehling }
5471dde0ea9SFelix Kuehling 
548574c4183SGraham Sider uint8_t amdgpu_amdkfd_get_xgmi_hops_count(struct amdgpu_device *dst,
549574c4183SGraham Sider 					  struct amdgpu_device *src)
550da361dd1Sshaoyunl {
551574c4183SGraham Sider 	struct amdgpu_device *peer_adev = src;
552574c4183SGraham Sider 	struct amdgpu_device *adev = dst;
553da361dd1Sshaoyunl 	int ret = amdgpu_xgmi_get_hops_count(adev, peer_adev);
554da361dd1Sshaoyunl 
555da361dd1Sshaoyunl 	if (ret < 0) {
556da361dd1Sshaoyunl 		DRM_ERROR("amdgpu: failed to get  xgmi hops count between node %d and %d. ret = %d\n",
557da361dd1Sshaoyunl 			adev->gmc.xgmi.physical_node_id,
558da361dd1Sshaoyunl 			peer_adev->gmc.xgmi.physical_node_id, ret);
559da361dd1Sshaoyunl 		ret = 0;
560da361dd1Sshaoyunl 	}
561da361dd1Sshaoyunl 	return  (uint8_t)ret;
562da361dd1Sshaoyunl }
563db8b62c0SShaoyun Liu 
564574c4183SGraham Sider int amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(struct amdgpu_device *dst,
565574c4183SGraham Sider 					    struct amdgpu_device *src,
566574c4183SGraham Sider 					    bool is_min)
5673f46c4e9SJonathan Kim {
568574c4183SGraham Sider 	struct amdgpu_device *adev = dst, *peer_adev;
5693f46c4e9SJonathan Kim 	int num_links;
5703f46c4e9SJonathan Kim 
5713f46c4e9SJonathan Kim 	if (adev->asic_type != CHIP_ALDEBARAN)
5723f46c4e9SJonathan Kim 		return 0;
5733f46c4e9SJonathan Kim 
5743f46c4e9SJonathan Kim 	if (src)
575574c4183SGraham Sider 		peer_adev = src;
5763f46c4e9SJonathan Kim 
5773f46c4e9SJonathan Kim 	/* num links returns 0 for indirect peers since indirect route is unknown. */
5783f46c4e9SJonathan Kim 	num_links = is_min ? 1 : amdgpu_xgmi_get_num_links(adev, peer_adev);
5793f46c4e9SJonathan Kim 	if (num_links < 0) {
5803f46c4e9SJonathan Kim 		DRM_ERROR("amdgpu: failed to get xgmi num links between node %d and %d. ret = %d\n",
5813f46c4e9SJonathan Kim 			adev->gmc.xgmi.physical_node_id,
5823f46c4e9SJonathan Kim 			peer_adev->gmc.xgmi.physical_node_id, num_links);
5833f46c4e9SJonathan Kim 		num_links = 0;
5843f46c4e9SJonathan Kim 	}
5853f46c4e9SJonathan Kim 
5863f46c4e9SJonathan Kim 	/* Aldebaran xGMI DPM is defeatured so assume x16 x 25Gbps for bandwidth. */
5873f46c4e9SJonathan Kim 	return (num_links * 16 * 25000)/BITS_PER_BYTE;
5883f46c4e9SJonathan Kim }
5893f46c4e9SJonathan Kim 
590574c4183SGraham Sider int amdgpu_amdkfd_get_pcie_bandwidth_mbytes(struct amdgpu_device *adev, bool is_min)
59193304810SJonathan Kim {
59293304810SJonathan Kim 	int num_lanes_shift = (is_min ? ffs(adev->pm.pcie_mlw_mask) :
59393304810SJonathan Kim 							fls(adev->pm.pcie_mlw_mask)) - 1;
59493304810SJonathan Kim 	int gen_speed_shift = (is_min ? ffs(adev->pm.pcie_gen_mask &
59593304810SJonathan Kim 						CAIL_PCIE_LINK_SPEED_SUPPORT_MASK) :
59693304810SJonathan Kim 					fls(adev->pm.pcie_gen_mask &
59793304810SJonathan Kim 						CAIL_PCIE_LINK_SPEED_SUPPORT_MASK)) - 1;
59893304810SJonathan Kim 	uint32_t num_lanes_mask = 1 << num_lanes_shift;
59993304810SJonathan Kim 	uint32_t gen_speed_mask = 1 << gen_speed_shift;
60093304810SJonathan Kim 	int num_lanes_factor = 0, gen_speed_mbits_factor = 0;
60193304810SJonathan Kim 
60293304810SJonathan Kim 	switch (num_lanes_mask) {
60393304810SJonathan Kim 	case CAIL_PCIE_LINK_WIDTH_SUPPORT_X1:
60493304810SJonathan Kim 		num_lanes_factor = 1;
60593304810SJonathan Kim 		break;
60693304810SJonathan Kim 	case CAIL_PCIE_LINK_WIDTH_SUPPORT_X2:
60793304810SJonathan Kim 		num_lanes_factor = 2;
60893304810SJonathan Kim 		break;
60993304810SJonathan Kim 	case CAIL_PCIE_LINK_WIDTH_SUPPORT_X4:
61093304810SJonathan Kim 		num_lanes_factor = 4;
61193304810SJonathan Kim 		break;
61293304810SJonathan Kim 	case CAIL_PCIE_LINK_WIDTH_SUPPORT_X8:
61393304810SJonathan Kim 		num_lanes_factor = 8;
61493304810SJonathan Kim 		break;
61593304810SJonathan Kim 	case CAIL_PCIE_LINK_WIDTH_SUPPORT_X12:
61693304810SJonathan Kim 		num_lanes_factor = 12;
61793304810SJonathan Kim 		break;
61893304810SJonathan Kim 	case CAIL_PCIE_LINK_WIDTH_SUPPORT_X16:
61993304810SJonathan Kim 		num_lanes_factor = 16;
62093304810SJonathan Kim 		break;
62193304810SJonathan Kim 	case CAIL_PCIE_LINK_WIDTH_SUPPORT_X32:
62293304810SJonathan Kim 		num_lanes_factor = 32;
62393304810SJonathan Kim 		break;
62493304810SJonathan Kim 	}
62593304810SJonathan Kim 
62693304810SJonathan Kim 	switch (gen_speed_mask) {
62793304810SJonathan Kim 	case CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1:
62893304810SJonathan Kim 		gen_speed_mbits_factor = 2500;
62993304810SJonathan Kim 		break;
63093304810SJonathan Kim 	case CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2:
63193304810SJonathan Kim 		gen_speed_mbits_factor = 5000;
63293304810SJonathan Kim 		break;
63393304810SJonathan Kim 	case CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3:
63493304810SJonathan Kim 		gen_speed_mbits_factor = 8000;
63593304810SJonathan Kim 		break;
63693304810SJonathan Kim 	case CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4:
63793304810SJonathan Kim 		gen_speed_mbits_factor = 16000;
63893304810SJonathan Kim 		break;
63993304810SJonathan Kim 	case CAIL_PCIE_LINK_SPEED_SUPPORT_GEN5:
64093304810SJonathan Kim 		gen_speed_mbits_factor = 32000;
64193304810SJonathan Kim 		break;
64293304810SJonathan Kim 	}
64393304810SJonathan Kim 
64493304810SJonathan Kim 	return (num_lanes_factor * gen_speed_mbits_factor)/BITS_PER_BYTE;
64593304810SJonathan Kim }
64693304810SJonathan Kim 
6476bfc7c7eSGraham Sider int amdgpu_amdkfd_submit_ib(struct amdgpu_device *adev,
6486bfc7c7eSGraham Sider 				enum kgd_engine_type engine,
6494c660c8fSFelix Kuehling 				uint32_t vmid, uint64_t gpu_addr,
6504c660c8fSFelix Kuehling 				uint32_t *ib_cmd, uint32_t ib_len)
6514c660c8fSFelix Kuehling {
6524c660c8fSFelix Kuehling 	struct amdgpu_job *job;
6534c660c8fSFelix Kuehling 	struct amdgpu_ib *ib;
6544c660c8fSFelix Kuehling 	struct amdgpu_ring *ring;
6554c660c8fSFelix Kuehling 	struct dma_fence *f = NULL;
6564c660c8fSFelix Kuehling 	int ret;
6574c660c8fSFelix Kuehling 
6584c660c8fSFelix Kuehling 	switch (engine) {
6594c660c8fSFelix Kuehling 	case KGD_ENGINE_MEC1:
6604c660c8fSFelix Kuehling 		ring = &adev->gfx.compute_ring[0];
6614c660c8fSFelix Kuehling 		break;
6624c660c8fSFelix Kuehling 	case KGD_ENGINE_SDMA1:
6634c660c8fSFelix Kuehling 		ring = &adev->sdma.instance[0].ring;
6644c660c8fSFelix Kuehling 		break;
6654c660c8fSFelix Kuehling 	case KGD_ENGINE_SDMA2:
6664c660c8fSFelix Kuehling 		ring = &adev->sdma.instance[1].ring;
6674c660c8fSFelix Kuehling 		break;
6684c660c8fSFelix Kuehling 	default:
6694c660c8fSFelix Kuehling 		pr_err("Invalid engine in IB submission: %d\n", engine);
6704c660c8fSFelix Kuehling 		ret = -EINVAL;
6714c660c8fSFelix Kuehling 		goto err;
6724c660c8fSFelix Kuehling 	}
6734c660c8fSFelix Kuehling 
674f7d66fb2SChristian König 	ret = amdgpu_job_alloc(adev, NULL, NULL, NULL, 1, &job);
6754c660c8fSFelix Kuehling 	if (ret)
6764c660c8fSFelix Kuehling 		goto err;
6774c660c8fSFelix Kuehling 
6784c660c8fSFelix Kuehling 	ib = &job->ibs[0];
6794c660c8fSFelix Kuehling 	memset(ib, 0, sizeof(struct amdgpu_ib));
6804c660c8fSFelix Kuehling 
6814c660c8fSFelix Kuehling 	ib->gpu_addr = gpu_addr;
6824c660c8fSFelix Kuehling 	ib->ptr = ib_cmd;
6834c660c8fSFelix Kuehling 	ib->length_dw = ib_len;
6844c660c8fSFelix Kuehling 	/* This works for NO_HWS. TODO: need to handle without knowing VMID */
6854c660c8fSFelix Kuehling 	job->vmid = vmid;
6864624459cSChristian König 	job->num_ibs = 1;
6874c660c8fSFelix Kuehling 
6884c660c8fSFelix Kuehling 	ret = amdgpu_ib_schedule(ring, 1, ib, job, &f);
68994561899SDennis Li 
6904c660c8fSFelix Kuehling 	if (ret) {
6914c660c8fSFelix Kuehling 		DRM_ERROR("amdgpu: failed to schedule IB.\n");
6924c660c8fSFelix Kuehling 		goto err_ib_sched;
6934c660c8fSFelix Kuehling 	}
6944c660c8fSFelix Kuehling 
6959ae55f03SAndrey Grodzovsky 	/* Drop the initial kref_init count (see drm_sched_main as example) */
6969ae55f03SAndrey Grodzovsky 	dma_fence_put(f);
6974c660c8fSFelix Kuehling 	ret = dma_fence_wait(f, false);
6984c660c8fSFelix Kuehling 
6994c660c8fSFelix Kuehling err_ib_sched:
7004c660c8fSFelix Kuehling 	amdgpu_job_free(job);
7014c660c8fSFelix Kuehling err:
7024c660c8fSFelix Kuehling 	return ret;
7034c660c8fSFelix Kuehling }
7044c660c8fSFelix Kuehling 
7056bfc7c7eSGraham Sider void amdgpu_amdkfd_set_compute_idle(struct amdgpu_device *adev, bool idle)
70601c097dbSFelix Kuehling {
707087b8542SGraham Sider 	/* Temporary workaround to fix issues observed in some
708087b8542SGraham Sider 	 * compute applications when GFXOFF is enabled on GFX11.
709087b8542SGraham Sider 	 */
710087b8542SGraham Sider 	if (IP_VERSION_MAJ(adev->ip_versions[GC_HWIP][0]) == 11) {
711087b8542SGraham Sider 		pr_debug("GFXOFF is %s\n", idle ? "enabled" : "disabled");
712087b8542SGraham Sider 		amdgpu_gfx_off_ctrl(adev, idle);
713087b8542SGraham Sider 	}
71401c097dbSFelix Kuehling 	amdgpu_dpm_switch_power_profile(adev,
715919a52fcSFelix Kuehling 					PP_SMC_POWER_PROFILE_COMPUTE,
716919a52fcSFelix Kuehling 					!idle);
71701c097dbSFelix Kuehling }
71801c097dbSFelix Kuehling 
719155494dbSFelix Kuehling bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid)
720155494dbSFelix Kuehling {
72140111ec2SFelix Kuehling 	if (adev->kfd.dev)
72240111ec2SFelix Kuehling 		return vmid >= adev->vm_manager.first_kfd_vmid;
723155494dbSFelix Kuehling 
724155494dbSFelix Kuehling 	return false;
725155494dbSFelix Kuehling }
726fcdfa432SOded Gabbay 
7276bfc7c7eSGraham Sider int amdgpu_amdkfd_flush_gpu_tlb_vmid(struct amdgpu_device *adev,
7286bfc7c7eSGraham Sider 				     uint16_t vmid)
729ffa02269SAlex Sierra {
730ffa02269SAlex Sierra 	if (adev->family == AMDGPU_FAMILY_AI) {
731ffa02269SAlex Sierra 		int i;
732ffa02269SAlex Sierra 
733d9426c3dSLe Ma 		for_each_set_bit(i, adev->vmhubs_mask, AMDGPU_MAX_VMHUBS)
734ffa02269SAlex Sierra 			amdgpu_gmc_flush_gpu_tlb(adev, vmid, i, 0);
735ffa02269SAlex Sierra 	} else {
736f4caf584SHawking Zhang 		amdgpu_gmc_flush_gpu_tlb(adev, vmid, AMDGPU_GFXHUB(0), 0);
737ffa02269SAlex Sierra 	}
738ffa02269SAlex Sierra 
739ffa02269SAlex Sierra 	return 0;
740ffa02269SAlex Sierra }
741ffa02269SAlex Sierra 
7426bfc7c7eSGraham Sider int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
743f87f6864SMukul Joshi 				      uint16_t pasid,
744f87f6864SMukul Joshi 				      enum TLB_FLUSH_TYPE flush_type,
745f87f6864SMukul Joshi 				      uint32_t inst)
746ffa02269SAlex Sierra {
747ffa02269SAlex Sierra 	bool all_hub = false;
748ffa02269SAlex Sierra 
749508f748bSRuili Ji 	if (adev->family == AMDGPU_FAMILY_AI ||
750508f748bSRuili Ji 	    adev->family == AMDGPU_FAMILY_RV)
751ffa02269SAlex Sierra 		all_hub = true;
752ffa02269SAlex Sierra 
753f87f6864SMukul Joshi 	return amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, flush_type, all_hub, inst);
754ffa02269SAlex Sierra }
755ffa02269SAlex Sierra 
7566bfc7c7eSGraham Sider bool amdgpu_amdkfd_have_atomics_support(struct amdgpu_device *adev)
757aabf3a95SJack Xiao {
758aabf3a95SJack Xiao 	return adev->have_atomics_support;
759aabf3a95SJack Xiao }
760c7490949STao Zhou 
761a70a93faSJonathan Kim void amdgpu_amdkfd_debug_mem_fence(struct amdgpu_device *adev)
762a70a93faSJonathan Kim {
763a70a93faSJonathan Kim 	amdgpu_device_flush_hdp(adev, NULL);
764a70a93faSJonathan Kim }
765a70a93faSJonathan Kim 
766b6485bedSTao Zhou void amdgpu_amdkfd_ras_poison_consumption_handler(struct amdgpu_device *adev, bool reset)
767c7490949STao Zhou {
7681ed0e176STao Zhou 	amdgpu_umc_poison_handler(adev, reset);
769c7490949STao Zhou }
7706475ae2bSTao Zhou 
77112fb1ad7SJonathan Kim int amdgpu_amdkfd_send_close_event_drain_irq(struct amdgpu_device *adev,
77212fb1ad7SJonathan Kim 					uint32_t *payload)
77312fb1ad7SJonathan Kim {
77412fb1ad7SJonathan Kim 	int ret;
77512fb1ad7SJonathan Kim 
77612fb1ad7SJonathan Kim 	/* Device or IH ring is not ready so bail. */
77712fb1ad7SJonathan Kim 	ret = amdgpu_ih_wait_on_checkpoint_process_ts(adev, &adev->irq.ih);
77812fb1ad7SJonathan Kim 	if (ret)
77912fb1ad7SJonathan Kim 		return ret;
78012fb1ad7SJonathan Kim 
78112fb1ad7SJonathan Kim 	/* Send payload to fence KFD interrupts */
78212fb1ad7SJonathan Kim 	amdgpu_amdkfd_interrupt(adev, payload);
78312fb1ad7SJonathan Kim 
78412fb1ad7SJonathan Kim 	return 0;
78512fb1ad7SJonathan Kim }
78612fb1ad7SJonathan Kim 
7876475ae2bSTao Zhou bool amdgpu_amdkfd_ras_query_utcl2_poison_status(struct amdgpu_device *adev)
7886475ae2bSTao Zhou {
7893cd3e731SFelix Kuehling 	if (adev->gfx.ras && adev->gfx.ras->query_utcl2_poison_status)
7906475ae2bSTao Zhou 		return adev->gfx.ras->query_utcl2_poison_status(adev);
7916475ae2bSTao Zhou 	else
7926475ae2bSTao Zhou 		return false;
7936475ae2bSTao Zhou }
7940c7315e7SMukul Joshi 
7950c7315e7SMukul Joshi int amdgpu_amdkfd_check_and_lock_kfd(struct amdgpu_device *adev)
7960c7315e7SMukul Joshi {
7970c7315e7SMukul Joshi 	return kgd2kfd_check_and_lock_kfd();
7980c7315e7SMukul Joshi }
7990c7315e7SMukul Joshi 
8000c7315e7SMukul Joshi void amdgpu_amdkfd_unlock_kfd(struct amdgpu_device *adev)
8010c7315e7SMukul Joshi {
8020c7315e7SMukul Joshi 	kgd2kfd_unlock_kfd();
8030c7315e7SMukul Joshi }
80445b3a914SAlex Deucher 
80545b3a914SAlex Deucher 
80645b3a914SAlex Deucher u64 amdgpu_amdkfd_xcp_memory_size(struct amdgpu_device *adev, int xcp_id)
80745b3a914SAlex Deucher {
80845b3a914SAlex Deucher 	u64 tmp;
80945b3a914SAlex Deucher 	s8 mem_id = KFD_XCP_MEM_ID(adev, xcp_id);
81045b3a914SAlex Deucher 
81145b3a914SAlex Deucher 	if (adev->gmc.num_mem_partitions && xcp_id >= 0 && mem_id >= 0) {
81245b3a914SAlex Deucher 		tmp = adev->gmc.mem_partitions[mem_id].size;
81345b3a914SAlex Deucher 		do_div(tmp, adev->xcp_mgr->num_xcp_per_mem_partition);
814acf429dcSPhilip Yang 		return ALIGN_DOWN(tmp, PAGE_SIZE);
81545b3a914SAlex Deucher 	} else {
81645b3a914SAlex Deucher 		return adev->gmc.real_vram_size;
81745b3a914SAlex Deucher 	}
81845b3a914SAlex Deucher }
8199041b53aSMukul Joshi 
8209041b53aSMukul Joshi int amdgpu_amdkfd_unmap_hiq(struct amdgpu_device *adev, u32 doorbell_off,
8219041b53aSMukul Joshi 			    u32 inst)
8229041b53aSMukul Joshi {
8239041b53aSMukul Joshi 	struct amdgpu_kiq *kiq = &adev->gfx.kiq[inst];
8249041b53aSMukul Joshi 	struct amdgpu_ring *kiq_ring = &kiq->ring;
825bd3c4142SSrinivasan Shanmugam 	struct amdgpu_ring_funcs *ring_funcs;
826bd3c4142SSrinivasan Shanmugam 	struct amdgpu_ring *ring;
8279041b53aSMukul Joshi 	int r = 0;
8289041b53aSMukul Joshi 
8299041b53aSMukul Joshi 	if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
8309041b53aSMukul Joshi 		return -EINVAL;
8319041b53aSMukul Joshi 
832bd3c4142SSrinivasan Shanmugam 	ring_funcs = kzalloc(sizeof(*ring_funcs), GFP_KERNEL);
833bd3c4142SSrinivasan Shanmugam 	if (!ring_funcs)
834bd3c4142SSrinivasan Shanmugam 		return -ENOMEM;
8359041b53aSMukul Joshi 
836bd3c4142SSrinivasan Shanmugam 	ring = kzalloc(sizeof(*ring), GFP_KERNEL);
837bd3c4142SSrinivasan Shanmugam 	if (!ring) {
838bd3c4142SSrinivasan Shanmugam 		r = -ENOMEM;
839bd3c4142SSrinivasan Shanmugam 		goto free_ring_funcs;
840bd3c4142SSrinivasan Shanmugam 	}
841bd3c4142SSrinivasan Shanmugam 
842bd3c4142SSrinivasan Shanmugam 	ring_funcs->type = AMDGPU_RING_TYPE_COMPUTE;
843bd3c4142SSrinivasan Shanmugam 	ring->doorbell_index = doorbell_off;
844bd3c4142SSrinivasan Shanmugam 	ring->funcs = ring_funcs;
8459041b53aSMukul Joshi 
8469041b53aSMukul Joshi 	spin_lock(&kiq->ring_lock);
8479041b53aSMukul Joshi 
8489041b53aSMukul Joshi 	if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) {
8499041b53aSMukul Joshi 		spin_unlock(&kiq->ring_lock);
850bd3c4142SSrinivasan Shanmugam 		r = -ENOMEM;
851bd3c4142SSrinivasan Shanmugam 		goto free_ring;
8529041b53aSMukul Joshi 	}
8539041b53aSMukul Joshi 
854bd3c4142SSrinivasan Shanmugam 	kiq->pmf->kiq_unmap_queues(kiq_ring, ring, RESET_QUEUES, 0, 0);
8559041b53aSMukul Joshi 
8569041b53aSMukul Joshi 	if (kiq_ring->sched.ready && !adev->job_hang)
8579041b53aSMukul Joshi 		r = amdgpu_ring_test_helper(kiq_ring);
8589041b53aSMukul Joshi 
8599041b53aSMukul Joshi 	spin_unlock(&kiq->ring_lock);
8609041b53aSMukul Joshi 
861bd3c4142SSrinivasan Shanmugam free_ring:
862bd3c4142SSrinivasan Shanmugam 	kfree(ring);
863bd3c4142SSrinivasan Shanmugam 
864bd3c4142SSrinivasan Shanmugam free_ring_funcs:
865bd3c4142SSrinivasan Shanmugam 	kfree(ring_funcs);
866bd3c4142SSrinivasan Shanmugam 
8679041b53aSMukul Joshi 	return r;
8689041b53aSMukul Joshi }
869