xref: /linux/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c (revision 8c6a0234739e33c8be8830c2eee13a49acfd59ea)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright 2014 Advanced Micro Devices, Inc.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice shall be included in
13  * all copies or substantial portions of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
19  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
21  * OTHER DEALINGS IN THE SOFTWARE.
22  */
23 
24 #include "amdgpu_amdkfd.h"
25 #include "amd_pcie.h"
26 #include "amd_shared.h"
27 
28 #include "amdgpu.h"
29 #include "amdgpu_gfx.h"
30 #include "amdgpu_dma_buf.h"
31 #include <drm/ttm/ttm_tt.h>
32 #include <linux/module.h>
33 #include <linux/dma-buf.h>
34 #include "amdgpu_xgmi.h"
35 #include <uapi/linux/kfd_ioctl.h>
36 #include "amdgpu_ras.h"
37 #include "amdgpu_umc.h"
38 #include "amdgpu_reset.h"
39 #include "amdgpu_ras_mgr.h"
40 
41 /* Total memory size in system memory and all GPU VRAM. Used to
42  * estimate worst case amount of memory to reserve for page tables
43  */
44 uint64_t amdgpu_amdkfd_total_mem_size;
45 
46 static bool kfd_initialized;
47 
48 int amdgpu_amdkfd_init(void)
49 {
50 	struct sysinfo si;
51 	int ret;
52 
53 	si_meminfo(&si);
54 	amdgpu_amdkfd_total_mem_size = si.freeram - si.freehigh;
55 	amdgpu_amdkfd_total_mem_size *= si.mem_unit;
56 
57 	ret = kgd2kfd_init();
58 	kfd_initialized = !ret;
59 
60 	return ret;
61 }
62 
63 void amdgpu_amdkfd_fini(void)
64 {
65 	if (kfd_initialized) {
66 		kgd2kfd_exit();
67 		kfd_initialized = false;
68 	}
69 }
70 
71 void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev)
72 {
73 	bool vf = amdgpu_sriov_vf(adev);
74 
75 	if (!kfd_initialized)
76 		return;
77 
78 	adev->kfd.dev = kgd2kfd_probe(adev, vf);
79 }
80 
81 /**
82  * amdgpu_doorbell_get_kfd_info - Report doorbell configuration required to
83  *                                setup amdkfd
84  *
85  * @adev: amdgpu_device pointer
86  * @aperture_base: output returning doorbell aperture base physical address
87  * @aperture_size: output returning doorbell aperture size in bytes
88  * @start_offset: output returning # of doorbell bytes reserved for amdgpu.
89  *
90  * amdgpu and amdkfd share the doorbell aperture. amdgpu sets it up,
91  * takes doorbells required for its own rings and reports the setup to amdkfd.
92  * amdgpu reserved doorbells are at the start of the doorbell aperture.
93  */
94 static void amdgpu_doorbell_get_kfd_info(struct amdgpu_device *adev,
95 					 phys_addr_t *aperture_base,
96 					 size_t *aperture_size,
97 					 size_t *start_offset)
98 {
99 	/*
100 	 * The first num_kernel_doorbells are used by amdgpu.
101 	 * amdkfd takes whatever's left in the aperture.
102 	 */
103 	if (adev->enable_mes) {
104 		/*
105 		 * With MES enabled, we only need to initialize
106 		 * the base address. The size and offset are
107 		 * not initialized as AMDGPU manages the whole
108 		 * doorbell space.
109 		 */
110 		*aperture_base = adev->doorbell.base;
111 		*aperture_size = 0;
112 		*start_offset = 0;
113 	} else if (adev->doorbell.size > adev->doorbell.num_kernel_doorbells *
114 						sizeof(u32)) {
115 		*aperture_base = adev->doorbell.base;
116 		*aperture_size = adev->doorbell.size;
117 		*start_offset = adev->doorbell.num_kernel_doorbells * sizeof(u32);
118 	} else {
119 		*aperture_base = 0;
120 		*aperture_size = 0;
121 		*start_offset = 0;
122 	}
123 }
124 
125 
126 static void amdgpu_amdkfd_reset_work(struct work_struct *work)
127 {
128 	struct amdgpu_device *adev = container_of(work, struct amdgpu_device,
129 						  kfd.reset_work);
130 
131 	struct amdgpu_reset_context reset_context;
132 
133 	memset(&reset_context, 0, sizeof(reset_context));
134 
135 	reset_context.method = AMD_RESET_METHOD_NONE;
136 	reset_context.reset_req_dev = adev;
137 	reset_context.src = adev->enable_mes ?
138 			    AMDGPU_RESET_SRC_MES :
139 			    AMDGPU_RESET_SRC_HWS;
140 	clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
141 
142 	amdgpu_device_gpu_recover(adev, NULL, &reset_context);
143 }
144 
145 static const struct drm_client_funcs kfd_client_funcs = {
146 	.unregister	= drm_client_release,
147 };
148 
149 int amdgpu_amdkfd_drm_client_create(struct amdgpu_device *adev)
150 {
151 	int ret;
152 
153 	if (!adev->kfd.init_complete || adev->kfd.client.dev)
154 		return 0;
155 
156 	ret = drm_client_init(&adev->ddev, &adev->kfd.client, "kfd",
157 			      &kfd_client_funcs);
158 	if (ret) {
159 		dev_err(adev->dev, "Failed to init DRM client: %d\n",
160 			ret);
161 		return ret;
162 	}
163 
164 	drm_client_register(&adev->kfd.client);
165 
166 	return 0;
167 }
168 
169 void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
170 {
171 	int i;
172 	int last_valid_bit;
173 
174 	amdgpu_amdkfd_gpuvm_init_mem_limits();
175 
176 	if (adev->kfd.dev) {
177 		struct kgd2kfd_shared_resources gpu_resources = {
178 			.compute_vmid_bitmap =
179 				((1 << AMDGPU_NUM_VMID) - 1) -
180 				((1 << adev->vm_manager.first_kfd_vmid) - 1),
181 			.num_pipe_per_mec = adev->gfx.mec.num_pipe_per_mec,
182 			.num_queue_per_pipe = adev->gfx.mec.num_queue_per_pipe,
183 			.gpuvm_size = min(adev->vm_manager.max_pfn
184 					  << AMDGPU_GPU_PAGE_SHIFT,
185 					  AMDGPU_GMC_HOLE_START),
186 			.drm_render_minor = adev_to_drm(adev)->render->index,
187 			.sdma_doorbell_idx = adev->doorbell_index.sdma_engine,
188 			.enable_mes = adev->enable_mes,
189 		};
190 
191 		/* this is going to have a few of the MSBs set that we need to
192 		 * clear
193 		 */
194 		bitmap_complement(gpu_resources.cp_queue_bitmap,
195 				  adev->gfx.mec_bitmap[0].queue_bitmap,
196 				  AMDGPU_MAX_QUEUES);
197 
198 		/* According to linux/bitmap.h we shouldn't use bitmap_clear if
199 		 * nbits is not compile time constant
200 		 */
201 		last_valid_bit = 1 /* only first MEC can have compute queues */
202 				* adev->gfx.mec.num_pipe_per_mec
203 				* adev->gfx.mec.num_queue_per_pipe;
204 		for (i = last_valid_bit; i < AMDGPU_MAX_QUEUES; ++i)
205 			clear_bit(i, gpu_resources.cp_queue_bitmap);
206 
207 		amdgpu_doorbell_get_kfd_info(adev,
208 				&gpu_resources.doorbell_physical_address,
209 				&gpu_resources.doorbell_aperture_size,
210 				&gpu_resources.doorbell_start_offset);
211 
212 		/* Since SOC15, BIF starts to statically use the
213 		 * lower 12 bits of doorbell addresses for routing
214 		 * based on settings in registers like
215 		 * SDMA0_DOORBELL_RANGE etc..
216 		 * In order to route a doorbell to CP engine, the lower
217 		 * 12 bits of its address has to be outside the range
218 		 * set for SDMA, VCN, and IH blocks.
219 		 */
220 		if (adev->asic_type >= CHIP_VEGA10) {
221 			gpu_resources.non_cp_doorbells_start =
222 					adev->doorbell_index.first_non_cp;
223 			gpu_resources.non_cp_doorbells_end =
224 					adev->doorbell_index.last_non_cp;
225 		}
226 
227 		adev->kfd.init_complete = kgd2kfd_device_init(adev->kfd.dev,
228 							&gpu_resources);
229 
230 		amdgpu_amdkfd_total_mem_size += adev->gmc.real_vram_size;
231 
232 		INIT_WORK(&adev->kfd.reset_work, amdgpu_amdkfd_reset_work);
233 	}
234 }
235 
236 void amdgpu_amdkfd_device_fini_sw(struct amdgpu_device *adev)
237 {
238 	if (adev->kfd.dev) {
239 		kgd2kfd_device_exit(adev->kfd.dev);
240 		adev->kfd.dev = NULL;
241 		amdgpu_amdkfd_total_mem_size -= adev->gmc.real_vram_size;
242 	}
243 }
244 
245 void amdgpu_amdkfd_interrupt(struct amdgpu_device *adev,
246 		const void *ih_ring_entry)
247 {
248 	if (adev->kfd.dev)
249 		kgd2kfd_interrupt(adev->kfd.dev, ih_ring_entry);
250 }
251 
252 void amdgpu_amdkfd_suspend(struct amdgpu_device *adev, bool suspend_proc)
253 {
254 	if (adev->kfd.dev) {
255 		if (adev->in_s0ix)
256 			kgd2kfd_stop_sched_all_nodes(adev->kfd.dev);
257 		else
258 			kgd2kfd_suspend(adev->kfd.dev, suspend_proc);
259 	}
260 }
261 
262 int amdgpu_amdkfd_resume(struct amdgpu_device *adev, bool resume_proc)
263 {
264 	int r = 0;
265 
266 	if (adev->kfd.dev) {
267 		if (adev->in_s0ix)
268 			r = kgd2kfd_start_sched_all_nodes(adev->kfd.dev);
269 		else
270 			r = kgd2kfd_resume(adev->kfd.dev, resume_proc);
271 	}
272 
273 	return r;
274 }
275 
276 void amdgpu_amdkfd_suspend_process(struct amdgpu_device *adev)
277 {
278 	if (adev->kfd.dev)
279 		kgd2kfd_suspend_process(adev->kfd.dev);
280 }
281 
282 int amdgpu_amdkfd_resume_process(struct amdgpu_device *adev)
283 {
284 	int r = 0;
285 
286 	if (adev->kfd.dev)
287 		r = kgd2kfd_resume_process(adev->kfd.dev);
288 
289 	return r;
290 }
291 
292 int amdgpu_amdkfd_pre_reset(struct amdgpu_device *adev,
293 			    struct amdgpu_reset_context *reset_context)
294 {
295 	int r = 0;
296 
297 	if (adev->kfd.dev)
298 		r = kgd2kfd_pre_reset(adev->kfd.dev, reset_context);
299 
300 	return r;
301 }
302 
303 int amdgpu_amdkfd_post_reset(struct amdgpu_device *adev)
304 {
305 	int r = 0;
306 
307 	if (adev->kfd.dev)
308 		r = kgd2kfd_post_reset(adev->kfd.dev);
309 
310 	return r;
311 }
312 
313 void amdgpu_amdkfd_gpu_reset(struct amdgpu_device *adev)
314 {
315 	if (amdgpu_device_should_recover_gpu(adev))
316 		amdgpu_reset_domain_schedule(adev->reset_domain,
317 					     &adev->kfd.reset_work);
318 }
319 
320 int amdgpu_amdkfd_alloc_gtt_mem(struct amdgpu_device *adev, size_t size,
321 				void **mem_obj, uint64_t *gpu_addr,
322 				void **cpu_ptr, bool cp_mqd_gfx9)
323 {
324 	struct amdgpu_bo *bo = NULL;
325 	struct amdgpu_bo_param bp;
326 	int r;
327 	void *cpu_ptr_tmp = NULL;
328 
329 	memset(&bp, 0, sizeof(bp));
330 	bp.size = size;
331 	bp.byte_align = PAGE_SIZE;
332 	bp.domain = AMDGPU_GEM_DOMAIN_GTT;
333 	bp.flags = AMDGPU_GEM_CREATE_CPU_GTT_USWC;
334 	bp.type = ttm_bo_type_kernel;
335 	bp.resv = NULL;
336 	bp.bo_ptr_size = sizeof(struct amdgpu_bo);
337 
338 	if (cp_mqd_gfx9)
339 		bp.flags |= AMDGPU_GEM_CREATE_CP_MQD_GFX9;
340 
341 	r = amdgpu_bo_create(adev, &bp, &bo);
342 	if (r) {
343 		dev_err(adev->dev,
344 			"failed to allocate BO for amdkfd (%d)\n", r);
345 		return r;
346 	}
347 
348 	/* map the buffer */
349 	r = amdgpu_bo_reserve(bo, true);
350 	if (r) {
351 		dev_err(adev->dev, "(%d) failed to reserve bo for amdkfd\n", r);
352 		goto allocate_mem_reserve_bo_failed;
353 	}
354 
355 	r = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT);
356 	if (r) {
357 		dev_err(adev->dev, "(%d) failed to pin bo for amdkfd\n", r);
358 		goto allocate_mem_pin_bo_failed;
359 	}
360 
361 	r = amdgpu_ttm_alloc_gart(&bo->tbo);
362 	if (r) {
363 		dev_err(adev->dev, "%p bind failed\n", bo);
364 		goto allocate_mem_kmap_bo_failed;
365 	}
366 
367 	r = amdgpu_bo_kmap(bo, &cpu_ptr_tmp);
368 	if (r) {
369 		dev_err(adev->dev,
370 			"(%d) failed to map bo to kernel for amdkfd\n", r);
371 		goto allocate_mem_kmap_bo_failed;
372 	}
373 
374 	*mem_obj = bo;
375 	*gpu_addr = amdgpu_bo_gpu_offset(bo);
376 	*cpu_ptr = cpu_ptr_tmp;
377 
378 	amdgpu_bo_unreserve(bo);
379 
380 	return 0;
381 
382 allocate_mem_kmap_bo_failed:
383 	amdgpu_bo_unpin(bo);
384 allocate_mem_pin_bo_failed:
385 	amdgpu_bo_unreserve(bo);
386 allocate_mem_reserve_bo_failed:
387 	amdgpu_bo_unref(&bo);
388 
389 	return r;
390 }
391 
392 void amdgpu_amdkfd_free_gtt_mem(struct amdgpu_device *adev, void **mem_obj)
393 {
394 	struct amdgpu_bo **bo = (struct amdgpu_bo **) mem_obj;
395 
396 	if (!bo || !*bo)
397 		return;
398 
399 	(void)amdgpu_bo_reserve(*bo, true);
400 	amdgpu_bo_kunmap(*bo);
401 	amdgpu_bo_unpin(*bo);
402 	amdgpu_bo_unreserve(*bo);
403 	amdgpu_bo_unref(bo);
404 }
405 
406 int amdgpu_amdkfd_alloc_gws(struct amdgpu_device *adev, size_t size,
407 				void **mem_obj)
408 {
409 	struct amdgpu_bo *bo = NULL;
410 	struct amdgpu_bo_user *ubo;
411 	struct amdgpu_bo_param bp;
412 	int r;
413 
414 	memset(&bp, 0, sizeof(bp));
415 	bp.size = size;
416 	bp.byte_align = 1;
417 	bp.domain = AMDGPU_GEM_DOMAIN_GWS;
418 	bp.flags = AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
419 	bp.type = ttm_bo_type_device;
420 	bp.resv = NULL;
421 	bp.bo_ptr_size = sizeof(struct amdgpu_bo);
422 
423 	r = amdgpu_bo_create_user(adev, &bp, &ubo);
424 	if (r) {
425 		dev_err(adev->dev,
426 			"failed to allocate gws BO for amdkfd (%d)\n", r);
427 		return r;
428 	}
429 
430 	bo = &ubo->bo;
431 	*mem_obj = bo;
432 	return 0;
433 }
434 
435 void amdgpu_amdkfd_free_gws(struct amdgpu_device *adev, void *mem_obj)
436 {
437 	struct amdgpu_bo *bo = (struct amdgpu_bo *)mem_obj;
438 
439 	amdgpu_bo_unref(&bo);
440 }
441 
442 uint32_t amdgpu_amdkfd_get_fw_version(struct amdgpu_device *adev,
443 				      enum kgd_engine_type type)
444 {
445 	switch (type) {
446 	case KGD_ENGINE_PFP:
447 		return adev->gfx.pfp_fw_version;
448 
449 	case KGD_ENGINE_ME:
450 		return adev->gfx.me_fw_version;
451 
452 	case KGD_ENGINE_CE:
453 		return adev->gfx.ce_fw_version;
454 
455 	case KGD_ENGINE_MEC1:
456 		return adev->gfx.mec_fw_version;
457 
458 	case KGD_ENGINE_MEC2:
459 		return adev->gfx.mec2_fw_version;
460 
461 	case KGD_ENGINE_RLC:
462 		return adev->gfx.rlc_fw_version;
463 
464 	case KGD_ENGINE_SDMA1:
465 		return adev->sdma.instance[0].fw_version;
466 
467 	case KGD_ENGINE_SDMA2:
468 		return adev->sdma.instance[1].fw_version;
469 
470 	default:
471 		return 0;
472 	}
473 
474 	return 0;
475 }
476 
477 void amdgpu_amdkfd_get_local_mem_info(struct amdgpu_device *adev,
478 				      struct kfd_local_mem_info *mem_info,
479 				      struct amdgpu_xcp *xcp)
480 {
481 	memset(mem_info, 0, sizeof(*mem_info));
482 
483 	if (xcp) {
484 		if (adev->gmc.real_vram_size == adev->gmc.visible_vram_size)
485 			mem_info->local_mem_size_public =
486 					KFD_XCP_MEMORY_SIZE(adev, xcp->id);
487 		else
488 			mem_info->local_mem_size_private =
489 					KFD_XCP_MEMORY_SIZE(adev, xcp->id);
490 	} else if (adev->apu_prefer_gtt) {
491 		mem_info->local_mem_size_public = (ttm_tt_pages_limit() << PAGE_SHIFT);
492 		mem_info->local_mem_size_private = 0;
493 	} else {
494 		mem_info->local_mem_size_public = adev->gmc.visible_vram_size;
495 		mem_info->local_mem_size_private = adev->gmc.real_vram_size -
496 						adev->gmc.visible_vram_size;
497 	}
498 	mem_info->vram_width = adev->gmc.vram_width;
499 
500 	pr_debug("Address base: %pap public 0x%llx private 0x%llx\n",
501 			&adev->gmc.aper_base,
502 			mem_info->local_mem_size_public,
503 			mem_info->local_mem_size_private);
504 
505 	if (adev->pm.dpm_enabled) {
506 		if (amdgpu_emu_mode == 1)
507 			mem_info->mem_clk_max = 0;
508 		else
509 			mem_info->mem_clk_max = amdgpu_dpm_get_mclk(adev, false) / 100;
510 	} else
511 		mem_info->mem_clk_max = 100;
512 }
513 
514 uint64_t amdgpu_amdkfd_get_gpu_clock_counter(struct amdgpu_device *adev)
515 {
516 	if (adev->gfx.funcs->get_gpu_clock_counter)
517 		return adev->gfx.funcs->get_gpu_clock_counter(adev);
518 	return 0;
519 }
520 
521 uint32_t amdgpu_amdkfd_get_max_engine_clock_in_mhz(struct amdgpu_device *adev)
522 {
523 	/* the sclk is in quantas of 10kHz */
524 	if (adev->pm.dpm_enabled)
525 		return amdgpu_dpm_get_sclk(adev, false) / 100;
526 	else
527 		return 100;
528 }
529 
530 int amdgpu_amdkfd_get_dmabuf_info(struct amdgpu_device *adev, int dma_buf_fd,
531 				  struct amdgpu_device **dmabuf_adev,
532 				  uint64_t *bo_size, void *metadata_buffer,
533 				  size_t buffer_size, uint32_t *metadata_size,
534 				  uint32_t *flags, int8_t *xcp_id)
535 {
536 	struct dma_buf *dma_buf;
537 	struct drm_gem_object *obj;
538 	struct amdgpu_bo *bo;
539 	uint64_t metadata_flags;
540 	int r = -EINVAL;
541 
542 	dma_buf = dma_buf_get(dma_buf_fd);
543 	if (IS_ERR(dma_buf))
544 		return PTR_ERR(dma_buf);
545 
546 	if (dma_buf->ops != &amdgpu_dmabuf_ops)
547 		/* Can't handle non-graphics buffers */
548 		goto out_put;
549 
550 	obj = dma_buf->priv;
551 	if (obj->dev->driver != adev_to_drm(adev)->driver)
552 		/* Can't handle buffers from different drivers */
553 		goto out_put;
554 
555 	adev = drm_to_adev(obj->dev);
556 	bo = gem_to_amdgpu_bo(obj);
557 	if (!(bo->preferred_domains & (AMDGPU_GEM_DOMAIN_VRAM |
558 				    AMDGPU_GEM_DOMAIN_GTT)))
559 		/* Only VRAM and GTT BOs are supported */
560 		goto out_put;
561 
562 	r = 0;
563 	if (dmabuf_adev)
564 		*dmabuf_adev = adev;
565 	if (bo_size)
566 		*bo_size = amdgpu_bo_size(bo);
567 	if (metadata_buffer)
568 		r = amdgpu_bo_get_metadata(bo, metadata_buffer, buffer_size,
569 					   metadata_size, &metadata_flags);
570 	if (flags) {
571 		*flags = (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ?
572 				KFD_IOC_ALLOC_MEM_FLAGS_VRAM
573 				: KFD_IOC_ALLOC_MEM_FLAGS_GTT;
574 
575 		if (bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)
576 			*flags |= KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC;
577 	}
578 	if (xcp_id)
579 		*xcp_id = bo->xcp_id;
580 
581 out_put:
582 	dma_buf_put(dma_buf);
583 	return r;
584 }
585 
586 int amdgpu_amdkfd_get_pcie_bandwidth_mbytes(struct amdgpu_device *adev, bool is_min)
587 {
588 	int num_lanes_shift = (is_min ? ffs(adev->pm.pcie_mlw_mask) :
589 							fls(adev->pm.pcie_mlw_mask)) - 1;
590 	int gen_speed_shift = (is_min ? ffs(adev->pm.pcie_gen_mask &
591 						CAIL_PCIE_LINK_SPEED_SUPPORT_MASK) :
592 					fls(adev->pm.pcie_gen_mask &
593 						CAIL_PCIE_LINK_SPEED_SUPPORT_MASK)) - 1;
594 	uint32_t num_lanes_mask = 1 << num_lanes_shift;
595 	uint32_t gen_speed_mask = 1 << gen_speed_shift;
596 	int num_lanes_factor = 0, gen_speed_mbits_factor = 0;
597 
598 	switch (num_lanes_mask) {
599 	case CAIL_PCIE_LINK_WIDTH_SUPPORT_X1:
600 		num_lanes_factor = 1;
601 		break;
602 	case CAIL_PCIE_LINK_WIDTH_SUPPORT_X2:
603 		num_lanes_factor = 2;
604 		break;
605 	case CAIL_PCIE_LINK_WIDTH_SUPPORT_X4:
606 		num_lanes_factor = 4;
607 		break;
608 	case CAIL_PCIE_LINK_WIDTH_SUPPORT_X8:
609 		num_lanes_factor = 8;
610 		break;
611 	case CAIL_PCIE_LINK_WIDTH_SUPPORT_X12:
612 		num_lanes_factor = 12;
613 		break;
614 	case CAIL_PCIE_LINK_WIDTH_SUPPORT_X16:
615 		num_lanes_factor = 16;
616 		break;
617 	case CAIL_PCIE_LINK_WIDTH_SUPPORT_X32:
618 		num_lanes_factor = 32;
619 		break;
620 	}
621 
622 	switch (gen_speed_mask) {
623 	case CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1:
624 		gen_speed_mbits_factor = 2500;
625 		break;
626 	case CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2:
627 		gen_speed_mbits_factor = 5000;
628 		break;
629 	case CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3:
630 		gen_speed_mbits_factor = 8000;
631 		break;
632 	case CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4:
633 		gen_speed_mbits_factor = 16000;
634 		break;
635 	case CAIL_PCIE_LINK_SPEED_SUPPORT_GEN5:
636 		gen_speed_mbits_factor = 32000;
637 		break;
638 	}
639 
640 	return (num_lanes_factor * gen_speed_mbits_factor)/BITS_PER_BYTE;
641 }
642 
643 int amdgpu_amdkfd_submit_ib(struct amdgpu_device *adev,
644 				enum kgd_engine_type engine,
645 				uint32_t vmid, uint64_t gpu_addr,
646 				uint32_t *ib_cmd, uint32_t ib_len)
647 {
648 	struct amdgpu_job *job;
649 	struct amdgpu_ib *ib;
650 	struct amdgpu_ring *ring;
651 	struct dma_fence *f = NULL;
652 	int ret;
653 
654 	switch (engine) {
655 	case KGD_ENGINE_MEC1:
656 		ring = &adev->gfx.compute_ring[0];
657 		break;
658 	case KGD_ENGINE_SDMA1:
659 		ring = &adev->sdma.instance[0].ring;
660 		break;
661 	case KGD_ENGINE_SDMA2:
662 		ring = &adev->sdma.instance[1].ring;
663 		break;
664 	default:
665 		pr_err("Invalid engine in IB submission: %d\n", engine);
666 		ret = -EINVAL;
667 		goto err;
668 	}
669 
670 	ret = amdgpu_job_alloc(adev, NULL, NULL, NULL, 1, &job, 0);
671 	if (ret)
672 		goto err;
673 
674 	ib = &job->ibs[0];
675 	memset(ib, 0, sizeof(struct amdgpu_ib));
676 
677 	ib->gpu_addr = gpu_addr;
678 	ib->ptr = ib_cmd;
679 	ib->length_dw = ib_len;
680 	/* This works for NO_HWS. TODO: need to handle without knowing VMID */
681 	job->vmid = vmid;
682 	job->num_ibs = 1;
683 
684 	ret = amdgpu_ib_schedule(ring, 1, ib, job, &f);
685 
686 	if (ret) {
687 		DRM_ERROR("amdgpu: failed to schedule IB.\n");
688 		goto err_ib_sched;
689 	}
690 
691 	/* Drop the initial kref_init count (see drm_sched_main as example) */
692 	dma_fence_put(f);
693 	ret = dma_fence_wait(f, false);
694 
695 err_ib_sched:
696 	amdgpu_job_free(job);
697 err:
698 	return ret;
699 }
700 
701 void amdgpu_amdkfd_set_compute_idle(struct amdgpu_device *adev, bool idle)
702 {
703 	enum amd_powergating_state state = idle ? AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE;
704 	if ((IP_VERSION_MAJ(amdgpu_ip_version(adev, GC_HWIP, 0)) == 11 &&
705 	    ((adev->mes.kiq_version & AMDGPU_MES_VERSION_MASK) <= 64)) ||
706 		(IP_VERSION_MAJ(amdgpu_ip_version(adev, GC_HWIP, 0)) == 12)) {
707 		pr_debug("GFXOFF is %s\n", idle ? "enabled" : "disabled");
708 		amdgpu_gfx_off_ctrl(adev, idle);
709 	} else if ((IP_VERSION_MAJ(amdgpu_ip_version(adev, GC_HWIP, 0)) == 9) &&
710 		(adev->flags & AMD_IS_APU)) {
711 		/* Disable GFXOFF and PG. Temporary workaround
712 		 * to fix some compute applications issue on GFX9.
713 		 */
714 		struct amdgpu_ip_block *gfx_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GFX);
715 		if (gfx_block != NULL)
716 			gfx_block->version->funcs->set_powergating_state((void *)gfx_block, state);
717 	}
718 	amdgpu_dpm_switch_power_profile(adev,
719 					PP_SMC_POWER_PROFILE_COMPUTE,
720 					!idle);
721 }
722 
723 bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid)
724 {
725 	if (adev->kfd.dev)
726 		return vmid >= adev->vm_manager.first_kfd_vmid;
727 
728 	return false;
729 }
730 
731 bool amdgpu_amdkfd_have_atomics_support(struct amdgpu_device *adev)
732 {
733 	return adev->have_atomics_support;
734 }
735 
736 void amdgpu_amdkfd_debug_mem_fence(struct amdgpu_device *adev)
737 {
738 	amdgpu_device_flush_hdp(adev, NULL);
739 }
740 
741 bool amdgpu_amdkfd_is_fed(struct amdgpu_device *adev)
742 {
743 	return amdgpu_ras_get_fed_status(adev);
744 }
745 
746 void amdgpu_amdkfd_ras_pasid_poison_consumption_handler(struct amdgpu_device *adev,
747 				enum amdgpu_ras_block block, uint16_t pasid,
748 				pasid_notify pasid_fn, void *data, uint32_t reset)
749 {
750 
751 	if (amdgpu_uniras_enabled(adev)) {
752 		struct ras_ih_info ih_info;
753 
754 		memset(&ih_info, 0, sizeof(ih_info));
755 		ih_info.block = block;
756 		ih_info.pasid = pasid;
757 		ih_info.reset = reset;
758 		ih_info.pasid_fn = pasid_fn;
759 		ih_info.data = data;
760 		amdgpu_ras_mgr_handle_consumer_interrupt(adev, &ih_info);
761 		return;
762 	}
763 
764 	amdgpu_umc_pasid_poison_handler(adev, block, pasid, pasid_fn, data, reset);
765 }
766 
767 void amdgpu_amdkfd_ras_poison_consumption_handler(struct amdgpu_device *adev,
768 	enum amdgpu_ras_block block, uint32_t reset)
769 {
770 	amdgpu_umc_pasid_poison_handler(adev, block, 0, NULL, NULL, reset);
771 }
772 
773 int amdgpu_amdkfd_send_close_event_drain_irq(struct amdgpu_device *adev,
774 					uint32_t *payload)
775 {
776 	int ret;
777 
778 	/* Device or IH ring is not ready so bail. */
779 	ret = amdgpu_ih_wait_on_checkpoint_process_ts(adev, &adev->irq.ih);
780 	if (ret)
781 		return ret;
782 
783 	/* Send payload to fence KFD interrupts */
784 	amdgpu_amdkfd_interrupt(adev, payload);
785 
786 	return 0;
787 }
788 
789 int amdgpu_amdkfd_check_and_lock_kfd(struct amdgpu_device *adev)
790 {
791 	return kgd2kfd_check_and_lock_kfd(adev->kfd.dev);
792 }
793 
794 void amdgpu_amdkfd_unlock_kfd(struct amdgpu_device *adev)
795 {
796 	kgd2kfd_unlock_kfd(adev->kfd.dev);
797 }
798 
799 
800 u64 amdgpu_amdkfd_xcp_memory_size(struct amdgpu_device *adev, int xcp_id)
801 {
802 	s8 mem_id = KFD_XCP_MEM_ID(adev, xcp_id);
803 	u64 tmp;
804 
805 	if (adev->gmc.num_mem_partitions && xcp_id >= 0 && mem_id >= 0) {
806 		if (adev->gmc.is_app_apu && adev->gmc.num_mem_partitions == 1) {
807 			/* In NPS1 mode, we should restrict the vram reporting
808 			 * tied to the ttm_pages_limit which is 1/2 of the system
809 			 * memory. For other partition modes, the HBM is uniformly
810 			 * divided already per numa node reported. If user wants to
811 			 * go beyond the default ttm limit and maximize the ROCm
812 			 * allocations, they can go up to max ttm and sysmem limits.
813 			 */
814 
815 			tmp = (ttm_tt_pages_limit() << PAGE_SHIFT) / num_online_nodes();
816 		} else {
817 			tmp = adev->gmc.mem_partitions[mem_id].size;
818 		}
819 		do_div(tmp, adev->xcp_mgr->num_xcp_per_mem_partition);
820 		return ALIGN_DOWN(tmp, PAGE_SIZE);
821 	} else if (adev->apu_prefer_gtt) {
822 		return (ttm_tt_pages_limit() << PAGE_SHIFT);
823 	} else {
824 		return adev->gmc.real_vram_size;
825 	}
826 }
827 
828 int amdgpu_amdkfd_unmap_hiq(struct amdgpu_device *adev, u32 doorbell_off,
829 			    u32 inst)
830 {
831 	struct amdgpu_kiq *kiq = &adev->gfx.kiq[inst];
832 	struct amdgpu_ring *kiq_ring = &kiq->ring;
833 	struct amdgpu_ring_funcs *ring_funcs;
834 	struct amdgpu_ring *ring;
835 	int r = 0;
836 
837 	if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
838 		return -EINVAL;
839 
840 	if (!kiq_ring->sched.ready || amdgpu_in_reset(adev))
841 		return 0;
842 
843 	ring_funcs = kzalloc(sizeof(*ring_funcs), GFP_KERNEL);
844 	if (!ring_funcs)
845 		return -ENOMEM;
846 
847 	ring = kzalloc(sizeof(*ring), GFP_KERNEL);
848 	if (!ring) {
849 		r = -ENOMEM;
850 		goto free_ring_funcs;
851 	}
852 
853 	ring_funcs->type = AMDGPU_RING_TYPE_COMPUTE;
854 	ring->doorbell_index = doorbell_off;
855 	ring->funcs = ring_funcs;
856 
857 	spin_lock(&kiq->ring_lock);
858 
859 	if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) {
860 		spin_unlock(&kiq->ring_lock);
861 		r = -ENOMEM;
862 		goto free_ring;
863 	}
864 
865 	kiq->pmf->kiq_unmap_queues(kiq_ring, ring, RESET_QUEUES, 0, 0);
866 
867 	/* Submit unmap queue packet */
868 	amdgpu_ring_commit(kiq_ring);
869 	/*
870 	 * Ring test will do a basic scratch register change check. Just run
871 	 * this to ensure that unmap queues that is submitted before got
872 	 * processed successfully before returning.
873 	 */
874 	r = amdgpu_ring_test_helper(kiq_ring);
875 
876 	spin_unlock(&kiq->ring_lock);
877 
878 free_ring:
879 	kfree(ring);
880 
881 free_ring_funcs:
882 	kfree(ring_funcs);
883 
884 	return r;
885 }
886 
887 /* Stop scheduling on KFD */
888 int amdgpu_amdkfd_stop_sched(struct amdgpu_device *adev, uint32_t node_id)
889 {
890 	if (!adev->kfd.init_complete)
891 		return 0;
892 
893 	return kgd2kfd_stop_sched(adev->kfd.dev, node_id);
894 }
895 
896 /* Start scheduling on KFD */
897 int amdgpu_amdkfd_start_sched(struct amdgpu_device *adev, uint32_t node_id)
898 {
899 	if (!adev->kfd.init_complete)
900 		return 0;
901 
902 	return kgd2kfd_start_sched(adev->kfd.dev, node_id);
903 }
904 
905 /* check if there are KFD queues active */
906 bool amdgpu_amdkfd_compute_active(struct amdgpu_device *adev, uint32_t node_id)
907 {
908 	if (!adev->kfd.init_complete)
909 		return false;
910 
911 	return kgd2kfd_compute_active(adev->kfd.dev, node_id);
912 }
913 
914 /* Config CGTT_SQ_CLK_CTRL */
915 int amdgpu_amdkfd_config_sq_perfmon(struct amdgpu_device *adev, uint32_t xcp_id,
916 	bool core_override_enable, bool reg_override_enable, bool perfmon_override_enable)
917 {
918 	int r;
919 
920 	if (!adev->kfd.init_complete)
921 		return 0;
922 
923 	r = psp_config_sq_perfmon(&adev->psp, xcp_id, core_override_enable,
924 					reg_override_enable, perfmon_override_enable);
925 
926 	return r;
927 }
928