xref: /linux/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c (revision bba2c3615bd6cfee7456d1130f2e6b01b3f4e9ba)
1 // SPDX-License-Identifier: GPL-2.0 OR MIT
2 /*
3  * Copyright 2016-2022 Advanced Micro Devices, Inc.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice shall be included in
13  * all copies or substantial portions of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
19  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
21  * OTHER DEALINGS IN THE SOFTWARE.
22  *
23  */
24 
25 #include <linux/printk.h>
26 #include <linux/slab.h>
27 #include <linux/uaccess.h>
28 #include "kfd_priv.h"
29 #include "kfd_mqd_manager.h"
30 #include "v9_structs.h"
31 #include "gc/gc_9_0_offset.h"
32 #include "gc/gc_9_0_sh_mask.h"
33 #include "sdma0/sdma0_4_0_sh_mask.h"
34 #include "amdgpu_amdkfd.h"
35 #include "kfd_device_queue_manager.h"
36 
37 static void update_mqd(struct mqd_manager *mm, void *mqd,
38 		       struct queue_properties *q,
39 		       struct mqd_update_info *minfo);
40 
41 static uint64_t mqd_stride_v9(struct mqd_manager *mm,
42 				struct queue_properties *q)
43 {
44 	if (mm->dev->kfd->cwsr_enabled &&
45 	    q->type == KFD_QUEUE_TYPE_COMPUTE) {
46 
47 		/* On gfxv9, the MQD resides in the first 4K page,
48 		 * followed by the control stack. Align both to
49 		 * AMDGPU_GPU_PAGE_SIZE to maintain the required 4K boundary.
50 		 */
51 
52 		return ALIGN(ALIGN(q->ctl_stack_size, AMDGPU_GPU_PAGE_SIZE) +
53 			ALIGN(sizeof(struct v9_mqd), AMDGPU_GPU_PAGE_SIZE), PAGE_SIZE);
54 	}
55 
56 	return mm->mqd_size;
57 }
58 
59 static inline struct v9_mqd *get_mqd(void *mqd)
60 {
61 	return (struct v9_mqd *)mqd;
62 }
63 
64 static inline struct v9_sdma_mqd *get_sdma_mqd(void *mqd)
65 {
66 	return (struct v9_sdma_mqd *)mqd;
67 }
68 
69 static void update_cu_mask(struct mqd_manager *mm, void *mqd,
70 			struct mqd_update_info *minfo, uint32_t inst)
71 {
72 	struct v9_mqd *m;
73 	uint32_t se_mask[KFD_MAX_NUM_SE] = {0};
74 
75 	if (!minfo || !minfo->cu_mask.ptr)
76 		return;
77 
78 	mqd_symmetrically_map_cu_mask(mm,
79 		minfo->cu_mask.ptr, minfo->cu_mask.count, se_mask, inst);
80 
81 	m = get_mqd(mqd);
82 
83 	m->compute_static_thread_mgmt_se0 = se_mask[0];
84 	m->compute_static_thread_mgmt_se1 = se_mask[1];
85 	m->compute_static_thread_mgmt_se2 = se_mask[2];
86 	m->compute_static_thread_mgmt_se3 = se_mask[3];
87 	if (KFD_GC_VERSION(mm->dev) != IP_VERSION(9, 4, 3) &&
88 	    KFD_GC_VERSION(mm->dev) != IP_VERSION(9, 4, 4) &&
89 	    KFD_GC_VERSION(mm->dev) != IP_VERSION(9, 5, 0)) {
90 		m->compute_static_thread_mgmt_se4 = se_mask[4];
91 		m->compute_static_thread_mgmt_se5 = se_mask[5];
92 		m->compute_static_thread_mgmt_se6 = se_mask[6];
93 		m->compute_static_thread_mgmt_se7 = se_mask[7];
94 
95 		pr_debug("update cu mask to %#x %#x %#x %#x %#x %#x %#x %#x\n",
96 			m->compute_static_thread_mgmt_se0,
97 			m->compute_static_thread_mgmt_se1,
98 			m->compute_static_thread_mgmt_se2,
99 			m->compute_static_thread_mgmt_se3,
100 			m->compute_static_thread_mgmt_se4,
101 			m->compute_static_thread_mgmt_se5,
102 			m->compute_static_thread_mgmt_se6,
103 			m->compute_static_thread_mgmt_se7);
104 	} else {
105 		pr_debug("inst: %u, update cu mask to %#x %#x %#x %#x\n",
106 			inst, m->compute_static_thread_mgmt_se0,
107 			m->compute_static_thread_mgmt_se1,
108 			m->compute_static_thread_mgmt_se2,
109 			m->compute_static_thread_mgmt_se3);
110 	}
111 }
112 
113 static void set_priority(struct v9_mqd *m, struct queue_properties *q)
114 {
115 	m->cp_hqd_pipe_priority = pipe_priority_map[q->priority];
116 }
117 
118 static bool mqd_on_vram(struct amdgpu_device *adev)
119 {
120 	if (adev->apu_prefer_gtt)
121 		return false;
122 
123 	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
124 	case IP_VERSION(9, 4, 3):
125 	case IP_VERSION(9, 5, 0):
126 		return true;
127 	default:
128 		return false;
129 	}
130 }
131 
132 static struct kfd_mem_obj *allocate_mqd(struct mqd_manager *mm,
133 		struct queue_properties *q)
134 {
135 	int retval;
136 	struct kfd_node *node = mm->dev;
137 	struct kfd_mem_obj *mqd_mem_obj = NULL;
138 
139 	/* For V9 only, due to a HW bug, the control stack of a user mode
140 	 * compute queue needs to be allocated just behind the page boundary
141 	 * of its regular MQD buffer. So we allocate an enlarged MQD buffer:
142 	 * the first page of the buffer serves as the regular MQD buffer
143 	 * purpose and the remaining is for control stack. Although the two
144 	 * parts are in the same buffer object, they need different memory
145 	 * types: MQD part needs UC (uncached) as usual, while control stack
146 	 * needs NC (non coherent), which is different from the UC type which
147 	 * is used when control stack is allocated in user space.
148 	 *
149 	 * Because of all those, we use the gtt allocation function instead
150 	 * of sub-allocation function for this enlarged MQD buffer. Moreover,
151 	 * in order to achieve two memory types in a single buffer object, we
152 	 * pass a special bo flag AMDGPU_GEM_CREATE_CP_MQD_GFX9 to instruct
153 	 * amdgpu memory functions to do so.
154 	 */
155 	if (node->kfd->cwsr_enabled && (q->type == KFD_QUEUE_TYPE_COMPUTE)) {
156 		mqd_mem_obj = kzalloc_obj(struct kfd_mem_obj);
157 		if (!mqd_mem_obj)
158 			return NULL;
159 		retval = amdgpu_amdkfd_alloc_kernel_mem(node->adev,
160 			(ALIGN(ALIGN(q->ctl_stack_size, AMDGPU_GPU_PAGE_SIZE) +
161 			ALIGN(sizeof(struct v9_mqd), AMDGPU_GPU_PAGE_SIZE), PAGE_SIZE)) *
162 			NUM_XCC(node->xcc_mask),
163 			mqd_on_vram(node->adev) ? AMDGPU_GEM_DOMAIN_VRAM :
164 						  AMDGPU_GEM_DOMAIN_GTT,
165 			&(mqd_mem_obj->mem),
166 			&(mqd_mem_obj->gpu_addr),
167 			(void *)&(mqd_mem_obj->cpu_ptr), true);
168 
169 		if (retval) {
170 			kfree(mqd_mem_obj);
171 			return NULL;
172 		}
173 	} else {
174 		retval = kfd_gtt_sa_allocate(node, sizeof(struct v9_mqd),
175 				&mqd_mem_obj);
176 		if (retval)
177 			return NULL;
178 	}
179 
180 	return mqd_mem_obj;
181 }
182 
183 static void init_mqd(struct mqd_manager *mm, void **mqd,
184 			struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
185 			struct queue_properties *q)
186 {
187 	uint64_t addr;
188 	struct v9_mqd *m;
189 
190 	m = (struct v9_mqd *) mqd_mem_obj->cpu_ptr;
191 	addr = mqd_mem_obj->gpu_addr;
192 
193 	memset(m, 0, sizeof(struct v9_mqd));
194 
195 	m->header = 0xC0310800;
196 	m->compute_pipelinestat_enable = 1;
197 	m->compute_static_thread_mgmt_se0 = 0xFFFFFFFF;
198 	m->compute_static_thread_mgmt_se1 = 0xFFFFFFFF;
199 	m->compute_static_thread_mgmt_se2 = 0xFFFFFFFF;
200 	m->compute_static_thread_mgmt_se3 = 0xFFFFFFFF;
201 	m->compute_static_thread_mgmt_se4 = 0xFFFFFFFF;
202 	m->compute_static_thread_mgmt_se5 = 0xFFFFFFFF;
203 	m->compute_static_thread_mgmt_se6 = 0xFFFFFFFF;
204 	m->compute_static_thread_mgmt_se7 = 0xFFFFFFFF;
205 
206 	m->cp_hqd_persistent_state = CP_HQD_PERSISTENT_STATE__PRELOAD_REQ_MASK |
207 			0x53 << CP_HQD_PERSISTENT_STATE__PRELOAD_SIZE__SHIFT;
208 
209 	m->cp_hqd_pq_control = 5 << CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE__SHIFT;
210 	m->cp_hqd_pq_control |= CP_HQD_PQ_CONTROL__UNORD_DISPATCH_MASK;
211 
212 	m->cp_mqd_control = 1 << CP_MQD_CONTROL__PRIV_STATE__SHIFT;
213 
214 	m->cp_mqd_base_addr_lo        = lower_32_bits(addr);
215 	m->cp_mqd_base_addr_hi        = upper_32_bits(addr);
216 
217 	m->cp_hqd_quantum = 1 << CP_HQD_QUANTUM__QUANTUM_EN__SHIFT |
218 			1 << CP_HQD_QUANTUM__QUANTUM_SCALE__SHIFT |
219 			1 << CP_HQD_QUANTUM__QUANTUM_DURATION__SHIFT;
220 
221 	/* Set cp_hqd_hq_scheduler0 bit 14 to 1 to have the CP set up the
222 	 * DISPATCH_PTR.  This is required for the kfd debugger
223 	 */
224 	m->cp_hqd_hq_status0 = 1 << 14;
225 
226 	if (q->format == KFD_QUEUE_FORMAT_AQL)
227 		m->cp_hqd_aql_control =
228 			1 << CP_HQD_AQL_CONTROL__CONTROL0__SHIFT;
229 
230 	if (q->tba_addr)
231 		m->compute_pgm_rsrc2 |=
232 			(1 << COMPUTE_PGM_RSRC2__TRAP_PRESENT__SHIFT);
233 
234 	if (mm->dev->kfd->cwsr_enabled && q->ctx_save_restore_area_address) {
235 		m->cp_hqd_persistent_state |=
236 			(1 << CP_HQD_PERSISTENT_STATE__QSWITCH_MODE__SHIFT);
237 		m->cp_hqd_ctx_save_base_addr_lo =
238 			lower_32_bits(q->ctx_save_restore_area_address);
239 		m->cp_hqd_ctx_save_base_addr_hi =
240 			upper_32_bits(q->ctx_save_restore_area_address);
241 		m->cp_hqd_ctx_save_size = q->ctx_save_restore_area_size;
242 		m->cp_hqd_cntl_stack_size = q->ctl_stack_size;
243 		m->cp_hqd_cntl_stack_offset = q->ctl_stack_size;
244 		m->cp_hqd_wg_state_offset = q->ctl_stack_size;
245 	}
246 
247 	mutex_lock(&mm->dev->kfd->profiler_lock);
248 	if (mm->dev->kfd->profiler_process != NULL)
249 		m->compute_perfcount_enable = 1;
250 	mutex_unlock(&mm->dev->kfd->profiler_lock);
251 
252 	*mqd = m;
253 	if (gart_addr)
254 		*gart_addr = addr;
255 	update_mqd(mm, m, q, NULL);
256 }
257 
258 static int load_mqd(struct mqd_manager *mm, void *mqd,
259 			uint32_t pipe_id, uint32_t queue_id,
260 			struct queue_properties *p, struct mm_struct *mms)
261 {
262 	/* AQL write pointer counts in 64B packets, PM4/CP counts in dwords. */
263 	uint32_t wptr_shift = (p->format == KFD_QUEUE_FORMAT_AQL ? 4 : 0);
264 
265 	return mm->dev->kfd2kgd->hqd_load(mm->dev->adev, mqd, pipe_id, queue_id,
266 					  (uint32_t __user *)p->write_ptr,
267 					  wptr_shift, 0, mms, 0);
268 }
269 
270 static void update_mqd(struct mqd_manager *mm, void *mqd,
271 			struct queue_properties *q,
272 			struct mqd_update_info *minfo)
273 {
274 	struct v9_mqd *m;
275 
276 	m = get_mqd(mqd);
277 
278 	m->cp_hqd_pq_control &= ~CP_HQD_PQ_CONTROL__QUEUE_SIZE_MASK;
279 	m->cp_hqd_pq_control |= order_base_2(q->queue_size / 4) - 1;
280 	pr_debug("cp_hqd_pq_control 0x%x\n", m->cp_hqd_pq_control);
281 
282 	m->cp_hqd_pq_base_lo = lower_32_bits((uint64_t)q->queue_address >> 8);
283 	m->cp_hqd_pq_base_hi = upper_32_bits((uint64_t)q->queue_address >> 8);
284 
285 	m->cp_hqd_pq_rptr_report_addr_lo = lower_32_bits((uint64_t)q->read_ptr);
286 	m->cp_hqd_pq_rptr_report_addr_hi = upper_32_bits((uint64_t)q->read_ptr);
287 	m->cp_hqd_pq_wptr_poll_addr_lo = lower_32_bits((uint64_t)q->write_ptr);
288 	m->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits((uint64_t)q->write_ptr);
289 
290 	m->cp_hqd_pq_doorbell_control =
291 		q->doorbell_off <<
292 			CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT;
293 	pr_debug("cp_hqd_pq_doorbell_control 0x%x\n",
294 			m->cp_hqd_pq_doorbell_control);
295 
296 	m->cp_hqd_ib_control =
297 		3 << CP_HQD_IB_CONTROL__MIN_IB_AVAIL_SIZE__SHIFT |
298 		1 << CP_HQD_IB_CONTROL__IB_EXE_DISABLE__SHIFT;
299 
300 	/*
301 	 * HW does not clamp this field correctly. Maximum EOP queue size
302 	 * is constrained by per-SE EOP done signal count, which is 8-bit.
303 	 * Limit is 0xFF EOP entries (= 0x7F8 dwords). CP will not submit
304 	 * more than (EOP entry count - 1) so a queue size of 0x800 dwords
305 	 * is safe, giving a maximum field value of 0xA.
306 	 *
307 	 * Also, do calculation only if EOP is used (size > 0), otherwise
308 	 * the order_base_2 calculation provides incorrect result.
309 	 *
310 	 */
311 	m->cp_hqd_eop_control = q->eop_ring_buffer_size ?
312 		min(0xA, order_base_2(q->eop_ring_buffer_size / 4) - 1) : 0;
313 
314 	m->cp_hqd_eop_base_addr_lo =
315 			lower_32_bits(q->eop_ring_buffer_address >> 8);
316 	m->cp_hqd_eop_base_addr_hi =
317 			upper_32_bits(q->eop_ring_buffer_address >> 8);
318 
319 	m->cp_hqd_iq_timer = 0;
320 
321 	m->cp_hqd_vmid = q->vmid;
322 
323 	if (q->format == KFD_QUEUE_FORMAT_AQL) {
324 		m->cp_hqd_pq_control |= CP_HQD_PQ_CONTROL__NO_UPDATE_RPTR_MASK |
325 				2 << CP_HQD_PQ_CONTROL__SLOT_BASED_WPTR__SHIFT |
326 				1 << CP_HQD_PQ_CONTROL__QUEUE_FULL_EN__SHIFT |
327 				1 << CP_HQD_PQ_CONTROL__WPP_CLAMP_EN__SHIFT;
328 		m->cp_hqd_pq_doorbell_control |= 1 <<
329 			CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_BIF_DROP__SHIFT;
330 	}
331 	if (mm->dev->kfd->cwsr_enabled && q->ctx_save_restore_area_address)
332 		m->cp_hqd_ctx_save_control = 0;
333 
334 	if (minfo) {
335 		if (minfo->update_flag == UPDATE_FLAG_PERFCOUNT_ENABLE)
336 			m->compute_perfcount_enable = 1;
337 		else if (minfo->update_flag == UPDATE_FLAG_PERFCOUNT_DISABLE)
338 			m->compute_perfcount_enable = 0;
339 	}
340 
341 	if (KFD_GC_VERSION(mm->dev) != IP_VERSION(9, 4, 3) &&
342 	    KFD_GC_VERSION(mm->dev) != IP_VERSION(9, 4, 4) &&
343 	    KFD_GC_VERSION(mm->dev) != IP_VERSION(9, 5, 0))
344 		update_cu_mask(mm, mqd, minfo, 0);
345 	set_priority(m, q);
346 
347 	if (minfo && KFD_GC_VERSION(mm->dev) >= IP_VERSION(9, 4, 2)) {
348 		if (minfo->update_flag & UPDATE_FLAG_IS_GWS)
349 			m->compute_resource_limits |=
350 				COMPUTE_RESOURCE_LIMITS__FORCE_SIMD_DIST_MASK;
351 		else
352 			m->compute_resource_limits &=
353 				~COMPUTE_RESOURCE_LIMITS__FORCE_SIMD_DIST_MASK;
354 	}
355 
356 	q->is_active = QUEUE_IS_ACTIVE(*q);
357 }
358 
359 
360 static bool check_preemption_failed(struct mqd_manager *mm, void *mqd)
361 {
362 	struct v9_mqd *m = (struct v9_mqd *)mqd;
363 	uint32_t doorbell_id = m->queue_doorbell_id0;
364 
365 	m->queue_doorbell_id0 = 0;
366 
367 	return kfd_check_hiq_mqd_doorbell_id(mm->dev, doorbell_id, 0);
368 }
369 
370 static int get_wave_state(struct mqd_manager *mm, void *mqd,
371 			  struct queue_properties *q,
372 			  void __user *ctl_stack,
373 			  u32 *ctl_stack_used_size,
374 			  u32 *save_area_used_size)
375 {
376 	struct v9_mqd *m;
377 	struct kfd_context_save_area_header header;
378 	u32 cntl_stack_size;
379 	u32 cntl_stack_offset;
380 
381 	/* Control stack is located one page after MQD. */
382 	void *mqd_ctl_stack = (void *)((uintptr_t)mqd + AMDGPU_GPU_PAGE_SIZE);
383 
384 	m = get_mqd(mqd);
385 	cntl_stack_size = min_t(u32, m->cp_hqd_cntl_stack_size,   q->ctl_stack_size);
386 	cntl_stack_offset = min_t(u32, m->cp_hqd_cntl_stack_offset, cntl_stack_size);
387 
388 	*ctl_stack_used_size = m->cp_hqd_cntl_stack_size -
389 		m->cp_hqd_cntl_stack_offset;
390 	*save_area_used_size = m->cp_hqd_wg_state_offset -
391 		m->cp_hqd_cntl_stack_size;
392 
393 	header.wave_state.control_stack_size = *ctl_stack_used_size;
394 	header.wave_state.wave_state_size = *save_area_used_size;
395 
396 	header.wave_state.wave_state_offset = m->cp_hqd_wg_state_offset;
397 	header.wave_state.control_stack_offset = m->cp_hqd_cntl_stack_offset;
398 
399 	if (copy_to_user(ctl_stack, &header, sizeof(header.wave_state)))
400 		return -EFAULT;
401 
402 	*ctl_stack_used_size = cntl_stack_size - cntl_stack_offset;
403 
404 	if (copy_to_user(ctl_stack + cntl_stack_offset, mqd_ctl_stack + cntl_stack_offset,
405 					*ctl_stack_used_size))
406 		return -EFAULT;
407 
408 	return 0;
409 }
410 
411 static int get_checkpoint_info(struct mqd_manager *mm, void *mqd, u32 *ctl_stack_size)
412 {
413 	struct v9_mqd *m = get_mqd(mqd);
414 
415 	if (check_mul_overflow(m->cp_hqd_cntl_stack_size, NUM_XCC(mm->dev->xcc_mask), ctl_stack_size))
416 		return -EINVAL;
417 
418 	return 0;
419 }
420 
421 static void checkpoint_mqd(struct mqd_manager *mm, void *mqd, void *mqd_dst, void *ctl_stack_dst)
422 {
423 	struct v9_mqd *m;
424 	/* Control stack is located one page after MQD. */
425 	void *ctl_stack = (void *)((uintptr_t)mqd + AMDGPU_GPU_PAGE_SIZE);
426 
427 	m = get_mqd(mqd);
428 
429 	memcpy(mqd_dst, m, sizeof(struct v9_mqd));
430 	memcpy(ctl_stack_dst, ctl_stack, m->cp_hqd_cntl_stack_size);
431 }
432 
433 static void checkpoint_mqd_v9_4_3(struct mqd_manager *mm,
434 								  void *mqd,
435 								  void *mqd_dst,
436 								  void *ctl_stack_dst)
437 {
438 	struct v9_mqd *m;
439 	int xcc;
440 	uint64_t size = get_mqd(mqd)->cp_mqd_stride_size;
441 
442 	for (xcc = 0; xcc < NUM_XCC(mm->dev->xcc_mask); xcc++) {
443 		m = get_mqd(mqd + size * xcc);
444 
445 		checkpoint_mqd(mm, m,
446 				(uint8_t *)mqd_dst + sizeof(*m) * xcc,
447 				(uint8_t *)ctl_stack_dst + m->cp_hqd_cntl_stack_size * xcc);
448 	}
449 }
450 
451 static void restore_mqd(struct mqd_manager *mm, void **mqd,
452 			struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
453 			struct queue_properties *qp,
454 			const void *mqd_src,
455 			const void *ctl_stack_src, u32 ctl_stack_size)
456 {
457 	uint64_t addr;
458 	struct v9_mqd *m;
459 	void *ctl_stack;
460 
461 	m = (struct v9_mqd *) mqd_mem_obj->cpu_ptr;
462 	addr = mqd_mem_obj->gpu_addr;
463 
464 	memcpy(m, mqd_src, sizeof(*m));
465 
466 	*mqd = m;
467 	if (gart_addr)
468 		*gart_addr = addr;
469 
470 	/* Control stack is located one page after MQD. */
471 	ctl_stack = (void *)((uintptr_t)*mqd + AMDGPU_GPU_PAGE_SIZE);
472 	memcpy(ctl_stack, ctl_stack_src, ctl_stack_size);
473 
474 	m->cp_hqd_pq_doorbell_control =
475 		qp->doorbell_off <<
476 			CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT;
477 	pr_debug("cp_hqd_pq_doorbell_control 0x%x\n",
478 				m->cp_hqd_pq_doorbell_control);
479 
480 	qp->is_active = 0;
481 }
482 
483 static void init_mqd_hiq(struct mqd_manager *mm, void **mqd,
484 			struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
485 			struct queue_properties *q)
486 {
487 	struct v9_mqd *m;
488 
489 	init_mqd(mm, mqd, mqd_mem_obj, gart_addr, q);
490 
491 	m = get_mqd(*mqd);
492 
493 	m->cp_hqd_pq_control |= 1 << CP_HQD_PQ_CONTROL__PRIV_STATE__SHIFT |
494 			1 << CP_HQD_PQ_CONTROL__KMD_QUEUE__SHIFT;
495 }
496 
497 static int destroy_hiq_mqd(struct mqd_manager *mm, void *mqd,
498 			enum kfd_preempt_type type, unsigned int timeout,
499 			uint32_t pipe_id, uint32_t queue_id)
500 {
501 	int err;
502 	struct v9_mqd *m;
503 	u32 doorbell_off;
504 
505 	m = get_mqd(mqd);
506 
507 	doorbell_off = m->cp_hqd_pq_doorbell_control >>
508 			CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT;
509 	err = amdgpu_amdkfd_unmap_hiq(mm->dev->adev, doorbell_off, 0);
510 	if (err)
511 		pr_debug("Destroy HIQ MQD failed: %d\n", err);
512 
513 	return err;
514 }
515 
516 static void init_mqd_sdma(struct mqd_manager *mm, void **mqd,
517 		struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
518 		struct queue_properties *q)
519 {
520 	struct v9_sdma_mqd *m;
521 
522 	m = (struct v9_sdma_mqd *) mqd_mem_obj->cpu_ptr;
523 
524 	memset(m, 0, sizeof(struct v9_sdma_mqd));
525 
526 	*mqd = m;
527 	if (gart_addr)
528 		*gart_addr = mqd_mem_obj->gpu_addr;
529 
530 	mm->update_mqd(mm, m, q, NULL);
531 }
532 
533 #define SDMA_RLC_DUMMY_DEFAULT 0xf
534 
535 static void update_mqd_sdma(struct mqd_manager *mm, void *mqd,
536 			struct queue_properties *q,
537 			struct mqd_update_info *minfo)
538 {
539 	struct v9_sdma_mqd *m;
540 
541 	m = get_sdma_mqd(mqd);
542 	m->sdmax_rlcx_rb_cntl = order_base_2(q->queue_size / 4)
543 		<< SDMA0_RLC0_RB_CNTL__RB_SIZE__SHIFT |
544 		q->vmid << SDMA0_RLC0_RB_CNTL__RB_VMID__SHIFT |
545 		1 << SDMA0_RLC0_RB_CNTL__RPTR_WRITEBACK_ENABLE__SHIFT |
546 		6 << SDMA0_RLC0_RB_CNTL__RPTR_WRITEBACK_TIMER__SHIFT;
547 
548 	m->sdmax_rlcx_rb_base = lower_32_bits(q->queue_address >> 8);
549 	m->sdmax_rlcx_rb_base_hi = upper_32_bits(q->queue_address >> 8);
550 	m->sdmax_rlcx_rb_rptr_addr_lo = lower_32_bits((uint64_t)q->read_ptr);
551 	m->sdmax_rlcx_rb_rptr_addr_hi = upper_32_bits((uint64_t)q->read_ptr);
552 	m->sdmax_rlcx_doorbell_offset =
553 		q->doorbell_off << SDMA0_RLC0_DOORBELL_OFFSET__OFFSET__SHIFT;
554 
555 	m->sdma_engine_id = q->sdma_engine_id;
556 	m->sdma_queue_id = q->sdma_queue_id;
557 	m->sdmax_rlcx_dummy_reg = SDMA_RLC_DUMMY_DEFAULT;
558 	/* Allow context switch so we don't cross-process starve with a massive
559 	 * command buffer of long-running SDMA commands
560 	 */
561 	m->sdmax_rlcx_ib_cntl |= SDMA0_GFX_IB_CNTL__SWITCH_INSIDE_IB_MASK;
562 
563 	q->is_active = QUEUE_IS_ACTIVE(*q);
564 }
565 
566 static void checkpoint_mqd_sdma(struct mqd_manager *mm,
567 				void *mqd,
568 				void *mqd_dst,
569 				void *ctl_stack_dst)
570 {
571 	struct v9_sdma_mqd *m;
572 
573 	m = get_sdma_mqd(mqd);
574 
575 	memcpy(mqd_dst, m, sizeof(struct v9_sdma_mqd));
576 }
577 
578 static void restore_mqd_sdma(struct mqd_manager *mm, void **mqd,
579 			     struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
580 			     struct queue_properties *qp,
581 			     const void *mqd_src,
582 			     const void *ctl_stack_src, const u32 ctl_stack_size)
583 {
584 	uint64_t addr;
585 	struct v9_sdma_mqd *m;
586 
587 	m = (struct v9_sdma_mqd *) mqd_mem_obj->cpu_ptr;
588 	addr = mqd_mem_obj->gpu_addr;
589 
590 	memcpy(m, mqd_src, sizeof(*m));
591 
592 	m->sdmax_rlcx_doorbell_offset =
593 		qp->doorbell_off << SDMA0_RLC0_DOORBELL_OFFSET__OFFSET__SHIFT;
594 
595 	*mqd = m;
596 	if (gart_addr)
597 		*gart_addr = addr;
598 
599 	qp->is_active = 0;
600 }
601 
602 static void init_mqd_hiq_v9_4_3(struct mqd_manager *mm, void **mqd,
603 			struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
604 			struct queue_properties *q)
605 {
606 	struct v9_mqd *m;
607 	int xcc = 0;
608 	struct kfd_mem_obj xcc_mqd_mem_obj;
609 	uint64_t xcc_gart_addr = 0;
610 
611 	memset(&xcc_mqd_mem_obj, 0x0, sizeof(struct kfd_mem_obj));
612 
613 	for (xcc = 0; xcc < NUM_XCC(mm->dev->xcc_mask); xcc++) {
614 		kfd_get_hiq_xcc_mqd(mm->dev, &xcc_mqd_mem_obj, xcc);
615 
616 		init_mqd(mm, (void **)&m, &xcc_mqd_mem_obj, &xcc_gart_addr, q);
617 
618 		m->cp_hqd_pq_control |= CP_HQD_PQ_CONTROL__NO_UPDATE_RPTR_MASK |
619 					1 << CP_HQD_PQ_CONTROL__PRIV_STATE__SHIFT |
620 					1 << CP_HQD_PQ_CONTROL__KMD_QUEUE__SHIFT;
621 		if (amdgpu_sriov_multi_vf_mode(mm->dev->adev))
622 			m->cp_hqd_pq_doorbell_control |= 1 <<
623 				CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_MODE__SHIFT;
624 		m->cp_mqd_stride_size = kfd_hiq_mqd_stride(mm->dev);
625 		if (xcc == 0) {
626 			/* Set no_update_rptr = 0 in Master XCC */
627 			m->cp_hqd_pq_control &= ~CP_HQD_PQ_CONTROL__NO_UPDATE_RPTR_MASK;
628 
629 			/* Set the MQD pointer and gart address to XCC0 MQD */
630 			*mqd = m;
631 			*gart_addr = xcc_gart_addr;
632 		}
633 	}
634 }
635 
636 static int hiq_load_mqd_kiq_v9_4_3(struct mqd_manager *mm, void *mqd,
637 			uint32_t pipe_id, uint32_t queue_id,
638 			struct queue_properties *p, struct mm_struct *mms)
639 {
640 	uint32_t xcc_mask = mm->dev->xcc_mask;
641 	int xcc_id, err = 0, inst = 0;
642 	void *xcc_mqd;
643 	uint64_t hiq_mqd_size = kfd_hiq_mqd_stride(mm->dev);
644 
645 	for_each_inst(xcc_id, xcc_mask) {
646 		xcc_mqd = mqd + hiq_mqd_size * inst;
647 		err = mm->dev->kfd2kgd->hiq_mqd_load(mm->dev->adev, xcc_mqd,
648 						     pipe_id, queue_id,
649 						     p->doorbell_off, xcc_id);
650 		if (err) {
651 			pr_debug("Failed to load HIQ MQD for XCC: %d\n", inst);
652 			break;
653 		}
654 		++inst;
655 	}
656 
657 	return err;
658 }
659 
660 static int destroy_hiq_mqd_v9_4_3(struct mqd_manager *mm, void *mqd,
661 			enum kfd_preempt_type type, unsigned int timeout,
662 			uint32_t pipe_id, uint32_t queue_id)
663 {
664 	uint32_t xcc_mask = mm->dev->xcc_mask;
665 	int xcc_id, err = 0, inst = 0;
666 	uint64_t hiq_mqd_size = kfd_hiq_mqd_stride(mm->dev);
667 	struct v9_mqd *m;
668 	u32 doorbell_off;
669 
670 	for_each_inst(xcc_id, xcc_mask) {
671 		m = get_mqd(mqd + hiq_mqd_size * inst);
672 
673 		doorbell_off = m->cp_hqd_pq_doorbell_control >>
674 				CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT;
675 
676 		err = amdgpu_amdkfd_unmap_hiq(mm->dev->adev, doorbell_off, xcc_id);
677 		if (err) {
678 			pr_debug("Destroy HIQ MQD failed for xcc: %d\n", inst);
679 			break;
680 		}
681 		++inst;
682 	}
683 
684 	return err;
685 }
686 
687 static bool check_preemption_failed_v9_4_3(struct mqd_manager *mm, void *mqd)
688 {
689 	uint64_t hiq_mqd_size = kfd_hiq_mqd_stride(mm->dev);
690 	uint32_t xcc_mask = mm->dev->xcc_mask;
691 	int inst = 0, xcc_id;
692 	struct v9_mqd *m;
693 	bool ret = false;
694 
695 	for_each_inst(xcc_id, xcc_mask) {
696 		m = get_mqd(mqd + hiq_mqd_size * inst);
697 		ret |= kfd_check_hiq_mqd_doorbell_id(mm->dev,
698 					m->queue_doorbell_id0, inst);
699 		m->queue_doorbell_id0 = 0;
700 		++inst;
701 	}
702 
703 	return ret;
704 }
705 
706 static void get_xcc_mqd(struct kfd_mem_obj *mqd_mem_obj,
707 			       struct kfd_mem_obj *xcc_mqd_mem_obj,
708 			       uint64_t offset)
709 {
710 	xcc_mqd_mem_obj->mem = (offset == 0) ?
711 					mqd_mem_obj->mem : NULL;
712 	xcc_mqd_mem_obj->gpu_addr = mqd_mem_obj->gpu_addr + offset;
713 	xcc_mqd_mem_obj->cpu_ptr = (uint32_t *)((uintptr_t)mqd_mem_obj->cpu_ptr
714 						+ offset);
715 }
716 
717 static void init_mqd_v9_4_3(struct mqd_manager *mm, void **mqd,
718 			struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
719 			struct queue_properties *q)
720 {
721 	struct v9_mqd *m;
722 	int xcc = 0;
723 	struct kfd_mem_obj xcc_mqd_mem_obj;
724 	uint64_t xcc_gart_addr = 0;
725 	uint64_t xcc_ctx_save_restore_area_address;
726 	uint64_t offset = mm->mqd_stride(mm, q);
727 	uint32_t local_xcc_start = mm->dev->dqm->current_logical_xcc_start++;
728 
729 	memset(&xcc_mqd_mem_obj, 0x0, sizeof(struct kfd_mem_obj));
730 	for (xcc = 0; xcc < NUM_XCC(mm->dev->xcc_mask); xcc++) {
731 		get_xcc_mqd(mqd_mem_obj, &xcc_mqd_mem_obj, offset*xcc);
732 
733 		init_mqd(mm, (void **)&m, &xcc_mqd_mem_obj, &xcc_gart_addr, q);
734 		if (amdgpu_sriov_multi_vf_mode(mm->dev->adev))
735 				m->cp_hqd_pq_doorbell_control |= 1 <<
736 					CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_MODE__SHIFT;
737 		m->cp_mqd_stride_size = offset;
738 
739 		/*
740 		 * Update the CWSR address for each XCC if CWSR is enabled
741 		 * and CWSR area is allocated in thunk
742 		 */
743 		if (mm->dev->kfd->cwsr_enabled &&
744 		    q->ctx_save_restore_area_address) {
745 			xcc_ctx_save_restore_area_address =
746 				q->ctx_save_restore_area_address +
747 				(xcc * q->ctx_save_restore_area_size);
748 
749 			m->cp_hqd_ctx_save_base_addr_lo =
750 				lower_32_bits(xcc_ctx_save_restore_area_address);
751 			m->cp_hqd_ctx_save_base_addr_hi =
752 				upper_32_bits(xcc_ctx_save_restore_area_address);
753 		}
754 
755 		if (q->format == KFD_QUEUE_FORMAT_AQL) {
756 			m->compute_tg_chunk_size = 1;
757 			m->compute_current_logic_xcc_id =
758 					(local_xcc_start + xcc) %
759 					NUM_XCC(mm->dev->xcc_mask);
760 
761 			switch (xcc) {
762 			case 0:
763 				/* Master XCC */
764 				m->cp_hqd_pq_control &=
765 					~CP_HQD_PQ_CONTROL__NO_UPDATE_RPTR_MASK;
766 				break;
767 			default:
768 				break;
769 			}
770 		} else {
771 			/* PM4 Queue */
772 			m->compute_current_logic_xcc_id = 0;
773 			m->compute_tg_chunk_size = 0;
774 			m->pm4_target_xcc_in_xcp = q->pm4_target_xcc;
775 		}
776 
777 		if (xcc == 0) {
778 			/* Set the MQD pointer and gart address to XCC0 MQD */
779 			*mqd = m;
780 			*gart_addr = xcc_gart_addr;
781 		}
782 	}
783 
784 	if (mqd_on_vram(mm->dev->adev))
785 		amdgpu_device_flush_hdp(mm->dev->adev, NULL);
786 }
787 
788 static void update_mqd_v9_4_3(struct mqd_manager *mm, void *mqd,
789 		      struct queue_properties *q, struct mqd_update_info *minfo)
790 {
791 	struct v9_mqd *m;
792 	int xcc = 0;
793 	uint64_t size = mm->mqd_stride(mm, q);
794 
795 	for (xcc = 0; xcc < NUM_XCC(mm->dev->xcc_mask); xcc++) {
796 		m = get_mqd(mqd + size * xcc);
797 		update_mqd(mm, m, q, minfo);
798 
799 		if (amdgpu_sriov_multi_vf_mode(mm->dev->adev))
800 				m->cp_hqd_pq_doorbell_control |= 1 <<
801 					CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_MODE__SHIFT;
802 		update_cu_mask(mm, m, minfo, xcc);
803 
804 		if (q->format == KFD_QUEUE_FORMAT_AQL) {
805 			switch (xcc) {
806 			case 0:
807 				/* Master XCC */
808 				m->cp_hqd_pq_control &=
809 					~CP_HQD_PQ_CONTROL__NO_UPDATE_RPTR_MASK;
810 				break;
811 			default:
812 				break;
813 			}
814 			m->compute_tg_chunk_size = 1;
815 		} else {
816 			/* PM4 Queue */
817 			m->compute_current_logic_xcc_id = 0;
818 			m->compute_tg_chunk_size = 0;
819 			m->pm4_target_xcc_in_xcp = q->pm4_target_xcc;
820 		}
821 	}
822 
823 	if (mqd_on_vram(mm->dev->adev))
824 		amdgpu_device_flush_hdp(mm->dev->adev, NULL);
825 }
826 
827 static void restore_mqd_v9_4_3(struct mqd_manager *mm, void **mqd,
828 			struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
829 			struct queue_properties *qp,
830 			const void *mqd_src,
831 			const void *ctl_stack_src, u32 ctl_stack_size)
832 {
833 	struct kfd_mem_obj xcc_mqd_mem_obj;
834 	u32 mqd_ctl_stack_size;
835 	struct v9_mqd *m;
836 	u32 num_xcc;
837 	int xcc;
838 
839 	uint64_t offset = mm->mqd_stride(mm, qp);
840 
841 	mm->dev->dqm->current_logical_xcc_start++;
842 
843 	num_xcc = NUM_XCC(mm->dev->xcc_mask);
844 	mqd_ctl_stack_size = ctl_stack_size / num_xcc;
845 
846 	memset(&xcc_mqd_mem_obj, 0x0, sizeof(struct kfd_mem_obj));
847 
848 	/* Set the MQD pointer and gart address to XCC0 MQD */
849 	*mqd = mqd_mem_obj->cpu_ptr;
850 	if (gart_addr)
851 		*gart_addr = mqd_mem_obj->gpu_addr;
852 
853 	for (xcc = 0; xcc < num_xcc; xcc++) {
854 		get_xcc_mqd(mqd_mem_obj, &xcc_mqd_mem_obj, offset * xcc);
855 		restore_mqd(mm, (void **)&m,
856 					&xcc_mqd_mem_obj,
857 					NULL,
858 					qp,
859 					(uint8_t *)mqd_src + xcc * sizeof(*m),
860 					(uint8_t *)ctl_stack_src + xcc *  mqd_ctl_stack_size,
861 					mqd_ctl_stack_size);
862 	}
863 
864 	if (mqd_on_vram(mm->dev->adev))
865 		amdgpu_device_flush_hdp(mm->dev->adev, NULL);
866 }
867 static int destroy_mqd_v9_4_3(struct mqd_manager *mm, void *mqd,
868 		   enum kfd_preempt_type type, unsigned int timeout,
869 		   uint32_t pipe_id, uint32_t queue_id)
870 {
871 	uint32_t xcc_mask = mm->dev->xcc_mask;
872 	int xcc_id, err = 0, inst = 0;
873 	void *xcc_mqd;
874 	struct v9_mqd *m;
875 	uint64_t mqd_offset;
876 
877 	m = get_mqd(mqd);
878 	mqd_offset = m->cp_mqd_stride_size;
879 
880 	for_each_inst(xcc_id, xcc_mask) {
881 		xcc_mqd = mqd + mqd_offset * inst;
882 		err = mm->dev->kfd2kgd->hqd_destroy(mm->dev->adev, xcc_mqd,
883 						    type, timeout, pipe_id,
884 						    queue_id, xcc_id);
885 		if (err) {
886 			pr_debug("Destroy MQD failed for xcc: %d\n", inst);
887 			break;
888 		}
889 		++inst;
890 	}
891 
892 	return err;
893 }
894 
895 static int load_mqd_v9_4_3(struct mqd_manager *mm, void *mqd,
896 			uint32_t pipe_id, uint32_t queue_id,
897 			struct queue_properties *p, struct mm_struct *mms)
898 {
899 	/* AQL write pointer counts in 64B packets, PM4/CP counts in dwords. */
900 	uint32_t wptr_shift = (p->format == KFD_QUEUE_FORMAT_AQL ? 4 : 0);
901 	uint32_t xcc_mask = mm->dev->xcc_mask;
902 	int xcc_id, err = 0, inst = 0;
903 	void *xcc_mqd;
904 	uint64_t mqd_stride_size = mm->mqd_stride(mm, p);
905 
906 	for_each_inst(xcc_id, xcc_mask) {
907 		xcc_mqd = mqd + mqd_stride_size * inst;
908 		err = mm->dev->kfd2kgd->hqd_load(
909 			mm->dev->adev, xcc_mqd, pipe_id, queue_id,
910 			(uint32_t __user *)p->write_ptr, wptr_shift, 0, mms,
911 			xcc_id);
912 		if (err) {
913 			pr_debug("Load MQD failed for xcc: %d\n", inst);
914 			break;
915 		}
916 		++inst;
917 	}
918 
919 	return err;
920 }
921 
922 static int get_wave_state_v9_4_3(struct mqd_manager *mm, void *mqd,
923 				 struct queue_properties *q,
924 				 void __user *ctl_stack,
925 				 u32 *ctl_stack_used_size,
926 				 u32 *save_area_used_size)
927 {
928 	int xcc, err = 0;
929 	void *xcc_mqd;
930 	void __user *xcc_ctl_stack;
931 	uint64_t mqd_stride_size = mm->mqd_stride(mm, q);
932 	u32 tmp_ctl_stack_used_size = 0, tmp_save_area_used_size = 0;
933 
934 	for (xcc = 0; xcc < NUM_XCC(mm->dev->xcc_mask); xcc++) {
935 		xcc_mqd = mqd + mqd_stride_size * xcc;
936 		xcc_ctl_stack = (void __user *)((uintptr_t)ctl_stack +
937 					q->ctx_save_restore_area_size * xcc);
938 
939 		err = get_wave_state(mm, xcc_mqd, q, xcc_ctl_stack,
940 				     &tmp_ctl_stack_used_size,
941 				     &tmp_save_area_used_size);
942 		if (err)
943 			break;
944 
945 		/*
946 		 * Set the ctl_stack_used_size and save_area_used_size to
947 		 * ctl_stack_used_size and save_area_used_size of XCC 0 when
948 		 * passing the info the user-space.
949 		 * For multi XCC, user-space would have to look at the header
950 		 * info of each Control stack area to determine the control
951 		 * stack size and save area used.
952 		 */
953 		if (xcc == 0) {
954 			*ctl_stack_used_size = tmp_ctl_stack_used_size;
955 			*save_area_used_size = tmp_save_area_used_size;
956 		}
957 	}
958 
959 	return err;
960 }
961 
962 #if defined(CONFIG_DEBUG_FS)
963 
964 static int debugfs_show_mqd(struct seq_file *m, void *data)
965 {
966 	seq_hex_dump(m, "    ", DUMP_PREFIX_OFFSET, 32, 4,
967 		     data, sizeof(struct v9_mqd), false);
968 	return 0;
969 }
970 
971 static int debugfs_show_mqd_sdma(struct seq_file *m, void *data)
972 {
973 	seq_hex_dump(m, "    ", DUMP_PREFIX_OFFSET, 32, 4,
974 		     data, sizeof(struct v9_sdma_mqd), false);
975 	return 0;
976 }
977 
978 #endif
979 
980 struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE type,
981 		struct kfd_node *dev)
982 {
983 	struct mqd_manager *mqd;
984 
985 	if (WARN_ON(type >= KFD_MQD_TYPE_MAX))
986 		return NULL;
987 
988 	mqd = kzalloc_obj(*mqd);
989 	if (!mqd)
990 		return NULL;
991 
992 	mqd->dev = dev;
993 
994 	switch (type) {
995 	case KFD_MQD_TYPE_CP:
996 		mqd->allocate_mqd = allocate_mqd;
997 		mqd->free_mqd = kfd_free_mqd_cp;
998 		mqd->is_occupied = kfd_is_occupied_cp;
999 		mqd->get_checkpoint_info = get_checkpoint_info;
1000 		mqd->mqd_size = sizeof(struct v9_mqd);
1001 		mqd->mqd_stride = mqd_stride_v9;
1002 #if defined(CONFIG_DEBUG_FS)
1003 		mqd->debugfs_show_mqd = debugfs_show_mqd;
1004 #endif
1005 		if (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 3) ||
1006 		    KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 4) ||
1007 		    KFD_GC_VERSION(dev) == IP_VERSION(9, 5, 0)) {
1008 			mqd->init_mqd = init_mqd_v9_4_3;
1009 			mqd->load_mqd = load_mqd_v9_4_3;
1010 			mqd->update_mqd = update_mqd_v9_4_3;
1011 			mqd->destroy_mqd = destroy_mqd_v9_4_3;
1012 			mqd->get_wave_state = get_wave_state_v9_4_3;
1013 			mqd->checkpoint_mqd = checkpoint_mqd_v9_4_3;
1014 			mqd->restore_mqd = restore_mqd_v9_4_3;
1015 		} else {
1016 			mqd->init_mqd = init_mqd;
1017 			mqd->load_mqd = load_mqd;
1018 			mqd->update_mqd = update_mqd;
1019 			mqd->destroy_mqd = kfd_destroy_mqd_cp;
1020 			mqd->get_wave_state = get_wave_state;
1021 			mqd->checkpoint_mqd = checkpoint_mqd;
1022 			mqd->restore_mqd = restore_mqd;
1023 		}
1024 		break;
1025 	case KFD_MQD_TYPE_HIQ:
1026 		mqd->allocate_mqd = allocate_hiq_mqd;
1027 		mqd->free_mqd = free_mqd_hiq_sdma;
1028 		mqd->update_mqd = update_mqd;
1029 		mqd->is_occupied = kfd_is_occupied_cp;
1030 		mqd->mqd_size = sizeof(struct v9_mqd);
1031 		mqd->mqd_stride = kfd_mqd_stride;
1032 #if defined(CONFIG_DEBUG_FS)
1033 		mqd->debugfs_show_mqd = debugfs_show_mqd;
1034 #endif
1035 		mqd->check_preemption_failed = check_preemption_failed;
1036 		if (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 3) ||
1037 		    KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 4) ||
1038 		    KFD_GC_VERSION(dev) == IP_VERSION(9, 5, 0)) {
1039 			mqd->init_mqd = init_mqd_hiq_v9_4_3;
1040 			mqd->load_mqd = hiq_load_mqd_kiq_v9_4_3;
1041 			mqd->destroy_mqd = destroy_hiq_mqd_v9_4_3;
1042 			mqd->check_preemption_failed = check_preemption_failed_v9_4_3;
1043 		} else {
1044 			mqd->init_mqd = init_mqd_hiq;
1045 			mqd->load_mqd = kfd_hiq_load_mqd_kiq;
1046 			mqd->destroy_mqd = destroy_hiq_mqd;
1047 			mqd->check_preemption_failed = check_preemption_failed;
1048 		}
1049 		break;
1050 	case KFD_MQD_TYPE_DIQ:
1051 		mqd->allocate_mqd = allocate_mqd;
1052 		mqd->init_mqd = init_mqd_hiq;
1053 		mqd->free_mqd = kfd_free_mqd_cp;
1054 		mqd->load_mqd = load_mqd;
1055 		mqd->update_mqd = update_mqd;
1056 		mqd->destroy_mqd = kfd_destroy_mqd_cp;
1057 		mqd->is_occupied = kfd_is_occupied_cp;
1058 		mqd->mqd_size = sizeof(struct v9_mqd);
1059 #if defined(CONFIG_DEBUG_FS)
1060 		mqd->debugfs_show_mqd = debugfs_show_mqd;
1061 #endif
1062 		break;
1063 	case KFD_MQD_TYPE_SDMA:
1064 		mqd->allocate_mqd = allocate_sdma_mqd;
1065 		mqd->init_mqd = init_mqd_sdma;
1066 		mqd->free_mqd = free_mqd_hiq_sdma;
1067 		mqd->load_mqd = kfd_load_mqd_sdma;
1068 		mqd->update_mqd = update_mqd_sdma;
1069 		mqd->destroy_mqd = kfd_destroy_mqd_sdma;
1070 		mqd->is_occupied = kfd_is_occupied_sdma;
1071 		mqd->checkpoint_mqd = checkpoint_mqd_sdma;
1072 		mqd->restore_mqd = restore_mqd_sdma;
1073 		mqd->mqd_size = sizeof(struct v9_sdma_mqd);
1074 		mqd->mqd_stride = kfd_mqd_stride;
1075 #if defined(CONFIG_DEBUG_FS)
1076 		mqd->debugfs_show_mqd = debugfs_show_mqd_sdma;
1077 #endif
1078 		break;
1079 	default:
1080 		kfree(mqd);
1081 		return NULL;
1082 	}
1083 
1084 	return mqd;
1085 }
1086