xref: /linux/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c (revision bf4afc53b77aeaa48b5409da5c8da6bb4eff7f43)
1 // SPDX-License-Identifier: GPL-2.0 OR MIT
2 /*
3  * Copyright 2016-2022 Advanced Micro Devices, Inc.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice shall be included in
13  * all copies or substantial portions of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
19  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
21  * OTHER DEALINGS IN THE SOFTWARE.
22  *
23  */
24 
25 #include <linux/printk.h>
26 #include <linux/slab.h>
27 #include <linux/uaccess.h>
28 #include "kfd_priv.h"
29 #include "kfd_mqd_manager.h"
30 #include "v9_structs.h"
31 #include "gc/gc_9_0_offset.h"
32 #include "gc/gc_9_0_sh_mask.h"
33 #include "sdma0/sdma0_4_0_sh_mask.h"
34 #include "amdgpu_amdkfd.h"
35 #include "kfd_device_queue_manager.h"
36 
37 static void update_mqd(struct mqd_manager *mm, void *mqd,
38 		       struct queue_properties *q,
39 		       struct mqd_update_info *minfo);
40 
mqd_stride_v9(struct mqd_manager * mm,struct queue_properties * q)41 static uint64_t mqd_stride_v9(struct mqd_manager *mm,
42 				struct queue_properties *q)
43 {
44 	if (mm->dev->kfd->cwsr_enabled &&
45 	    q->type == KFD_QUEUE_TYPE_COMPUTE)
46 		return ALIGN(q->ctl_stack_size, PAGE_SIZE) +
47 			ALIGN(sizeof(struct v9_mqd), PAGE_SIZE);
48 
49 	return mm->mqd_size;
50 }
51 
get_mqd(void * mqd)52 static inline struct v9_mqd *get_mqd(void *mqd)
53 {
54 	return (struct v9_mqd *)mqd;
55 }
56 
get_sdma_mqd(void * mqd)57 static inline struct v9_sdma_mqd *get_sdma_mqd(void *mqd)
58 {
59 	return (struct v9_sdma_mqd *)mqd;
60 }
61 
update_cu_mask(struct mqd_manager * mm,void * mqd,struct mqd_update_info * minfo,uint32_t inst)62 static void update_cu_mask(struct mqd_manager *mm, void *mqd,
63 			struct mqd_update_info *minfo, uint32_t inst)
64 {
65 	struct v9_mqd *m;
66 	uint32_t se_mask[KFD_MAX_NUM_SE] = {0};
67 
68 	if (!minfo || !minfo->cu_mask.ptr)
69 		return;
70 
71 	mqd_symmetrically_map_cu_mask(mm,
72 		minfo->cu_mask.ptr, minfo->cu_mask.count, se_mask, inst);
73 
74 	m = get_mqd(mqd);
75 
76 	m->compute_static_thread_mgmt_se0 = se_mask[0];
77 	m->compute_static_thread_mgmt_se1 = se_mask[1];
78 	m->compute_static_thread_mgmt_se2 = se_mask[2];
79 	m->compute_static_thread_mgmt_se3 = se_mask[3];
80 	if (KFD_GC_VERSION(mm->dev) != IP_VERSION(9, 4, 3) &&
81 	    KFD_GC_VERSION(mm->dev) != IP_VERSION(9, 4, 4) &&
82 	    KFD_GC_VERSION(mm->dev) != IP_VERSION(9, 5, 0)) {
83 		m->compute_static_thread_mgmt_se4 = se_mask[4];
84 		m->compute_static_thread_mgmt_se5 = se_mask[5];
85 		m->compute_static_thread_mgmt_se6 = se_mask[6];
86 		m->compute_static_thread_mgmt_se7 = se_mask[7];
87 
88 		pr_debug("update cu mask to %#x %#x %#x %#x %#x %#x %#x %#x\n",
89 			m->compute_static_thread_mgmt_se0,
90 			m->compute_static_thread_mgmt_se1,
91 			m->compute_static_thread_mgmt_se2,
92 			m->compute_static_thread_mgmt_se3,
93 			m->compute_static_thread_mgmt_se4,
94 			m->compute_static_thread_mgmt_se5,
95 			m->compute_static_thread_mgmt_se6,
96 			m->compute_static_thread_mgmt_se7);
97 	} else {
98 		pr_debug("inst: %u, update cu mask to %#x %#x %#x %#x\n",
99 			inst, m->compute_static_thread_mgmt_se0,
100 			m->compute_static_thread_mgmt_se1,
101 			m->compute_static_thread_mgmt_se2,
102 			m->compute_static_thread_mgmt_se3);
103 	}
104 }
105 
set_priority(struct v9_mqd * m,struct queue_properties * q)106 static void set_priority(struct v9_mqd *m, struct queue_properties *q)
107 {
108 	m->cp_hqd_pipe_priority = pipe_priority_map[q->priority];
109 	/* m->cp_hqd_queue_priority = q->priority; */
110 }
111 
mqd_on_vram(struct amdgpu_device * adev)112 static bool mqd_on_vram(struct amdgpu_device *adev)
113 {
114 	if (adev->apu_prefer_gtt)
115 		return false;
116 
117 	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
118 	case IP_VERSION(9, 4, 3):
119 	case IP_VERSION(9, 5, 0):
120 		return true;
121 	default:
122 		return false;
123 	}
124 }
125 
allocate_mqd(struct mqd_manager * mm,struct queue_properties * q)126 static struct kfd_mem_obj *allocate_mqd(struct mqd_manager *mm,
127 		struct queue_properties *q)
128 {
129 	int retval;
130 	struct kfd_node *node = mm->dev;
131 	struct kfd_mem_obj *mqd_mem_obj = NULL;
132 
133 	/* For V9 only, due to a HW bug, the control stack of a user mode
134 	 * compute queue needs to be allocated just behind the page boundary
135 	 * of its regular MQD buffer. So we allocate an enlarged MQD buffer:
136 	 * the first page of the buffer serves as the regular MQD buffer
137 	 * purpose and the remaining is for control stack. Although the two
138 	 * parts are in the same buffer object, they need different memory
139 	 * types: MQD part needs UC (uncached) as usual, while control stack
140 	 * needs NC (non coherent), which is different from the UC type which
141 	 * is used when control stack is allocated in user space.
142 	 *
143 	 * Because of all those, we use the gtt allocation function instead
144 	 * of sub-allocation function for this enlarged MQD buffer. Moreover,
145 	 * in order to achieve two memory types in a single buffer object, we
146 	 * pass a special bo flag AMDGPU_GEM_CREATE_CP_MQD_GFX9 to instruct
147 	 * amdgpu memory functions to do so.
148 	 */
149 	if (node->kfd->cwsr_enabled && (q->type == KFD_QUEUE_TYPE_COMPUTE)) {
150 		mqd_mem_obj = kzalloc_obj(struct kfd_mem_obj);
151 		if (!mqd_mem_obj)
152 			return NULL;
153 		retval = amdgpu_amdkfd_alloc_kernel_mem(node->adev,
154 			(ALIGN(q->ctl_stack_size, PAGE_SIZE) +
155 			ALIGN(sizeof(struct v9_mqd), PAGE_SIZE)) *
156 			NUM_XCC(node->xcc_mask),
157 			mqd_on_vram(node->adev) ? AMDGPU_GEM_DOMAIN_VRAM :
158 						  AMDGPU_GEM_DOMAIN_GTT,
159 			&(mqd_mem_obj->mem),
160 			&(mqd_mem_obj->gpu_addr),
161 			(void *)&(mqd_mem_obj->cpu_ptr), true);
162 
163 		if (retval) {
164 			kfree(mqd_mem_obj);
165 			return NULL;
166 		}
167 	} else {
168 		retval = kfd_gtt_sa_allocate(node, sizeof(struct v9_mqd),
169 				&mqd_mem_obj);
170 		if (retval)
171 			return NULL;
172 	}
173 
174 	return mqd_mem_obj;
175 }
176 
init_mqd(struct mqd_manager * mm,void ** mqd,struct kfd_mem_obj * mqd_mem_obj,uint64_t * gart_addr,struct queue_properties * q)177 static void init_mqd(struct mqd_manager *mm, void **mqd,
178 			struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
179 			struct queue_properties *q)
180 {
181 	uint64_t addr;
182 	struct v9_mqd *m;
183 
184 	m = (struct v9_mqd *) mqd_mem_obj->cpu_ptr;
185 	addr = mqd_mem_obj->gpu_addr;
186 
187 	memset(m, 0, sizeof(struct v9_mqd));
188 
189 	m->header = 0xC0310800;
190 	m->compute_pipelinestat_enable = 1;
191 	m->compute_static_thread_mgmt_se0 = 0xFFFFFFFF;
192 	m->compute_static_thread_mgmt_se1 = 0xFFFFFFFF;
193 	m->compute_static_thread_mgmt_se2 = 0xFFFFFFFF;
194 	m->compute_static_thread_mgmt_se3 = 0xFFFFFFFF;
195 	m->compute_static_thread_mgmt_se4 = 0xFFFFFFFF;
196 	m->compute_static_thread_mgmt_se5 = 0xFFFFFFFF;
197 	m->compute_static_thread_mgmt_se6 = 0xFFFFFFFF;
198 	m->compute_static_thread_mgmt_se7 = 0xFFFFFFFF;
199 
200 	m->cp_hqd_persistent_state = CP_HQD_PERSISTENT_STATE__PRELOAD_REQ_MASK |
201 			0x53 << CP_HQD_PERSISTENT_STATE__PRELOAD_SIZE__SHIFT;
202 
203 	m->cp_hqd_pq_control = 5 << CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE__SHIFT;
204 	m->cp_hqd_pq_control |= CP_HQD_PQ_CONTROL__UNORD_DISPATCH_MASK;
205 
206 	m->cp_mqd_control = 1 << CP_MQD_CONTROL__PRIV_STATE__SHIFT;
207 
208 	m->cp_mqd_base_addr_lo        = lower_32_bits(addr);
209 	m->cp_mqd_base_addr_hi        = upper_32_bits(addr);
210 
211 	m->cp_hqd_quantum = 1 << CP_HQD_QUANTUM__QUANTUM_EN__SHIFT |
212 			1 << CP_HQD_QUANTUM__QUANTUM_SCALE__SHIFT |
213 			1 << CP_HQD_QUANTUM__QUANTUM_DURATION__SHIFT;
214 
215 	/* Set cp_hqd_hq_scheduler0 bit 14 to 1 to have the CP set up the
216 	 * DISPATCH_PTR.  This is required for the kfd debugger
217 	 */
218 	m->cp_hqd_hq_status0 = 1 << 14;
219 
220 	if (q->format == KFD_QUEUE_FORMAT_AQL)
221 		m->cp_hqd_aql_control =
222 			1 << CP_HQD_AQL_CONTROL__CONTROL0__SHIFT;
223 
224 	if (q->tba_addr) {
225 		m->compute_pgm_rsrc2 |=
226 			(1 << COMPUTE_PGM_RSRC2__TRAP_PRESENT__SHIFT);
227 	}
228 
229 	if (mm->dev->kfd->cwsr_enabled && q->ctx_save_restore_area_address) {
230 		m->cp_hqd_persistent_state |=
231 			(1 << CP_HQD_PERSISTENT_STATE__QSWITCH_MODE__SHIFT);
232 		m->cp_hqd_ctx_save_base_addr_lo =
233 			lower_32_bits(q->ctx_save_restore_area_address);
234 		m->cp_hqd_ctx_save_base_addr_hi =
235 			upper_32_bits(q->ctx_save_restore_area_address);
236 		m->cp_hqd_ctx_save_size = q->ctx_save_restore_area_size;
237 		m->cp_hqd_cntl_stack_size = q->ctl_stack_size;
238 		m->cp_hqd_cntl_stack_offset = q->ctl_stack_size;
239 		m->cp_hqd_wg_state_offset = q->ctl_stack_size;
240 	}
241 
242 	*mqd = m;
243 	if (gart_addr)
244 		*gart_addr = addr;
245 	update_mqd(mm, m, q, NULL);
246 }
247 
load_mqd(struct mqd_manager * mm,void * mqd,uint32_t pipe_id,uint32_t queue_id,struct queue_properties * p,struct mm_struct * mms)248 static int load_mqd(struct mqd_manager *mm, void *mqd,
249 			uint32_t pipe_id, uint32_t queue_id,
250 			struct queue_properties *p, struct mm_struct *mms)
251 {
252 	/* AQL write pointer counts in 64B packets, PM4/CP counts in dwords. */
253 	uint32_t wptr_shift = (p->format == KFD_QUEUE_FORMAT_AQL ? 4 : 0);
254 
255 	return mm->dev->kfd2kgd->hqd_load(mm->dev->adev, mqd, pipe_id, queue_id,
256 					  (uint32_t __user *)p->write_ptr,
257 					  wptr_shift, 0, mms, 0);
258 }
259 
update_mqd(struct mqd_manager * mm,void * mqd,struct queue_properties * q,struct mqd_update_info * minfo)260 static void update_mqd(struct mqd_manager *mm, void *mqd,
261 			struct queue_properties *q,
262 			struct mqd_update_info *minfo)
263 {
264 	struct v9_mqd *m;
265 
266 	m = get_mqd(mqd);
267 
268 	m->cp_hqd_pq_control &= ~CP_HQD_PQ_CONTROL__QUEUE_SIZE_MASK;
269 	m->cp_hqd_pq_control |= order_base_2(q->queue_size / 4) - 1;
270 	pr_debug("cp_hqd_pq_control 0x%x\n", m->cp_hqd_pq_control);
271 
272 	m->cp_hqd_pq_base_lo = lower_32_bits((uint64_t)q->queue_address >> 8);
273 	m->cp_hqd_pq_base_hi = upper_32_bits((uint64_t)q->queue_address >> 8);
274 
275 	m->cp_hqd_pq_rptr_report_addr_lo = lower_32_bits((uint64_t)q->read_ptr);
276 	m->cp_hqd_pq_rptr_report_addr_hi = upper_32_bits((uint64_t)q->read_ptr);
277 	m->cp_hqd_pq_wptr_poll_addr_lo = lower_32_bits((uint64_t)q->write_ptr);
278 	m->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits((uint64_t)q->write_ptr);
279 
280 	m->cp_hqd_pq_doorbell_control =
281 		q->doorbell_off <<
282 			CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT;
283 	pr_debug("cp_hqd_pq_doorbell_control 0x%x\n",
284 			m->cp_hqd_pq_doorbell_control);
285 
286 	m->cp_hqd_ib_control =
287 		3 << CP_HQD_IB_CONTROL__MIN_IB_AVAIL_SIZE__SHIFT |
288 		1 << CP_HQD_IB_CONTROL__IB_EXE_DISABLE__SHIFT;
289 
290 	/*
291 	 * HW does not clamp this field correctly. Maximum EOP queue size
292 	 * is constrained by per-SE EOP done signal count, which is 8-bit.
293 	 * Limit is 0xFF EOP entries (= 0x7F8 dwords). CP will not submit
294 	 * more than (EOP entry count - 1) so a queue size of 0x800 dwords
295 	 * is safe, giving a maximum field value of 0xA.
296 	 *
297 	 * Also, do calculation only if EOP is used (size > 0), otherwise
298 	 * the order_base_2 calculation provides incorrect result.
299 	 *
300 	 */
301 	m->cp_hqd_eop_control = q->eop_ring_buffer_size ?
302 		min(0xA, order_base_2(q->eop_ring_buffer_size / 4) - 1) : 0;
303 
304 	m->cp_hqd_eop_base_addr_lo =
305 			lower_32_bits(q->eop_ring_buffer_address >> 8);
306 	m->cp_hqd_eop_base_addr_hi =
307 			upper_32_bits(q->eop_ring_buffer_address >> 8);
308 
309 	m->cp_hqd_iq_timer = 0;
310 
311 	m->cp_hqd_vmid = q->vmid;
312 
313 	if (q->format == KFD_QUEUE_FORMAT_AQL) {
314 		m->cp_hqd_pq_control |= CP_HQD_PQ_CONTROL__NO_UPDATE_RPTR_MASK |
315 				2 << CP_HQD_PQ_CONTROL__SLOT_BASED_WPTR__SHIFT |
316 				1 << CP_HQD_PQ_CONTROL__QUEUE_FULL_EN__SHIFT |
317 				1 << CP_HQD_PQ_CONTROL__WPP_CLAMP_EN__SHIFT;
318 		m->cp_hqd_pq_doorbell_control |= 1 <<
319 			CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_BIF_DROP__SHIFT;
320 	}
321 	if (mm->dev->kfd->cwsr_enabled && q->ctx_save_restore_area_address)
322 		m->cp_hqd_ctx_save_control = 0;
323 
324 	if (KFD_GC_VERSION(mm->dev) != IP_VERSION(9, 4, 3) &&
325 	    KFD_GC_VERSION(mm->dev) != IP_VERSION(9, 4, 4) &&
326 	    KFD_GC_VERSION(mm->dev) != IP_VERSION(9, 5, 0))
327 		update_cu_mask(mm, mqd, minfo, 0);
328 	set_priority(m, q);
329 
330 	if (minfo && KFD_GC_VERSION(mm->dev) >= IP_VERSION(9, 4, 2)) {
331 		if (minfo->update_flag & UPDATE_FLAG_IS_GWS)
332 			m->compute_resource_limits |=
333 				COMPUTE_RESOURCE_LIMITS__FORCE_SIMD_DIST_MASK;
334 		else
335 			m->compute_resource_limits &=
336 				~COMPUTE_RESOURCE_LIMITS__FORCE_SIMD_DIST_MASK;
337 	}
338 
339 	q->is_active = QUEUE_IS_ACTIVE(*q);
340 }
341 
342 
check_preemption_failed(struct mqd_manager * mm,void * mqd)343 static bool check_preemption_failed(struct mqd_manager *mm, void *mqd)
344 {
345 	struct v9_mqd *m = (struct v9_mqd *)mqd;
346 	uint32_t doorbell_id = m->queue_doorbell_id0;
347 
348 	m->queue_doorbell_id0 = 0;
349 
350 	return kfd_check_hiq_mqd_doorbell_id(mm->dev, doorbell_id, 0);
351 }
352 
get_wave_state(struct mqd_manager * mm,void * mqd,struct queue_properties * q,void __user * ctl_stack,u32 * ctl_stack_used_size,u32 * save_area_used_size)353 static int get_wave_state(struct mqd_manager *mm, void *mqd,
354 			  struct queue_properties *q,
355 			  void __user *ctl_stack,
356 			  u32 *ctl_stack_used_size,
357 			  u32 *save_area_used_size)
358 {
359 	struct v9_mqd *m;
360 	struct kfd_context_save_area_header header;
361 
362 	/* Control stack is located one page after MQD. */
363 	void *mqd_ctl_stack = (void *)((uintptr_t)mqd + PAGE_SIZE);
364 
365 	m = get_mqd(mqd);
366 
367 	*ctl_stack_used_size = m->cp_hqd_cntl_stack_size -
368 		m->cp_hqd_cntl_stack_offset;
369 	*save_area_used_size = m->cp_hqd_wg_state_offset -
370 		m->cp_hqd_cntl_stack_size;
371 
372 	header.wave_state.control_stack_size = *ctl_stack_used_size;
373 	header.wave_state.wave_state_size = *save_area_used_size;
374 
375 	header.wave_state.wave_state_offset = m->cp_hqd_wg_state_offset;
376 	header.wave_state.control_stack_offset = m->cp_hqd_cntl_stack_offset;
377 
378 	if (copy_to_user(ctl_stack, &header, sizeof(header.wave_state)))
379 		return -EFAULT;
380 
381 	if (copy_to_user(ctl_stack + m->cp_hqd_cntl_stack_offset,
382 				mqd_ctl_stack + m->cp_hqd_cntl_stack_offset,
383 				*ctl_stack_used_size))
384 		return -EFAULT;
385 
386 	return 0;
387 }
388 
get_checkpoint_info(struct mqd_manager * mm,void * mqd,u32 * ctl_stack_size)389 static void get_checkpoint_info(struct mqd_manager *mm, void *mqd, u32 *ctl_stack_size)
390 {
391 	struct v9_mqd *m = get_mqd(mqd);
392 
393 	*ctl_stack_size = m->cp_hqd_cntl_stack_size * NUM_XCC(mm->dev->xcc_mask);
394 }
395 
checkpoint_mqd(struct mqd_manager * mm,void * mqd,void * mqd_dst,void * ctl_stack_dst)396 static void checkpoint_mqd(struct mqd_manager *mm, void *mqd, void *mqd_dst, void *ctl_stack_dst)
397 {
398 	struct v9_mqd *m;
399 	/* Control stack is located one page after MQD. */
400 	void *ctl_stack = (void *)((uintptr_t)mqd + PAGE_SIZE);
401 
402 	m = get_mqd(mqd);
403 
404 	memcpy(mqd_dst, m, sizeof(struct v9_mqd));
405 	memcpy(ctl_stack_dst, ctl_stack, m->cp_hqd_cntl_stack_size);
406 }
407 
checkpoint_mqd_v9_4_3(struct mqd_manager * mm,void * mqd,void * mqd_dst,void * ctl_stack_dst)408 static void checkpoint_mqd_v9_4_3(struct mqd_manager *mm,
409 								  void *mqd,
410 								  void *mqd_dst,
411 								  void *ctl_stack_dst)
412 {
413 	struct v9_mqd *m;
414 	int xcc;
415 	uint64_t size = get_mqd(mqd)->cp_mqd_stride_size;
416 
417 	for (xcc = 0; xcc < NUM_XCC(mm->dev->xcc_mask); xcc++) {
418 		m = get_mqd(mqd + size * xcc);
419 
420 		checkpoint_mqd(mm, m,
421 				(uint8_t *)mqd_dst + sizeof(*m) * xcc,
422 				(uint8_t *)ctl_stack_dst + m->cp_hqd_cntl_stack_size * xcc);
423 	}
424 }
425 
restore_mqd(struct mqd_manager * mm,void ** mqd,struct kfd_mem_obj * mqd_mem_obj,uint64_t * gart_addr,struct queue_properties * qp,const void * mqd_src,const void * ctl_stack_src,u32 ctl_stack_size)426 static void restore_mqd(struct mqd_manager *mm, void **mqd,
427 			struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
428 			struct queue_properties *qp,
429 			const void *mqd_src,
430 			const void *ctl_stack_src, u32 ctl_stack_size)
431 {
432 	uint64_t addr;
433 	struct v9_mqd *m;
434 	void *ctl_stack;
435 
436 	m = (struct v9_mqd *) mqd_mem_obj->cpu_ptr;
437 	addr = mqd_mem_obj->gpu_addr;
438 
439 	memcpy(m, mqd_src, sizeof(*m));
440 
441 	*mqd = m;
442 	if (gart_addr)
443 		*gart_addr = addr;
444 
445 	/* Control stack is located one page after MQD. */
446 	ctl_stack = (void *)((uintptr_t)*mqd + PAGE_SIZE);
447 	memcpy(ctl_stack, ctl_stack_src, ctl_stack_size);
448 
449 	m->cp_hqd_pq_doorbell_control =
450 		qp->doorbell_off <<
451 			CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT;
452 	pr_debug("cp_hqd_pq_doorbell_control 0x%x\n",
453 				m->cp_hqd_pq_doorbell_control);
454 
455 	qp->is_active = 0;
456 }
457 
init_mqd_hiq(struct mqd_manager * mm,void ** mqd,struct kfd_mem_obj * mqd_mem_obj,uint64_t * gart_addr,struct queue_properties * q)458 static void init_mqd_hiq(struct mqd_manager *mm, void **mqd,
459 			struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
460 			struct queue_properties *q)
461 {
462 	struct v9_mqd *m;
463 
464 	init_mqd(mm, mqd, mqd_mem_obj, gart_addr, q);
465 
466 	m = get_mqd(*mqd);
467 
468 	m->cp_hqd_pq_control |= 1 << CP_HQD_PQ_CONTROL__PRIV_STATE__SHIFT |
469 			1 << CP_HQD_PQ_CONTROL__KMD_QUEUE__SHIFT;
470 }
471 
destroy_hiq_mqd(struct mqd_manager * mm,void * mqd,enum kfd_preempt_type type,unsigned int timeout,uint32_t pipe_id,uint32_t queue_id)472 static int destroy_hiq_mqd(struct mqd_manager *mm, void *mqd,
473 			enum kfd_preempt_type type, unsigned int timeout,
474 			uint32_t pipe_id, uint32_t queue_id)
475 {
476 	int err;
477 	struct v9_mqd *m;
478 	u32 doorbell_off;
479 
480 	m = get_mqd(mqd);
481 
482 	doorbell_off = m->cp_hqd_pq_doorbell_control >>
483 			CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT;
484 	err = amdgpu_amdkfd_unmap_hiq(mm->dev->adev, doorbell_off, 0);
485 	if (err)
486 		pr_debug("Destroy HIQ MQD failed: %d\n", err);
487 
488 	return err;
489 }
490 
init_mqd_sdma(struct mqd_manager * mm,void ** mqd,struct kfd_mem_obj * mqd_mem_obj,uint64_t * gart_addr,struct queue_properties * q)491 static void init_mqd_sdma(struct mqd_manager *mm, void **mqd,
492 		struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
493 		struct queue_properties *q)
494 {
495 	struct v9_sdma_mqd *m;
496 
497 	m = (struct v9_sdma_mqd *) mqd_mem_obj->cpu_ptr;
498 
499 	memset(m, 0, sizeof(struct v9_sdma_mqd));
500 
501 	*mqd = m;
502 	if (gart_addr)
503 		*gart_addr = mqd_mem_obj->gpu_addr;
504 
505 	mm->update_mqd(mm, m, q, NULL);
506 }
507 
508 #define SDMA_RLC_DUMMY_DEFAULT 0xf
509 
update_mqd_sdma(struct mqd_manager * mm,void * mqd,struct queue_properties * q,struct mqd_update_info * minfo)510 static void update_mqd_sdma(struct mqd_manager *mm, void *mqd,
511 			struct queue_properties *q,
512 			struct mqd_update_info *minfo)
513 {
514 	struct v9_sdma_mqd *m;
515 
516 	m = get_sdma_mqd(mqd);
517 	m->sdmax_rlcx_rb_cntl = order_base_2(q->queue_size / 4)
518 		<< SDMA0_RLC0_RB_CNTL__RB_SIZE__SHIFT |
519 		q->vmid << SDMA0_RLC0_RB_CNTL__RB_VMID__SHIFT |
520 		1 << SDMA0_RLC0_RB_CNTL__RPTR_WRITEBACK_ENABLE__SHIFT |
521 		6 << SDMA0_RLC0_RB_CNTL__RPTR_WRITEBACK_TIMER__SHIFT;
522 
523 	m->sdmax_rlcx_rb_base = lower_32_bits(q->queue_address >> 8);
524 	m->sdmax_rlcx_rb_base_hi = upper_32_bits(q->queue_address >> 8);
525 	m->sdmax_rlcx_rb_rptr_addr_lo = lower_32_bits((uint64_t)q->read_ptr);
526 	m->sdmax_rlcx_rb_rptr_addr_hi = upper_32_bits((uint64_t)q->read_ptr);
527 	m->sdmax_rlcx_doorbell_offset =
528 		q->doorbell_off << SDMA0_RLC0_DOORBELL_OFFSET__OFFSET__SHIFT;
529 
530 	m->sdma_engine_id = q->sdma_engine_id;
531 	m->sdma_queue_id = q->sdma_queue_id;
532 	m->sdmax_rlcx_dummy_reg = SDMA_RLC_DUMMY_DEFAULT;
533 	/* Allow context switch so we don't cross-process starve with a massive
534 	 * command buffer of long-running SDMA commands
535 	 */
536 	m->sdmax_rlcx_ib_cntl |= SDMA0_GFX_IB_CNTL__SWITCH_INSIDE_IB_MASK;
537 
538 	q->is_active = QUEUE_IS_ACTIVE(*q);
539 }
540 
checkpoint_mqd_sdma(struct mqd_manager * mm,void * mqd,void * mqd_dst,void * ctl_stack_dst)541 static void checkpoint_mqd_sdma(struct mqd_manager *mm,
542 				void *mqd,
543 				void *mqd_dst,
544 				void *ctl_stack_dst)
545 {
546 	struct v9_sdma_mqd *m;
547 
548 	m = get_sdma_mqd(mqd);
549 
550 	memcpy(mqd_dst, m, sizeof(struct v9_sdma_mqd));
551 }
552 
restore_mqd_sdma(struct mqd_manager * mm,void ** mqd,struct kfd_mem_obj * mqd_mem_obj,uint64_t * gart_addr,struct queue_properties * qp,const void * mqd_src,const void * ctl_stack_src,const u32 ctl_stack_size)553 static void restore_mqd_sdma(struct mqd_manager *mm, void **mqd,
554 			     struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
555 			     struct queue_properties *qp,
556 			     const void *mqd_src,
557 			     const void *ctl_stack_src, const u32 ctl_stack_size)
558 {
559 	uint64_t addr;
560 	struct v9_sdma_mqd *m;
561 
562 	m = (struct v9_sdma_mqd *) mqd_mem_obj->cpu_ptr;
563 	addr = mqd_mem_obj->gpu_addr;
564 
565 	memcpy(m, mqd_src, sizeof(*m));
566 
567 	m->sdmax_rlcx_doorbell_offset =
568 		qp->doorbell_off << SDMA0_RLC0_DOORBELL_OFFSET__OFFSET__SHIFT;
569 
570 	*mqd = m;
571 	if (gart_addr)
572 		*gart_addr = addr;
573 
574 	qp->is_active = 0;
575 }
576 
init_mqd_hiq_v9_4_3(struct mqd_manager * mm,void ** mqd,struct kfd_mem_obj * mqd_mem_obj,uint64_t * gart_addr,struct queue_properties * q)577 static void init_mqd_hiq_v9_4_3(struct mqd_manager *mm, void **mqd,
578 			struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
579 			struct queue_properties *q)
580 {
581 	struct v9_mqd *m;
582 	int xcc = 0;
583 	struct kfd_mem_obj xcc_mqd_mem_obj;
584 	uint64_t xcc_gart_addr = 0;
585 
586 	memset(&xcc_mqd_mem_obj, 0x0, sizeof(struct kfd_mem_obj));
587 
588 	for (xcc = 0; xcc < NUM_XCC(mm->dev->xcc_mask); xcc++) {
589 		kfd_get_hiq_xcc_mqd(mm->dev, &xcc_mqd_mem_obj, xcc);
590 
591 		init_mqd(mm, (void **)&m, &xcc_mqd_mem_obj, &xcc_gart_addr, q);
592 
593 		m->cp_hqd_pq_control |= CP_HQD_PQ_CONTROL__NO_UPDATE_RPTR_MASK |
594 					1 << CP_HQD_PQ_CONTROL__PRIV_STATE__SHIFT |
595 					1 << CP_HQD_PQ_CONTROL__KMD_QUEUE__SHIFT;
596 		if (amdgpu_sriov_multi_vf_mode(mm->dev->adev))
597 			m->cp_hqd_pq_doorbell_control |= 1 <<
598 				CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_MODE__SHIFT;
599 		m->cp_mqd_stride_size = kfd_hiq_mqd_stride(mm->dev);
600 		if (xcc == 0) {
601 			/* Set no_update_rptr = 0 in Master XCC */
602 			m->cp_hqd_pq_control &= ~CP_HQD_PQ_CONTROL__NO_UPDATE_RPTR_MASK;
603 
604 			/* Set the MQD pointer and gart address to XCC0 MQD */
605 			*mqd = m;
606 			*gart_addr = xcc_gart_addr;
607 		}
608 	}
609 }
610 
hiq_load_mqd_kiq_v9_4_3(struct mqd_manager * mm,void * mqd,uint32_t pipe_id,uint32_t queue_id,struct queue_properties * p,struct mm_struct * mms)611 static int hiq_load_mqd_kiq_v9_4_3(struct mqd_manager *mm, void *mqd,
612 			uint32_t pipe_id, uint32_t queue_id,
613 			struct queue_properties *p, struct mm_struct *mms)
614 {
615 	uint32_t xcc_mask = mm->dev->xcc_mask;
616 	int xcc_id, err = 0, inst = 0;
617 	void *xcc_mqd;
618 	uint64_t hiq_mqd_size = kfd_hiq_mqd_stride(mm->dev);
619 
620 	for_each_inst(xcc_id, xcc_mask) {
621 		xcc_mqd = mqd + hiq_mqd_size * inst;
622 		err = mm->dev->kfd2kgd->hiq_mqd_load(mm->dev->adev, xcc_mqd,
623 						     pipe_id, queue_id,
624 						     p->doorbell_off, xcc_id);
625 		if (err) {
626 			pr_debug("Failed to load HIQ MQD for XCC: %d\n", inst);
627 			break;
628 		}
629 		++inst;
630 	}
631 
632 	return err;
633 }
634 
destroy_hiq_mqd_v9_4_3(struct mqd_manager * mm,void * mqd,enum kfd_preempt_type type,unsigned int timeout,uint32_t pipe_id,uint32_t queue_id)635 static int destroy_hiq_mqd_v9_4_3(struct mqd_manager *mm, void *mqd,
636 			enum kfd_preempt_type type, unsigned int timeout,
637 			uint32_t pipe_id, uint32_t queue_id)
638 {
639 	uint32_t xcc_mask = mm->dev->xcc_mask;
640 	int xcc_id, err = 0, inst = 0;
641 	uint64_t hiq_mqd_size = kfd_hiq_mqd_stride(mm->dev);
642 	struct v9_mqd *m;
643 	u32 doorbell_off;
644 
645 	for_each_inst(xcc_id, xcc_mask) {
646 		m = get_mqd(mqd + hiq_mqd_size * inst);
647 
648 		doorbell_off = m->cp_hqd_pq_doorbell_control >>
649 				CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT;
650 
651 		err = amdgpu_amdkfd_unmap_hiq(mm->dev->adev, doorbell_off, xcc_id);
652 		if (err) {
653 			pr_debug("Destroy HIQ MQD failed for xcc: %d\n", inst);
654 			break;
655 		}
656 		++inst;
657 	}
658 
659 	return err;
660 }
661 
check_preemption_failed_v9_4_3(struct mqd_manager * mm,void * mqd)662 static bool check_preemption_failed_v9_4_3(struct mqd_manager *mm, void *mqd)
663 {
664 	uint64_t hiq_mqd_size = kfd_hiq_mqd_stride(mm->dev);
665 	uint32_t xcc_mask = mm->dev->xcc_mask;
666 	int inst = 0, xcc_id;
667 	struct v9_mqd *m;
668 	bool ret = false;
669 
670 	for_each_inst(xcc_id, xcc_mask) {
671 		m = get_mqd(mqd + hiq_mqd_size * inst);
672 		ret |= kfd_check_hiq_mqd_doorbell_id(mm->dev,
673 					m->queue_doorbell_id0, inst);
674 		m->queue_doorbell_id0 = 0;
675 		++inst;
676 	}
677 
678 	return ret;
679 }
680 
get_xcc_mqd(struct kfd_mem_obj * mqd_mem_obj,struct kfd_mem_obj * xcc_mqd_mem_obj,uint64_t offset)681 static void get_xcc_mqd(struct kfd_mem_obj *mqd_mem_obj,
682 			       struct kfd_mem_obj *xcc_mqd_mem_obj,
683 			       uint64_t offset)
684 {
685 	xcc_mqd_mem_obj->mem = (offset == 0) ?
686 					mqd_mem_obj->mem : NULL;
687 	xcc_mqd_mem_obj->gpu_addr = mqd_mem_obj->gpu_addr + offset;
688 	xcc_mqd_mem_obj->cpu_ptr = (uint32_t *)((uintptr_t)mqd_mem_obj->cpu_ptr
689 						+ offset);
690 }
691 
init_mqd_v9_4_3(struct mqd_manager * mm,void ** mqd,struct kfd_mem_obj * mqd_mem_obj,uint64_t * gart_addr,struct queue_properties * q)692 static void init_mqd_v9_4_3(struct mqd_manager *mm, void **mqd,
693 			struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
694 			struct queue_properties *q)
695 {
696 	struct v9_mqd *m;
697 	int xcc = 0;
698 	struct kfd_mem_obj xcc_mqd_mem_obj;
699 	uint64_t xcc_gart_addr = 0;
700 	uint64_t xcc_ctx_save_restore_area_address;
701 	uint64_t offset = mm->mqd_stride(mm, q);
702 	uint32_t local_xcc_start = mm->dev->dqm->current_logical_xcc_start++;
703 
704 	memset(&xcc_mqd_mem_obj, 0x0, sizeof(struct kfd_mem_obj));
705 	for (xcc = 0; xcc < NUM_XCC(mm->dev->xcc_mask); xcc++) {
706 		get_xcc_mqd(mqd_mem_obj, &xcc_mqd_mem_obj, offset*xcc);
707 
708 		init_mqd(mm, (void **)&m, &xcc_mqd_mem_obj, &xcc_gart_addr, q);
709 		if (amdgpu_sriov_multi_vf_mode(mm->dev->adev))
710 				m->cp_hqd_pq_doorbell_control |= 1 <<
711 					CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_MODE__SHIFT;
712 		m->cp_mqd_stride_size = offset;
713 
714 		/*
715 		 * Update the CWSR address for each XCC if CWSR is enabled
716 		 * and CWSR area is allocated in thunk
717 		 */
718 		if (mm->dev->kfd->cwsr_enabled &&
719 		    q->ctx_save_restore_area_address) {
720 			xcc_ctx_save_restore_area_address =
721 				q->ctx_save_restore_area_address +
722 				(xcc * q->ctx_save_restore_area_size);
723 
724 			m->cp_hqd_ctx_save_base_addr_lo =
725 				lower_32_bits(xcc_ctx_save_restore_area_address);
726 			m->cp_hqd_ctx_save_base_addr_hi =
727 				upper_32_bits(xcc_ctx_save_restore_area_address);
728 		}
729 
730 		if (q->format == KFD_QUEUE_FORMAT_AQL) {
731 			m->compute_tg_chunk_size = 1;
732 			m->compute_current_logic_xcc_id =
733 					(local_xcc_start + xcc) %
734 					NUM_XCC(mm->dev->xcc_mask);
735 
736 			switch (xcc) {
737 			case 0:
738 				/* Master XCC */
739 				m->cp_hqd_pq_control &=
740 					~CP_HQD_PQ_CONTROL__NO_UPDATE_RPTR_MASK;
741 				break;
742 			default:
743 				break;
744 			}
745 		} else {
746 			/* PM4 Queue */
747 			m->compute_current_logic_xcc_id = 0;
748 			m->compute_tg_chunk_size = 0;
749 			m->pm4_target_xcc_in_xcp = q->pm4_target_xcc;
750 		}
751 
752 		if (xcc == 0) {
753 			/* Set the MQD pointer and gart address to XCC0 MQD */
754 			*mqd = m;
755 			*gart_addr = xcc_gart_addr;
756 		}
757 	}
758 
759 	if (mqd_on_vram(mm->dev->adev))
760 		amdgpu_device_flush_hdp(mm->dev->adev, NULL);
761 }
762 
update_mqd_v9_4_3(struct mqd_manager * mm,void * mqd,struct queue_properties * q,struct mqd_update_info * minfo)763 static void update_mqd_v9_4_3(struct mqd_manager *mm, void *mqd,
764 		      struct queue_properties *q, struct mqd_update_info *minfo)
765 {
766 	struct v9_mqd *m;
767 	int xcc = 0;
768 	uint64_t size = mm->mqd_stride(mm, q);
769 
770 	for (xcc = 0; xcc < NUM_XCC(mm->dev->xcc_mask); xcc++) {
771 		m = get_mqd(mqd + size * xcc);
772 		update_mqd(mm, m, q, minfo);
773 
774 		if (amdgpu_sriov_multi_vf_mode(mm->dev->adev))
775 				m->cp_hqd_pq_doorbell_control |= 1 <<
776 					CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_MODE__SHIFT;
777 		update_cu_mask(mm, m, minfo, xcc);
778 
779 		if (q->format == KFD_QUEUE_FORMAT_AQL) {
780 			switch (xcc) {
781 			case 0:
782 				/* Master XCC */
783 				m->cp_hqd_pq_control &=
784 					~CP_HQD_PQ_CONTROL__NO_UPDATE_RPTR_MASK;
785 				break;
786 			default:
787 				break;
788 			}
789 			m->compute_tg_chunk_size = 1;
790 		} else {
791 			/* PM4 Queue */
792 			m->compute_current_logic_xcc_id = 0;
793 			m->compute_tg_chunk_size = 0;
794 			m->pm4_target_xcc_in_xcp = q->pm4_target_xcc;
795 		}
796 	}
797 
798 	if (mqd_on_vram(mm->dev->adev))
799 		amdgpu_device_flush_hdp(mm->dev->adev, NULL);
800 }
801 
restore_mqd_v9_4_3(struct mqd_manager * mm,void ** mqd,struct kfd_mem_obj * mqd_mem_obj,uint64_t * gart_addr,struct queue_properties * qp,const void * mqd_src,const void * ctl_stack_src,u32 ctl_stack_size)802 static void restore_mqd_v9_4_3(struct mqd_manager *mm, void **mqd,
803 			struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
804 			struct queue_properties *qp,
805 			const void *mqd_src,
806 			const void *ctl_stack_src, u32 ctl_stack_size)
807 {
808 	struct kfd_mem_obj xcc_mqd_mem_obj;
809 	u32 mqd_ctl_stack_size;
810 	struct v9_mqd *m;
811 	u32 num_xcc;
812 	int xcc;
813 
814 	uint64_t offset = mm->mqd_stride(mm, qp);
815 
816 	mm->dev->dqm->current_logical_xcc_start++;
817 
818 	num_xcc = NUM_XCC(mm->dev->xcc_mask);
819 	mqd_ctl_stack_size = ctl_stack_size / num_xcc;
820 
821 	memset(&xcc_mqd_mem_obj, 0x0, sizeof(struct kfd_mem_obj));
822 
823 	/* Set the MQD pointer and gart address to XCC0 MQD */
824 	*mqd = mqd_mem_obj->cpu_ptr;
825 	if (gart_addr)
826 		*gart_addr = mqd_mem_obj->gpu_addr;
827 
828 	for (xcc = 0; xcc < num_xcc; xcc++) {
829 		get_xcc_mqd(mqd_mem_obj, &xcc_mqd_mem_obj, offset * xcc);
830 		restore_mqd(mm, (void **)&m,
831 					&xcc_mqd_mem_obj,
832 					NULL,
833 					qp,
834 					(uint8_t *)mqd_src + xcc * sizeof(*m),
835 					(uint8_t *)ctl_stack_src + xcc *  mqd_ctl_stack_size,
836 					mqd_ctl_stack_size);
837 	}
838 
839 	if (mqd_on_vram(mm->dev->adev))
840 		amdgpu_device_flush_hdp(mm->dev->adev, NULL);
841 }
destroy_mqd_v9_4_3(struct mqd_manager * mm,void * mqd,enum kfd_preempt_type type,unsigned int timeout,uint32_t pipe_id,uint32_t queue_id)842 static int destroy_mqd_v9_4_3(struct mqd_manager *mm, void *mqd,
843 		   enum kfd_preempt_type type, unsigned int timeout,
844 		   uint32_t pipe_id, uint32_t queue_id)
845 {
846 	uint32_t xcc_mask = mm->dev->xcc_mask;
847 	int xcc_id, err = 0, inst = 0;
848 	void *xcc_mqd;
849 	struct v9_mqd *m;
850 	uint64_t mqd_offset;
851 
852 	m = get_mqd(mqd);
853 	mqd_offset = m->cp_mqd_stride_size;
854 
855 	for_each_inst(xcc_id, xcc_mask) {
856 		xcc_mqd = mqd + mqd_offset * inst;
857 		err = mm->dev->kfd2kgd->hqd_destroy(mm->dev->adev, xcc_mqd,
858 						    type, timeout, pipe_id,
859 						    queue_id, xcc_id);
860 		if (err) {
861 			pr_debug("Destroy MQD failed for xcc: %d\n", inst);
862 			break;
863 		}
864 		++inst;
865 	}
866 
867 	return err;
868 }
869 
load_mqd_v9_4_3(struct mqd_manager * mm,void * mqd,uint32_t pipe_id,uint32_t queue_id,struct queue_properties * p,struct mm_struct * mms)870 static int load_mqd_v9_4_3(struct mqd_manager *mm, void *mqd,
871 			uint32_t pipe_id, uint32_t queue_id,
872 			struct queue_properties *p, struct mm_struct *mms)
873 {
874 	/* AQL write pointer counts in 64B packets, PM4/CP counts in dwords. */
875 	uint32_t wptr_shift = (p->format == KFD_QUEUE_FORMAT_AQL ? 4 : 0);
876 	uint32_t xcc_mask = mm->dev->xcc_mask;
877 	int xcc_id, err = 0, inst = 0;
878 	void *xcc_mqd;
879 	uint64_t mqd_stride_size = mm->mqd_stride(mm, p);
880 
881 	for_each_inst(xcc_id, xcc_mask) {
882 		xcc_mqd = mqd + mqd_stride_size * inst;
883 		err = mm->dev->kfd2kgd->hqd_load(
884 			mm->dev->adev, xcc_mqd, pipe_id, queue_id,
885 			(uint32_t __user *)p->write_ptr, wptr_shift, 0, mms,
886 			xcc_id);
887 		if (err) {
888 			pr_debug("Load MQD failed for xcc: %d\n", inst);
889 			break;
890 		}
891 		++inst;
892 	}
893 
894 	return err;
895 }
896 
get_wave_state_v9_4_3(struct mqd_manager * mm,void * mqd,struct queue_properties * q,void __user * ctl_stack,u32 * ctl_stack_used_size,u32 * save_area_used_size)897 static int get_wave_state_v9_4_3(struct mqd_manager *mm, void *mqd,
898 				 struct queue_properties *q,
899 				 void __user *ctl_stack,
900 				 u32 *ctl_stack_used_size,
901 				 u32 *save_area_used_size)
902 {
903 	int xcc, err = 0;
904 	void *xcc_mqd;
905 	void __user *xcc_ctl_stack;
906 	uint64_t mqd_stride_size = mm->mqd_stride(mm, q);
907 	u32 tmp_ctl_stack_used_size = 0, tmp_save_area_used_size = 0;
908 
909 	for (xcc = 0; xcc < NUM_XCC(mm->dev->xcc_mask); xcc++) {
910 		xcc_mqd = mqd + mqd_stride_size * xcc;
911 		xcc_ctl_stack = (void __user *)((uintptr_t)ctl_stack +
912 					q->ctx_save_restore_area_size * xcc);
913 
914 		err = get_wave_state(mm, xcc_mqd, q, xcc_ctl_stack,
915 				     &tmp_ctl_stack_used_size,
916 				     &tmp_save_area_used_size);
917 		if (err)
918 			break;
919 
920 		/*
921 		 * Set the ctl_stack_used_size and save_area_used_size to
922 		 * ctl_stack_used_size and save_area_used_size of XCC 0 when
923 		 * passing the info the user-space.
924 		 * For multi XCC, user-space would have to look at the header
925 		 * info of each Control stack area to determine the control
926 		 * stack size and save area used.
927 		 */
928 		if (xcc == 0) {
929 			*ctl_stack_used_size = tmp_ctl_stack_used_size;
930 			*save_area_used_size = tmp_save_area_used_size;
931 		}
932 	}
933 
934 	return err;
935 }
936 
937 #if defined(CONFIG_DEBUG_FS)
938 
debugfs_show_mqd(struct seq_file * m,void * data)939 static int debugfs_show_mqd(struct seq_file *m, void *data)
940 {
941 	seq_hex_dump(m, "    ", DUMP_PREFIX_OFFSET, 32, 4,
942 		     data, sizeof(struct v9_mqd), false);
943 	return 0;
944 }
945 
debugfs_show_mqd_sdma(struct seq_file * m,void * data)946 static int debugfs_show_mqd_sdma(struct seq_file *m, void *data)
947 {
948 	seq_hex_dump(m, "    ", DUMP_PREFIX_OFFSET, 32, 4,
949 		     data, sizeof(struct v9_sdma_mqd), false);
950 	return 0;
951 }
952 
953 #endif
954 
mqd_manager_init_v9(enum KFD_MQD_TYPE type,struct kfd_node * dev)955 struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE type,
956 		struct kfd_node *dev)
957 {
958 	struct mqd_manager *mqd;
959 
960 	if (WARN_ON(type >= KFD_MQD_TYPE_MAX))
961 		return NULL;
962 
963 	mqd = kzalloc_obj(*mqd);
964 	if (!mqd)
965 		return NULL;
966 
967 	mqd->dev = dev;
968 
969 	switch (type) {
970 	case KFD_MQD_TYPE_CP:
971 		mqd->allocate_mqd = allocate_mqd;
972 		mqd->free_mqd = kfd_free_mqd_cp;
973 		mqd->is_occupied = kfd_is_occupied_cp;
974 		mqd->get_checkpoint_info = get_checkpoint_info;
975 		mqd->mqd_size = sizeof(struct v9_mqd);
976 		mqd->mqd_stride = mqd_stride_v9;
977 #if defined(CONFIG_DEBUG_FS)
978 		mqd->debugfs_show_mqd = debugfs_show_mqd;
979 #endif
980 		if (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 3) ||
981 		    KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 4) ||
982 		    KFD_GC_VERSION(dev) == IP_VERSION(9, 5, 0)) {
983 			mqd->init_mqd = init_mqd_v9_4_3;
984 			mqd->load_mqd = load_mqd_v9_4_3;
985 			mqd->update_mqd = update_mqd_v9_4_3;
986 			mqd->destroy_mqd = destroy_mqd_v9_4_3;
987 			mqd->get_wave_state = get_wave_state_v9_4_3;
988 			mqd->checkpoint_mqd = checkpoint_mqd_v9_4_3;
989 			mqd->restore_mqd = restore_mqd_v9_4_3;
990 		} else {
991 			mqd->init_mqd = init_mqd;
992 			mqd->load_mqd = load_mqd;
993 			mqd->update_mqd = update_mqd;
994 			mqd->destroy_mqd = kfd_destroy_mqd_cp;
995 			mqd->get_wave_state = get_wave_state;
996 			mqd->checkpoint_mqd = checkpoint_mqd;
997 			mqd->restore_mqd = restore_mqd;
998 		}
999 		break;
1000 	case KFD_MQD_TYPE_HIQ:
1001 		mqd->allocate_mqd = allocate_hiq_mqd;
1002 		mqd->free_mqd = free_mqd_hiq_sdma;
1003 		mqd->update_mqd = update_mqd;
1004 		mqd->is_occupied = kfd_is_occupied_cp;
1005 		mqd->mqd_size = sizeof(struct v9_mqd);
1006 		mqd->mqd_stride = kfd_mqd_stride;
1007 #if defined(CONFIG_DEBUG_FS)
1008 		mqd->debugfs_show_mqd = debugfs_show_mqd;
1009 #endif
1010 		mqd->check_preemption_failed = check_preemption_failed;
1011 		if (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 3) ||
1012 		    KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 4) ||
1013 		    KFD_GC_VERSION(dev) == IP_VERSION(9, 5, 0)) {
1014 			mqd->init_mqd = init_mqd_hiq_v9_4_3;
1015 			mqd->load_mqd = hiq_load_mqd_kiq_v9_4_3;
1016 			mqd->destroy_mqd = destroy_hiq_mqd_v9_4_3;
1017 			mqd->check_preemption_failed = check_preemption_failed_v9_4_3;
1018 		} else {
1019 			mqd->init_mqd = init_mqd_hiq;
1020 			mqd->load_mqd = kfd_hiq_load_mqd_kiq;
1021 			mqd->destroy_mqd = destroy_hiq_mqd;
1022 			mqd->check_preemption_failed = check_preemption_failed;
1023 		}
1024 		break;
1025 	case KFD_MQD_TYPE_DIQ:
1026 		mqd->allocate_mqd = allocate_mqd;
1027 		mqd->init_mqd = init_mqd_hiq;
1028 		mqd->free_mqd = kfd_free_mqd_cp;
1029 		mqd->load_mqd = load_mqd;
1030 		mqd->update_mqd = update_mqd;
1031 		mqd->destroy_mqd = kfd_destroy_mqd_cp;
1032 		mqd->is_occupied = kfd_is_occupied_cp;
1033 		mqd->mqd_size = sizeof(struct v9_mqd);
1034 #if defined(CONFIG_DEBUG_FS)
1035 		mqd->debugfs_show_mqd = debugfs_show_mqd;
1036 #endif
1037 		break;
1038 	case KFD_MQD_TYPE_SDMA:
1039 		mqd->allocate_mqd = allocate_sdma_mqd;
1040 		mqd->init_mqd = init_mqd_sdma;
1041 		mqd->free_mqd = free_mqd_hiq_sdma;
1042 		mqd->load_mqd = kfd_load_mqd_sdma;
1043 		mqd->update_mqd = update_mqd_sdma;
1044 		mqd->destroy_mqd = kfd_destroy_mqd_sdma;
1045 		mqd->is_occupied = kfd_is_occupied_sdma;
1046 		mqd->checkpoint_mqd = checkpoint_mqd_sdma;
1047 		mqd->restore_mqd = restore_mqd_sdma;
1048 		mqd->mqd_size = sizeof(struct v9_sdma_mqd);
1049 		mqd->mqd_stride = kfd_mqd_stride;
1050 #if defined(CONFIG_DEBUG_FS)
1051 		mqd->debugfs_show_mqd = debugfs_show_mqd_sdma;
1052 #endif
1053 		break;
1054 	default:
1055 		kfree(mqd);
1056 		return NULL;
1057 	}
1058 
1059 	return mqd;
1060 }
1061