1 // SPDX-License-Identifier: GPL-2.0 OR MIT
2 /*
3 * Copyright 2016-2022 Advanced Micro Devices, Inc.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be included in
13 * all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
19 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
21 * OTHER DEALINGS IN THE SOFTWARE.
22 *
23 */
24
25 #include <linux/printk.h>
26 #include <linux/slab.h>
27 #include <linux/uaccess.h>
28 #include "kfd_priv.h"
29 #include "kfd_mqd_manager.h"
30 #include "v9_structs.h"
31 #include "gc/gc_9_0_offset.h"
32 #include "gc/gc_9_0_sh_mask.h"
33 #include "sdma0/sdma0_4_0_sh_mask.h"
34 #include "amdgpu_amdkfd.h"
35 #include "kfd_device_queue_manager.h"
36
37 static void update_mqd(struct mqd_manager *mm, void *mqd,
38 struct queue_properties *q,
39 struct mqd_update_info *minfo);
40
mqd_stride_v9(struct mqd_manager * mm,struct queue_properties * q)41 static uint64_t mqd_stride_v9(struct mqd_manager *mm,
42 struct queue_properties *q)
43 {
44 if (mm->dev->kfd->cwsr_enabled &&
45 q->type == KFD_QUEUE_TYPE_COMPUTE)
46 return ALIGN(q->ctl_stack_size, PAGE_SIZE) +
47 ALIGN(sizeof(struct v9_mqd), PAGE_SIZE);
48
49 return mm->mqd_size;
50 }
51
get_mqd(void * mqd)52 static inline struct v9_mqd *get_mqd(void *mqd)
53 {
54 return (struct v9_mqd *)mqd;
55 }
56
get_sdma_mqd(void * mqd)57 static inline struct v9_sdma_mqd *get_sdma_mqd(void *mqd)
58 {
59 return (struct v9_sdma_mqd *)mqd;
60 }
61
update_cu_mask(struct mqd_manager * mm,void * mqd,struct mqd_update_info * minfo,uint32_t inst)62 static void update_cu_mask(struct mqd_manager *mm, void *mqd,
63 struct mqd_update_info *minfo, uint32_t inst)
64 {
65 struct v9_mqd *m;
66 uint32_t se_mask[KFD_MAX_NUM_SE] = {0};
67
68 if (!minfo || !minfo->cu_mask.ptr)
69 return;
70
71 mqd_symmetrically_map_cu_mask(mm,
72 minfo->cu_mask.ptr, minfo->cu_mask.count, se_mask, inst);
73
74 m = get_mqd(mqd);
75
76 m->compute_static_thread_mgmt_se0 = se_mask[0];
77 m->compute_static_thread_mgmt_se1 = se_mask[1];
78 m->compute_static_thread_mgmt_se2 = se_mask[2];
79 m->compute_static_thread_mgmt_se3 = se_mask[3];
80 if (KFD_GC_VERSION(mm->dev) != IP_VERSION(9, 4, 3) &&
81 KFD_GC_VERSION(mm->dev) != IP_VERSION(9, 4, 4) &&
82 KFD_GC_VERSION(mm->dev) != IP_VERSION(9, 5, 0)) {
83 m->compute_static_thread_mgmt_se4 = se_mask[4];
84 m->compute_static_thread_mgmt_se5 = se_mask[5];
85 m->compute_static_thread_mgmt_se6 = se_mask[6];
86 m->compute_static_thread_mgmt_se7 = se_mask[7];
87
88 pr_debug("update cu mask to %#x %#x %#x %#x %#x %#x %#x %#x\n",
89 m->compute_static_thread_mgmt_se0,
90 m->compute_static_thread_mgmt_se1,
91 m->compute_static_thread_mgmt_se2,
92 m->compute_static_thread_mgmt_se3,
93 m->compute_static_thread_mgmt_se4,
94 m->compute_static_thread_mgmt_se5,
95 m->compute_static_thread_mgmt_se6,
96 m->compute_static_thread_mgmt_se7);
97 } else {
98 pr_debug("inst: %u, update cu mask to %#x %#x %#x %#x\n",
99 inst, m->compute_static_thread_mgmt_se0,
100 m->compute_static_thread_mgmt_se1,
101 m->compute_static_thread_mgmt_se2,
102 m->compute_static_thread_mgmt_se3);
103 }
104 }
105
set_priority(struct v9_mqd * m,struct queue_properties * q)106 static void set_priority(struct v9_mqd *m, struct queue_properties *q)
107 {
108 m->cp_hqd_pipe_priority = pipe_priority_map[q->priority];
109 /* m->cp_hqd_queue_priority = q->priority; */
110 }
111
mqd_on_vram(struct amdgpu_device * adev)112 static bool mqd_on_vram(struct amdgpu_device *adev)
113 {
114 if (adev->apu_prefer_gtt)
115 return false;
116
117 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
118 case IP_VERSION(9, 4, 3):
119 case IP_VERSION(9, 5, 0):
120 return true;
121 default:
122 return false;
123 }
124 }
125
allocate_mqd(struct mqd_manager * mm,struct queue_properties * q)126 static struct kfd_mem_obj *allocate_mqd(struct mqd_manager *mm,
127 struct queue_properties *q)
128 {
129 int retval;
130 struct kfd_node *node = mm->dev;
131 struct kfd_mem_obj *mqd_mem_obj = NULL;
132
133 /* For V9 only, due to a HW bug, the control stack of a user mode
134 * compute queue needs to be allocated just behind the page boundary
135 * of its regular MQD buffer. So we allocate an enlarged MQD buffer:
136 * the first page of the buffer serves as the regular MQD buffer
137 * purpose and the remaining is for control stack. Although the two
138 * parts are in the same buffer object, they need different memory
139 * types: MQD part needs UC (uncached) as usual, while control stack
140 * needs NC (non coherent), which is different from the UC type which
141 * is used when control stack is allocated in user space.
142 *
143 * Because of all those, we use the gtt allocation function instead
144 * of sub-allocation function for this enlarged MQD buffer. Moreover,
145 * in order to achieve two memory types in a single buffer object, we
146 * pass a special bo flag AMDGPU_GEM_CREATE_CP_MQD_GFX9 to instruct
147 * amdgpu memory functions to do so.
148 */
149 if (node->kfd->cwsr_enabled && (q->type == KFD_QUEUE_TYPE_COMPUTE)) {
150 mqd_mem_obj = kzalloc_obj(struct kfd_mem_obj);
151 if (!mqd_mem_obj)
152 return NULL;
153 retval = amdgpu_amdkfd_alloc_kernel_mem(node->adev,
154 (ALIGN(q->ctl_stack_size, PAGE_SIZE) +
155 ALIGN(sizeof(struct v9_mqd), PAGE_SIZE)) *
156 NUM_XCC(node->xcc_mask),
157 mqd_on_vram(node->adev) ? AMDGPU_GEM_DOMAIN_VRAM :
158 AMDGPU_GEM_DOMAIN_GTT,
159 &(mqd_mem_obj->mem),
160 &(mqd_mem_obj->gpu_addr),
161 (void *)&(mqd_mem_obj->cpu_ptr), true);
162
163 if (retval) {
164 kfree(mqd_mem_obj);
165 return NULL;
166 }
167 } else {
168 retval = kfd_gtt_sa_allocate(node, sizeof(struct v9_mqd),
169 &mqd_mem_obj);
170 if (retval)
171 return NULL;
172 }
173
174 return mqd_mem_obj;
175 }
176
init_mqd(struct mqd_manager * mm,void ** mqd,struct kfd_mem_obj * mqd_mem_obj,uint64_t * gart_addr,struct queue_properties * q)177 static void init_mqd(struct mqd_manager *mm, void **mqd,
178 struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
179 struct queue_properties *q)
180 {
181 uint64_t addr;
182 struct v9_mqd *m;
183
184 m = (struct v9_mqd *) mqd_mem_obj->cpu_ptr;
185 addr = mqd_mem_obj->gpu_addr;
186
187 memset(m, 0, sizeof(struct v9_mqd));
188
189 m->header = 0xC0310800;
190 m->compute_pipelinestat_enable = 1;
191 m->compute_static_thread_mgmt_se0 = 0xFFFFFFFF;
192 m->compute_static_thread_mgmt_se1 = 0xFFFFFFFF;
193 m->compute_static_thread_mgmt_se2 = 0xFFFFFFFF;
194 m->compute_static_thread_mgmt_se3 = 0xFFFFFFFF;
195 m->compute_static_thread_mgmt_se4 = 0xFFFFFFFF;
196 m->compute_static_thread_mgmt_se5 = 0xFFFFFFFF;
197 m->compute_static_thread_mgmt_se6 = 0xFFFFFFFF;
198 m->compute_static_thread_mgmt_se7 = 0xFFFFFFFF;
199
200 m->cp_hqd_persistent_state = CP_HQD_PERSISTENT_STATE__PRELOAD_REQ_MASK |
201 0x53 << CP_HQD_PERSISTENT_STATE__PRELOAD_SIZE__SHIFT;
202
203 m->cp_hqd_pq_control = 5 << CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE__SHIFT;
204 m->cp_hqd_pq_control |= CP_HQD_PQ_CONTROL__UNORD_DISPATCH_MASK;
205
206 m->cp_mqd_control = 1 << CP_MQD_CONTROL__PRIV_STATE__SHIFT;
207
208 m->cp_mqd_base_addr_lo = lower_32_bits(addr);
209 m->cp_mqd_base_addr_hi = upper_32_bits(addr);
210
211 m->cp_hqd_quantum = 1 << CP_HQD_QUANTUM__QUANTUM_EN__SHIFT |
212 1 << CP_HQD_QUANTUM__QUANTUM_SCALE__SHIFT |
213 1 << CP_HQD_QUANTUM__QUANTUM_DURATION__SHIFT;
214
215 /* Set cp_hqd_hq_scheduler0 bit 14 to 1 to have the CP set up the
216 * DISPATCH_PTR. This is required for the kfd debugger
217 */
218 m->cp_hqd_hq_status0 = 1 << 14;
219
220 if (q->format == KFD_QUEUE_FORMAT_AQL)
221 m->cp_hqd_aql_control =
222 1 << CP_HQD_AQL_CONTROL__CONTROL0__SHIFT;
223
224 if (q->tba_addr) {
225 m->compute_pgm_rsrc2 |=
226 (1 << COMPUTE_PGM_RSRC2__TRAP_PRESENT__SHIFT);
227 }
228
229 if (mm->dev->kfd->cwsr_enabled && q->ctx_save_restore_area_address) {
230 m->cp_hqd_persistent_state |=
231 (1 << CP_HQD_PERSISTENT_STATE__QSWITCH_MODE__SHIFT);
232 m->cp_hqd_ctx_save_base_addr_lo =
233 lower_32_bits(q->ctx_save_restore_area_address);
234 m->cp_hqd_ctx_save_base_addr_hi =
235 upper_32_bits(q->ctx_save_restore_area_address);
236 m->cp_hqd_ctx_save_size = q->ctx_save_restore_area_size;
237 m->cp_hqd_cntl_stack_size = q->ctl_stack_size;
238 m->cp_hqd_cntl_stack_offset = q->ctl_stack_size;
239 m->cp_hqd_wg_state_offset = q->ctl_stack_size;
240 }
241
242 *mqd = m;
243 if (gart_addr)
244 *gart_addr = addr;
245 update_mqd(mm, m, q, NULL);
246 }
247
load_mqd(struct mqd_manager * mm,void * mqd,uint32_t pipe_id,uint32_t queue_id,struct queue_properties * p,struct mm_struct * mms)248 static int load_mqd(struct mqd_manager *mm, void *mqd,
249 uint32_t pipe_id, uint32_t queue_id,
250 struct queue_properties *p, struct mm_struct *mms)
251 {
252 /* AQL write pointer counts in 64B packets, PM4/CP counts in dwords. */
253 uint32_t wptr_shift = (p->format == KFD_QUEUE_FORMAT_AQL ? 4 : 0);
254
255 return mm->dev->kfd2kgd->hqd_load(mm->dev->adev, mqd, pipe_id, queue_id,
256 (uint32_t __user *)p->write_ptr,
257 wptr_shift, 0, mms, 0);
258 }
259
update_mqd(struct mqd_manager * mm,void * mqd,struct queue_properties * q,struct mqd_update_info * minfo)260 static void update_mqd(struct mqd_manager *mm, void *mqd,
261 struct queue_properties *q,
262 struct mqd_update_info *minfo)
263 {
264 struct v9_mqd *m;
265
266 m = get_mqd(mqd);
267
268 m->cp_hqd_pq_control &= ~CP_HQD_PQ_CONTROL__QUEUE_SIZE_MASK;
269 m->cp_hqd_pq_control |= order_base_2(q->queue_size / 4) - 1;
270 pr_debug("cp_hqd_pq_control 0x%x\n", m->cp_hqd_pq_control);
271
272 m->cp_hqd_pq_base_lo = lower_32_bits((uint64_t)q->queue_address >> 8);
273 m->cp_hqd_pq_base_hi = upper_32_bits((uint64_t)q->queue_address >> 8);
274
275 m->cp_hqd_pq_rptr_report_addr_lo = lower_32_bits((uint64_t)q->read_ptr);
276 m->cp_hqd_pq_rptr_report_addr_hi = upper_32_bits((uint64_t)q->read_ptr);
277 m->cp_hqd_pq_wptr_poll_addr_lo = lower_32_bits((uint64_t)q->write_ptr);
278 m->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits((uint64_t)q->write_ptr);
279
280 m->cp_hqd_pq_doorbell_control =
281 q->doorbell_off <<
282 CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT;
283 pr_debug("cp_hqd_pq_doorbell_control 0x%x\n",
284 m->cp_hqd_pq_doorbell_control);
285
286 m->cp_hqd_ib_control =
287 3 << CP_HQD_IB_CONTROL__MIN_IB_AVAIL_SIZE__SHIFT |
288 1 << CP_HQD_IB_CONTROL__IB_EXE_DISABLE__SHIFT;
289
290 /*
291 * HW does not clamp this field correctly. Maximum EOP queue size
292 * is constrained by per-SE EOP done signal count, which is 8-bit.
293 * Limit is 0xFF EOP entries (= 0x7F8 dwords). CP will not submit
294 * more than (EOP entry count - 1) so a queue size of 0x800 dwords
295 * is safe, giving a maximum field value of 0xA.
296 *
297 * Also, do calculation only if EOP is used (size > 0), otherwise
298 * the order_base_2 calculation provides incorrect result.
299 *
300 */
301 m->cp_hqd_eop_control = q->eop_ring_buffer_size ?
302 min(0xA, order_base_2(q->eop_ring_buffer_size / 4) - 1) : 0;
303
304 m->cp_hqd_eop_base_addr_lo =
305 lower_32_bits(q->eop_ring_buffer_address >> 8);
306 m->cp_hqd_eop_base_addr_hi =
307 upper_32_bits(q->eop_ring_buffer_address >> 8);
308
309 m->cp_hqd_iq_timer = 0;
310
311 m->cp_hqd_vmid = q->vmid;
312
313 if (q->format == KFD_QUEUE_FORMAT_AQL) {
314 m->cp_hqd_pq_control |= CP_HQD_PQ_CONTROL__NO_UPDATE_RPTR_MASK |
315 2 << CP_HQD_PQ_CONTROL__SLOT_BASED_WPTR__SHIFT |
316 1 << CP_HQD_PQ_CONTROL__QUEUE_FULL_EN__SHIFT |
317 1 << CP_HQD_PQ_CONTROL__WPP_CLAMP_EN__SHIFT;
318 m->cp_hqd_pq_doorbell_control |= 1 <<
319 CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_BIF_DROP__SHIFT;
320 }
321 if (mm->dev->kfd->cwsr_enabled && q->ctx_save_restore_area_address)
322 m->cp_hqd_ctx_save_control = 0;
323
324 if (KFD_GC_VERSION(mm->dev) != IP_VERSION(9, 4, 3) &&
325 KFD_GC_VERSION(mm->dev) != IP_VERSION(9, 4, 4) &&
326 KFD_GC_VERSION(mm->dev) != IP_VERSION(9, 5, 0))
327 update_cu_mask(mm, mqd, minfo, 0);
328 set_priority(m, q);
329
330 if (minfo && KFD_GC_VERSION(mm->dev) >= IP_VERSION(9, 4, 2)) {
331 if (minfo->update_flag & UPDATE_FLAG_IS_GWS)
332 m->compute_resource_limits |=
333 COMPUTE_RESOURCE_LIMITS__FORCE_SIMD_DIST_MASK;
334 else
335 m->compute_resource_limits &=
336 ~COMPUTE_RESOURCE_LIMITS__FORCE_SIMD_DIST_MASK;
337 }
338
339 q->is_active = QUEUE_IS_ACTIVE(*q);
340 }
341
342
check_preemption_failed(struct mqd_manager * mm,void * mqd)343 static bool check_preemption_failed(struct mqd_manager *mm, void *mqd)
344 {
345 struct v9_mqd *m = (struct v9_mqd *)mqd;
346 uint32_t doorbell_id = m->queue_doorbell_id0;
347
348 m->queue_doorbell_id0 = 0;
349
350 return kfd_check_hiq_mqd_doorbell_id(mm->dev, doorbell_id, 0);
351 }
352
get_wave_state(struct mqd_manager * mm,void * mqd,struct queue_properties * q,void __user * ctl_stack,u32 * ctl_stack_used_size,u32 * save_area_used_size)353 static int get_wave_state(struct mqd_manager *mm, void *mqd,
354 struct queue_properties *q,
355 void __user *ctl_stack,
356 u32 *ctl_stack_used_size,
357 u32 *save_area_used_size)
358 {
359 struct v9_mqd *m;
360 struct kfd_context_save_area_header header;
361
362 /* Control stack is located one page after MQD. */
363 void *mqd_ctl_stack = (void *)((uintptr_t)mqd + PAGE_SIZE);
364
365 m = get_mqd(mqd);
366
367 *ctl_stack_used_size = m->cp_hqd_cntl_stack_size -
368 m->cp_hqd_cntl_stack_offset;
369 *save_area_used_size = m->cp_hqd_wg_state_offset -
370 m->cp_hqd_cntl_stack_size;
371
372 header.wave_state.control_stack_size = *ctl_stack_used_size;
373 header.wave_state.wave_state_size = *save_area_used_size;
374
375 header.wave_state.wave_state_offset = m->cp_hqd_wg_state_offset;
376 header.wave_state.control_stack_offset = m->cp_hqd_cntl_stack_offset;
377
378 if (copy_to_user(ctl_stack, &header, sizeof(header.wave_state)))
379 return -EFAULT;
380
381 if (copy_to_user(ctl_stack + m->cp_hqd_cntl_stack_offset,
382 mqd_ctl_stack + m->cp_hqd_cntl_stack_offset,
383 *ctl_stack_used_size))
384 return -EFAULT;
385
386 return 0;
387 }
388
get_checkpoint_info(struct mqd_manager * mm,void * mqd,u32 * ctl_stack_size)389 static void get_checkpoint_info(struct mqd_manager *mm, void *mqd, u32 *ctl_stack_size)
390 {
391 struct v9_mqd *m = get_mqd(mqd);
392
393 *ctl_stack_size = m->cp_hqd_cntl_stack_size * NUM_XCC(mm->dev->xcc_mask);
394 }
395
checkpoint_mqd(struct mqd_manager * mm,void * mqd,void * mqd_dst,void * ctl_stack_dst)396 static void checkpoint_mqd(struct mqd_manager *mm, void *mqd, void *mqd_dst, void *ctl_stack_dst)
397 {
398 struct v9_mqd *m;
399 /* Control stack is located one page after MQD. */
400 void *ctl_stack = (void *)((uintptr_t)mqd + PAGE_SIZE);
401
402 m = get_mqd(mqd);
403
404 memcpy(mqd_dst, m, sizeof(struct v9_mqd));
405 memcpy(ctl_stack_dst, ctl_stack, m->cp_hqd_cntl_stack_size);
406 }
407
checkpoint_mqd_v9_4_3(struct mqd_manager * mm,void * mqd,void * mqd_dst,void * ctl_stack_dst)408 static void checkpoint_mqd_v9_4_3(struct mqd_manager *mm,
409 void *mqd,
410 void *mqd_dst,
411 void *ctl_stack_dst)
412 {
413 struct v9_mqd *m;
414 int xcc;
415 uint64_t size = get_mqd(mqd)->cp_mqd_stride_size;
416
417 for (xcc = 0; xcc < NUM_XCC(mm->dev->xcc_mask); xcc++) {
418 m = get_mqd(mqd + size * xcc);
419
420 checkpoint_mqd(mm, m,
421 (uint8_t *)mqd_dst + sizeof(*m) * xcc,
422 (uint8_t *)ctl_stack_dst + m->cp_hqd_cntl_stack_size * xcc);
423 }
424 }
425
restore_mqd(struct mqd_manager * mm,void ** mqd,struct kfd_mem_obj * mqd_mem_obj,uint64_t * gart_addr,struct queue_properties * qp,const void * mqd_src,const void * ctl_stack_src,u32 ctl_stack_size)426 static void restore_mqd(struct mqd_manager *mm, void **mqd,
427 struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
428 struct queue_properties *qp,
429 const void *mqd_src,
430 const void *ctl_stack_src, u32 ctl_stack_size)
431 {
432 uint64_t addr;
433 struct v9_mqd *m;
434 void *ctl_stack;
435
436 m = (struct v9_mqd *) mqd_mem_obj->cpu_ptr;
437 addr = mqd_mem_obj->gpu_addr;
438
439 memcpy(m, mqd_src, sizeof(*m));
440
441 *mqd = m;
442 if (gart_addr)
443 *gart_addr = addr;
444
445 /* Control stack is located one page after MQD. */
446 ctl_stack = (void *)((uintptr_t)*mqd + PAGE_SIZE);
447 memcpy(ctl_stack, ctl_stack_src, ctl_stack_size);
448
449 m->cp_hqd_pq_doorbell_control =
450 qp->doorbell_off <<
451 CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT;
452 pr_debug("cp_hqd_pq_doorbell_control 0x%x\n",
453 m->cp_hqd_pq_doorbell_control);
454
455 qp->is_active = 0;
456 }
457
init_mqd_hiq(struct mqd_manager * mm,void ** mqd,struct kfd_mem_obj * mqd_mem_obj,uint64_t * gart_addr,struct queue_properties * q)458 static void init_mqd_hiq(struct mqd_manager *mm, void **mqd,
459 struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
460 struct queue_properties *q)
461 {
462 struct v9_mqd *m;
463
464 init_mqd(mm, mqd, mqd_mem_obj, gart_addr, q);
465
466 m = get_mqd(*mqd);
467
468 m->cp_hqd_pq_control |= 1 << CP_HQD_PQ_CONTROL__PRIV_STATE__SHIFT |
469 1 << CP_HQD_PQ_CONTROL__KMD_QUEUE__SHIFT;
470 }
471
destroy_hiq_mqd(struct mqd_manager * mm,void * mqd,enum kfd_preempt_type type,unsigned int timeout,uint32_t pipe_id,uint32_t queue_id)472 static int destroy_hiq_mqd(struct mqd_manager *mm, void *mqd,
473 enum kfd_preempt_type type, unsigned int timeout,
474 uint32_t pipe_id, uint32_t queue_id)
475 {
476 int err;
477 struct v9_mqd *m;
478 u32 doorbell_off;
479
480 m = get_mqd(mqd);
481
482 doorbell_off = m->cp_hqd_pq_doorbell_control >>
483 CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT;
484 err = amdgpu_amdkfd_unmap_hiq(mm->dev->adev, doorbell_off, 0);
485 if (err)
486 pr_debug("Destroy HIQ MQD failed: %d\n", err);
487
488 return err;
489 }
490
init_mqd_sdma(struct mqd_manager * mm,void ** mqd,struct kfd_mem_obj * mqd_mem_obj,uint64_t * gart_addr,struct queue_properties * q)491 static void init_mqd_sdma(struct mqd_manager *mm, void **mqd,
492 struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
493 struct queue_properties *q)
494 {
495 struct v9_sdma_mqd *m;
496
497 m = (struct v9_sdma_mqd *) mqd_mem_obj->cpu_ptr;
498
499 memset(m, 0, sizeof(struct v9_sdma_mqd));
500
501 *mqd = m;
502 if (gart_addr)
503 *gart_addr = mqd_mem_obj->gpu_addr;
504
505 mm->update_mqd(mm, m, q, NULL);
506 }
507
508 #define SDMA_RLC_DUMMY_DEFAULT 0xf
509
update_mqd_sdma(struct mqd_manager * mm,void * mqd,struct queue_properties * q,struct mqd_update_info * minfo)510 static void update_mqd_sdma(struct mqd_manager *mm, void *mqd,
511 struct queue_properties *q,
512 struct mqd_update_info *minfo)
513 {
514 struct v9_sdma_mqd *m;
515
516 m = get_sdma_mqd(mqd);
517 m->sdmax_rlcx_rb_cntl = order_base_2(q->queue_size / 4)
518 << SDMA0_RLC0_RB_CNTL__RB_SIZE__SHIFT |
519 q->vmid << SDMA0_RLC0_RB_CNTL__RB_VMID__SHIFT |
520 1 << SDMA0_RLC0_RB_CNTL__RPTR_WRITEBACK_ENABLE__SHIFT |
521 6 << SDMA0_RLC0_RB_CNTL__RPTR_WRITEBACK_TIMER__SHIFT;
522
523 m->sdmax_rlcx_rb_base = lower_32_bits(q->queue_address >> 8);
524 m->sdmax_rlcx_rb_base_hi = upper_32_bits(q->queue_address >> 8);
525 m->sdmax_rlcx_rb_rptr_addr_lo = lower_32_bits((uint64_t)q->read_ptr);
526 m->sdmax_rlcx_rb_rptr_addr_hi = upper_32_bits((uint64_t)q->read_ptr);
527 m->sdmax_rlcx_doorbell_offset =
528 q->doorbell_off << SDMA0_RLC0_DOORBELL_OFFSET__OFFSET__SHIFT;
529
530 m->sdma_engine_id = q->sdma_engine_id;
531 m->sdma_queue_id = q->sdma_queue_id;
532 m->sdmax_rlcx_dummy_reg = SDMA_RLC_DUMMY_DEFAULT;
533 /* Allow context switch so we don't cross-process starve with a massive
534 * command buffer of long-running SDMA commands
535 */
536 m->sdmax_rlcx_ib_cntl |= SDMA0_GFX_IB_CNTL__SWITCH_INSIDE_IB_MASK;
537
538 q->is_active = QUEUE_IS_ACTIVE(*q);
539 }
540
checkpoint_mqd_sdma(struct mqd_manager * mm,void * mqd,void * mqd_dst,void * ctl_stack_dst)541 static void checkpoint_mqd_sdma(struct mqd_manager *mm,
542 void *mqd,
543 void *mqd_dst,
544 void *ctl_stack_dst)
545 {
546 struct v9_sdma_mqd *m;
547
548 m = get_sdma_mqd(mqd);
549
550 memcpy(mqd_dst, m, sizeof(struct v9_sdma_mqd));
551 }
552
restore_mqd_sdma(struct mqd_manager * mm,void ** mqd,struct kfd_mem_obj * mqd_mem_obj,uint64_t * gart_addr,struct queue_properties * qp,const void * mqd_src,const void * ctl_stack_src,const u32 ctl_stack_size)553 static void restore_mqd_sdma(struct mqd_manager *mm, void **mqd,
554 struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
555 struct queue_properties *qp,
556 const void *mqd_src,
557 const void *ctl_stack_src, const u32 ctl_stack_size)
558 {
559 uint64_t addr;
560 struct v9_sdma_mqd *m;
561
562 m = (struct v9_sdma_mqd *) mqd_mem_obj->cpu_ptr;
563 addr = mqd_mem_obj->gpu_addr;
564
565 memcpy(m, mqd_src, sizeof(*m));
566
567 m->sdmax_rlcx_doorbell_offset =
568 qp->doorbell_off << SDMA0_RLC0_DOORBELL_OFFSET__OFFSET__SHIFT;
569
570 *mqd = m;
571 if (gart_addr)
572 *gart_addr = addr;
573
574 qp->is_active = 0;
575 }
576
init_mqd_hiq_v9_4_3(struct mqd_manager * mm,void ** mqd,struct kfd_mem_obj * mqd_mem_obj,uint64_t * gart_addr,struct queue_properties * q)577 static void init_mqd_hiq_v9_4_3(struct mqd_manager *mm, void **mqd,
578 struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
579 struct queue_properties *q)
580 {
581 struct v9_mqd *m;
582 int xcc = 0;
583 struct kfd_mem_obj xcc_mqd_mem_obj;
584 uint64_t xcc_gart_addr = 0;
585
586 memset(&xcc_mqd_mem_obj, 0x0, sizeof(struct kfd_mem_obj));
587
588 for (xcc = 0; xcc < NUM_XCC(mm->dev->xcc_mask); xcc++) {
589 kfd_get_hiq_xcc_mqd(mm->dev, &xcc_mqd_mem_obj, xcc);
590
591 init_mqd(mm, (void **)&m, &xcc_mqd_mem_obj, &xcc_gart_addr, q);
592
593 m->cp_hqd_pq_control |= CP_HQD_PQ_CONTROL__NO_UPDATE_RPTR_MASK |
594 1 << CP_HQD_PQ_CONTROL__PRIV_STATE__SHIFT |
595 1 << CP_HQD_PQ_CONTROL__KMD_QUEUE__SHIFT;
596 if (amdgpu_sriov_multi_vf_mode(mm->dev->adev))
597 m->cp_hqd_pq_doorbell_control |= 1 <<
598 CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_MODE__SHIFT;
599 m->cp_mqd_stride_size = kfd_hiq_mqd_stride(mm->dev);
600 if (xcc == 0) {
601 /* Set no_update_rptr = 0 in Master XCC */
602 m->cp_hqd_pq_control &= ~CP_HQD_PQ_CONTROL__NO_UPDATE_RPTR_MASK;
603
604 /* Set the MQD pointer and gart address to XCC0 MQD */
605 *mqd = m;
606 *gart_addr = xcc_gart_addr;
607 }
608 }
609 }
610
hiq_load_mqd_kiq_v9_4_3(struct mqd_manager * mm,void * mqd,uint32_t pipe_id,uint32_t queue_id,struct queue_properties * p,struct mm_struct * mms)611 static int hiq_load_mqd_kiq_v9_4_3(struct mqd_manager *mm, void *mqd,
612 uint32_t pipe_id, uint32_t queue_id,
613 struct queue_properties *p, struct mm_struct *mms)
614 {
615 uint32_t xcc_mask = mm->dev->xcc_mask;
616 int xcc_id, err = 0, inst = 0;
617 void *xcc_mqd;
618 uint64_t hiq_mqd_size = kfd_hiq_mqd_stride(mm->dev);
619
620 for_each_inst(xcc_id, xcc_mask) {
621 xcc_mqd = mqd + hiq_mqd_size * inst;
622 err = mm->dev->kfd2kgd->hiq_mqd_load(mm->dev->adev, xcc_mqd,
623 pipe_id, queue_id,
624 p->doorbell_off, xcc_id);
625 if (err) {
626 pr_debug("Failed to load HIQ MQD for XCC: %d\n", inst);
627 break;
628 }
629 ++inst;
630 }
631
632 return err;
633 }
634
destroy_hiq_mqd_v9_4_3(struct mqd_manager * mm,void * mqd,enum kfd_preempt_type type,unsigned int timeout,uint32_t pipe_id,uint32_t queue_id)635 static int destroy_hiq_mqd_v9_4_3(struct mqd_manager *mm, void *mqd,
636 enum kfd_preempt_type type, unsigned int timeout,
637 uint32_t pipe_id, uint32_t queue_id)
638 {
639 uint32_t xcc_mask = mm->dev->xcc_mask;
640 int xcc_id, err = 0, inst = 0;
641 uint64_t hiq_mqd_size = kfd_hiq_mqd_stride(mm->dev);
642 struct v9_mqd *m;
643 u32 doorbell_off;
644
645 for_each_inst(xcc_id, xcc_mask) {
646 m = get_mqd(mqd + hiq_mqd_size * inst);
647
648 doorbell_off = m->cp_hqd_pq_doorbell_control >>
649 CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT;
650
651 err = amdgpu_amdkfd_unmap_hiq(mm->dev->adev, doorbell_off, xcc_id);
652 if (err) {
653 pr_debug("Destroy HIQ MQD failed for xcc: %d\n", inst);
654 break;
655 }
656 ++inst;
657 }
658
659 return err;
660 }
661
check_preemption_failed_v9_4_3(struct mqd_manager * mm,void * mqd)662 static bool check_preemption_failed_v9_4_3(struct mqd_manager *mm, void *mqd)
663 {
664 uint64_t hiq_mqd_size = kfd_hiq_mqd_stride(mm->dev);
665 uint32_t xcc_mask = mm->dev->xcc_mask;
666 int inst = 0, xcc_id;
667 struct v9_mqd *m;
668 bool ret = false;
669
670 for_each_inst(xcc_id, xcc_mask) {
671 m = get_mqd(mqd + hiq_mqd_size * inst);
672 ret |= kfd_check_hiq_mqd_doorbell_id(mm->dev,
673 m->queue_doorbell_id0, inst);
674 m->queue_doorbell_id0 = 0;
675 ++inst;
676 }
677
678 return ret;
679 }
680
get_xcc_mqd(struct kfd_mem_obj * mqd_mem_obj,struct kfd_mem_obj * xcc_mqd_mem_obj,uint64_t offset)681 static void get_xcc_mqd(struct kfd_mem_obj *mqd_mem_obj,
682 struct kfd_mem_obj *xcc_mqd_mem_obj,
683 uint64_t offset)
684 {
685 xcc_mqd_mem_obj->mem = (offset == 0) ?
686 mqd_mem_obj->mem : NULL;
687 xcc_mqd_mem_obj->gpu_addr = mqd_mem_obj->gpu_addr + offset;
688 xcc_mqd_mem_obj->cpu_ptr = (uint32_t *)((uintptr_t)mqd_mem_obj->cpu_ptr
689 + offset);
690 }
691
init_mqd_v9_4_3(struct mqd_manager * mm,void ** mqd,struct kfd_mem_obj * mqd_mem_obj,uint64_t * gart_addr,struct queue_properties * q)692 static void init_mqd_v9_4_3(struct mqd_manager *mm, void **mqd,
693 struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
694 struct queue_properties *q)
695 {
696 struct v9_mqd *m;
697 int xcc = 0;
698 struct kfd_mem_obj xcc_mqd_mem_obj;
699 uint64_t xcc_gart_addr = 0;
700 uint64_t xcc_ctx_save_restore_area_address;
701 uint64_t offset = mm->mqd_stride(mm, q);
702 uint32_t local_xcc_start = mm->dev->dqm->current_logical_xcc_start++;
703
704 memset(&xcc_mqd_mem_obj, 0x0, sizeof(struct kfd_mem_obj));
705 for (xcc = 0; xcc < NUM_XCC(mm->dev->xcc_mask); xcc++) {
706 get_xcc_mqd(mqd_mem_obj, &xcc_mqd_mem_obj, offset*xcc);
707
708 init_mqd(mm, (void **)&m, &xcc_mqd_mem_obj, &xcc_gart_addr, q);
709 if (amdgpu_sriov_multi_vf_mode(mm->dev->adev))
710 m->cp_hqd_pq_doorbell_control |= 1 <<
711 CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_MODE__SHIFT;
712 m->cp_mqd_stride_size = offset;
713
714 /*
715 * Update the CWSR address for each XCC if CWSR is enabled
716 * and CWSR area is allocated in thunk
717 */
718 if (mm->dev->kfd->cwsr_enabled &&
719 q->ctx_save_restore_area_address) {
720 xcc_ctx_save_restore_area_address =
721 q->ctx_save_restore_area_address +
722 (xcc * q->ctx_save_restore_area_size);
723
724 m->cp_hqd_ctx_save_base_addr_lo =
725 lower_32_bits(xcc_ctx_save_restore_area_address);
726 m->cp_hqd_ctx_save_base_addr_hi =
727 upper_32_bits(xcc_ctx_save_restore_area_address);
728 }
729
730 if (q->format == KFD_QUEUE_FORMAT_AQL) {
731 m->compute_tg_chunk_size = 1;
732 m->compute_current_logic_xcc_id =
733 (local_xcc_start + xcc) %
734 NUM_XCC(mm->dev->xcc_mask);
735
736 switch (xcc) {
737 case 0:
738 /* Master XCC */
739 m->cp_hqd_pq_control &=
740 ~CP_HQD_PQ_CONTROL__NO_UPDATE_RPTR_MASK;
741 break;
742 default:
743 break;
744 }
745 } else {
746 /* PM4 Queue */
747 m->compute_current_logic_xcc_id = 0;
748 m->compute_tg_chunk_size = 0;
749 m->pm4_target_xcc_in_xcp = q->pm4_target_xcc;
750 }
751
752 if (xcc == 0) {
753 /* Set the MQD pointer and gart address to XCC0 MQD */
754 *mqd = m;
755 *gart_addr = xcc_gart_addr;
756 }
757 }
758
759 if (mqd_on_vram(mm->dev->adev))
760 amdgpu_device_flush_hdp(mm->dev->adev, NULL);
761 }
762
update_mqd_v9_4_3(struct mqd_manager * mm,void * mqd,struct queue_properties * q,struct mqd_update_info * minfo)763 static void update_mqd_v9_4_3(struct mqd_manager *mm, void *mqd,
764 struct queue_properties *q, struct mqd_update_info *minfo)
765 {
766 struct v9_mqd *m;
767 int xcc = 0;
768 uint64_t size = mm->mqd_stride(mm, q);
769
770 for (xcc = 0; xcc < NUM_XCC(mm->dev->xcc_mask); xcc++) {
771 m = get_mqd(mqd + size * xcc);
772 update_mqd(mm, m, q, minfo);
773
774 if (amdgpu_sriov_multi_vf_mode(mm->dev->adev))
775 m->cp_hqd_pq_doorbell_control |= 1 <<
776 CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_MODE__SHIFT;
777 update_cu_mask(mm, m, minfo, xcc);
778
779 if (q->format == KFD_QUEUE_FORMAT_AQL) {
780 switch (xcc) {
781 case 0:
782 /* Master XCC */
783 m->cp_hqd_pq_control &=
784 ~CP_HQD_PQ_CONTROL__NO_UPDATE_RPTR_MASK;
785 break;
786 default:
787 break;
788 }
789 m->compute_tg_chunk_size = 1;
790 } else {
791 /* PM4 Queue */
792 m->compute_current_logic_xcc_id = 0;
793 m->compute_tg_chunk_size = 0;
794 m->pm4_target_xcc_in_xcp = q->pm4_target_xcc;
795 }
796 }
797
798 if (mqd_on_vram(mm->dev->adev))
799 amdgpu_device_flush_hdp(mm->dev->adev, NULL);
800 }
801
restore_mqd_v9_4_3(struct mqd_manager * mm,void ** mqd,struct kfd_mem_obj * mqd_mem_obj,uint64_t * gart_addr,struct queue_properties * qp,const void * mqd_src,const void * ctl_stack_src,u32 ctl_stack_size)802 static void restore_mqd_v9_4_3(struct mqd_manager *mm, void **mqd,
803 struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
804 struct queue_properties *qp,
805 const void *mqd_src,
806 const void *ctl_stack_src, u32 ctl_stack_size)
807 {
808 struct kfd_mem_obj xcc_mqd_mem_obj;
809 u32 mqd_ctl_stack_size;
810 struct v9_mqd *m;
811 u32 num_xcc;
812 int xcc;
813
814 uint64_t offset = mm->mqd_stride(mm, qp);
815
816 mm->dev->dqm->current_logical_xcc_start++;
817
818 num_xcc = NUM_XCC(mm->dev->xcc_mask);
819 mqd_ctl_stack_size = ctl_stack_size / num_xcc;
820
821 memset(&xcc_mqd_mem_obj, 0x0, sizeof(struct kfd_mem_obj));
822
823 /* Set the MQD pointer and gart address to XCC0 MQD */
824 *mqd = mqd_mem_obj->cpu_ptr;
825 if (gart_addr)
826 *gart_addr = mqd_mem_obj->gpu_addr;
827
828 for (xcc = 0; xcc < num_xcc; xcc++) {
829 get_xcc_mqd(mqd_mem_obj, &xcc_mqd_mem_obj, offset * xcc);
830 restore_mqd(mm, (void **)&m,
831 &xcc_mqd_mem_obj,
832 NULL,
833 qp,
834 (uint8_t *)mqd_src + xcc * sizeof(*m),
835 (uint8_t *)ctl_stack_src + xcc * mqd_ctl_stack_size,
836 mqd_ctl_stack_size);
837 }
838
839 if (mqd_on_vram(mm->dev->adev))
840 amdgpu_device_flush_hdp(mm->dev->adev, NULL);
841 }
destroy_mqd_v9_4_3(struct mqd_manager * mm,void * mqd,enum kfd_preempt_type type,unsigned int timeout,uint32_t pipe_id,uint32_t queue_id)842 static int destroy_mqd_v9_4_3(struct mqd_manager *mm, void *mqd,
843 enum kfd_preempt_type type, unsigned int timeout,
844 uint32_t pipe_id, uint32_t queue_id)
845 {
846 uint32_t xcc_mask = mm->dev->xcc_mask;
847 int xcc_id, err = 0, inst = 0;
848 void *xcc_mqd;
849 struct v9_mqd *m;
850 uint64_t mqd_offset;
851
852 m = get_mqd(mqd);
853 mqd_offset = m->cp_mqd_stride_size;
854
855 for_each_inst(xcc_id, xcc_mask) {
856 xcc_mqd = mqd + mqd_offset * inst;
857 err = mm->dev->kfd2kgd->hqd_destroy(mm->dev->adev, xcc_mqd,
858 type, timeout, pipe_id,
859 queue_id, xcc_id);
860 if (err) {
861 pr_debug("Destroy MQD failed for xcc: %d\n", inst);
862 break;
863 }
864 ++inst;
865 }
866
867 return err;
868 }
869
load_mqd_v9_4_3(struct mqd_manager * mm,void * mqd,uint32_t pipe_id,uint32_t queue_id,struct queue_properties * p,struct mm_struct * mms)870 static int load_mqd_v9_4_3(struct mqd_manager *mm, void *mqd,
871 uint32_t pipe_id, uint32_t queue_id,
872 struct queue_properties *p, struct mm_struct *mms)
873 {
874 /* AQL write pointer counts in 64B packets, PM4/CP counts in dwords. */
875 uint32_t wptr_shift = (p->format == KFD_QUEUE_FORMAT_AQL ? 4 : 0);
876 uint32_t xcc_mask = mm->dev->xcc_mask;
877 int xcc_id, err = 0, inst = 0;
878 void *xcc_mqd;
879 uint64_t mqd_stride_size = mm->mqd_stride(mm, p);
880
881 for_each_inst(xcc_id, xcc_mask) {
882 xcc_mqd = mqd + mqd_stride_size * inst;
883 err = mm->dev->kfd2kgd->hqd_load(
884 mm->dev->adev, xcc_mqd, pipe_id, queue_id,
885 (uint32_t __user *)p->write_ptr, wptr_shift, 0, mms,
886 xcc_id);
887 if (err) {
888 pr_debug("Load MQD failed for xcc: %d\n", inst);
889 break;
890 }
891 ++inst;
892 }
893
894 return err;
895 }
896
get_wave_state_v9_4_3(struct mqd_manager * mm,void * mqd,struct queue_properties * q,void __user * ctl_stack,u32 * ctl_stack_used_size,u32 * save_area_used_size)897 static int get_wave_state_v9_4_3(struct mqd_manager *mm, void *mqd,
898 struct queue_properties *q,
899 void __user *ctl_stack,
900 u32 *ctl_stack_used_size,
901 u32 *save_area_used_size)
902 {
903 int xcc, err = 0;
904 void *xcc_mqd;
905 void __user *xcc_ctl_stack;
906 uint64_t mqd_stride_size = mm->mqd_stride(mm, q);
907 u32 tmp_ctl_stack_used_size = 0, tmp_save_area_used_size = 0;
908
909 for (xcc = 0; xcc < NUM_XCC(mm->dev->xcc_mask); xcc++) {
910 xcc_mqd = mqd + mqd_stride_size * xcc;
911 xcc_ctl_stack = (void __user *)((uintptr_t)ctl_stack +
912 q->ctx_save_restore_area_size * xcc);
913
914 err = get_wave_state(mm, xcc_mqd, q, xcc_ctl_stack,
915 &tmp_ctl_stack_used_size,
916 &tmp_save_area_used_size);
917 if (err)
918 break;
919
920 /*
921 * Set the ctl_stack_used_size and save_area_used_size to
922 * ctl_stack_used_size and save_area_used_size of XCC 0 when
923 * passing the info the user-space.
924 * For multi XCC, user-space would have to look at the header
925 * info of each Control stack area to determine the control
926 * stack size and save area used.
927 */
928 if (xcc == 0) {
929 *ctl_stack_used_size = tmp_ctl_stack_used_size;
930 *save_area_used_size = tmp_save_area_used_size;
931 }
932 }
933
934 return err;
935 }
936
937 #if defined(CONFIG_DEBUG_FS)
938
debugfs_show_mqd(struct seq_file * m,void * data)939 static int debugfs_show_mqd(struct seq_file *m, void *data)
940 {
941 seq_hex_dump(m, " ", DUMP_PREFIX_OFFSET, 32, 4,
942 data, sizeof(struct v9_mqd), false);
943 return 0;
944 }
945
debugfs_show_mqd_sdma(struct seq_file * m,void * data)946 static int debugfs_show_mqd_sdma(struct seq_file *m, void *data)
947 {
948 seq_hex_dump(m, " ", DUMP_PREFIX_OFFSET, 32, 4,
949 data, sizeof(struct v9_sdma_mqd), false);
950 return 0;
951 }
952
953 #endif
954
mqd_manager_init_v9(enum KFD_MQD_TYPE type,struct kfd_node * dev)955 struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE type,
956 struct kfd_node *dev)
957 {
958 struct mqd_manager *mqd;
959
960 if (WARN_ON(type >= KFD_MQD_TYPE_MAX))
961 return NULL;
962
963 mqd = kzalloc_obj(*mqd);
964 if (!mqd)
965 return NULL;
966
967 mqd->dev = dev;
968
969 switch (type) {
970 case KFD_MQD_TYPE_CP:
971 mqd->allocate_mqd = allocate_mqd;
972 mqd->free_mqd = kfd_free_mqd_cp;
973 mqd->is_occupied = kfd_is_occupied_cp;
974 mqd->get_checkpoint_info = get_checkpoint_info;
975 mqd->mqd_size = sizeof(struct v9_mqd);
976 mqd->mqd_stride = mqd_stride_v9;
977 #if defined(CONFIG_DEBUG_FS)
978 mqd->debugfs_show_mqd = debugfs_show_mqd;
979 #endif
980 if (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 3) ||
981 KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 4) ||
982 KFD_GC_VERSION(dev) == IP_VERSION(9, 5, 0)) {
983 mqd->init_mqd = init_mqd_v9_4_3;
984 mqd->load_mqd = load_mqd_v9_4_3;
985 mqd->update_mqd = update_mqd_v9_4_3;
986 mqd->destroy_mqd = destroy_mqd_v9_4_3;
987 mqd->get_wave_state = get_wave_state_v9_4_3;
988 mqd->checkpoint_mqd = checkpoint_mqd_v9_4_3;
989 mqd->restore_mqd = restore_mqd_v9_4_3;
990 } else {
991 mqd->init_mqd = init_mqd;
992 mqd->load_mqd = load_mqd;
993 mqd->update_mqd = update_mqd;
994 mqd->destroy_mqd = kfd_destroy_mqd_cp;
995 mqd->get_wave_state = get_wave_state;
996 mqd->checkpoint_mqd = checkpoint_mqd;
997 mqd->restore_mqd = restore_mqd;
998 }
999 break;
1000 case KFD_MQD_TYPE_HIQ:
1001 mqd->allocate_mqd = allocate_hiq_mqd;
1002 mqd->free_mqd = free_mqd_hiq_sdma;
1003 mqd->update_mqd = update_mqd;
1004 mqd->is_occupied = kfd_is_occupied_cp;
1005 mqd->mqd_size = sizeof(struct v9_mqd);
1006 mqd->mqd_stride = kfd_mqd_stride;
1007 #if defined(CONFIG_DEBUG_FS)
1008 mqd->debugfs_show_mqd = debugfs_show_mqd;
1009 #endif
1010 mqd->check_preemption_failed = check_preemption_failed;
1011 if (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 3) ||
1012 KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 4) ||
1013 KFD_GC_VERSION(dev) == IP_VERSION(9, 5, 0)) {
1014 mqd->init_mqd = init_mqd_hiq_v9_4_3;
1015 mqd->load_mqd = hiq_load_mqd_kiq_v9_4_3;
1016 mqd->destroy_mqd = destroy_hiq_mqd_v9_4_3;
1017 mqd->check_preemption_failed = check_preemption_failed_v9_4_3;
1018 } else {
1019 mqd->init_mqd = init_mqd_hiq;
1020 mqd->load_mqd = kfd_hiq_load_mqd_kiq;
1021 mqd->destroy_mqd = destroy_hiq_mqd;
1022 mqd->check_preemption_failed = check_preemption_failed;
1023 }
1024 break;
1025 case KFD_MQD_TYPE_DIQ:
1026 mqd->allocate_mqd = allocate_mqd;
1027 mqd->init_mqd = init_mqd_hiq;
1028 mqd->free_mqd = kfd_free_mqd_cp;
1029 mqd->load_mqd = load_mqd;
1030 mqd->update_mqd = update_mqd;
1031 mqd->destroy_mqd = kfd_destroy_mqd_cp;
1032 mqd->is_occupied = kfd_is_occupied_cp;
1033 mqd->mqd_size = sizeof(struct v9_mqd);
1034 #if defined(CONFIG_DEBUG_FS)
1035 mqd->debugfs_show_mqd = debugfs_show_mqd;
1036 #endif
1037 break;
1038 case KFD_MQD_TYPE_SDMA:
1039 mqd->allocate_mqd = allocate_sdma_mqd;
1040 mqd->init_mqd = init_mqd_sdma;
1041 mqd->free_mqd = free_mqd_hiq_sdma;
1042 mqd->load_mqd = kfd_load_mqd_sdma;
1043 mqd->update_mqd = update_mqd_sdma;
1044 mqd->destroy_mqd = kfd_destroy_mqd_sdma;
1045 mqd->is_occupied = kfd_is_occupied_sdma;
1046 mqd->checkpoint_mqd = checkpoint_mqd_sdma;
1047 mqd->restore_mqd = restore_mqd_sdma;
1048 mqd->mqd_size = sizeof(struct v9_sdma_mqd);
1049 mqd->mqd_stride = kfd_mqd_stride;
1050 #if defined(CONFIG_DEBUG_FS)
1051 mqd->debugfs_show_mqd = debugfs_show_mqd_sdma;
1052 #endif
1053 break;
1054 default:
1055 kfree(mqd);
1056 return NULL;
1057 }
1058
1059 return mqd;
1060 }
1061