xref: /linux/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c (revision bce04f216df40cb407243efce1beec9e8ea7815e)
1 // SPDX-License-Identifier: GPL-2.0 OR MIT
2 /*
3  * Copyright 2014-2022 Advanced Micro Devices, Inc.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice shall be included in
13  * all copies or substantial portions of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
19  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
21  * OTHER DEALINGS IN THE SOFTWARE.
22  *
23  */
24 
25 #include <linux/ratelimit.h>
26 #include <linux/printk.h>
27 #include <linux/slab.h>
28 #include <linux/list.h>
29 #include <linux/types.h>
30 #include <linux/bitops.h>
31 #include <linux/sched.h>
32 #include "kfd_priv.h"
33 #include "kfd_device_queue_manager.h"
34 #include "kfd_mqd_manager.h"
35 #include "cik_regs.h"
36 #include "kfd_kernel_queue.h"
37 #include "amdgpu_amdkfd.h"
38 #include "mes_api_def.h"
39 
40 /* Size of the per-pipe EOP queue */
41 #define CIK_HPD_EOP_BYTES_LOG2 11
42 #define CIK_HPD_EOP_BYTES (1U << CIK_HPD_EOP_BYTES_LOG2)
43 
44 static int set_pasid_vmid_mapping(struct device_queue_manager *dqm,
45 				  u32 pasid, unsigned int vmid);
46 
47 static int execute_queues_cpsch(struct device_queue_manager *dqm,
48 				enum kfd_unmap_queues_filter filter,
49 				uint32_t filter_param);
50 static int unmap_queues_cpsch(struct device_queue_manager *dqm,
51 				enum kfd_unmap_queues_filter filter,
52 				uint32_t filter_param, bool reset);
53 
54 static int map_queues_cpsch(struct device_queue_manager *dqm);
55 
56 static void deallocate_sdma_queue(struct device_queue_manager *dqm,
57 				struct queue *q);
58 
59 static inline void deallocate_hqd(struct device_queue_manager *dqm,
60 				struct queue *q);
61 static int allocate_hqd(struct device_queue_manager *dqm, struct queue *q);
62 static int allocate_sdma_queue(struct device_queue_manager *dqm,
63 				struct queue *q, const uint32_t *restore_sdma_id);
64 static void kfd_process_hw_exception(struct work_struct *work);
65 
66 static inline
67 enum KFD_MQD_TYPE get_mqd_type_from_queue_type(enum kfd_queue_type type)
68 {
69 	if (type == KFD_QUEUE_TYPE_SDMA || type == KFD_QUEUE_TYPE_SDMA_XGMI)
70 		return KFD_MQD_TYPE_SDMA;
71 	return KFD_MQD_TYPE_CP;
72 }
73 
74 static bool is_pipe_enabled(struct device_queue_manager *dqm, int mec, int pipe)
75 {
76 	int i;
77 	int pipe_offset = (mec * dqm->dev->shared_resources.num_pipe_per_mec
78 		+ pipe) * dqm->dev->shared_resources.num_queue_per_pipe;
79 
80 	/* queue is available for KFD usage if bit is 1 */
81 	for (i = 0; i <  dqm->dev->shared_resources.num_queue_per_pipe; ++i)
82 		if (test_bit(pipe_offset + i,
83 			      dqm->dev->shared_resources.cp_queue_bitmap))
84 			return true;
85 	return false;
86 }
87 
88 unsigned int get_cp_queues_num(struct device_queue_manager *dqm)
89 {
90 	return bitmap_weight(dqm->dev->shared_resources.cp_queue_bitmap,
91 				KGD_MAX_QUEUES);
92 }
93 
94 unsigned int get_queues_per_pipe(struct device_queue_manager *dqm)
95 {
96 	return dqm->dev->shared_resources.num_queue_per_pipe;
97 }
98 
99 unsigned int get_pipes_per_mec(struct device_queue_manager *dqm)
100 {
101 	return dqm->dev->shared_resources.num_pipe_per_mec;
102 }
103 
104 static unsigned int get_num_all_sdma_engines(struct device_queue_manager *dqm)
105 {
106 	return kfd_get_num_sdma_engines(dqm->dev) +
107 		kfd_get_num_xgmi_sdma_engines(dqm->dev);
108 }
109 
110 unsigned int get_num_sdma_queues(struct device_queue_manager *dqm)
111 {
112 	return kfd_get_num_sdma_engines(dqm->dev) *
113 		dqm->dev->device_info.num_sdma_queues_per_engine;
114 }
115 
116 unsigned int get_num_xgmi_sdma_queues(struct device_queue_manager *dqm)
117 {
118 	return kfd_get_num_xgmi_sdma_engines(dqm->dev) *
119 		dqm->dev->device_info.num_sdma_queues_per_engine;
120 }
121 
122 static inline uint64_t get_reserved_sdma_queues_bitmap(struct device_queue_manager *dqm)
123 {
124 	return dqm->dev->device_info.reserved_sdma_queues_bitmap;
125 }
126 
127 void program_sh_mem_settings(struct device_queue_manager *dqm,
128 					struct qcm_process_device *qpd)
129 {
130 	return dqm->dev->kfd2kgd->program_sh_mem_settings(
131 						dqm->dev->adev, qpd->vmid,
132 						qpd->sh_mem_config,
133 						qpd->sh_mem_ape1_base,
134 						qpd->sh_mem_ape1_limit,
135 						qpd->sh_mem_bases);
136 }
137 
138 static void kfd_hws_hang(struct device_queue_manager *dqm)
139 {
140 	/*
141 	 * Issue a GPU reset if HWS is unresponsive
142 	 */
143 	dqm->is_hws_hang = true;
144 
145 	/* It's possible we're detecting a HWS hang in the
146 	 * middle of a GPU reset. No need to schedule another
147 	 * reset in this case.
148 	 */
149 	if (!dqm->is_resetting)
150 		schedule_work(&dqm->hw_exception_work);
151 }
152 
153 static int convert_to_mes_queue_type(int queue_type)
154 {
155 	int mes_queue_type;
156 
157 	switch (queue_type) {
158 	case KFD_QUEUE_TYPE_COMPUTE:
159 		mes_queue_type = MES_QUEUE_TYPE_COMPUTE;
160 		break;
161 	case KFD_QUEUE_TYPE_SDMA:
162 		mes_queue_type = MES_QUEUE_TYPE_SDMA;
163 		break;
164 	default:
165 		WARN(1, "Invalid queue type %d", queue_type);
166 		mes_queue_type = -EINVAL;
167 		break;
168 	}
169 
170 	return mes_queue_type;
171 }
172 
173 static int add_queue_mes(struct device_queue_manager *dqm, struct queue *q,
174 			 struct qcm_process_device *qpd)
175 {
176 	struct amdgpu_device *adev = (struct amdgpu_device *)dqm->dev->adev;
177 	struct kfd_process_device *pdd = qpd_to_pdd(qpd);
178 	struct mes_add_queue_input queue_input;
179 	int r;
180 
181 	if (dqm->is_hws_hang)
182 		return -EIO;
183 
184 	memset(&queue_input, 0x0, sizeof(struct mes_add_queue_input));
185 	queue_input.process_id = qpd->pqm->process->pasid;
186 	queue_input.page_table_base_addr =  qpd->page_table_base;
187 	queue_input.process_va_start = 0;
188 	queue_input.process_va_end = adev->vm_manager.max_pfn - 1;
189 	/* MES unit for quantum is 100ns */
190 	queue_input.process_quantum = KFD_MES_PROCESS_QUANTUM;  /* Equivalent to 10ms. */
191 	queue_input.process_context_addr = pdd->proc_ctx_gpu_addr;
192 	queue_input.gang_quantum = KFD_MES_GANG_QUANTUM; /* Equivalent to 1ms */
193 	queue_input.gang_context_addr = q->gang_ctx_gpu_addr;
194 	queue_input.inprocess_gang_priority = q->properties.priority;
195 	queue_input.gang_global_priority_level =
196 					AMDGPU_MES_PRIORITY_LEVEL_NORMAL;
197 	queue_input.doorbell_offset = q->properties.doorbell_off;
198 	queue_input.mqd_addr = q->gart_mqd_addr;
199 	queue_input.wptr_addr = (uint64_t)q->properties.write_ptr;
200 	queue_input.paging = false;
201 	queue_input.tba_addr = qpd->tba_addr;
202 	queue_input.tma_addr = qpd->tma_addr;
203 
204 	queue_input.queue_type = convert_to_mes_queue_type(q->properties.type);
205 	if (queue_input.queue_type < 0) {
206 		pr_err("Queue type not supported with MES, queue:%d\n",
207 				q->properties.type);
208 		return -EINVAL;
209 	}
210 
211 	if (q->gws) {
212 		queue_input.gws_base = 0;
213 		queue_input.gws_size = qpd->num_gws;
214 	}
215 
216 	amdgpu_mes_lock(&adev->mes);
217 	r = adev->mes.funcs->add_hw_queue(&adev->mes, &queue_input);
218 	amdgpu_mes_unlock(&adev->mes);
219 	if (r) {
220 		pr_err("failed to add hardware queue to MES, doorbell=0x%x\n",
221 			q->properties.doorbell_off);
222 		pr_err("MES might be in unrecoverable state, issue a GPU reset\n");
223 		kfd_hws_hang(dqm);
224 }
225 
226 	return r;
227 }
228 
229 static int remove_queue_mes(struct device_queue_manager *dqm, struct queue *q,
230 			struct qcm_process_device *qpd)
231 {
232 	struct amdgpu_device *adev = (struct amdgpu_device *)dqm->dev->adev;
233 	int r;
234 	struct mes_remove_queue_input queue_input;
235 
236 	if (dqm->is_hws_hang)
237 		return -EIO;
238 
239 	memset(&queue_input, 0x0, sizeof(struct mes_remove_queue_input));
240 	queue_input.doorbell_offset = q->properties.doorbell_off;
241 	queue_input.gang_context_addr = q->gang_ctx_gpu_addr;
242 
243 	amdgpu_mes_lock(&adev->mes);
244 	r = adev->mes.funcs->remove_hw_queue(&adev->mes, &queue_input);
245 	amdgpu_mes_unlock(&adev->mes);
246 
247 	if (r) {
248 		pr_err("failed to remove hardware queue from MES, doorbell=0x%x\n",
249 			q->properties.doorbell_off);
250 		pr_err("MES might be in unrecoverable state, issue a GPU reset\n");
251 		kfd_hws_hang(dqm);
252 	}
253 
254 	return r;
255 }
256 
257 static int remove_all_queues_mes(struct device_queue_manager *dqm)
258 {
259 	struct device_process_node *cur;
260 	struct qcm_process_device *qpd;
261 	struct queue *q;
262 	int retval = 0;
263 
264 	list_for_each_entry(cur, &dqm->queues, list) {
265 		qpd = cur->qpd;
266 		list_for_each_entry(q, &qpd->queues_list, list) {
267 			if (q->properties.is_active) {
268 				retval = remove_queue_mes(dqm, q, qpd);
269 				if (retval) {
270 					pr_err("%s: Failed to remove queue %d for dev %d",
271 						__func__,
272 						q->properties.queue_id,
273 						dqm->dev->id);
274 					return retval;
275 				}
276 			}
277 		}
278 	}
279 
280 	return retval;
281 }
282 
283 static void increment_queue_count(struct device_queue_manager *dqm,
284 				  struct qcm_process_device *qpd,
285 				  struct queue *q)
286 {
287 	dqm->active_queue_count++;
288 	if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
289 	    q->properties.type == KFD_QUEUE_TYPE_DIQ)
290 		dqm->active_cp_queue_count++;
291 
292 	if (q->properties.is_gws) {
293 		dqm->gws_queue_count++;
294 		qpd->mapped_gws_queue = true;
295 	}
296 }
297 
298 static void decrement_queue_count(struct device_queue_manager *dqm,
299 				  struct qcm_process_device *qpd,
300 				  struct queue *q)
301 {
302 	dqm->active_queue_count--;
303 	if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
304 	    q->properties.type == KFD_QUEUE_TYPE_DIQ)
305 		dqm->active_cp_queue_count--;
306 
307 	if (q->properties.is_gws) {
308 		dqm->gws_queue_count--;
309 		qpd->mapped_gws_queue = false;
310 	}
311 }
312 
313 /*
314  * Allocate a doorbell ID to this queue.
315  * If doorbell_id is passed in, make sure requested ID is valid then allocate it.
316  */
317 static int allocate_doorbell(struct qcm_process_device *qpd,
318 			     struct queue *q,
319 			     uint32_t const *restore_id)
320 {
321 	struct kfd_dev *dev = qpd->dqm->dev;
322 
323 	if (!KFD_IS_SOC15(dev)) {
324 		/* On pre-SOC15 chips we need to use the queue ID to
325 		 * preserve the user mode ABI.
326 		 */
327 
328 		if (restore_id && *restore_id != q->properties.queue_id)
329 			return -EINVAL;
330 
331 		q->doorbell_id = q->properties.queue_id;
332 	} else if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
333 			q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
334 		/* For SDMA queues on SOC15 with 8-byte doorbell, use static
335 		 * doorbell assignments based on the engine and queue id.
336 		 * The doobell index distance between RLC (2*i) and (2*i+1)
337 		 * for a SDMA engine is 512.
338 		 */
339 
340 		uint32_t *idx_offset = dev->shared_resources.sdma_doorbell_idx;
341 		uint32_t valid_id = idx_offset[q->properties.sdma_engine_id]
342 						+ (q->properties.sdma_queue_id & 1)
343 						* KFD_QUEUE_DOORBELL_MIRROR_OFFSET
344 						+ (q->properties.sdma_queue_id >> 1);
345 
346 		if (restore_id && *restore_id != valid_id)
347 			return -EINVAL;
348 		q->doorbell_id = valid_id;
349 	} else {
350 		/* For CP queues on SOC15 */
351 		if (restore_id) {
352 			/* make sure that ID is free  */
353 			if (__test_and_set_bit(*restore_id, qpd->doorbell_bitmap))
354 				return -EINVAL;
355 
356 			q->doorbell_id = *restore_id;
357 		} else {
358 			/* or reserve a free doorbell ID */
359 			unsigned int found;
360 
361 			found = find_first_zero_bit(qpd->doorbell_bitmap,
362 						KFD_MAX_NUM_OF_QUEUES_PER_PROCESS);
363 			if (found >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS) {
364 				pr_debug("No doorbells available");
365 				return -EBUSY;
366 			}
367 			set_bit(found, qpd->doorbell_bitmap);
368 			q->doorbell_id = found;
369 		}
370 	}
371 
372 	q->properties.doorbell_off =
373 		kfd_get_doorbell_dw_offset_in_bar(dev, qpd_to_pdd(qpd),
374 					  q->doorbell_id);
375 	return 0;
376 }
377 
378 static void deallocate_doorbell(struct qcm_process_device *qpd,
379 				struct queue *q)
380 {
381 	unsigned int old;
382 	struct kfd_dev *dev = qpd->dqm->dev;
383 
384 	if (!KFD_IS_SOC15(dev) ||
385 	    q->properties.type == KFD_QUEUE_TYPE_SDMA ||
386 	    q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
387 		return;
388 
389 	old = test_and_clear_bit(q->doorbell_id, qpd->doorbell_bitmap);
390 	WARN_ON(!old);
391 }
392 
393 static void program_trap_handler_settings(struct device_queue_manager *dqm,
394 				struct qcm_process_device *qpd)
395 {
396 	if (dqm->dev->kfd2kgd->program_trap_handler_settings)
397 		dqm->dev->kfd2kgd->program_trap_handler_settings(
398 						dqm->dev->adev, qpd->vmid,
399 						qpd->tba_addr, qpd->tma_addr);
400 }
401 
402 static int allocate_vmid(struct device_queue_manager *dqm,
403 			struct qcm_process_device *qpd,
404 			struct queue *q)
405 {
406 	int allocated_vmid = -1, i;
407 
408 	for (i = dqm->dev->vm_info.first_vmid_kfd;
409 			i <= dqm->dev->vm_info.last_vmid_kfd; i++) {
410 		if (!dqm->vmid_pasid[i]) {
411 			allocated_vmid = i;
412 			break;
413 		}
414 	}
415 
416 	if (allocated_vmid < 0) {
417 		pr_err("no more vmid to allocate\n");
418 		return -ENOSPC;
419 	}
420 
421 	pr_debug("vmid allocated: %d\n", allocated_vmid);
422 
423 	dqm->vmid_pasid[allocated_vmid] = q->process->pasid;
424 
425 	set_pasid_vmid_mapping(dqm, q->process->pasid, allocated_vmid);
426 
427 	qpd->vmid = allocated_vmid;
428 	q->properties.vmid = allocated_vmid;
429 
430 	program_sh_mem_settings(dqm, qpd);
431 
432 	if (KFD_IS_SOC15(dqm->dev) && dqm->dev->cwsr_enabled)
433 		program_trap_handler_settings(dqm, qpd);
434 
435 	/* qpd->page_table_base is set earlier when register_process()
436 	 * is called, i.e. when the first queue is created.
437 	 */
438 	dqm->dev->kfd2kgd->set_vm_context_page_table_base(dqm->dev->adev,
439 			qpd->vmid,
440 			qpd->page_table_base);
441 	/* invalidate the VM context after pasid and vmid mapping is set up */
442 	kfd_flush_tlb(qpd_to_pdd(qpd), TLB_FLUSH_LEGACY);
443 
444 	if (dqm->dev->kfd2kgd->set_scratch_backing_va)
445 		dqm->dev->kfd2kgd->set_scratch_backing_va(dqm->dev->adev,
446 				qpd->sh_hidden_private_base, qpd->vmid);
447 
448 	return 0;
449 }
450 
451 static int flush_texture_cache_nocpsch(struct kfd_dev *kdev,
452 				struct qcm_process_device *qpd)
453 {
454 	const struct packet_manager_funcs *pmf = qpd->dqm->packet_mgr.pmf;
455 	int ret;
456 
457 	if (!qpd->ib_kaddr)
458 		return -ENOMEM;
459 
460 	ret = pmf->release_mem(qpd->ib_base, (uint32_t *)qpd->ib_kaddr);
461 	if (ret)
462 		return ret;
463 
464 	return amdgpu_amdkfd_submit_ib(kdev->adev, KGD_ENGINE_MEC1, qpd->vmid,
465 				qpd->ib_base, (uint32_t *)qpd->ib_kaddr,
466 				pmf->release_mem_size / sizeof(uint32_t));
467 }
468 
469 static void deallocate_vmid(struct device_queue_manager *dqm,
470 				struct qcm_process_device *qpd,
471 				struct queue *q)
472 {
473 	/* On GFX v7, CP doesn't flush TC at dequeue */
474 	if (q->device->adev->asic_type == CHIP_HAWAII)
475 		if (flush_texture_cache_nocpsch(q->device, qpd))
476 			pr_err("Failed to flush TC\n");
477 
478 	kfd_flush_tlb(qpd_to_pdd(qpd), TLB_FLUSH_LEGACY);
479 
480 	/* Release the vmid mapping */
481 	set_pasid_vmid_mapping(dqm, 0, qpd->vmid);
482 	dqm->vmid_pasid[qpd->vmid] = 0;
483 
484 	qpd->vmid = 0;
485 	q->properties.vmid = 0;
486 }
487 
488 static int create_queue_nocpsch(struct device_queue_manager *dqm,
489 				struct queue *q,
490 				struct qcm_process_device *qpd,
491 				const struct kfd_criu_queue_priv_data *qd,
492 				const void *restore_mqd, const void *restore_ctl_stack)
493 {
494 	struct mqd_manager *mqd_mgr;
495 	int retval;
496 
497 	dqm_lock(dqm);
498 
499 	if (dqm->total_queue_count >= max_num_of_queues_per_device) {
500 		pr_warn("Can't create new usermode queue because %d queues were already created\n",
501 				dqm->total_queue_count);
502 		retval = -EPERM;
503 		goto out_unlock;
504 	}
505 
506 	if (list_empty(&qpd->queues_list)) {
507 		retval = allocate_vmid(dqm, qpd, q);
508 		if (retval)
509 			goto out_unlock;
510 	}
511 	q->properties.vmid = qpd->vmid;
512 	/*
513 	 * Eviction state logic: mark all queues as evicted, even ones
514 	 * not currently active. Restoring inactive queues later only
515 	 * updates the is_evicted flag but is a no-op otherwise.
516 	 */
517 	q->properties.is_evicted = !!qpd->evicted;
518 
519 	q->properties.tba_addr = qpd->tba_addr;
520 	q->properties.tma_addr = qpd->tma_addr;
521 
522 	mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
523 			q->properties.type)];
524 	if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) {
525 		retval = allocate_hqd(dqm, q);
526 		if (retval)
527 			goto deallocate_vmid;
528 		pr_debug("Loading mqd to hqd on pipe %d, queue %d\n",
529 			q->pipe, q->queue);
530 	} else if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
531 		q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
532 		retval = allocate_sdma_queue(dqm, q, qd ? &qd->sdma_id : NULL);
533 		if (retval)
534 			goto deallocate_vmid;
535 		dqm->asic_ops.init_sdma_vm(dqm, q, qpd);
536 	}
537 
538 	retval = allocate_doorbell(qpd, q, qd ? &qd->doorbell_id : NULL);
539 	if (retval)
540 		goto out_deallocate_hqd;
541 
542 	/* Temporarily release dqm lock to avoid a circular lock dependency */
543 	dqm_unlock(dqm);
544 	q->mqd_mem_obj = mqd_mgr->allocate_mqd(mqd_mgr->dev, &q->properties);
545 	dqm_lock(dqm);
546 
547 	if (!q->mqd_mem_obj) {
548 		retval = -ENOMEM;
549 		goto out_deallocate_doorbell;
550 	}
551 
552 	if (qd)
553 		mqd_mgr->restore_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj, &q->gart_mqd_addr,
554 				     &q->properties, restore_mqd, restore_ctl_stack,
555 				     qd->ctl_stack_size);
556 	else
557 		mqd_mgr->init_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj,
558 					&q->gart_mqd_addr, &q->properties);
559 
560 	if (q->properties.is_active) {
561 		if (!dqm->sched_running) {
562 			WARN_ONCE(1, "Load non-HWS mqd while stopped\n");
563 			goto add_queue_to_list;
564 		}
565 
566 		if (WARN(q->process->mm != current->mm,
567 					"should only run in user thread"))
568 			retval = -EFAULT;
569 		else
570 			retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe,
571 					q->queue, &q->properties, current->mm);
572 		if (retval)
573 			goto out_free_mqd;
574 	}
575 
576 add_queue_to_list:
577 	list_add(&q->list, &qpd->queues_list);
578 	qpd->queue_count++;
579 	if (q->properties.is_active)
580 		increment_queue_count(dqm, qpd, q);
581 
582 	/*
583 	 * Unconditionally increment this counter, regardless of the queue's
584 	 * type or whether the queue is active.
585 	 */
586 	dqm->total_queue_count++;
587 	pr_debug("Total of %d queues are accountable so far\n",
588 			dqm->total_queue_count);
589 	goto out_unlock;
590 
591 out_free_mqd:
592 	mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
593 out_deallocate_doorbell:
594 	deallocate_doorbell(qpd, q);
595 out_deallocate_hqd:
596 	if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE)
597 		deallocate_hqd(dqm, q);
598 	else if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
599 		q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
600 		deallocate_sdma_queue(dqm, q);
601 deallocate_vmid:
602 	if (list_empty(&qpd->queues_list))
603 		deallocate_vmid(dqm, qpd, q);
604 out_unlock:
605 	dqm_unlock(dqm);
606 	return retval;
607 }
608 
609 static int allocate_hqd(struct device_queue_manager *dqm, struct queue *q)
610 {
611 	bool set;
612 	int pipe, bit, i;
613 
614 	set = false;
615 
616 	for (pipe = dqm->next_pipe_to_allocate, i = 0;
617 			i < get_pipes_per_mec(dqm);
618 			pipe = ((pipe + 1) % get_pipes_per_mec(dqm)), ++i) {
619 
620 		if (!is_pipe_enabled(dqm, 0, pipe))
621 			continue;
622 
623 		if (dqm->allocated_queues[pipe] != 0) {
624 			bit = ffs(dqm->allocated_queues[pipe]) - 1;
625 			dqm->allocated_queues[pipe] &= ~(1 << bit);
626 			q->pipe = pipe;
627 			q->queue = bit;
628 			set = true;
629 			break;
630 		}
631 	}
632 
633 	if (!set)
634 		return -EBUSY;
635 
636 	pr_debug("hqd slot - pipe %d, queue %d\n", q->pipe, q->queue);
637 	/* horizontal hqd allocation */
638 	dqm->next_pipe_to_allocate = (pipe + 1) % get_pipes_per_mec(dqm);
639 
640 	return 0;
641 }
642 
643 static inline void deallocate_hqd(struct device_queue_manager *dqm,
644 				struct queue *q)
645 {
646 	dqm->allocated_queues[q->pipe] |= (1 << q->queue);
647 }
648 
649 #define SQ_IND_CMD_CMD_KILL		0x00000003
650 #define SQ_IND_CMD_MODE_BROADCAST	0x00000001
651 
652 static int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p)
653 {
654 	int status = 0;
655 	unsigned int vmid;
656 	uint16_t queried_pasid;
657 	union SQ_CMD_BITS reg_sq_cmd;
658 	union GRBM_GFX_INDEX_BITS reg_gfx_index;
659 	struct kfd_process_device *pdd;
660 	int first_vmid_to_scan = dev->vm_info.first_vmid_kfd;
661 	int last_vmid_to_scan = dev->vm_info.last_vmid_kfd;
662 
663 	reg_sq_cmd.u32All = 0;
664 	reg_gfx_index.u32All = 0;
665 
666 	pr_debug("Killing all process wavefronts\n");
667 
668 	if (!dev->kfd2kgd->get_atc_vmid_pasid_mapping_info) {
669 		pr_err("no vmid pasid mapping supported \n");
670 		return -EOPNOTSUPP;
671 	}
672 
673 	/* Scan all registers in the range ATC_VMID8_PASID_MAPPING ..
674 	 * ATC_VMID15_PASID_MAPPING
675 	 * to check which VMID the current process is mapped to.
676 	 */
677 
678 	for (vmid = first_vmid_to_scan; vmid <= last_vmid_to_scan; vmid++) {
679 		status = dev->kfd2kgd->get_atc_vmid_pasid_mapping_info
680 				(dev->adev, vmid, &queried_pasid);
681 
682 		if (status && queried_pasid == p->pasid) {
683 			pr_debug("Killing wave fronts of vmid %d and pasid 0x%x\n",
684 					vmid, p->pasid);
685 			break;
686 		}
687 	}
688 
689 	if (vmid > last_vmid_to_scan) {
690 		pr_err("Didn't find vmid for pasid 0x%x\n", p->pasid);
691 		return -EFAULT;
692 	}
693 
694 	/* taking the VMID for that process on the safe way using PDD */
695 	pdd = kfd_get_process_device_data(dev, p);
696 	if (!pdd)
697 		return -EFAULT;
698 
699 	reg_gfx_index.bits.sh_broadcast_writes = 1;
700 	reg_gfx_index.bits.se_broadcast_writes = 1;
701 	reg_gfx_index.bits.instance_broadcast_writes = 1;
702 	reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_BROADCAST;
703 	reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_KILL;
704 	reg_sq_cmd.bits.vm_id = vmid;
705 
706 	dev->kfd2kgd->wave_control_execute(dev->adev,
707 					reg_gfx_index.u32All,
708 					reg_sq_cmd.u32All);
709 
710 	return 0;
711 }
712 
713 /* Access to DQM has to be locked before calling destroy_queue_nocpsch_locked
714  * to avoid asynchronized access
715  */
716 static int destroy_queue_nocpsch_locked(struct device_queue_manager *dqm,
717 				struct qcm_process_device *qpd,
718 				struct queue *q)
719 {
720 	int retval;
721 	struct mqd_manager *mqd_mgr;
722 
723 	mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
724 			q->properties.type)];
725 
726 	if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE)
727 		deallocate_hqd(dqm, q);
728 	else if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
729 		deallocate_sdma_queue(dqm, q);
730 	else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
731 		deallocate_sdma_queue(dqm, q);
732 	else {
733 		pr_debug("q->properties.type %d is invalid\n",
734 				q->properties.type);
735 		return -EINVAL;
736 	}
737 	dqm->total_queue_count--;
738 
739 	deallocate_doorbell(qpd, q);
740 
741 	if (!dqm->sched_running) {
742 		WARN_ONCE(1, "Destroy non-HWS queue while stopped\n");
743 		return 0;
744 	}
745 
746 	retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd,
747 				KFD_PREEMPT_TYPE_WAVEFRONT_RESET,
748 				KFD_UNMAP_LATENCY_MS,
749 				q->pipe, q->queue);
750 	if (retval == -ETIME)
751 		qpd->reset_wavefronts = true;
752 
753 	list_del(&q->list);
754 	if (list_empty(&qpd->queues_list)) {
755 		if (qpd->reset_wavefronts) {
756 			pr_warn("Resetting wave fronts (nocpsch) on dev %p\n",
757 					dqm->dev);
758 			/* dbgdev_wave_reset_wavefronts has to be called before
759 			 * deallocate_vmid(), i.e. when vmid is still in use.
760 			 */
761 			dbgdev_wave_reset_wavefronts(dqm->dev,
762 					qpd->pqm->process);
763 			qpd->reset_wavefronts = false;
764 		}
765 
766 		deallocate_vmid(dqm, qpd, q);
767 	}
768 	qpd->queue_count--;
769 	if (q->properties.is_active)
770 		decrement_queue_count(dqm, qpd, q);
771 
772 	return retval;
773 }
774 
775 static int destroy_queue_nocpsch(struct device_queue_manager *dqm,
776 				struct qcm_process_device *qpd,
777 				struct queue *q)
778 {
779 	int retval;
780 	uint64_t sdma_val = 0;
781 	struct kfd_process_device *pdd = qpd_to_pdd(qpd);
782 	struct mqd_manager *mqd_mgr =
783 		dqm->mqd_mgrs[get_mqd_type_from_queue_type(q->properties.type)];
784 
785 	/* Get the SDMA queue stats */
786 	if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) ||
787 	    (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) {
788 		retval = read_sdma_queue_counter((uint64_t __user *)q->properties.read_ptr,
789 							&sdma_val);
790 		if (retval)
791 			pr_err("Failed to read SDMA queue counter for queue: %d\n",
792 				q->properties.queue_id);
793 	}
794 
795 	dqm_lock(dqm);
796 	retval = destroy_queue_nocpsch_locked(dqm, qpd, q);
797 	if (!retval)
798 		pdd->sdma_past_activity_counter += sdma_val;
799 	dqm_unlock(dqm);
800 
801 	mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
802 
803 	return retval;
804 }
805 
806 static int update_queue(struct device_queue_manager *dqm, struct queue *q,
807 			struct mqd_update_info *minfo)
808 {
809 	int retval = 0;
810 	struct mqd_manager *mqd_mgr;
811 	struct kfd_process_device *pdd;
812 	bool prev_active = false;
813 	bool add_queue = false;
814 
815 	dqm_lock(dqm);
816 	pdd = kfd_get_process_device_data(q->device, q->process);
817 	if (!pdd) {
818 		retval = -ENODEV;
819 		goto out_unlock;
820 	}
821 	mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
822 			q->properties.type)];
823 
824 	/* Save previous activity state for counters */
825 	prev_active = q->properties.is_active;
826 
827 	/* Make sure the queue is unmapped before updating the MQD */
828 	if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) {
829 		if (!dqm->dev->shared_resources.enable_mes)
830 			retval = unmap_queues_cpsch(dqm,
831 						    KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, false);
832 		else if (prev_active)
833 			retval = remove_queue_mes(dqm, q, &pdd->qpd);
834 
835 		if (retval) {
836 			pr_err("unmap queue failed\n");
837 			goto out_unlock;
838 		}
839 	} else if (prev_active &&
840 		   (q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
841 		    q->properties.type == KFD_QUEUE_TYPE_SDMA ||
842 		    q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) {
843 
844 		if (!dqm->sched_running) {
845 			WARN_ONCE(1, "Update non-HWS queue while stopped\n");
846 			goto out_unlock;
847 		}
848 
849 		retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd,
850 				(dqm->dev->cwsr_enabled ?
851 				 KFD_PREEMPT_TYPE_WAVEFRONT_SAVE :
852 				 KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN),
853 				KFD_UNMAP_LATENCY_MS, q->pipe, q->queue);
854 		if (retval) {
855 			pr_err("destroy mqd failed\n");
856 			goto out_unlock;
857 		}
858 	}
859 
860 	mqd_mgr->update_mqd(mqd_mgr, q->mqd, &q->properties, minfo);
861 
862 	/*
863 	 * check active state vs. the previous state and modify
864 	 * counter accordingly. map_queues_cpsch uses the
865 	 * dqm->active_queue_count to determine whether a new runlist must be
866 	 * uploaded.
867 	 */
868 	if (q->properties.is_active && !prev_active) {
869 		increment_queue_count(dqm, &pdd->qpd, q);
870 	} else if (!q->properties.is_active && prev_active) {
871 		decrement_queue_count(dqm, &pdd->qpd, q);
872 	} else if (q->gws && !q->properties.is_gws) {
873 		if (q->properties.is_active) {
874 			dqm->gws_queue_count++;
875 			pdd->qpd.mapped_gws_queue = true;
876 		}
877 		q->properties.is_gws = true;
878 	} else if (!q->gws && q->properties.is_gws) {
879 		if (q->properties.is_active) {
880 			dqm->gws_queue_count--;
881 			pdd->qpd.mapped_gws_queue = false;
882 		}
883 		q->properties.is_gws = false;
884 	}
885 
886 	if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) {
887 		if (!dqm->dev->shared_resources.enable_mes)
888 			retval = map_queues_cpsch(dqm);
889 		else if (add_queue)
890 			retval = add_queue_mes(dqm, q, &pdd->qpd);
891 	} else if (q->properties.is_active &&
892 		 (q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
893 		  q->properties.type == KFD_QUEUE_TYPE_SDMA ||
894 		  q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) {
895 		if (WARN(q->process->mm != current->mm,
896 			 "should only run in user thread"))
897 			retval = -EFAULT;
898 		else
899 			retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd,
900 						   q->pipe, q->queue,
901 						   &q->properties, current->mm);
902 	}
903 
904 out_unlock:
905 	dqm_unlock(dqm);
906 	return retval;
907 }
908 
909 static int evict_process_queues_nocpsch(struct device_queue_manager *dqm,
910 					struct qcm_process_device *qpd)
911 {
912 	struct queue *q;
913 	struct mqd_manager *mqd_mgr;
914 	struct kfd_process_device *pdd;
915 	int retval, ret = 0;
916 
917 	dqm_lock(dqm);
918 	if (qpd->evicted++ > 0) /* already evicted, do nothing */
919 		goto out;
920 
921 	pdd = qpd_to_pdd(qpd);
922 	pr_debug_ratelimited("Evicting PASID 0x%x queues\n",
923 			    pdd->process->pasid);
924 
925 	pdd->last_evict_timestamp = get_jiffies_64();
926 	/* Mark all queues as evicted. Deactivate all active queues on
927 	 * the qpd.
928 	 */
929 	list_for_each_entry(q, &qpd->queues_list, list) {
930 		q->properties.is_evicted = true;
931 		if (!q->properties.is_active)
932 			continue;
933 
934 		mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
935 				q->properties.type)];
936 		q->properties.is_active = false;
937 		decrement_queue_count(dqm, qpd, q);
938 
939 		if (WARN_ONCE(!dqm->sched_running, "Evict when stopped\n"))
940 			continue;
941 
942 		retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd,
943 				(dqm->dev->cwsr_enabled ?
944 				 KFD_PREEMPT_TYPE_WAVEFRONT_SAVE :
945 				 KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN),
946 				KFD_UNMAP_LATENCY_MS, q->pipe, q->queue);
947 		if (retval && !ret)
948 			/* Return the first error, but keep going to
949 			 * maintain a consistent eviction state
950 			 */
951 			ret = retval;
952 	}
953 
954 out:
955 	dqm_unlock(dqm);
956 	return ret;
957 }
958 
959 static int evict_process_queues_cpsch(struct device_queue_manager *dqm,
960 				      struct qcm_process_device *qpd)
961 {
962 	struct queue *q;
963 	struct kfd_process_device *pdd;
964 	int retval = 0;
965 
966 	dqm_lock(dqm);
967 	if (qpd->evicted++ > 0) /* already evicted, do nothing */
968 		goto out;
969 
970 	pdd = qpd_to_pdd(qpd);
971 	pr_debug_ratelimited("Evicting PASID 0x%x queues\n",
972 			    pdd->process->pasid);
973 
974 	/* Mark all queues as evicted. Deactivate all active queues on
975 	 * the qpd.
976 	 */
977 	list_for_each_entry(q, &qpd->queues_list, list) {
978 		q->properties.is_evicted = true;
979 		if (!q->properties.is_active)
980 			continue;
981 
982 		q->properties.is_active = false;
983 		decrement_queue_count(dqm, qpd, q);
984 
985 		if (dqm->dev->shared_resources.enable_mes) {
986 			retval = remove_queue_mes(dqm, q, qpd);
987 			if (retval) {
988 				pr_err("Failed to evict queue %d\n",
989 					q->properties.queue_id);
990 				goto out;
991 			}
992 		}
993 	}
994 	pdd->last_evict_timestamp = get_jiffies_64();
995 	if (!dqm->dev->shared_resources.enable_mes)
996 		retval = execute_queues_cpsch(dqm,
997 					      qpd->is_debug ?
998 					      KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES :
999 					      KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
1000 
1001 out:
1002 	dqm_unlock(dqm);
1003 	return retval;
1004 }
1005 
1006 static int restore_process_queues_nocpsch(struct device_queue_manager *dqm,
1007 					  struct qcm_process_device *qpd)
1008 {
1009 	struct mm_struct *mm = NULL;
1010 	struct queue *q;
1011 	struct mqd_manager *mqd_mgr;
1012 	struct kfd_process_device *pdd;
1013 	uint64_t pd_base;
1014 	uint64_t eviction_duration;
1015 	int retval, ret = 0;
1016 
1017 	pdd = qpd_to_pdd(qpd);
1018 	/* Retrieve PD base */
1019 	pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->drm_priv);
1020 
1021 	dqm_lock(dqm);
1022 	if (WARN_ON_ONCE(!qpd->evicted)) /* already restored, do nothing */
1023 		goto out;
1024 	if (qpd->evicted > 1) { /* ref count still > 0, decrement & quit */
1025 		qpd->evicted--;
1026 		goto out;
1027 	}
1028 
1029 	pr_debug_ratelimited("Restoring PASID 0x%x queues\n",
1030 			    pdd->process->pasid);
1031 
1032 	/* Update PD Base in QPD */
1033 	qpd->page_table_base = pd_base;
1034 	pr_debug("Updated PD address to 0x%llx\n", pd_base);
1035 
1036 	if (!list_empty(&qpd->queues_list)) {
1037 		dqm->dev->kfd2kgd->set_vm_context_page_table_base(
1038 				dqm->dev->adev,
1039 				qpd->vmid,
1040 				qpd->page_table_base);
1041 		kfd_flush_tlb(pdd, TLB_FLUSH_LEGACY);
1042 	}
1043 
1044 	/* Take a safe reference to the mm_struct, which may otherwise
1045 	 * disappear even while the kfd_process is still referenced.
1046 	 */
1047 	mm = get_task_mm(pdd->process->lead_thread);
1048 	if (!mm) {
1049 		ret = -EFAULT;
1050 		goto out;
1051 	}
1052 
1053 	/* Remove the eviction flags. Activate queues that are not
1054 	 * inactive for other reasons.
1055 	 */
1056 	list_for_each_entry(q, &qpd->queues_list, list) {
1057 		q->properties.is_evicted = false;
1058 		if (!QUEUE_IS_ACTIVE(q->properties))
1059 			continue;
1060 
1061 		mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
1062 				q->properties.type)];
1063 		q->properties.is_active = true;
1064 		increment_queue_count(dqm, qpd, q);
1065 
1066 		if (WARN_ONCE(!dqm->sched_running, "Restore when stopped\n"))
1067 			continue;
1068 
1069 		retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe,
1070 				       q->queue, &q->properties, mm);
1071 		if (retval && !ret)
1072 			/* Return the first error, but keep going to
1073 			 * maintain a consistent eviction state
1074 			 */
1075 			ret = retval;
1076 	}
1077 	qpd->evicted = 0;
1078 	eviction_duration = get_jiffies_64() - pdd->last_evict_timestamp;
1079 	atomic64_add(eviction_duration, &pdd->evict_duration_counter);
1080 out:
1081 	if (mm)
1082 		mmput(mm);
1083 	dqm_unlock(dqm);
1084 	return ret;
1085 }
1086 
1087 static int restore_process_queues_cpsch(struct device_queue_manager *dqm,
1088 					struct qcm_process_device *qpd)
1089 {
1090 	struct queue *q;
1091 	struct kfd_process_device *pdd;
1092 	uint64_t pd_base;
1093 	uint64_t eviction_duration;
1094 	int retval = 0;
1095 
1096 	pdd = qpd_to_pdd(qpd);
1097 	/* Retrieve PD base */
1098 	pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->drm_priv);
1099 
1100 	dqm_lock(dqm);
1101 	if (WARN_ON_ONCE(!qpd->evicted)) /* already restored, do nothing */
1102 		goto out;
1103 	if (qpd->evicted > 1) { /* ref count still > 0, decrement & quit */
1104 		qpd->evicted--;
1105 		goto out;
1106 	}
1107 
1108 	pr_debug_ratelimited("Restoring PASID 0x%x queues\n",
1109 			    pdd->process->pasid);
1110 
1111 	/* Update PD Base in QPD */
1112 	qpd->page_table_base = pd_base;
1113 	pr_debug("Updated PD address to 0x%llx\n", pd_base);
1114 
1115 	/* activate all active queues on the qpd */
1116 	list_for_each_entry(q, &qpd->queues_list, list) {
1117 		q->properties.is_evicted = false;
1118 		if (!QUEUE_IS_ACTIVE(q->properties))
1119 			continue;
1120 
1121 		q->properties.is_active = true;
1122 		increment_queue_count(dqm, &pdd->qpd, q);
1123 
1124 		if (dqm->dev->shared_resources.enable_mes) {
1125 			retval = add_queue_mes(dqm, q, qpd);
1126 			if (retval) {
1127 				pr_err("Failed to restore queue %d\n",
1128 					q->properties.queue_id);
1129 				goto out;
1130 			}
1131 		}
1132 	}
1133 	if (!dqm->dev->shared_resources.enable_mes)
1134 		retval = execute_queues_cpsch(dqm,
1135 					      KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
1136 	qpd->evicted = 0;
1137 	eviction_duration = get_jiffies_64() - pdd->last_evict_timestamp;
1138 	atomic64_add(eviction_duration, &pdd->evict_duration_counter);
1139 out:
1140 	dqm_unlock(dqm);
1141 	return retval;
1142 }
1143 
1144 static int register_process(struct device_queue_manager *dqm,
1145 					struct qcm_process_device *qpd)
1146 {
1147 	struct device_process_node *n;
1148 	struct kfd_process_device *pdd;
1149 	uint64_t pd_base;
1150 	int retval;
1151 
1152 	n = kzalloc(sizeof(*n), GFP_KERNEL);
1153 	if (!n)
1154 		return -ENOMEM;
1155 
1156 	n->qpd = qpd;
1157 
1158 	pdd = qpd_to_pdd(qpd);
1159 	/* Retrieve PD base */
1160 	pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->drm_priv);
1161 
1162 	dqm_lock(dqm);
1163 	list_add(&n->list, &dqm->queues);
1164 
1165 	/* Update PD Base in QPD */
1166 	qpd->page_table_base = pd_base;
1167 	pr_debug("Updated PD address to 0x%llx\n", pd_base);
1168 
1169 	retval = dqm->asic_ops.update_qpd(dqm, qpd);
1170 
1171 	dqm->processes_count++;
1172 
1173 	dqm_unlock(dqm);
1174 
1175 	/* Outside the DQM lock because under the DQM lock we can't do
1176 	 * reclaim or take other locks that others hold while reclaiming.
1177 	 */
1178 	kfd_inc_compute_active(dqm->dev);
1179 
1180 	return retval;
1181 }
1182 
1183 static int unregister_process(struct device_queue_manager *dqm,
1184 					struct qcm_process_device *qpd)
1185 {
1186 	int retval;
1187 	struct device_process_node *cur, *next;
1188 
1189 	pr_debug("qpd->queues_list is %s\n",
1190 			list_empty(&qpd->queues_list) ? "empty" : "not empty");
1191 
1192 	retval = 0;
1193 	dqm_lock(dqm);
1194 
1195 	list_for_each_entry_safe(cur, next, &dqm->queues, list) {
1196 		if (qpd == cur->qpd) {
1197 			list_del(&cur->list);
1198 			kfree(cur);
1199 			dqm->processes_count--;
1200 			goto out;
1201 		}
1202 	}
1203 	/* qpd not found in dqm list */
1204 	retval = 1;
1205 out:
1206 	dqm_unlock(dqm);
1207 
1208 	/* Outside the DQM lock because under the DQM lock we can't do
1209 	 * reclaim or take other locks that others hold while reclaiming.
1210 	 */
1211 	if (!retval)
1212 		kfd_dec_compute_active(dqm->dev);
1213 
1214 	return retval;
1215 }
1216 
1217 static int
1218 set_pasid_vmid_mapping(struct device_queue_manager *dqm, u32 pasid,
1219 			unsigned int vmid)
1220 {
1221 	return dqm->dev->kfd2kgd->set_pasid_vmid_mapping(
1222 						dqm->dev->adev, pasid, vmid);
1223 }
1224 
1225 static void init_interrupts(struct device_queue_manager *dqm)
1226 {
1227 	unsigned int i;
1228 
1229 	for (i = 0 ; i < get_pipes_per_mec(dqm) ; i++)
1230 		if (is_pipe_enabled(dqm, 0, i))
1231 			dqm->dev->kfd2kgd->init_interrupts(dqm->dev->adev, i);
1232 }
1233 
1234 static int initialize_nocpsch(struct device_queue_manager *dqm)
1235 {
1236 	int pipe, queue;
1237 
1238 	pr_debug("num of pipes: %d\n", get_pipes_per_mec(dqm));
1239 
1240 	dqm->allocated_queues = kcalloc(get_pipes_per_mec(dqm),
1241 					sizeof(unsigned int), GFP_KERNEL);
1242 	if (!dqm->allocated_queues)
1243 		return -ENOMEM;
1244 
1245 	mutex_init(&dqm->lock_hidden);
1246 	INIT_LIST_HEAD(&dqm->queues);
1247 	dqm->active_queue_count = dqm->next_pipe_to_allocate = 0;
1248 	dqm->active_cp_queue_count = 0;
1249 	dqm->gws_queue_count = 0;
1250 
1251 	for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) {
1252 		int pipe_offset = pipe * get_queues_per_pipe(dqm);
1253 
1254 		for (queue = 0; queue < get_queues_per_pipe(dqm); queue++)
1255 			if (test_bit(pipe_offset + queue,
1256 				     dqm->dev->shared_resources.cp_queue_bitmap))
1257 				dqm->allocated_queues[pipe] |= 1 << queue;
1258 	}
1259 
1260 	memset(dqm->vmid_pasid, 0, sizeof(dqm->vmid_pasid));
1261 
1262 	dqm->sdma_bitmap = ~0ULL >> (64 - get_num_sdma_queues(dqm));
1263 	dqm->sdma_bitmap &= ~(get_reserved_sdma_queues_bitmap(dqm));
1264 	pr_info("sdma_bitmap: %llx\n", dqm->sdma_bitmap);
1265 
1266 	dqm->xgmi_sdma_bitmap = ~0ULL >> (64 - get_num_xgmi_sdma_queues(dqm));
1267 
1268 	return 0;
1269 }
1270 
1271 static void uninitialize(struct device_queue_manager *dqm)
1272 {
1273 	int i;
1274 
1275 	WARN_ON(dqm->active_queue_count > 0 || dqm->processes_count > 0);
1276 
1277 	kfree(dqm->allocated_queues);
1278 	for (i = 0 ; i < KFD_MQD_TYPE_MAX ; i++)
1279 		kfree(dqm->mqd_mgrs[i]);
1280 	mutex_destroy(&dqm->lock_hidden);
1281 }
1282 
1283 static int start_nocpsch(struct device_queue_manager *dqm)
1284 {
1285 	int r = 0;
1286 
1287 	pr_info("SW scheduler is used");
1288 	init_interrupts(dqm);
1289 
1290 	if (dqm->dev->adev->asic_type == CHIP_HAWAII)
1291 		r = pm_init(&dqm->packet_mgr, dqm);
1292 	if (!r)
1293 		dqm->sched_running = true;
1294 
1295 	return r;
1296 }
1297 
1298 static int stop_nocpsch(struct device_queue_manager *dqm)
1299 {
1300 	if (dqm->dev->adev->asic_type == CHIP_HAWAII)
1301 		pm_uninit(&dqm->packet_mgr, false);
1302 	dqm->sched_running = false;
1303 
1304 	return 0;
1305 }
1306 
1307 static void pre_reset(struct device_queue_manager *dqm)
1308 {
1309 	dqm_lock(dqm);
1310 	dqm->is_resetting = true;
1311 	dqm_unlock(dqm);
1312 }
1313 
1314 static int allocate_sdma_queue(struct device_queue_manager *dqm,
1315 				struct queue *q, const uint32_t *restore_sdma_id)
1316 {
1317 	int bit;
1318 
1319 	if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
1320 		if (dqm->sdma_bitmap == 0) {
1321 			pr_err("No more SDMA queue to allocate\n");
1322 			return -ENOMEM;
1323 		}
1324 
1325 		if (restore_sdma_id) {
1326 			/* Re-use existing sdma_id */
1327 			if (!(dqm->sdma_bitmap & (1ULL << *restore_sdma_id))) {
1328 				pr_err("SDMA queue already in use\n");
1329 				return -EBUSY;
1330 			}
1331 			dqm->sdma_bitmap &= ~(1ULL << *restore_sdma_id);
1332 			q->sdma_id = *restore_sdma_id;
1333 		} else {
1334 			/* Find first available sdma_id */
1335 			bit = __ffs64(dqm->sdma_bitmap);
1336 			dqm->sdma_bitmap &= ~(1ULL << bit);
1337 			q->sdma_id = bit;
1338 		}
1339 
1340 		q->properties.sdma_engine_id = q->sdma_id %
1341 				kfd_get_num_sdma_engines(dqm->dev);
1342 		q->properties.sdma_queue_id = q->sdma_id /
1343 				kfd_get_num_sdma_engines(dqm->dev);
1344 	} else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
1345 		if (dqm->xgmi_sdma_bitmap == 0) {
1346 			pr_err("No more XGMI SDMA queue to allocate\n");
1347 			return -ENOMEM;
1348 		}
1349 		if (restore_sdma_id) {
1350 			/* Re-use existing sdma_id */
1351 			if (!(dqm->xgmi_sdma_bitmap & (1ULL << *restore_sdma_id))) {
1352 				pr_err("SDMA queue already in use\n");
1353 				return -EBUSY;
1354 			}
1355 			dqm->xgmi_sdma_bitmap &= ~(1ULL << *restore_sdma_id);
1356 			q->sdma_id = *restore_sdma_id;
1357 		} else {
1358 			bit = __ffs64(dqm->xgmi_sdma_bitmap);
1359 			dqm->xgmi_sdma_bitmap &= ~(1ULL << bit);
1360 			q->sdma_id = bit;
1361 		}
1362 		/* sdma_engine_id is sdma id including
1363 		 * both PCIe-optimized SDMAs and XGMI-
1364 		 * optimized SDMAs. The calculation below
1365 		 * assumes the first N engines are always
1366 		 * PCIe-optimized ones
1367 		 */
1368 		q->properties.sdma_engine_id =
1369 			kfd_get_num_sdma_engines(dqm->dev) +
1370 			q->sdma_id % kfd_get_num_xgmi_sdma_engines(dqm->dev);
1371 		q->properties.sdma_queue_id = q->sdma_id /
1372 			kfd_get_num_xgmi_sdma_engines(dqm->dev);
1373 	}
1374 
1375 	pr_debug("SDMA engine id: %d\n", q->properties.sdma_engine_id);
1376 	pr_debug("SDMA queue id: %d\n", q->properties.sdma_queue_id);
1377 
1378 	return 0;
1379 }
1380 
1381 static void deallocate_sdma_queue(struct device_queue_manager *dqm,
1382 				struct queue *q)
1383 {
1384 	if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
1385 		if (q->sdma_id >= get_num_sdma_queues(dqm))
1386 			return;
1387 		dqm->sdma_bitmap |= (1ULL << q->sdma_id);
1388 	} else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
1389 		if (q->sdma_id >= get_num_xgmi_sdma_queues(dqm))
1390 			return;
1391 		dqm->xgmi_sdma_bitmap |= (1ULL << q->sdma_id);
1392 	}
1393 }
1394 
1395 /*
1396  * Device Queue Manager implementation for cp scheduler
1397  */
1398 
1399 static int set_sched_resources(struct device_queue_manager *dqm)
1400 {
1401 	int i, mec;
1402 	struct scheduling_resources res;
1403 
1404 	res.vmid_mask = dqm->dev->shared_resources.compute_vmid_bitmap;
1405 
1406 	res.queue_mask = 0;
1407 	for (i = 0; i < KGD_MAX_QUEUES; ++i) {
1408 		mec = (i / dqm->dev->shared_resources.num_queue_per_pipe)
1409 			/ dqm->dev->shared_resources.num_pipe_per_mec;
1410 
1411 		if (!test_bit(i, dqm->dev->shared_resources.cp_queue_bitmap))
1412 			continue;
1413 
1414 		/* only acquire queues from the first MEC */
1415 		if (mec > 0)
1416 			continue;
1417 
1418 		/* This situation may be hit in the future if a new HW
1419 		 * generation exposes more than 64 queues. If so, the
1420 		 * definition of res.queue_mask needs updating
1421 		 */
1422 		if (WARN_ON(i >= (sizeof(res.queue_mask)*8))) {
1423 			pr_err("Invalid queue enabled by amdgpu: %d\n", i);
1424 			break;
1425 		}
1426 
1427 		res.queue_mask |= 1ull
1428 			<< amdgpu_queue_mask_bit_to_set_resource_bit(
1429 				dqm->dev->adev, i);
1430 	}
1431 	res.gws_mask = ~0ull;
1432 	res.oac_mask = res.gds_heap_base = res.gds_heap_size = 0;
1433 
1434 	pr_debug("Scheduling resources:\n"
1435 			"vmid mask: 0x%8X\n"
1436 			"queue mask: 0x%8llX\n",
1437 			res.vmid_mask, res.queue_mask);
1438 
1439 	return pm_send_set_resources(&dqm->packet_mgr, &res);
1440 }
1441 
1442 static int initialize_cpsch(struct device_queue_manager *dqm)
1443 {
1444 	uint64_t num_sdma_queues;
1445 	uint64_t num_xgmi_sdma_queues;
1446 
1447 	pr_debug("num of pipes: %d\n", get_pipes_per_mec(dqm));
1448 
1449 	mutex_init(&dqm->lock_hidden);
1450 	INIT_LIST_HEAD(&dqm->queues);
1451 	dqm->active_queue_count = dqm->processes_count = 0;
1452 	dqm->active_cp_queue_count = 0;
1453 	dqm->gws_queue_count = 0;
1454 	dqm->active_runlist = false;
1455 
1456 	num_sdma_queues = get_num_sdma_queues(dqm);
1457 	if (num_sdma_queues >= BITS_PER_TYPE(dqm->sdma_bitmap))
1458 		dqm->sdma_bitmap = ULLONG_MAX;
1459 	else
1460 		dqm->sdma_bitmap = (BIT_ULL(num_sdma_queues) - 1);
1461 
1462 	dqm->sdma_bitmap &= ~(get_reserved_sdma_queues_bitmap(dqm));
1463 	pr_info("sdma_bitmap: %llx\n", dqm->sdma_bitmap);
1464 
1465 	num_xgmi_sdma_queues = get_num_xgmi_sdma_queues(dqm);
1466 	if (num_xgmi_sdma_queues >= BITS_PER_TYPE(dqm->xgmi_sdma_bitmap))
1467 		dqm->xgmi_sdma_bitmap = ULLONG_MAX;
1468 	else
1469 		dqm->xgmi_sdma_bitmap = (BIT_ULL(num_xgmi_sdma_queues) - 1);
1470 
1471 	INIT_WORK(&dqm->hw_exception_work, kfd_process_hw_exception);
1472 
1473 	return 0;
1474 }
1475 
1476 static int start_cpsch(struct device_queue_manager *dqm)
1477 {
1478 	int retval;
1479 
1480 	retval = 0;
1481 
1482 	dqm_lock(dqm);
1483 
1484 	if (!dqm->dev->shared_resources.enable_mes) {
1485 		retval = pm_init(&dqm->packet_mgr, dqm);
1486 		if (retval)
1487 			goto fail_packet_manager_init;
1488 
1489 		retval = set_sched_resources(dqm);
1490 		if (retval)
1491 			goto fail_set_sched_resources;
1492 	}
1493 	pr_debug("Allocating fence memory\n");
1494 
1495 	/* allocate fence memory on the gart */
1496 	retval = kfd_gtt_sa_allocate(dqm->dev, sizeof(*dqm->fence_addr),
1497 					&dqm->fence_mem);
1498 
1499 	if (retval)
1500 		goto fail_allocate_vidmem;
1501 
1502 	dqm->fence_addr = (uint64_t *)dqm->fence_mem->cpu_ptr;
1503 	dqm->fence_gpu_addr = dqm->fence_mem->gpu_addr;
1504 
1505 	init_interrupts(dqm);
1506 
1507 	/* clear hang status when driver try to start the hw scheduler */
1508 	dqm->is_hws_hang = false;
1509 	dqm->is_resetting = false;
1510 	dqm->sched_running = true;
1511 	if (!dqm->dev->shared_resources.enable_mes)
1512 		execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
1513 	dqm_unlock(dqm);
1514 
1515 	return 0;
1516 fail_allocate_vidmem:
1517 fail_set_sched_resources:
1518 	if (!dqm->dev->shared_resources.enable_mes)
1519 		pm_uninit(&dqm->packet_mgr, false);
1520 fail_packet_manager_init:
1521 	dqm_unlock(dqm);
1522 	return retval;
1523 }
1524 
1525 static int stop_cpsch(struct device_queue_manager *dqm)
1526 {
1527 	bool hanging;
1528 
1529 	dqm_lock(dqm);
1530 	if (!dqm->sched_running) {
1531 		dqm_unlock(dqm);
1532 		return 0;
1533 	}
1534 
1535 	if (!dqm->is_hws_hang) {
1536 		if (!dqm->dev->shared_resources.enable_mes)
1537 			unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0, false);
1538 		else
1539 			remove_all_queues_mes(dqm);
1540 	}
1541 
1542 	hanging = dqm->is_hws_hang || dqm->is_resetting;
1543 	dqm->sched_running = false;
1544 
1545 	if (!dqm->dev->shared_resources.enable_mes)
1546 		pm_release_ib(&dqm->packet_mgr);
1547 
1548 	kfd_gtt_sa_free(dqm->dev, dqm->fence_mem);
1549 	if (!dqm->dev->shared_resources.enable_mes)
1550 		pm_uninit(&dqm->packet_mgr, hanging);
1551 	dqm_unlock(dqm);
1552 
1553 	return 0;
1554 }
1555 
1556 static int create_kernel_queue_cpsch(struct device_queue_manager *dqm,
1557 					struct kernel_queue *kq,
1558 					struct qcm_process_device *qpd)
1559 {
1560 	dqm_lock(dqm);
1561 	if (dqm->total_queue_count >= max_num_of_queues_per_device) {
1562 		pr_warn("Can't create new kernel queue because %d queues were already created\n",
1563 				dqm->total_queue_count);
1564 		dqm_unlock(dqm);
1565 		return -EPERM;
1566 	}
1567 
1568 	/*
1569 	 * Unconditionally increment this counter, regardless of the queue's
1570 	 * type or whether the queue is active.
1571 	 */
1572 	dqm->total_queue_count++;
1573 	pr_debug("Total of %d queues are accountable so far\n",
1574 			dqm->total_queue_count);
1575 
1576 	list_add(&kq->list, &qpd->priv_queue_list);
1577 	increment_queue_count(dqm, qpd, kq->queue);
1578 	qpd->is_debug = true;
1579 	execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
1580 	dqm_unlock(dqm);
1581 
1582 	return 0;
1583 }
1584 
1585 static void destroy_kernel_queue_cpsch(struct device_queue_manager *dqm,
1586 					struct kernel_queue *kq,
1587 					struct qcm_process_device *qpd)
1588 {
1589 	dqm_lock(dqm);
1590 	list_del(&kq->list);
1591 	decrement_queue_count(dqm, qpd, kq->queue);
1592 	qpd->is_debug = false;
1593 	execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0);
1594 	/*
1595 	 * Unconditionally decrement this counter, regardless of the queue's
1596 	 * type.
1597 	 */
1598 	dqm->total_queue_count--;
1599 	pr_debug("Total of %d queues are accountable so far\n",
1600 			dqm->total_queue_count);
1601 	dqm_unlock(dqm);
1602 }
1603 
1604 static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
1605 			struct qcm_process_device *qpd,
1606 			const struct kfd_criu_queue_priv_data *qd,
1607 			const void *restore_mqd, const void *restore_ctl_stack)
1608 {
1609 	int retval;
1610 	struct mqd_manager *mqd_mgr;
1611 
1612 	if (dqm->total_queue_count >= max_num_of_queues_per_device) {
1613 		pr_warn("Can't create new usermode queue because %d queues were already created\n",
1614 				dqm->total_queue_count);
1615 		retval = -EPERM;
1616 		goto out;
1617 	}
1618 
1619 	if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
1620 		q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
1621 		dqm_lock(dqm);
1622 		retval = allocate_sdma_queue(dqm, q, qd ? &qd->sdma_id : NULL);
1623 		dqm_unlock(dqm);
1624 		if (retval)
1625 			goto out;
1626 	}
1627 
1628 	retval = allocate_doorbell(qpd, q, qd ? &qd->doorbell_id : NULL);
1629 	if (retval)
1630 		goto out_deallocate_sdma_queue;
1631 
1632 	mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
1633 			q->properties.type)];
1634 
1635 	if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
1636 		q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
1637 		dqm->asic_ops.init_sdma_vm(dqm, q, qpd);
1638 	q->properties.tba_addr = qpd->tba_addr;
1639 	q->properties.tma_addr = qpd->tma_addr;
1640 	q->mqd_mem_obj = mqd_mgr->allocate_mqd(mqd_mgr->dev, &q->properties);
1641 	if (!q->mqd_mem_obj) {
1642 		retval = -ENOMEM;
1643 		goto out_deallocate_doorbell;
1644 	}
1645 
1646 	dqm_lock(dqm);
1647 	/*
1648 	 * Eviction state logic: mark all queues as evicted, even ones
1649 	 * not currently active. Restoring inactive queues later only
1650 	 * updates the is_evicted flag but is a no-op otherwise.
1651 	 */
1652 	q->properties.is_evicted = !!qpd->evicted;
1653 
1654 	if (qd)
1655 		mqd_mgr->restore_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj, &q->gart_mqd_addr,
1656 				     &q->properties, restore_mqd, restore_ctl_stack,
1657 				     qd->ctl_stack_size);
1658 	else
1659 		mqd_mgr->init_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj,
1660 					&q->gart_mqd_addr, &q->properties);
1661 
1662 	list_add(&q->list, &qpd->queues_list);
1663 	qpd->queue_count++;
1664 
1665 	if (q->properties.is_active) {
1666 		increment_queue_count(dqm, qpd, q);
1667 
1668 		if (!dqm->dev->shared_resources.enable_mes) {
1669 			retval = execute_queues_cpsch(dqm,
1670 					     KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
1671 		} else {
1672 			retval = add_queue_mes(dqm, q, qpd);
1673 			if (retval)
1674 				goto cleanup_queue;
1675 		}
1676 	}
1677 
1678 	/*
1679 	 * Unconditionally increment this counter, regardless of the queue's
1680 	 * type or whether the queue is active.
1681 	 */
1682 	dqm->total_queue_count++;
1683 
1684 	pr_debug("Total of %d queues are accountable so far\n",
1685 			dqm->total_queue_count);
1686 
1687 	dqm_unlock(dqm);
1688 	return retval;
1689 
1690 cleanup_queue:
1691 	qpd->queue_count--;
1692 	list_del(&q->list);
1693 	if (q->properties.is_active)
1694 		decrement_queue_count(dqm, qpd, q);
1695 	mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
1696 	dqm_unlock(dqm);
1697 out_deallocate_doorbell:
1698 	deallocate_doorbell(qpd, q);
1699 out_deallocate_sdma_queue:
1700 	if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
1701 		q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
1702 		dqm_lock(dqm);
1703 		deallocate_sdma_queue(dqm, q);
1704 		dqm_unlock(dqm);
1705 	}
1706 out:
1707 	return retval;
1708 }
1709 
1710 int amdkfd_fence_wait_timeout(uint64_t *fence_addr,
1711 				uint64_t fence_value,
1712 				unsigned int timeout_ms)
1713 {
1714 	unsigned long end_jiffies = msecs_to_jiffies(timeout_ms) + jiffies;
1715 
1716 	while (*fence_addr != fence_value) {
1717 		if (time_after(jiffies, end_jiffies)) {
1718 			pr_err("qcm fence wait loop timeout expired\n");
1719 			/* In HWS case, this is used to halt the driver thread
1720 			 * in order not to mess up CP states before doing
1721 			 * scandumps for FW debugging.
1722 			 */
1723 			while (halt_if_hws_hang)
1724 				schedule();
1725 
1726 			return -ETIME;
1727 		}
1728 		schedule();
1729 	}
1730 
1731 	return 0;
1732 }
1733 
1734 /* dqm->lock mutex has to be locked before calling this function */
1735 static int map_queues_cpsch(struct device_queue_manager *dqm)
1736 {
1737 	int retval;
1738 
1739 	if (!dqm->sched_running)
1740 		return 0;
1741 	if (dqm->active_queue_count <= 0 || dqm->processes_count <= 0)
1742 		return 0;
1743 	if (dqm->active_runlist)
1744 		return 0;
1745 
1746 	retval = pm_send_runlist(&dqm->packet_mgr, &dqm->queues);
1747 	pr_debug("%s sent runlist\n", __func__);
1748 	if (retval) {
1749 		pr_err("failed to execute runlist\n");
1750 		return retval;
1751 	}
1752 	dqm->active_runlist = true;
1753 
1754 	return retval;
1755 }
1756 
1757 /* dqm->lock mutex has to be locked before calling this function */
1758 static int unmap_queues_cpsch(struct device_queue_manager *dqm,
1759 				enum kfd_unmap_queues_filter filter,
1760 				uint32_t filter_param, bool reset)
1761 {
1762 	int retval = 0;
1763 	struct mqd_manager *mqd_mgr;
1764 
1765 	if (!dqm->sched_running)
1766 		return 0;
1767 	if (dqm->is_hws_hang || dqm->is_resetting)
1768 		return -EIO;
1769 	if (!dqm->active_runlist)
1770 		return retval;
1771 
1772 	retval = pm_send_unmap_queue(&dqm->packet_mgr, filter, filter_param, reset);
1773 	if (retval)
1774 		return retval;
1775 
1776 	*dqm->fence_addr = KFD_FENCE_INIT;
1777 	pm_send_query_status(&dqm->packet_mgr, dqm->fence_gpu_addr,
1778 				KFD_FENCE_COMPLETED);
1779 	/* should be timed out */
1780 	retval = amdkfd_fence_wait_timeout(dqm->fence_addr, KFD_FENCE_COMPLETED,
1781 				queue_preemption_timeout_ms);
1782 	if (retval) {
1783 		pr_err("The cp might be in an unrecoverable state due to an unsuccessful queues preemption\n");
1784 		kfd_hws_hang(dqm);
1785 		return retval;
1786 	}
1787 
1788 	/* In the current MEC firmware implementation, if compute queue
1789 	 * doesn't response to the preemption request in time, HIQ will
1790 	 * abandon the unmap request without returning any timeout error
1791 	 * to driver. Instead, MEC firmware will log the doorbell of the
1792 	 * unresponding compute queue to HIQ.MQD.queue_doorbell_id fields.
1793 	 * To make sure the queue unmap was successful, driver need to
1794 	 * check those fields
1795 	 */
1796 	mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ];
1797 	if (mqd_mgr->read_doorbell_id(dqm->packet_mgr.priv_queue->queue->mqd)) {
1798 		pr_err("HIQ MQD's queue_doorbell_id0 is not 0, Queue preemption time out\n");
1799 		while (halt_if_hws_hang)
1800 			schedule();
1801 		return -ETIME;
1802 	}
1803 
1804 	pm_release_ib(&dqm->packet_mgr);
1805 	dqm->active_runlist = false;
1806 
1807 	return retval;
1808 }
1809 
1810 /* only for compute queue */
1811 static int reset_queues_cpsch(struct device_queue_manager *dqm,
1812 			uint16_t pasid)
1813 {
1814 	int retval;
1815 
1816 	dqm_lock(dqm);
1817 
1818 	retval = unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_BY_PASID,
1819 			pasid, true);
1820 
1821 	dqm_unlock(dqm);
1822 	return retval;
1823 }
1824 
1825 /* dqm->lock mutex has to be locked before calling this function */
1826 static int execute_queues_cpsch(struct device_queue_manager *dqm,
1827 				enum kfd_unmap_queues_filter filter,
1828 				uint32_t filter_param)
1829 {
1830 	int retval;
1831 
1832 	if (dqm->is_hws_hang)
1833 		return -EIO;
1834 	retval = unmap_queues_cpsch(dqm, filter, filter_param, false);
1835 	if (retval)
1836 		return retval;
1837 
1838 	return map_queues_cpsch(dqm);
1839 }
1840 
1841 static int destroy_queue_cpsch(struct device_queue_manager *dqm,
1842 				struct qcm_process_device *qpd,
1843 				struct queue *q)
1844 {
1845 	int retval;
1846 	struct mqd_manager *mqd_mgr;
1847 	uint64_t sdma_val = 0;
1848 	struct kfd_process_device *pdd = qpd_to_pdd(qpd);
1849 
1850 	/* Get the SDMA queue stats */
1851 	if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) ||
1852 	    (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) {
1853 		retval = read_sdma_queue_counter((uint64_t __user *)q->properties.read_ptr,
1854 							&sdma_val);
1855 		if (retval)
1856 			pr_err("Failed to read SDMA queue counter for queue: %d\n",
1857 				q->properties.queue_id);
1858 	}
1859 
1860 	retval = 0;
1861 
1862 	/* remove queue from list to prevent rescheduling after preemption */
1863 	dqm_lock(dqm);
1864 
1865 	if (qpd->is_debug) {
1866 		/*
1867 		 * error, currently we do not allow to destroy a queue
1868 		 * of a currently debugged process
1869 		 */
1870 		retval = -EBUSY;
1871 		goto failed_try_destroy_debugged_queue;
1872 
1873 	}
1874 
1875 	mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
1876 			q->properties.type)];
1877 
1878 	deallocate_doorbell(qpd, q);
1879 
1880 	if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) ||
1881 	    (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) {
1882 		deallocate_sdma_queue(dqm, q);
1883 		pdd->sdma_past_activity_counter += sdma_val;
1884 	}
1885 
1886 	list_del(&q->list);
1887 	qpd->queue_count--;
1888 	if (q->properties.is_active) {
1889 		if (!dqm->dev->shared_resources.enable_mes) {
1890 			decrement_queue_count(dqm, qpd, q);
1891 			retval = execute_queues_cpsch(dqm,
1892 						      KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
1893 			if (retval == -ETIME)
1894 				qpd->reset_wavefronts = true;
1895 		} else {
1896 			retval = remove_queue_mes(dqm, q, qpd);
1897 		}
1898 	}
1899 
1900 	/*
1901 	 * Unconditionally decrement this counter, regardless of the queue's
1902 	 * type
1903 	 */
1904 	dqm->total_queue_count--;
1905 	pr_debug("Total of %d queues are accountable so far\n",
1906 			dqm->total_queue_count);
1907 
1908 	dqm_unlock(dqm);
1909 
1910 	/* Do free_mqd after dqm_unlock(dqm) to avoid circular locking */
1911 	mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
1912 
1913 	return retval;
1914 
1915 failed_try_destroy_debugged_queue:
1916 
1917 	dqm_unlock(dqm);
1918 	return retval;
1919 }
1920 
1921 /*
1922  * Low bits must be 0000/FFFF as required by HW, high bits must be 0 to
1923  * stay in user mode.
1924  */
1925 #define APE1_FIXED_BITS_MASK 0xFFFF80000000FFFFULL
1926 /* APE1 limit is inclusive and 64K aligned. */
1927 #define APE1_LIMIT_ALIGNMENT 0xFFFF
1928 
1929 static bool set_cache_memory_policy(struct device_queue_manager *dqm,
1930 				   struct qcm_process_device *qpd,
1931 				   enum cache_policy default_policy,
1932 				   enum cache_policy alternate_policy,
1933 				   void __user *alternate_aperture_base,
1934 				   uint64_t alternate_aperture_size)
1935 {
1936 	bool retval = true;
1937 
1938 	if (!dqm->asic_ops.set_cache_memory_policy)
1939 		return retval;
1940 
1941 	dqm_lock(dqm);
1942 
1943 	if (alternate_aperture_size == 0) {
1944 		/* base > limit disables APE1 */
1945 		qpd->sh_mem_ape1_base = 1;
1946 		qpd->sh_mem_ape1_limit = 0;
1947 	} else {
1948 		/*
1949 		 * In FSA64, APE1_Base[63:0] = { 16{SH_MEM_APE1_BASE[31]},
1950 		 *			SH_MEM_APE1_BASE[31:0], 0x0000 }
1951 		 * APE1_Limit[63:0] = { 16{SH_MEM_APE1_LIMIT[31]},
1952 		 *			SH_MEM_APE1_LIMIT[31:0], 0xFFFF }
1953 		 * Verify that the base and size parameters can be
1954 		 * represented in this format and convert them.
1955 		 * Additionally restrict APE1 to user-mode addresses.
1956 		 */
1957 
1958 		uint64_t base = (uintptr_t)alternate_aperture_base;
1959 		uint64_t limit = base + alternate_aperture_size - 1;
1960 
1961 		if (limit <= base || (base & APE1_FIXED_BITS_MASK) != 0 ||
1962 		   (limit & APE1_FIXED_BITS_MASK) != APE1_LIMIT_ALIGNMENT) {
1963 			retval = false;
1964 			goto out;
1965 		}
1966 
1967 		qpd->sh_mem_ape1_base = base >> 16;
1968 		qpd->sh_mem_ape1_limit = limit >> 16;
1969 	}
1970 
1971 	retval = dqm->asic_ops.set_cache_memory_policy(
1972 			dqm,
1973 			qpd,
1974 			default_policy,
1975 			alternate_policy,
1976 			alternate_aperture_base,
1977 			alternate_aperture_size);
1978 
1979 	if ((dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) && (qpd->vmid != 0))
1980 		program_sh_mem_settings(dqm, qpd);
1981 
1982 	pr_debug("sh_mem_config: 0x%x, ape1_base: 0x%x, ape1_limit: 0x%x\n",
1983 		qpd->sh_mem_config, qpd->sh_mem_ape1_base,
1984 		qpd->sh_mem_ape1_limit);
1985 
1986 out:
1987 	dqm_unlock(dqm);
1988 	return retval;
1989 }
1990 
1991 static int process_termination_nocpsch(struct device_queue_manager *dqm,
1992 		struct qcm_process_device *qpd)
1993 {
1994 	struct queue *q;
1995 	struct device_process_node *cur, *next_dpn;
1996 	int retval = 0;
1997 	bool found = false;
1998 
1999 	dqm_lock(dqm);
2000 
2001 	/* Clear all user mode queues */
2002 	while (!list_empty(&qpd->queues_list)) {
2003 		struct mqd_manager *mqd_mgr;
2004 		int ret;
2005 
2006 		q = list_first_entry(&qpd->queues_list, struct queue, list);
2007 		mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
2008 				q->properties.type)];
2009 		ret = destroy_queue_nocpsch_locked(dqm, qpd, q);
2010 		if (ret)
2011 			retval = ret;
2012 		dqm_unlock(dqm);
2013 		mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
2014 		dqm_lock(dqm);
2015 	}
2016 
2017 	/* Unregister process */
2018 	list_for_each_entry_safe(cur, next_dpn, &dqm->queues, list) {
2019 		if (qpd == cur->qpd) {
2020 			list_del(&cur->list);
2021 			kfree(cur);
2022 			dqm->processes_count--;
2023 			found = true;
2024 			break;
2025 		}
2026 	}
2027 
2028 	dqm_unlock(dqm);
2029 
2030 	/* Outside the DQM lock because under the DQM lock we can't do
2031 	 * reclaim or take other locks that others hold while reclaiming.
2032 	 */
2033 	if (found)
2034 		kfd_dec_compute_active(dqm->dev);
2035 
2036 	return retval;
2037 }
2038 
2039 static int get_wave_state(struct device_queue_manager *dqm,
2040 			  struct queue *q,
2041 			  void __user *ctl_stack,
2042 			  u32 *ctl_stack_used_size,
2043 			  u32 *save_area_used_size)
2044 {
2045 	struct mqd_manager *mqd_mgr;
2046 
2047 	dqm_lock(dqm);
2048 
2049 	mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_CP];
2050 
2051 	if (q->properties.type != KFD_QUEUE_TYPE_COMPUTE ||
2052 	    q->properties.is_active || !q->device->cwsr_enabled ||
2053 	    !mqd_mgr->get_wave_state) {
2054 		dqm_unlock(dqm);
2055 		return -EINVAL;
2056 	}
2057 
2058 	dqm_unlock(dqm);
2059 
2060 	/*
2061 	 * get_wave_state is outside the dqm lock to prevent circular locking
2062 	 * and the queue should be protected against destruction by the process
2063 	 * lock.
2064 	 */
2065 	return mqd_mgr->get_wave_state(mqd_mgr, q->mqd, ctl_stack,
2066 			ctl_stack_used_size, save_area_used_size);
2067 }
2068 
2069 static void get_queue_checkpoint_info(struct device_queue_manager *dqm,
2070 			const struct queue *q,
2071 			u32 *mqd_size,
2072 			u32 *ctl_stack_size)
2073 {
2074 	struct mqd_manager *mqd_mgr;
2075 	enum KFD_MQD_TYPE mqd_type =
2076 			get_mqd_type_from_queue_type(q->properties.type);
2077 
2078 	dqm_lock(dqm);
2079 	mqd_mgr = dqm->mqd_mgrs[mqd_type];
2080 	*mqd_size = mqd_mgr->mqd_size;
2081 	*ctl_stack_size = 0;
2082 
2083 	if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE && mqd_mgr->get_checkpoint_info)
2084 		mqd_mgr->get_checkpoint_info(mqd_mgr, q->mqd, ctl_stack_size);
2085 
2086 	dqm_unlock(dqm);
2087 }
2088 
2089 static int checkpoint_mqd(struct device_queue_manager *dqm,
2090 			  const struct queue *q,
2091 			  void *mqd,
2092 			  void *ctl_stack)
2093 {
2094 	struct mqd_manager *mqd_mgr;
2095 	int r = 0;
2096 	enum KFD_MQD_TYPE mqd_type =
2097 			get_mqd_type_from_queue_type(q->properties.type);
2098 
2099 	dqm_lock(dqm);
2100 
2101 	if (q->properties.is_active || !q->device->cwsr_enabled) {
2102 		r = -EINVAL;
2103 		goto dqm_unlock;
2104 	}
2105 
2106 	mqd_mgr = dqm->mqd_mgrs[mqd_type];
2107 	if (!mqd_mgr->checkpoint_mqd) {
2108 		r = -EOPNOTSUPP;
2109 		goto dqm_unlock;
2110 	}
2111 
2112 	mqd_mgr->checkpoint_mqd(mqd_mgr, q->mqd, mqd, ctl_stack);
2113 
2114 dqm_unlock:
2115 	dqm_unlock(dqm);
2116 	return r;
2117 }
2118 
2119 static int process_termination_cpsch(struct device_queue_manager *dqm,
2120 		struct qcm_process_device *qpd)
2121 {
2122 	int retval;
2123 	struct queue *q;
2124 	struct kernel_queue *kq, *kq_next;
2125 	struct mqd_manager *mqd_mgr;
2126 	struct device_process_node *cur, *next_dpn;
2127 	enum kfd_unmap_queues_filter filter =
2128 		KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES;
2129 	bool found = false;
2130 
2131 	retval = 0;
2132 
2133 	dqm_lock(dqm);
2134 
2135 	/* Clean all kernel queues */
2136 	list_for_each_entry_safe(kq, kq_next, &qpd->priv_queue_list, list) {
2137 		list_del(&kq->list);
2138 		decrement_queue_count(dqm, qpd, kq->queue);
2139 		qpd->is_debug = false;
2140 		dqm->total_queue_count--;
2141 		filter = KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES;
2142 	}
2143 
2144 	/* Clear all user mode queues */
2145 	list_for_each_entry(q, &qpd->queues_list, list) {
2146 		if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
2147 			deallocate_sdma_queue(dqm, q);
2148 		else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
2149 			deallocate_sdma_queue(dqm, q);
2150 
2151 		if (q->properties.is_active) {
2152 			decrement_queue_count(dqm, qpd, q);
2153 
2154 			if (dqm->dev->shared_resources.enable_mes) {
2155 				retval = remove_queue_mes(dqm, q, qpd);
2156 				if (retval)
2157 					pr_err("Failed to remove queue %d\n",
2158 						q->properties.queue_id);
2159 			}
2160 		}
2161 
2162 		dqm->total_queue_count--;
2163 	}
2164 
2165 	/* Unregister process */
2166 	list_for_each_entry_safe(cur, next_dpn, &dqm->queues, list) {
2167 		if (qpd == cur->qpd) {
2168 			list_del(&cur->list);
2169 			kfree(cur);
2170 			dqm->processes_count--;
2171 			found = true;
2172 			break;
2173 		}
2174 	}
2175 
2176 	if (!dqm->dev->shared_resources.enable_mes)
2177 		retval = execute_queues_cpsch(dqm, filter, 0);
2178 
2179 	if ((!dqm->is_hws_hang) && (retval || qpd->reset_wavefronts)) {
2180 		pr_warn("Resetting wave fronts (cpsch) on dev %p\n", dqm->dev);
2181 		dbgdev_wave_reset_wavefronts(dqm->dev, qpd->pqm->process);
2182 		qpd->reset_wavefronts = false;
2183 	}
2184 
2185 	/* Lastly, free mqd resources.
2186 	 * Do free_mqd() after dqm_unlock to avoid circular locking.
2187 	 */
2188 	while (!list_empty(&qpd->queues_list)) {
2189 		q = list_first_entry(&qpd->queues_list, struct queue, list);
2190 		mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
2191 				q->properties.type)];
2192 		list_del(&q->list);
2193 		qpd->queue_count--;
2194 		dqm_unlock(dqm);
2195 		mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
2196 		dqm_lock(dqm);
2197 	}
2198 	dqm_unlock(dqm);
2199 
2200 	/* Outside the DQM lock because under the DQM lock we can't do
2201 	 * reclaim or take other locks that others hold while reclaiming.
2202 	 */
2203 	if (found)
2204 		kfd_dec_compute_active(dqm->dev);
2205 
2206 	return retval;
2207 }
2208 
2209 static int init_mqd_managers(struct device_queue_manager *dqm)
2210 {
2211 	int i, j;
2212 	struct mqd_manager *mqd_mgr;
2213 
2214 	for (i = 0; i < KFD_MQD_TYPE_MAX; i++) {
2215 		mqd_mgr = dqm->asic_ops.mqd_manager_init(i, dqm->dev);
2216 		if (!mqd_mgr) {
2217 			pr_err("mqd manager [%d] initialization failed\n", i);
2218 			goto out_free;
2219 		}
2220 		dqm->mqd_mgrs[i] = mqd_mgr;
2221 	}
2222 
2223 	return 0;
2224 
2225 out_free:
2226 	for (j = 0; j < i; j++) {
2227 		kfree(dqm->mqd_mgrs[j]);
2228 		dqm->mqd_mgrs[j] = NULL;
2229 	}
2230 
2231 	return -ENOMEM;
2232 }
2233 
2234 /* Allocate one hiq mqd (HWS) and all SDMA mqd in a continuous trunk*/
2235 static int allocate_hiq_sdma_mqd(struct device_queue_manager *dqm)
2236 {
2237 	int retval;
2238 	struct kfd_dev *dev = dqm->dev;
2239 	struct kfd_mem_obj *mem_obj = &dqm->hiq_sdma_mqd;
2240 	uint32_t size = dqm->mqd_mgrs[KFD_MQD_TYPE_SDMA]->mqd_size *
2241 		get_num_all_sdma_engines(dqm) *
2242 		dev->device_info.num_sdma_queues_per_engine +
2243 		dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]->mqd_size;
2244 
2245 	retval = amdgpu_amdkfd_alloc_gtt_mem(dev->adev, size,
2246 		&(mem_obj->gtt_mem), &(mem_obj->gpu_addr),
2247 		(void *)&(mem_obj->cpu_ptr), false);
2248 
2249 	return retval;
2250 }
2251 
2252 struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
2253 {
2254 	struct device_queue_manager *dqm;
2255 
2256 	pr_debug("Loading device queue manager\n");
2257 
2258 	dqm = kzalloc(sizeof(*dqm), GFP_KERNEL);
2259 	if (!dqm)
2260 		return NULL;
2261 
2262 	switch (dev->adev->asic_type) {
2263 	/* HWS is not available on Hawaii. */
2264 	case CHIP_HAWAII:
2265 	/* HWS depends on CWSR for timely dequeue. CWSR is not
2266 	 * available on Tonga.
2267 	 *
2268 	 * FIXME: This argument also applies to Kaveri.
2269 	 */
2270 	case CHIP_TONGA:
2271 		dqm->sched_policy = KFD_SCHED_POLICY_NO_HWS;
2272 		break;
2273 	default:
2274 		dqm->sched_policy = sched_policy;
2275 		break;
2276 	}
2277 
2278 	dqm->dev = dev;
2279 	switch (dqm->sched_policy) {
2280 	case KFD_SCHED_POLICY_HWS:
2281 	case KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION:
2282 		/* initialize dqm for cp scheduling */
2283 		dqm->ops.create_queue = create_queue_cpsch;
2284 		dqm->ops.initialize = initialize_cpsch;
2285 		dqm->ops.start = start_cpsch;
2286 		dqm->ops.stop = stop_cpsch;
2287 		dqm->ops.pre_reset = pre_reset;
2288 		dqm->ops.destroy_queue = destroy_queue_cpsch;
2289 		dqm->ops.update_queue = update_queue;
2290 		dqm->ops.register_process = register_process;
2291 		dqm->ops.unregister_process = unregister_process;
2292 		dqm->ops.uninitialize = uninitialize;
2293 		dqm->ops.create_kernel_queue = create_kernel_queue_cpsch;
2294 		dqm->ops.destroy_kernel_queue = destroy_kernel_queue_cpsch;
2295 		dqm->ops.set_cache_memory_policy = set_cache_memory_policy;
2296 		dqm->ops.process_termination = process_termination_cpsch;
2297 		dqm->ops.evict_process_queues = evict_process_queues_cpsch;
2298 		dqm->ops.restore_process_queues = restore_process_queues_cpsch;
2299 		dqm->ops.get_wave_state = get_wave_state;
2300 		dqm->ops.reset_queues = reset_queues_cpsch;
2301 		dqm->ops.get_queue_checkpoint_info = get_queue_checkpoint_info;
2302 		dqm->ops.checkpoint_mqd = checkpoint_mqd;
2303 		break;
2304 	case KFD_SCHED_POLICY_NO_HWS:
2305 		/* initialize dqm for no cp scheduling */
2306 		dqm->ops.start = start_nocpsch;
2307 		dqm->ops.stop = stop_nocpsch;
2308 		dqm->ops.pre_reset = pre_reset;
2309 		dqm->ops.create_queue = create_queue_nocpsch;
2310 		dqm->ops.destroy_queue = destroy_queue_nocpsch;
2311 		dqm->ops.update_queue = update_queue;
2312 		dqm->ops.register_process = register_process;
2313 		dqm->ops.unregister_process = unregister_process;
2314 		dqm->ops.initialize = initialize_nocpsch;
2315 		dqm->ops.uninitialize = uninitialize;
2316 		dqm->ops.set_cache_memory_policy = set_cache_memory_policy;
2317 		dqm->ops.process_termination = process_termination_nocpsch;
2318 		dqm->ops.evict_process_queues = evict_process_queues_nocpsch;
2319 		dqm->ops.restore_process_queues =
2320 			restore_process_queues_nocpsch;
2321 		dqm->ops.get_wave_state = get_wave_state;
2322 		dqm->ops.get_queue_checkpoint_info = get_queue_checkpoint_info;
2323 		dqm->ops.checkpoint_mqd = checkpoint_mqd;
2324 		break;
2325 	default:
2326 		pr_err("Invalid scheduling policy %d\n", dqm->sched_policy);
2327 		goto out_free;
2328 	}
2329 
2330 	switch (dev->adev->asic_type) {
2331 	case CHIP_CARRIZO:
2332 		device_queue_manager_init_vi(&dqm->asic_ops);
2333 		break;
2334 
2335 	case CHIP_KAVERI:
2336 		device_queue_manager_init_cik(&dqm->asic_ops);
2337 		break;
2338 
2339 	case CHIP_HAWAII:
2340 		device_queue_manager_init_cik_hawaii(&dqm->asic_ops);
2341 		break;
2342 
2343 	case CHIP_TONGA:
2344 	case CHIP_FIJI:
2345 	case CHIP_POLARIS10:
2346 	case CHIP_POLARIS11:
2347 	case CHIP_POLARIS12:
2348 	case CHIP_VEGAM:
2349 		device_queue_manager_init_vi_tonga(&dqm->asic_ops);
2350 		break;
2351 
2352 	default:
2353 		if (KFD_GC_VERSION(dev) >= IP_VERSION(11, 0, 0))
2354 			device_queue_manager_init_v11(&dqm->asic_ops);
2355 		else if (KFD_GC_VERSION(dev) >= IP_VERSION(10, 1, 1))
2356 			device_queue_manager_init_v10_navi10(&dqm->asic_ops);
2357 		else if (KFD_GC_VERSION(dev) >= IP_VERSION(9, 0, 1))
2358 			device_queue_manager_init_v9(&dqm->asic_ops);
2359 		else {
2360 			WARN(1, "Unexpected ASIC family %u",
2361 			     dev->adev->asic_type);
2362 			goto out_free;
2363 		}
2364 	}
2365 
2366 	if (init_mqd_managers(dqm))
2367 		goto out_free;
2368 
2369 	if (allocate_hiq_sdma_mqd(dqm)) {
2370 		pr_err("Failed to allocate hiq sdma mqd trunk buffer\n");
2371 		goto out_free;
2372 	}
2373 
2374 	if (!dqm->ops.initialize(dqm))
2375 		return dqm;
2376 
2377 out_free:
2378 	kfree(dqm);
2379 	return NULL;
2380 }
2381 
2382 static void deallocate_hiq_sdma_mqd(struct kfd_dev *dev,
2383 				    struct kfd_mem_obj *mqd)
2384 {
2385 	WARN(!mqd, "No hiq sdma mqd trunk to free");
2386 
2387 	amdgpu_amdkfd_free_gtt_mem(dev->adev, mqd->gtt_mem);
2388 }
2389 
2390 void device_queue_manager_uninit(struct device_queue_manager *dqm)
2391 {
2392 	dqm->ops.uninitialize(dqm);
2393 	deallocate_hiq_sdma_mqd(dqm->dev, &dqm->hiq_sdma_mqd);
2394 	kfree(dqm);
2395 }
2396 
2397 int kfd_dqm_evict_pasid(struct device_queue_manager *dqm, u32 pasid)
2398 {
2399 	struct kfd_process_device *pdd;
2400 	struct kfd_process *p = kfd_lookup_process_by_pasid(pasid);
2401 	int ret = 0;
2402 
2403 	if (!p)
2404 		return -EINVAL;
2405 	WARN(debug_evictions, "Evicting pid %d", p->lead_thread->pid);
2406 	pdd = kfd_get_process_device_data(dqm->dev, p);
2407 	if (pdd)
2408 		ret = dqm->ops.evict_process_queues(dqm, &pdd->qpd);
2409 	kfd_unref_process(p);
2410 
2411 	return ret;
2412 }
2413 
2414 static void kfd_process_hw_exception(struct work_struct *work)
2415 {
2416 	struct device_queue_manager *dqm = container_of(work,
2417 			struct device_queue_manager, hw_exception_work);
2418 	amdgpu_amdkfd_gpu_reset(dqm->dev->adev);
2419 }
2420 
2421 #if defined(CONFIG_DEBUG_FS)
2422 
2423 static void seq_reg_dump(struct seq_file *m,
2424 			 uint32_t (*dump)[2], uint32_t n_regs)
2425 {
2426 	uint32_t i, count;
2427 
2428 	for (i = 0, count = 0; i < n_regs; i++) {
2429 		if (count == 0 ||
2430 		    dump[i-1][0] + sizeof(uint32_t) != dump[i][0]) {
2431 			seq_printf(m, "%s    %08x: %08x",
2432 				   i ? "\n" : "",
2433 				   dump[i][0], dump[i][1]);
2434 			count = 7;
2435 		} else {
2436 			seq_printf(m, " %08x", dump[i][1]);
2437 			count--;
2438 		}
2439 	}
2440 
2441 	seq_puts(m, "\n");
2442 }
2443 
2444 int dqm_debugfs_hqds(struct seq_file *m, void *data)
2445 {
2446 	struct device_queue_manager *dqm = data;
2447 	uint32_t (*dump)[2], n_regs;
2448 	int pipe, queue;
2449 	int r = 0;
2450 
2451 	if (!dqm->sched_running) {
2452 		seq_puts(m, " Device is stopped\n");
2453 		return 0;
2454 	}
2455 
2456 	r = dqm->dev->kfd2kgd->hqd_dump(dqm->dev->adev,
2457 					KFD_CIK_HIQ_PIPE, KFD_CIK_HIQ_QUEUE,
2458 					&dump, &n_regs);
2459 	if (!r) {
2460 		seq_printf(m, "  HIQ on MEC %d Pipe %d Queue %d\n",
2461 			   KFD_CIK_HIQ_PIPE/get_pipes_per_mec(dqm)+1,
2462 			   KFD_CIK_HIQ_PIPE%get_pipes_per_mec(dqm),
2463 			   KFD_CIK_HIQ_QUEUE);
2464 		seq_reg_dump(m, dump, n_regs);
2465 
2466 		kfree(dump);
2467 	}
2468 
2469 	for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) {
2470 		int pipe_offset = pipe * get_queues_per_pipe(dqm);
2471 
2472 		for (queue = 0; queue < get_queues_per_pipe(dqm); queue++) {
2473 			if (!test_bit(pipe_offset + queue,
2474 				      dqm->dev->shared_resources.cp_queue_bitmap))
2475 				continue;
2476 
2477 			r = dqm->dev->kfd2kgd->hqd_dump(
2478 				dqm->dev->adev, pipe, queue, &dump, &n_regs);
2479 			if (r)
2480 				break;
2481 
2482 			seq_printf(m, "  CP Pipe %d, Queue %d\n",
2483 				  pipe, queue);
2484 			seq_reg_dump(m, dump, n_regs);
2485 
2486 			kfree(dump);
2487 		}
2488 	}
2489 
2490 	for (pipe = 0; pipe < get_num_all_sdma_engines(dqm); pipe++) {
2491 		for (queue = 0;
2492 		     queue < dqm->dev->device_info.num_sdma_queues_per_engine;
2493 		     queue++) {
2494 			r = dqm->dev->kfd2kgd->hqd_sdma_dump(
2495 				dqm->dev->adev, pipe, queue, &dump, &n_regs);
2496 			if (r)
2497 				break;
2498 
2499 			seq_printf(m, "  SDMA Engine %d, RLC %d\n",
2500 				  pipe, queue);
2501 			seq_reg_dump(m, dump, n_regs);
2502 
2503 			kfree(dump);
2504 		}
2505 	}
2506 
2507 	return r;
2508 }
2509 
2510 int dqm_debugfs_hang_hws(struct device_queue_manager *dqm)
2511 {
2512 	int r = 0;
2513 
2514 	dqm_lock(dqm);
2515 	r = pm_debugfs_hang_hws(&dqm->packet_mgr);
2516 	if (r) {
2517 		dqm_unlock(dqm);
2518 		return r;
2519 	}
2520 	dqm->active_runlist = true;
2521 	r = execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0);
2522 	dqm_unlock(dqm);
2523 
2524 	return r;
2525 }
2526 
2527 #endif
2528