xref: /linux/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c (revision 7cc9196675234d4de0e1e19b9da1a8b86ecfeedd)
1 /*
2  * Copyright 2019 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 
24 #include <linux/firmware.h>
25 #include <drm/drm_exec.h>
26 
27 #include "amdgpu_mes.h"
28 #include "amdgpu.h"
29 #include "soc15_common.h"
30 #include "amdgpu_mes_ctx.h"
31 
32 #define AMDGPU_MES_MAX_NUM_OF_QUEUES_PER_PROCESS 1024
33 #define AMDGPU_ONE_DOORBELL_SIZE 8
34 
35 int amdgpu_mes_doorbell_process_slice(struct amdgpu_device *adev)
36 {
37 	return roundup(AMDGPU_ONE_DOORBELL_SIZE *
38 		       AMDGPU_MES_MAX_NUM_OF_QUEUES_PER_PROCESS,
39 		       PAGE_SIZE);
40 }
41 
42 static int amdgpu_mes_kernel_doorbell_get(struct amdgpu_device *adev,
43 					 int ip_type, uint64_t *doorbell_index)
44 {
45 	unsigned int offset, found;
46 	struct amdgpu_mes *mes = &adev->mes;
47 
48 	if (ip_type == AMDGPU_RING_TYPE_SDMA)
49 		offset = adev->doorbell_index.sdma_engine[0];
50 	else
51 		offset = 0;
52 
53 	found = find_next_zero_bit(mes->doorbell_bitmap, mes->num_mes_dbs, offset);
54 	if (found >= mes->num_mes_dbs) {
55 		DRM_WARN("No doorbell available\n");
56 		return -ENOSPC;
57 	}
58 
59 	set_bit(found, mes->doorbell_bitmap);
60 
61 	/* Get the absolute doorbell index on BAR */
62 	*doorbell_index = mes->db_start_dw_offset + found * 2;
63 	return 0;
64 }
65 
66 static void amdgpu_mes_kernel_doorbell_free(struct amdgpu_device *adev,
67 					   uint32_t doorbell_index)
68 {
69 	unsigned int old, rel_index;
70 	struct amdgpu_mes *mes = &adev->mes;
71 
72 	/* Find the relative index of the doorbell in this object */
73 	rel_index = (doorbell_index - mes->db_start_dw_offset) / 2;
74 	old = test_and_clear_bit(rel_index, mes->doorbell_bitmap);
75 	WARN_ON(!old);
76 }
77 
78 static int amdgpu_mes_doorbell_init(struct amdgpu_device *adev)
79 {
80 	int i;
81 	struct amdgpu_mes *mes = &adev->mes;
82 
83 	/* Bitmap for dynamic allocation of kernel doorbells */
84 	mes->doorbell_bitmap = bitmap_zalloc(PAGE_SIZE / sizeof(u32), GFP_KERNEL);
85 	if (!mes->doorbell_bitmap) {
86 		DRM_ERROR("Failed to allocate MES doorbell bitmap\n");
87 		return -ENOMEM;
88 	}
89 
90 	mes->num_mes_dbs = PAGE_SIZE / AMDGPU_ONE_DOORBELL_SIZE;
91 	for (i = 0; i < AMDGPU_MES_PRIORITY_NUM_LEVELS; i++) {
92 		adev->mes.aggregated_doorbells[i] = mes->db_start_dw_offset + i * 2;
93 		set_bit(i, mes->doorbell_bitmap);
94 	}
95 
96 	return 0;
97 }
98 
99 static int amdgpu_mes_event_log_init(struct amdgpu_device *adev)
100 {
101 	int r;
102 
103 	r = amdgpu_bo_create_kernel(adev, PAGE_SIZE, PAGE_SIZE,
104 				    AMDGPU_GEM_DOMAIN_GTT,
105 				    &adev->mes.event_log_gpu_obj,
106 				    &adev->mes.event_log_gpu_addr,
107 				    &adev->mes.event_log_cpu_addr);
108 	if (r) {
109 		dev_warn(adev->dev, "failed to create MES event log buffer (%d)", r);
110 		return r;
111 	}
112 
113 	memset(adev->mes.event_log_cpu_addr, 0, PAGE_SIZE);
114 
115 	return  0;
116 
117 }
118 
119 static void amdgpu_mes_doorbell_free(struct amdgpu_device *adev)
120 {
121 	bitmap_free(adev->mes.doorbell_bitmap);
122 }
123 
124 int amdgpu_mes_init(struct amdgpu_device *adev)
125 {
126 	int i, r;
127 
128 	adev->mes.adev = adev;
129 
130 	idr_init(&adev->mes.pasid_idr);
131 	idr_init(&adev->mes.gang_id_idr);
132 	idr_init(&adev->mes.queue_id_idr);
133 	ida_init(&adev->mes.doorbell_ida);
134 	spin_lock_init(&adev->mes.queue_id_lock);
135 	spin_lock_init(&adev->mes.ring_lock);
136 	mutex_init(&adev->mes.mutex_hidden);
137 
138 	adev->mes.total_max_queue = AMDGPU_FENCE_MES_QUEUE_ID_MASK;
139 	adev->mes.vmid_mask_mmhub = 0xffffff00;
140 	adev->mes.vmid_mask_gfxhub = 0xffffff00;
141 
142 	for (i = 0; i < AMDGPU_MES_MAX_COMPUTE_PIPES; i++) {
143 		/* use only 1st MEC pipes */
144 		if (i >= 4)
145 			continue;
146 		adev->mes.compute_hqd_mask[i] = 0xc;
147 	}
148 
149 	for (i = 0; i < AMDGPU_MES_MAX_GFX_PIPES; i++)
150 		adev->mes.gfx_hqd_mask[i] = i ? 0 : 0xfffffffe;
151 
152 	for (i = 0; i < AMDGPU_MES_MAX_SDMA_PIPES; i++) {
153 		if (amdgpu_ip_version(adev, SDMA0_HWIP, 0) <
154 		    IP_VERSION(6, 0, 0))
155 			adev->mes.sdma_hqd_mask[i] = i ? 0 : 0x3fc;
156 		/* zero sdma_hqd_mask for non-existent engine */
157 		else if (adev->sdma.num_instances == 1)
158 			adev->mes.sdma_hqd_mask[i] = i ? 0 : 0xfc;
159 		else
160 			adev->mes.sdma_hqd_mask[i] = 0xfc;
161 	}
162 
163 	r = amdgpu_device_wb_get(adev, &adev->mes.sch_ctx_offs);
164 	if (r) {
165 		dev_err(adev->dev,
166 			"(%d) ring trail_fence_offs wb alloc failed\n", r);
167 		goto error_ids;
168 	}
169 	adev->mes.sch_ctx_gpu_addr =
170 		adev->wb.gpu_addr + (adev->mes.sch_ctx_offs * 4);
171 	adev->mes.sch_ctx_ptr =
172 		(uint64_t *)&adev->wb.wb[adev->mes.sch_ctx_offs];
173 
174 	r = amdgpu_device_wb_get(adev, &adev->mes.query_status_fence_offs);
175 	if (r) {
176 		amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs);
177 		dev_err(adev->dev,
178 			"(%d) query_status_fence_offs wb alloc failed\n", r);
179 		goto error_ids;
180 	}
181 	adev->mes.query_status_fence_gpu_addr =
182 		adev->wb.gpu_addr + (adev->mes.query_status_fence_offs * 4);
183 	adev->mes.query_status_fence_ptr =
184 		(uint64_t *)&adev->wb.wb[adev->mes.query_status_fence_offs];
185 
186 	r = amdgpu_device_wb_get(adev, &adev->mes.read_val_offs);
187 	if (r) {
188 		amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs);
189 		amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs);
190 		dev_err(adev->dev,
191 			"(%d) read_val_offs alloc failed\n", r);
192 		goto error_ids;
193 	}
194 	adev->mes.read_val_gpu_addr =
195 		adev->wb.gpu_addr + (adev->mes.read_val_offs * 4);
196 	adev->mes.read_val_ptr =
197 		(uint32_t *)&adev->wb.wb[adev->mes.read_val_offs];
198 
199 	r = amdgpu_mes_doorbell_init(adev);
200 	if (r)
201 		goto error;
202 
203 	r = amdgpu_mes_event_log_init(adev);
204 	if (r)
205 		goto error_doorbell;
206 
207 	return 0;
208 
209 error_doorbell:
210 	amdgpu_mes_doorbell_free(adev);
211 error:
212 	amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs);
213 	amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs);
214 	amdgpu_device_wb_free(adev, adev->mes.read_val_offs);
215 error_ids:
216 	idr_destroy(&adev->mes.pasid_idr);
217 	idr_destroy(&adev->mes.gang_id_idr);
218 	idr_destroy(&adev->mes.queue_id_idr);
219 	ida_destroy(&adev->mes.doorbell_ida);
220 	mutex_destroy(&adev->mes.mutex_hidden);
221 	return r;
222 }
223 
224 void amdgpu_mes_fini(struct amdgpu_device *adev)
225 {
226 	amdgpu_bo_free_kernel(&adev->mes.event_log_gpu_obj,
227 			      &adev->mes.event_log_gpu_addr,
228 			      &adev->mes.event_log_cpu_addr);
229 
230 	amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs);
231 	amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs);
232 	amdgpu_device_wb_free(adev, adev->mes.read_val_offs);
233 	amdgpu_mes_doorbell_free(adev);
234 
235 	idr_destroy(&adev->mes.pasid_idr);
236 	idr_destroy(&adev->mes.gang_id_idr);
237 	idr_destroy(&adev->mes.queue_id_idr);
238 	ida_destroy(&adev->mes.doorbell_ida);
239 	mutex_destroy(&adev->mes.mutex_hidden);
240 }
241 
242 static void amdgpu_mes_queue_free_mqd(struct amdgpu_mes_queue *q)
243 {
244 	amdgpu_bo_free_kernel(&q->mqd_obj,
245 			      &q->mqd_gpu_addr,
246 			      &q->mqd_cpu_ptr);
247 }
248 
249 int amdgpu_mes_create_process(struct amdgpu_device *adev, int pasid,
250 			      struct amdgpu_vm *vm)
251 {
252 	struct amdgpu_mes_process *process;
253 	int r;
254 
255 	/* allocate the mes process buffer */
256 	process = kzalloc(sizeof(struct amdgpu_mes_process), GFP_KERNEL);
257 	if (!process) {
258 		DRM_ERROR("no more memory to create mes process\n");
259 		return -ENOMEM;
260 	}
261 
262 	/* allocate the process context bo and map it */
263 	r = amdgpu_bo_create_kernel(adev, AMDGPU_MES_PROC_CTX_SIZE, PAGE_SIZE,
264 				    AMDGPU_GEM_DOMAIN_GTT,
265 				    &process->proc_ctx_bo,
266 				    &process->proc_ctx_gpu_addr,
267 				    &process->proc_ctx_cpu_ptr);
268 	if (r) {
269 		DRM_ERROR("failed to allocate process context bo\n");
270 		goto clean_up_memory;
271 	}
272 	memset(process->proc_ctx_cpu_ptr, 0, AMDGPU_MES_PROC_CTX_SIZE);
273 
274 	/*
275 	 * Avoid taking any other locks under MES lock to avoid circular
276 	 * lock dependencies.
277 	 */
278 	amdgpu_mes_lock(&adev->mes);
279 
280 	/* add the mes process to idr list */
281 	r = idr_alloc(&adev->mes.pasid_idr, process, pasid, pasid + 1,
282 		      GFP_KERNEL);
283 	if (r < 0) {
284 		DRM_ERROR("failed to lock pasid=%d\n", pasid);
285 		goto clean_up_ctx;
286 	}
287 
288 	INIT_LIST_HEAD(&process->gang_list);
289 	process->vm = vm;
290 	process->pasid = pasid;
291 	process->process_quantum = adev->mes.default_process_quantum;
292 	process->pd_gpu_addr = amdgpu_bo_gpu_offset(vm->root.bo);
293 
294 	amdgpu_mes_unlock(&adev->mes);
295 	return 0;
296 
297 clean_up_ctx:
298 	amdgpu_mes_unlock(&adev->mes);
299 	amdgpu_bo_free_kernel(&process->proc_ctx_bo,
300 			      &process->proc_ctx_gpu_addr,
301 			      &process->proc_ctx_cpu_ptr);
302 clean_up_memory:
303 	kfree(process);
304 	return r;
305 }
306 
307 void amdgpu_mes_destroy_process(struct amdgpu_device *adev, int pasid)
308 {
309 	struct amdgpu_mes_process *process;
310 	struct amdgpu_mes_gang *gang, *tmp1;
311 	struct amdgpu_mes_queue *queue, *tmp2;
312 	struct mes_remove_queue_input queue_input;
313 	unsigned long flags;
314 	int r;
315 
316 	/*
317 	 * Avoid taking any other locks under MES lock to avoid circular
318 	 * lock dependencies.
319 	 */
320 	amdgpu_mes_lock(&adev->mes);
321 
322 	process = idr_find(&adev->mes.pasid_idr, pasid);
323 	if (!process) {
324 		DRM_WARN("pasid %d doesn't exist\n", pasid);
325 		amdgpu_mes_unlock(&adev->mes);
326 		return;
327 	}
328 
329 	/* Remove all queues from hardware */
330 	list_for_each_entry_safe(gang, tmp1, &process->gang_list, list) {
331 		list_for_each_entry_safe(queue, tmp2, &gang->queue_list, list) {
332 			spin_lock_irqsave(&adev->mes.queue_id_lock, flags);
333 			idr_remove(&adev->mes.queue_id_idr, queue->queue_id);
334 			spin_unlock_irqrestore(&adev->mes.queue_id_lock, flags);
335 
336 			queue_input.doorbell_offset = queue->doorbell_off;
337 			queue_input.gang_context_addr = gang->gang_ctx_gpu_addr;
338 
339 			r = adev->mes.funcs->remove_hw_queue(&adev->mes,
340 							     &queue_input);
341 			if (r)
342 				DRM_WARN("failed to remove hardware queue\n");
343 		}
344 
345 		idr_remove(&adev->mes.gang_id_idr, gang->gang_id);
346 	}
347 
348 	idr_remove(&adev->mes.pasid_idr, pasid);
349 	amdgpu_mes_unlock(&adev->mes);
350 
351 	/* free all memory allocated by the process */
352 	list_for_each_entry_safe(gang, tmp1, &process->gang_list, list) {
353 		/* free all queues in the gang */
354 		list_for_each_entry_safe(queue, tmp2, &gang->queue_list, list) {
355 			amdgpu_mes_queue_free_mqd(queue);
356 			list_del(&queue->list);
357 			kfree(queue);
358 		}
359 		amdgpu_bo_free_kernel(&gang->gang_ctx_bo,
360 				      &gang->gang_ctx_gpu_addr,
361 				      &gang->gang_ctx_cpu_ptr);
362 		list_del(&gang->list);
363 		kfree(gang);
364 
365 	}
366 	amdgpu_bo_free_kernel(&process->proc_ctx_bo,
367 			      &process->proc_ctx_gpu_addr,
368 			      &process->proc_ctx_cpu_ptr);
369 	kfree(process);
370 }
371 
372 int amdgpu_mes_add_gang(struct amdgpu_device *adev, int pasid,
373 			struct amdgpu_mes_gang_properties *gprops,
374 			int *gang_id)
375 {
376 	struct amdgpu_mes_process *process;
377 	struct amdgpu_mes_gang *gang;
378 	int r;
379 
380 	/* allocate the mes gang buffer */
381 	gang = kzalloc(sizeof(struct amdgpu_mes_gang), GFP_KERNEL);
382 	if (!gang) {
383 		return -ENOMEM;
384 	}
385 
386 	/* allocate the gang context bo and map it to cpu space */
387 	r = amdgpu_bo_create_kernel(adev, AMDGPU_MES_GANG_CTX_SIZE, PAGE_SIZE,
388 				    AMDGPU_GEM_DOMAIN_GTT,
389 				    &gang->gang_ctx_bo,
390 				    &gang->gang_ctx_gpu_addr,
391 				    &gang->gang_ctx_cpu_ptr);
392 	if (r) {
393 		DRM_ERROR("failed to allocate process context bo\n");
394 		goto clean_up_mem;
395 	}
396 	memset(gang->gang_ctx_cpu_ptr, 0, AMDGPU_MES_GANG_CTX_SIZE);
397 
398 	/*
399 	 * Avoid taking any other locks under MES lock to avoid circular
400 	 * lock dependencies.
401 	 */
402 	amdgpu_mes_lock(&adev->mes);
403 
404 	process = idr_find(&adev->mes.pasid_idr, pasid);
405 	if (!process) {
406 		DRM_ERROR("pasid %d doesn't exist\n", pasid);
407 		r = -EINVAL;
408 		goto clean_up_ctx;
409 	}
410 
411 	/* add the mes gang to idr list */
412 	r = idr_alloc(&adev->mes.gang_id_idr, gang, 1, 0,
413 		      GFP_KERNEL);
414 	if (r < 0) {
415 		DRM_ERROR("failed to allocate idr for gang\n");
416 		goto clean_up_ctx;
417 	}
418 
419 	gang->gang_id = r;
420 	*gang_id = r;
421 
422 	INIT_LIST_HEAD(&gang->queue_list);
423 	gang->process = process;
424 	gang->priority = gprops->priority;
425 	gang->gang_quantum = gprops->gang_quantum ?
426 		gprops->gang_quantum : adev->mes.default_gang_quantum;
427 	gang->global_priority_level = gprops->global_priority_level;
428 	gang->inprocess_gang_priority = gprops->inprocess_gang_priority;
429 	list_add_tail(&gang->list, &process->gang_list);
430 
431 	amdgpu_mes_unlock(&adev->mes);
432 	return 0;
433 
434 clean_up_ctx:
435 	amdgpu_mes_unlock(&adev->mes);
436 	amdgpu_bo_free_kernel(&gang->gang_ctx_bo,
437 			      &gang->gang_ctx_gpu_addr,
438 			      &gang->gang_ctx_cpu_ptr);
439 clean_up_mem:
440 	kfree(gang);
441 	return r;
442 }
443 
444 int amdgpu_mes_remove_gang(struct amdgpu_device *adev, int gang_id)
445 {
446 	struct amdgpu_mes_gang *gang;
447 
448 	/*
449 	 * Avoid taking any other locks under MES lock to avoid circular
450 	 * lock dependencies.
451 	 */
452 	amdgpu_mes_lock(&adev->mes);
453 
454 	gang = idr_find(&adev->mes.gang_id_idr, gang_id);
455 	if (!gang) {
456 		DRM_ERROR("gang id %d doesn't exist\n", gang_id);
457 		amdgpu_mes_unlock(&adev->mes);
458 		return -EINVAL;
459 	}
460 
461 	if (!list_empty(&gang->queue_list)) {
462 		DRM_ERROR("queue list is not empty\n");
463 		amdgpu_mes_unlock(&adev->mes);
464 		return -EBUSY;
465 	}
466 
467 	idr_remove(&adev->mes.gang_id_idr, gang->gang_id);
468 	list_del(&gang->list);
469 	amdgpu_mes_unlock(&adev->mes);
470 
471 	amdgpu_bo_free_kernel(&gang->gang_ctx_bo,
472 			      &gang->gang_ctx_gpu_addr,
473 			      &gang->gang_ctx_cpu_ptr);
474 
475 	kfree(gang);
476 
477 	return 0;
478 }
479 
480 int amdgpu_mes_suspend(struct amdgpu_device *adev)
481 {
482 	struct idr *idp;
483 	struct amdgpu_mes_process *process;
484 	struct amdgpu_mes_gang *gang;
485 	struct mes_suspend_gang_input input;
486 	int r, pasid;
487 
488 	/*
489 	 * Avoid taking any other locks under MES lock to avoid circular
490 	 * lock dependencies.
491 	 */
492 	amdgpu_mes_lock(&adev->mes);
493 
494 	idp = &adev->mes.pasid_idr;
495 
496 	idr_for_each_entry(idp, process, pasid) {
497 		list_for_each_entry(gang, &process->gang_list, list) {
498 			r = adev->mes.funcs->suspend_gang(&adev->mes, &input);
499 			if (r)
500 				DRM_ERROR("failed to suspend pasid %d gangid %d",
501 					 pasid, gang->gang_id);
502 		}
503 	}
504 
505 	amdgpu_mes_unlock(&adev->mes);
506 	return 0;
507 }
508 
509 int amdgpu_mes_resume(struct amdgpu_device *adev)
510 {
511 	struct idr *idp;
512 	struct amdgpu_mes_process *process;
513 	struct amdgpu_mes_gang *gang;
514 	struct mes_resume_gang_input input;
515 	int r, pasid;
516 
517 	/*
518 	 * Avoid taking any other locks under MES lock to avoid circular
519 	 * lock dependencies.
520 	 */
521 	amdgpu_mes_lock(&adev->mes);
522 
523 	idp = &adev->mes.pasid_idr;
524 
525 	idr_for_each_entry(idp, process, pasid) {
526 		list_for_each_entry(gang, &process->gang_list, list) {
527 			r = adev->mes.funcs->resume_gang(&adev->mes, &input);
528 			if (r)
529 				DRM_ERROR("failed to resume pasid %d gangid %d",
530 					 pasid, gang->gang_id);
531 		}
532 	}
533 
534 	amdgpu_mes_unlock(&adev->mes);
535 	return 0;
536 }
537 
538 static int amdgpu_mes_queue_alloc_mqd(struct amdgpu_device *adev,
539 				     struct amdgpu_mes_queue *q,
540 				     struct amdgpu_mes_queue_properties *p)
541 {
542 	struct amdgpu_mqd *mqd_mgr = &adev->mqds[p->queue_type];
543 	u32 mqd_size = mqd_mgr->mqd_size;
544 	int r;
545 
546 	r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE,
547 				    AMDGPU_GEM_DOMAIN_GTT,
548 				    &q->mqd_obj,
549 				    &q->mqd_gpu_addr, &q->mqd_cpu_ptr);
550 	if (r) {
551 		dev_warn(adev->dev, "failed to create queue mqd bo (%d)", r);
552 		return r;
553 	}
554 	memset(q->mqd_cpu_ptr, 0, mqd_size);
555 
556 	r = amdgpu_bo_reserve(q->mqd_obj, false);
557 	if (unlikely(r != 0))
558 		goto clean_up;
559 
560 	return 0;
561 
562 clean_up:
563 	amdgpu_bo_free_kernel(&q->mqd_obj,
564 			      &q->mqd_gpu_addr,
565 			      &q->mqd_cpu_ptr);
566 	return r;
567 }
568 
569 static void amdgpu_mes_queue_init_mqd(struct amdgpu_device *adev,
570 				     struct amdgpu_mes_queue *q,
571 				     struct amdgpu_mes_queue_properties *p)
572 {
573 	struct amdgpu_mqd *mqd_mgr = &adev->mqds[p->queue_type];
574 	struct amdgpu_mqd_prop mqd_prop = {0};
575 
576 	mqd_prop.mqd_gpu_addr = q->mqd_gpu_addr;
577 	mqd_prop.hqd_base_gpu_addr = p->hqd_base_gpu_addr;
578 	mqd_prop.rptr_gpu_addr = p->rptr_gpu_addr;
579 	mqd_prop.wptr_gpu_addr = p->wptr_gpu_addr;
580 	mqd_prop.queue_size = p->queue_size;
581 	mqd_prop.use_doorbell = true;
582 	mqd_prop.doorbell_index = p->doorbell_off;
583 	mqd_prop.eop_gpu_addr = p->eop_gpu_addr;
584 	mqd_prop.hqd_pipe_priority = p->hqd_pipe_priority;
585 	mqd_prop.hqd_queue_priority = p->hqd_queue_priority;
586 	mqd_prop.hqd_active = false;
587 
588 	if (p->queue_type == AMDGPU_RING_TYPE_GFX ||
589 	    p->queue_type == AMDGPU_RING_TYPE_COMPUTE) {
590 		mutex_lock(&adev->srbm_mutex);
591 		amdgpu_gfx_select_me_pipe_q(adev, p->ring->me, p->ring->pipe, 0, 0, 0);
592 	}
593 
594 	mqd_mgr->init_mqd(adev, q->mqd_cpu_ptr, &mqd_prop);
595 
596 	if (p->queue_type == AMDGPU_RING_TYPE_GFX ||
597 	    p->queue_type == AMDGPU_RING_TYPE_COMPUTE) {
598 		amdgpu_gfx_select_me_pipe_q(adev, 0, 0, 0, 0, 0);
599 		mutex_unlock(&adev->srbm_mutex);
600 	}
601 
602 	amdgpu_bo_unreserve(q->mqd_obj);
603 }
604 
605 int amdgpu_mes_add_hw_queue(struct amdgpu_device *adev, int gang_id,
606 			    struct amdgpu_mes_queue_properties *qprops,
607 			    int *queue_id)
608 {
609 	struct amdgpu_mes_queue *queue;
610 	struct amdgpu_mes_gang *gang;
611 	struct mes_add_queue_input queue_input;
612 	unsigned long flags;
613 	int r;
614 
615 	memset(&queue_input, 0, sizeof(struct mes_add_queue_input));
616 
617 	/* allocate the mes queue buffer */
618 	queue = kzalloc(sizeof(struct amdgpu_mes_queue), GFP_KERNEL);
619 	if (!queue) {
620 		DRM_ERROR("Failed to allocate memory for queue\n");
621 		return -ENOMEM;
622 	}
623 
624 	/* Allocate the queue mqd */
625 	r = amdgpu_mes_queue_alloc_mqd(adev, queue, qprops);
626 	if (r)
627 		goto clean_up_memory;
628 
629 	/*
630 	 * Avoid taking any other locks under MES lock to avoid circular
631 	 * lock dependencies.
632 	 */
633 	amdgpu_mes_lock(&adev->mes);
634 
635 	gang = idr_find(&adev->mes.gang_id_idr, gang_id);
636 	if (!gang) {
637 		DRM_ERROR("gang id %d doesn't exist\n", gang_id);
638 		r = -EINVAL;
639 		goto clean_up_mqd;
640 	}
641 
642 	/* add the mes gang to idr list */
643 	spin_lock_irqsave(&adev->mes.queue_id_lock, flags);
644 	r = idr_alloc(&adev->mes.queue_id_idr, queue, 1, 0,
645 		      GFP_ATOMIC);
646 	if (r < 0) {
647 		spin_unlock_irqrestore(&adev->mes.queue_id_lock, flags);
648 		goto clean_up_mqd;
649 	}
650 	spin_unlock_irqrestore(&adev->mes.queue_id_lock, flags);
651 	*queue_id = queue->queue_id = r;
652 
653 	/* allocate a doorbell index for the queue */
654 	r = amdgpu_mes_kernel_doorbell_get(adev,
655 					  qprops->queue_type,
656 					  &qprops->doorbell_off);
657 	if (r)
658 		goto clean_up_queue_id;
659 
660 	/* initialize the queue mqd */
661 	amdgpu_mes_queue_init_mqd(adev, queue, qprops);
662 
663 	/* add hw queue to mes */
664 	queue_input.process_id = gang->process->pasid;
665 
666 	queue_input.page_table_base_addr =
667 		adev->vm_manager.vram_base_offset + gang->process->pd_gpu_addr -
668 		adev->gmc.vram_start;
669 
670 	queue_input.process_va_start = 0;
671 	queue_input.process_va_end =
672 		(adev->vm_manager.max_pfn - 1) << AMDGPU_GPU_PAGE_SHIFT;
673 	queue_input.process_quantum = gang->process->process_quantum;
674 	queue_input.process_context_addr = gang->process->proc_ctx_gpu_addr;
675 	queue_input.gang_quantum = gang->gang_quantum;
676 	queue_input.gang_context_addr = gang->gang_ctx_gpu_addr;
677 	queue_input.inprocess_gang_priority = gang->inprocess_gang_priority;
678 	queue_input.gang_global_priority_level = gang->global_priority_level;
679 	queue_input.doorbell_offset = qprops->doorbell_off;
680 	queue_input.mqd_addr = queue->mqd_gpu_addr;
681 	queue_input.wptr_addr = qprops->wptr_gpu_addr;
682 	queue_input.wptr_mc_addr = qprops->wptr_mc_addr;
683 	queue_input.queue_type = qprops->queue_type;
684 	queue_input.paging = qprops->paging;
685 	queue_input.is_kfd_process = 0;
686 
687 	r = adev->mes.funcs->add_hw_queue(&adev->mes, &queue_input);
688 	if (r) {
689 		DRM_ERROR("failed to add hardware queue to MES, doorbell=0x%llx\n",
690 			  qprops->doorbell_off);
691 		goto clean_up_doorbell;
692 	}
693 
694 	DRM_DEBUG("MES hw queue was added, pasid=%d, gang id=%d, "
695 		  "queue type=%d, doorbell=0x%llx\n",
696 		  gang->process->pasid, gang_id, qprops->queue_type,
697 		  qprops->doorbell_off);
698 
699 	queue->ring = qprops->ring;
700 	queue->doorbell_off = qprops->doorbell_off;
701 	queue->wptr_gpu_addr = qprops->wptr_gpu_addr;
702 	queue->queue_type = qprops->queue_type;
703 	queue->paging = qprops->paging;
704 	queue->gang = gang;
705 	queue->ring->mqd_ptr = queue->mqd_cpu_ptr;
706 	list_add_tail(&queue->list, &gang->queue_list);
707 
708 	amdgpu_mes_unlock(&adev->mes);
709 	return 0;
710 
711 clean_up_doorbell:
712 	amdgpu_mes_kernel_doorbell_free(adev, qprops->doorbell_off);
713 clean_up_queue_id:
714 	spin_lock_irqsave(&adev->mes.queue_id_lock, flags);
715 	idr_remove(&adev->mes.queue_id_idr, queue->queue_id);
716 	spin_unlock_irqrestore(&adev->mes.queue_id_lock, flags);
717 clean_up_mqd:
718 	amdgpu_mes_unlock(&adev->mes);
719 	amdgpu_mes_queue_free_mqd(queue);
720 clean_up_memory:
721 	kfree(queue);
722 	return r;
723 }
724 
725 int amdgpu_mes_remove_hw_queue(struct amdgpu_device *adev, int queue_id)
726 {
727 	unsigned long flags;
728 	struct amdgpu_mes_queue *queue;
729 	struct amdgpu_mes_gang *gang;
730 	struct mes_remove_queue_input queue_input;
731 	int r;
732 
733 	/*
734 	 * Avoid taking any other locks under MES lock to avoid circular
735 	 * lock dependencies.
736 	 */
737 	amdgpu_mes_lock(&adev->mes);
738 
739 	/* remove the mes gang from idr list */
740 	spin_lock_irqsave(&adev->mes.queue_id_lock, flags);
741 
742 	queue = idr_find(&adev->mes.queue_id_idr, queue_id);
743 	if (!queue) {
744 		spin_unlock_irqrestore(&adev->mes.queue_id_lock, flags);
745 		amdgpu_mes_unlock(&adev->mes);
746 		DRM_ERROR("queue id %d doesn't exist\n", queue_id);
747 		return -EINVAL;
748 	}
749 
750 	idr_remove(&adev->mes.queue_id_idr, queue_id);
751 	spin_unlock_irqrestore(&adev->mes.queue_id_lock, flags);
752 
753 	DRM_DEBUG("try to remove queue, doorbell off = 0x%llx\n",
754 		  queue->doorbell_off);
755 
756 	gang = queue->gang;
757 	queue_input.doorbell_offset = queue->doorbell_off;
758 	queue_input.gang_context_addr = gang->gang_ctx_gpu_addr;
759 
760 	r = adev->mes.funcs->remove_hw_queue(&adev->mes, &queue_input);
761 	if (r)
762 		DRM_ERROR("failed to remove hardware queue, queue id = %d\n",
763 			  queue_id);
764 
765 	list_del(&queue->list);
766 	amdgpu_mes_kernel_doorbell_free(adev, queue->doorbell_off);
767 	amdgpu_mes_unlock(&adev->mes);
768 
769 	amdgpu_mes_queue_free_mqd(queue);
770 	kfree(queue);
771 	return 0;
772 }
773 
774 int amdgpu_mes_unmap_legacy_queue(struct amdgpu_device *adev,
775 				  struct amdgpu_ring *ring,
776 				  enum amdgpu_unmap_queues_action action,
777 				  u64 gpu_addr, u64 seq)
778 {
779 	struct mes_unmap_legacy_queue_input queue_input;
780 	int r;
781 
782 	queue_input.action = action;
783 	queue_input.queue_type = ring->funcs->type;
784 	queue_input.doorbell_offset = ring->doorbell_index;
785 	queue_input.pipe_id = ring->pipe;
786 	queue_input.queue_id = ring->queue;
787 	queue_input.trail_fence_addr = gpu_addr;
788 	queue_input.trail_fence_data = seq;
789 
790 	r = adev->mes.funcs->unmap_legacy_queue(&adev->mes, &queue_input);
791 	if (r)
792 		DRM_ERROR("failed to unmap legacy queue\n");
793 
794 	return r;
795 }
796 
797 uint32_t amdgpu_mes_rreg(struct amdgpu_device *adev, uint32_t reg)
798 {
799 	struct mes_misc_op_input op_input;
800 	int r, val = 0;
801 
802 	op_input.op = MES_MISC_OP_READ_REG;
803 	op_input.read_reg.reg_offset = reg;
804 	op_input.read_reg.buffer_addr = adev->mes.read_val_gpu_addr;
805 
806 	if (!adev->mes.funcs->misc_op) {
807 		DRM_ERROR("mes rreg is not supported!\n");
808 		goto error;
809 	}
810 
811 	r = adev->mes.funcs->misc_op(&adev->mes, &op_input);
812 	if (r)
813 		DRM_ERROR("failed to read reg (0x%x)\n", reg);
814 	else
815 		val = *(adev->mes.read_val_ptr);
816 
817 error:
818 	return val;
819 }
820 
821 int amdgpu_mes_wreg(struct amdgpu_device *adev,
822 		    uint32_t reg, uint32_t val)
823 {
824 	struct mes_misc_op_input op_input;
825 	int r;
826 
827 	op_input.op = MES_MISC_OP_WRITE_REG;
828 	op_input.write_reg.reg_offset = reg;
829 	op_input.write_reg.reg_value = val;
830 
831 	if (!adev->mes.funcs->misc_op) {
832 		DRM_ERROR("mes wreg is not supported!\n");
833 		r = -EINVAL;
834 		goto error;
835 	}
836 
837 	r = adev->mes.funcs->misc_op(&adev->mes, &op_input);
838 	if (r)
839 		DRM_ERROR("failed to write reg (0x%x)\n", reg);
840 
841 error:
842 	return r;
843 }
844 
845 int amdgpu_mes_reg_write_reg_wait(struct amdgpu_device *adev,
846 				  uint32_t reg0, uint32_t reg1,
847 				  uint32_t ref, uint32_t mask)
848 {
849 	struct mes_misc_op_input op_input;
850 	int r;
851 
852 	op_input.op = MES_MISC_OP_WRM_REG_WR_WAIT;
853 	op_input.wrm_reg.reg0 = reg0;
854 	op_input.wrm_reg.reg1 = reg1;
855 	op_input.wrm_reg.ref = ref;
856 	op_input.wrm_reg.mask = mask;
857 
858 	if (!adev->mes.funcs->misc_op) {
859 		DRM_ERROR("mes reg_write_reg_wait is not supported!\n");
860 		r = -EINVAL;
861 		goto error;
862 	}
863 
864 	r = adev->mes.funcs->misc_op(&adev->mes, &op_input);
865 	if (r)
866 		DRM_ERROR("failed to reg_write_reg_wait\n");
867 
868 error:
869 	return r;
870 }
871 
872 int amdgpu_mes_reg_wait(struct amdgpu_device *adev, uint32_t reg,
873 			uint32_t val, uint32_t mask)
874 {
875 	struct mes_misc_op_input op_input;
876 	int r;
877 
878 	op_input.op = MES_MISC_OP_WRM_REG_WAIT;
879 	op_input.wrm_reg.reg0 = reg;
880 	op_input.wrm_reg.ref = val;
881 	op_input.wrm_reg.mask = mask;
882 
883 	if (!adev->mes.funcs->misc_op) {
884 		DRM_ERROR("mes reg wait is not supported!\n");
885 		r = -EINVAL;
886 		goto error;
887 	}
888 
889 	r = adev->mes.funcs->misc_op(&adev->mes, &op_input);
890 	if (r)
891 		DRM_ERROR("failed to reg_write_reg_wait\n");
892 
893 error:
894 	return r;
895 }
896 
897 int amdgpu_mes_set_shader_debugger(struct amdgpu_device *adev,
898 				uint64_t process_context_addr,
899 				uint32_t spi_gdbg_per_vmid_cntl,
900 				const uint32_t *tcp_watch_cntl,
901 				uint32_t flags,
902 				bool trap_en)
903 {
904 	struct mes_misc_op_input op_input = {0};
905 	int r;
906 
907 	if (!adev->mes.funcs->misc_op) {
908 		DRM_ERROR("mes set shader debugger is not supported!\n");
909 		return -EINVAL;
910 	}
911 
912 	op_input.op = MES_MISC_OP_SET_SHADER_DEBUGGER;
913 	op_input.set_shader_debugger.process_context_addr = process_context_addr;
914 	op_input.set_shader_debugger.flags.u32all = flags;
915 
916 	/* use amdgpu mes_flush_shader_debugger instead */
917 	if (op_input.set_shader_debugger.flags.process_ctx_flush)
918 		return -EINVAL;
919 
920 	op_input.set_shader_debugger.spi_gdbg_per_vmid_cntl = spi_gdbg_per_vmid_cntl;
921 	memcpy(op_input.set_shader_debugger.tcp_watch_cntl, tcp_watch_cntl,
922 			sizeof(op_input.set_shader_debugger.tcp_watch_cntl));
923 
924 	if (((adev->mes.sched_version & AMDGPU_MES_API_VERSION_MASK) >>
925 			AMDGPU_MES_API_VERSION_SHIFT) >= 14)
926 		op_input.set_shader_debugger.trap_en = trap_en;
927 
928 	amdgpu_mes_lock(&adev->mes);
929 
930 	r = adev->mes.funcs->misc_op(&adev->mes, &op_input);
931 	if (r)
932 		DRM_ERROR("failed to set_shader_debugger\n");
933 
934 	amdgpu_mes_unlock(&adev->mes);
935 
936 	return r;
937 }
938 
939 int amdgpu_mes_flush_shader_debugger(struct amdgpu_device *adev,
940 				     uint64_t process_context_addr)
941 {
942 	struct mes_misc_op_input op_input = {0};
943 	int r;
944 
945 	if (!adev->mes.funcs->misc_op) {
946 		DRM_ERROR("mes flush shader debugger is not supported!\n");
947 		return -EINVAL;
948 	}
949 
950 	op_input.op = MES_MISC_OP_SET_SHADER_DEBUGGER;
951 	op_input.set_shader_debugger.process_context_addr = process_context_addr;
952 	op_input.set_shader_debugger.flags.process_ctx_flush = true;
953 
954 	amdgpu_mes_lock(&adev->mes);
955 
956 	r = adev->mes.funcs->misc_op(&adev->mes, &op_input);
957 	if (r)
958 		DRM_ERROR("failed to set_shader_debugger\n");
959 
960 	amdgpu_mes_unlock(&adev->mes);
961 
962 	return r;
963 }
964 
965 static void
966 amdgpu_mes_ring_to_queue_props(struct amdgpu_device *adev,
967 			       struct amdgpu_ring *ring,
968 			       struct amdgpu_mes_queue_properties *props)
969 {
970 	props->queue_type = ring->funcs->type;
971 	props->hqd_base_gpu_addr = ring->gpu_addr;
972 	props->rptr_gpu_addr = ring->rptr_gpu_addr;
973 	props->wptr_gpu_addr = ring->wptr_gpu_addr;
974 	props->wptr_mc_addr =
975 		ring->mes_ctx->meta_data_mc_addr + ring->wptr_offs;
976 	props->queue_size = ring->ring_size;
977 	props->eop_gpu_addr = ring->eop_gpu_addr;
978 	props->hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_NORMAL;
979 	props->hqd_queue_priority = AMDGPU_GFX_QUEUE_PRIORITY_MINIMUM;
980 	props->paging = false;
981 	props->ring = ring;
982 }
983 
984 #define DEFINE_AMDGPU_MES_CTX_GET_OFFS_ENG(_eng)			\
985 do {									\
986        if (id_offs < AMDGPU_MES_CTX_MAX_OFFS)				\
987 		return offsetof(struct amdgpu_mes_ctx_meta_data,	\
988 				_eng[ring->idx].slots[id_offs]);        \
989        else if (id_offs == AMDGPU_MES_CTX_RING_OFFS)			\
990 		return offsetof(struct amdgpu_mes_ctx_meta_data,        \
991 				_eng[ring->idx].ring);                  \
992        else if (id_offs == AMDGPU_MES_CTX_IB_OFFS)			\
993 		return offsetof(struct amdgpu_mes_ctx_meta_data,        \
994 				_eng[ring->idx].ib);                    \
995        else if (id_offs == AMDGPU_MES_CTX_PADDING_OFFS)			\
996 		return offsetof(struct amdgpu_mes_ctx_meta_data,        \
997 				_eng[ring->idx].padding);               \
998 } while(0)
999 
1000 int amdgpu_mes_ctx_get_offs(struct amdgpu_ring *ring, unsigned int id_offs)
1001 {
1002 	switch (ring->funcs->type) {
1003 	case AMDGPU_RING_TYPE_GFX:
1004 		DEFINE_AMDGPU_MES_CTX_GET_OFFS_ENG(gfx);
1005 		break;
1006 	case AMDGPU_RING_TYPE_COMPUTE:
1007 		DEFINE_AMDGPU_MES_CTX_GET_OFFS_ENG(compute);
1008 		break;
1009 	case AMDGPU_RING_TYPE_SDMA:
1010 		DEFINE_AMDGPU_MES_CTX_GET_OFFS_ENG(sdma);
1011 		break;
1012 	default:
1013 		break;
1014 	}
1015 
1016 	WARN_ON(1);
1017 	return -EINVAL;
1018 }
1019 
1020 int amdgpu_mes_add_ring(struct amdgpu_device *adev, int gang_id,
1021 			int queue_type, int idx,
1022 			struct amdgpu_mes_ctx_data *ctx_data,
1023 			struct amdgpu_ring **out)
1024 {
1025 	struct amdgpu_ring *ring;
1026 	struct amdgpu_mes_gang *gang;
1027 	struct amdgpu_mes_queue_properties qprops = {0};
1028 	int r, queue_id, pasid;
1029 
1030 	/*
1031 	 * Avoid taking any other locks under MES lock to avoid circular
1032 	 * lock dependencies.
1033 	 */
1034 	amdgpu_mes_lock(&adev->mes);
1035 	gang = idr_find(&adev->mes.gang_id_idr, gang_id);
1036 	if (!gang) {
1037 		DRM_ERROR("gang id %d doesn't exist\n", gang_id);
1038 		amdgpu_mes_unlock(&adev->mes);
1039 		return -EINVAL;
1040 	}
1041 	pasid = gang->process->pasid;
1042 
1043 	ring = kzalloc(sizeof(struct amdgpu_ring), GFP_KERNEL);
1044 	if (!ring) {
1045 		amdgpu_mes_unlock(&adev->mes);
1046 		return -ENOMEM;
1047 	}
1048 
1049 	ring->ring_obj = NULL;
1050 	ring->use_doorbell = true;
1051 	ring->is_mes_queue = true;
1052 	ring->mes_ctx = ctx_data;
1053 	ring->idx = idx;
1054 	ring->no_scheduler = true;
1055 
1056 	if (queue_type == AMDGPU_RING_TYPE_COMPUTE) {
1057 		int offset = offsetof(struct amdgpu_mes_ctx_meta_data,
1058 				      compute[ring->idx].mec_hpd);
1059 		ring->eop_gpu_addr =
1060 			amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
1061 	}
1062 
1063 	switch (queue_type) {
1064 	case AMDGPU_RING_TYPE_GFX:
1065 		ring->funcs = adev->gfx.gfx_ring[0].funcs;
1066 		ring->me = adev->gfx.gfx_ring[0].me;
1067 		ring->pipe = adev->gfx.gfx_ring[0].pipe;
1068 		break;
1069 	case AMDGPU_RING_TYPE_COMPUTE:
1070 		ring->funcs = adev->gfx.compute_ring[0].funcs;
1071 		ring->me = adev->gfx.compute_ring[0].me;
1072 		ring->pipe = adev->gfx.compute_ring[0].pipe;
1073 		break;
1074 	case AMDGPU_RING_TYPE_SDMA:
1075 		ring->funcs = adev->sdma.instance[0].ring.funcs;
1076 		break;
1077 	default:
1078 		BUG();
1079 	}
1080 
1081 	r = amdgpu_ring_init(adev, ring, 1024, NULL, 0,
1082 			     AMDGPU_RING_PRIO_DEFAULT, NULL);
1083 	if (r)
1084 		goto clean_up_memory;
1085 
1086 	amdgpu_mes_ring_to_queue_props(adev, ring, &qprops);
1087 
1088 	dma_fence_wait(gang->process->vm->last_update, false);
1089 	dma_fence_wait(ctx_data->meta_data_va->last_pt_update, false);
1090 	amdgpu_mes_unlock(&adev->mes);
1091 
1092 	r = amdgpu_mes_add_hw_queue(adev, gang_id, &qprops, &queue_id);
1093 	if (r)
1094 		goto clean_up_ring;
1095 
1096 	ring->hw_queue_id = queue_id;
1097 	ring->doorbell_index = qprops.doorbell_off;
1098 
1099 	if (queue_type == AMDGPU_RING_TYPE_GFX)
1100 		sprintf(ring->name, "gfx_%d.%d.%d", pasid, gang_id, queue_id);
1101 	else if (queue_type == AMDGPU_RING_TYPE_COMPUTE)
1102 		sprintf(ring->name, "compute_%d.%d.%d", pasid, gang_id,
1103 			queue_id);
1104 	else if (queue_type == AMDGPU_RING_TYPE_SDMA)
1105 		sprintf(ring->name, "sdma_%d.%d.%d", pasid, gang_id,
1106 			queue_id);
1107 	else
1108 		BUG();
1109 
1110 	*out = ring;
1111 	return 0;
1112 
1113 clean_up_ring:
1114 	amdgpu_ring_fini(ring);
1115 clean_up_memory:
1116 	kfree(ring);
1117 	amdgpu_mes_unlock(&adev->mes);
1118 	return r;
1119 }
1120 
1121 void amdgpu_mes_remove_ring(struct amdgpu_device *adev,
1122 			    struct amdgpu_ring *ring)
1123 {
1124 	if (!ring)
1125 		return;
1126 
1127 	amdgpu_mes_remove_hw_queue(adev, ring->hw_queue_id);
1128 	amdgpu_ring_fini(ring);
1129 	kfree(ring);
1130 }
1131 
1132 uint32_t amdgpu_mes_get_aggregated_doorbell_index(struct amdgpu_device *adev,
1133 						   enum amdgpu_mes_priority_level prio)
1134 {
1135 	return adev->mes.aggregated_doorbells[prio];
1136 }
1137 
1138 int amdgpu_mes_ctx_alloc_meta_data(struct amdgpu_device *adev,
1139 				   struct amdgpu_mes_ctx_data *ctx_data)
1140 {
1141 	int r;
1142 
1143 	r = amdgpu_bo_create_kernel(adev,
1144 			    sizeof(struct amdgpu_mes_ctx_meta_data),
1145 			    PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1146 			    &ctx_data->meta_data_obj,
1147 			    &ctx_data->meta_data_mc_addr,
1148 			    &ctx_data->meta_data_ptr);
1149 	if (r) {
1150 		dev_warn(adev->dev, "(%d) create CTX bo failed\n", r);
1151 		return r;
1152 	}
1153 
1154 	if (!ctx_data->meta_data_obj)
1155 		return -ENOMEM;
1156 
1157 	memset(ctx_data->meta_data_ptr, 0,
1158 	       sizeof(struct amdgpu_mes_ctx_meta_data));
1159 
1160 	return 0;
1161 }
1162 
1163 void amdgpu_mes_ctx_free_meta_data(struct amdgpu_mes_ctx_data *ctx_data)
1164 {
1165 	if (ctx_data->meta_data_obj)
1166 		amdgpu_bo_free_kernel(&ctx_data->meta_data_obj,
1167 				      &ctx_data->meta_data_mc_addr,
1168 				      &ctx_data->meta_data_ptr);
1169 }
1170 
1171 int amdgpu_mes_ctx_map_meta_data(struct amdgpu_device *adev,
1172 				 struct amdgpu_vm *vm,
1173 				 struct amdgpu_mes_ctx_data *ctx_data)
1174 {
1175 	struct amdgpu_bo_va *bo_va;
1176 	struct amdgpu_sync sync;
1177 	struct drm_exec exec;
1178 	int r;
1179 
1180 	amdgpu_sync_create(&sync);
1181 
1182 	drm_exec_init(&exec, 0, 0);
1183 	drm_exec_until_all_locked(&exec) {
1184 		r = drm_exec_lock_obj(&exec,
1185 				      &ctx_data->meta_data_obj->tbo.base);
1186 		drm_exec_retry_on_contention(&exec);
1187 		if (unlikely(r))
1188 			goto error_fini_exec;
1189 
1190 		r = amdgpu_vm_lock_pd(vm, &exec, 0);
1191 		drm_exec_retry_on_contention(&exec);
1192 		if (unlikely(r))
1193 			goto error_fini_exec;
1194 	}
1195 
1196 	bo_va = amdgpu_vm_bo_add(adev, vm, ctx_data->meta_data_obj);
1197 	if (!bo_va) {
1198 		DRM_ERROR("failed to create bo_va for meta data BO\n");
1199 		r = -ENOMEM;
1200 		goto error_fini_exec;
1201 	}
1202 
1203 	r = amdgpu_vm_bo_map(adev, bo_va, ctx_data->meta_data_gpu_addr, 0,
1204 			     sizeof(struct amdgpu_mes_ctx_meta_data),
1205 			     AMDGPU_PTE_READABLE | AMDGPU_PTE_WRITEABLE |
1206 			     AMDGPU_PTE_EXECUTABLE);
1207 
1208 	if (r) {
1209 		DRM_ERROR("failed to do bo_map on meta data, err=%d\n", r);
1210 		goto error_del_bo_va;
1211 	}
1212 
1213 	r = amdgpu_vm_bo_update(adev, bo_va, false);
1214 	if (r) {
1215 		DRM_ERROR("failed to do vm_bo_update on meta data\n");
1216 		goto error_del_bo_va;
1217 	}
1218 	amdgpu_sync_fence(&sync, bo_va->last_pt_update);
1219 
1220 	r = amdgpu_vm_update_pdes(adev, vm, false);
1221 	if (r) {
1222 		DRM_ERROR("failed to update pdes on meta data\n");
1223 		goto error_del_bo_va;
1224 	}
1225 	amdgpu_sync_fence(&sync, vm->last_update);
1226 
1227 	amdgpu_sync_wait(&sync, false);
1228 	drm_exec_fini(&exec);
1229 
1230 	amdgpu_sync_free(&sync);
1231 	ctx_data->meta_data_va = bo_va;
1232 	return 0;
1233 
1234 error_del_bo_va:
1235 	amdgpu_vm_bo_del(adev, bo_va);
1236 
1237 error_fini_exec:
1238 	drm_exec_fini(&exec);
1239 	amdgpu_sync_free(&sync);
1240 	return r;
1241 }
1242 
1243 int amdgpu_mes_ctx_unmap_meta_data(struct amdgpu_device *adev,
1244 				   struct amdgpu_mes_ctx_data *ctx_data)
1245 {
1246 	struct amdgpu_bo_va *bo_va = ctx_data->meta_data_va;
1247 	struct amdgpu_bo *bo = ctx_data->meta_data_obj;
1248 	struct amdgpu_vm *vm = bo_va->base.vm;
1249 	struct dma_fence *fence;
1250 	struct drm_exec exec;
1251 	long r;
1252 
1253 	drm_exec_init(&exec, 0, 0);
1254 	drm_exec_until_all_locked(&exec) {
1255 		r = drm_exec_lock_obj(&exec,
1256 				      &ctx_data->meta_data_obj->tbo.base);
1257 		drm_exec_retry_on_contention(&exec);
1258 		if (unlikely(r))
1259 			goto out_unlock;
1260 
1261 		r = amdgpu_vm_lock_pd(vm, &exec, 0);
1262 		drm_exec_retry_on_contention(&exec);
1263 		if (unlikely(r))
1264 			goto out_unlock;
1265 	}
1266 
1267 	amdgpu_vm_bo_del(adev, bo_va);
1268 	if (!amdgpu_vm_ready(vm))
1269 		goto out_unlock;
1270 
1271 	r = dma_resv_get_singleton(bo->tbo.base.resv, DMA_RESV_USAGE_BOOKKEEP,
1272 				   &fence);
1273 	if (r)
1274 		goto out_unlock;
1275 	if (fence) {
1276 		amdgpu_bo_fence(bo, fence, true);
1277 		fence = NULL;
1278 	}
1279 
1280 	r = amdgpu_vm_clear_freed(adev, vm, &fence);
1281 	if (r || !fence)
1282 		goto out_unlock;
1283 
1284 	dma_fence_wait(fence, false);
1285 	amdgpu_bo_fence(bo, fence, true);
1286 	dma_fence_put(fence);
1287 
1288 out_unlock:
1289 	if (unlikely(r < 0))
1290 		dev_err(adev->dev, "failed to clear page tables (%ld)\n", r);
1291 	drm_exec_fini(&exec);
1292 
1293 	return r;
1294 }
1295 
1296 static int amdgpu_mes_test_create_gang_and_queues(struct amdgpu_device *adev,
1297 					  int pasid, int *gang_id,
1298 					  int queue_type, int num_queue,
1299 					  struct amdgpu_ring **added_rings,
1300 					  struct amdgpu_mes_ctx_data *ctx_data)
1301 {
1302 	struct amdgpu_ring *ring;
1303 	struct amdgpu_mes_gang_properties gprops = {0};
1304 	int r, j;
1305 
1306 	/* create a gang for the process */
1307 	gprops.priority = AMDGPU_MES_PRIORITY_LEVEL_NORMAL;
1308 	gprops.gang_quantum = adev->mes.default_gang_quantum;
1309 	gprops.inprocess_gang_priority = AMDGPU_MES_PRIORITY_LEVEL_NORMAL;
1310 	gprops.priority_level = AMDGPU_MES_PRIORITY_LEVEL_NORMAL;
1311 	gprops.global_priority_level = AMDGPU_MES_PRIORITY_LEVEL_NORMAL;
1312 
1313 	r = amdgpu_mes_add_gang(adev, pasid, &gprops, gang_id);
1314 	if (r) {
1315 		DRM_ERROR("failed to add gang\n");
1316 		return r;
1317 	}
1318 
1319 	/* create queues for the gang */
1320 	for (j = 0; j < num_queue; j++) {
1321 		r = amdgpu_mes_add_ring(adev, *gang_id, queue_type, j,
1322 					ctx_data, &ring);
1323 		if (r) {
1324 			DRM_ERROR("failed to add ring\n");
1325 			break;
1326 		}
1327 
1328 		DRM_INFO("ring %s was added\n", ring->name);
1329 		added_rings[j] = ring;
1330 	}
1331 
1332 	return 0;
1333 }
1334 
1335 static int amdgpu_mes_test_queues(struct amdgpu_ring **added_rings)
1336 {
1337 	struct amdgpu_ring *ring;
1338 	int i, r;
1339 
1340 	for (i = 0; i < AMDGPU_MES_CTX_MAX_RINGS; i++) {
1341 		ring = added_rings[i];
1342 		if (!ring)
1343 			continue;
1344 
1345 		r = amdgpu_ring_test_helper(ring);
1346 		if (r)
1347 			return r;
1348 
1349 		r = amdgpu_ring_test_ib(ring, 1000 * 10);
1350 		if (r) {
1351 			DRM_DEV_ERROR(ring->adev->dev,
1352 				      "ring %s ib test failed (%d)\n",
1353 				      ring->name, r);
1354 			return r;
1355 		} else
1356 			DRM_INFO("ring %s ib test pass\n", ring->name);
1357 	}
1358 
1359 	return 0;
1360 }
1361 
1362 int amdgpu_mes_self_test(struct amdgpu_device *adev)
1363 {
1364 	struct amdgpu_vm *vm = NULL;
1365 	struct amdgpu_mes_ctx_data ctx_data = {0};
1366 	struct amdgpu_ring *added_rings[AMDGPU_MES_CTX_MAX_RINGS] = { NULL };
1367 	int gang_ids[3] = {0};
1368 	int queue_types[][2] = { { AMDGPU_RING_TYPE_GFX, 1 },
1369 				 { AMDGPU_RING_TYPE_COMPUTE, 1 },
1370 				 { AMDGPU_RING_TYPE_SDMA, 1} };
1371 	int i, r, pasid, k = 0;
1372 
1373 	pasid = amdgpu_pasid_alloc(16);
1374 	if (pasid < 0) {
1375 		dev_warn(adev->dev, "No more PASIDs available!");
1376 		pasid = 0;
1377 	}
1378 
1379 	vm = kzalloc(sizeof(*vm), GFP_KERNEL);
1380 	if (!vm) {
1381 		r = -ENOMEM;
1382 		goto error_pasid;
1383 	}
1384 
1385 	r = amdgpu_vm_init(adev, vm, -1);
1386 	if (r) {
1387 		DRM_ERROR("failed to initialize vm\n");
1388 		goto error_pasid;
1389 	}
1390 
1391 	r = amdgpu_mes_ctx_alloc_meta_data(adev, &ctx_data);
1392 	if (r) {
1393 		DRM_ERROR("failed to alloc ctx meta data\n");
1394 		goto error_fini;
1395 	}
1396 
1397 	ctx_data.meta_data_gpu_addr = AMDGPU_VA_RESERVED_BOTTOM;
1398 	r = amdgpu_mes_ctx_map_meta_data(adev, vm, &ctx_data);
1399 	if (r) {
1400 		DRM_ERROR("failed to map ctx meta data\n");
1401 		goto error_vm;
1402 	}
1403 
1404 	r = amdgpu_mes_create_process(adev, pasid, vm);
1405 	if (r) {
1406 		DRM_ERROR("failed to create MES process\n");
1407 		goto error_vm;
1408 	}
1409 
1410 	for (i = 0; i < ARRAY_SIZE(queue_types); i++) {
1411 		/* On GFX v10.3, fw hasn't supported to map sdma queue. */
1412 		if (amdgpu_ip_version(adev, GC_HWIP, 0) >=
1413 			    IP_VERSION(10, 3, 0) &&
1414 		    amdgpu_ip_version(adev, GC_HWIP, 0) <
1415 			    IP_VERSION(11, 0, 0) &&
1416 		    queue_types[i][0] == AMDGPU_RING_TYPE_SDMA)
1417 			continue;
1418 
1419 		r = amdgpu_mes_test_create_gang_and_queues(adev, pasid,
1420 							   &gang_ids[i],
1421 							   queue_types[i][0],
1422 							   queue_types[i][1],
1423 							   &added_rings[k],
1424 							   &ctx_data);
1425 		if (r)
1426 			goto error_queues;
1427 
1428 		k += queue_types[i][1];
1429 	}
1430 
1431 	/* start ring test and ib test for MES queues */
1432 	amdgpu_mes_test_queues(added_rings);
1433 
1434 error_queues:
1435 	/* remove all queues */
1436 	for (i = 0; i < ARRAY_SIZE(added_rings); i++) {
1437 		if (!added_rings[i])
1438 			continue;
1439 		amdgpu_mes_remove_ring(adev, added_rings[i]);
1440 	}
1441 
1442 	for (i = 0; i < ARRAY_SIZE(gang_ids); i++) {
1443 		if (!gang_ids[i])
1444 			continue;
1445 		amdgpu_mes_remove_gang(adev, gang_ids[i]);
1446 	}
1447 
1448 	amdgpu_mes_destroy_process(adev, pasid);
1449 
1450 error_vm:
1451 	amdgpu_mes_ctx_unmap_meta_data(adev, &ctx_data);
1452 
1453 error_fini:
1454 	amdgpu_vm_fini(adev, vm);
1455 
1456 error_pasid:
1457 	if (pasid)
1458 		amdgpu_pasid_free(pasid);
1459 
1460 	amdgpu_mes_ctx_free_meta_data(&ctx_data);
1461 	kfree(vm);
1462 	return 0;
1463 }
1464 
1465 int amdgpu_mes_init_microcode(struct amdgpu_device *adev, int pipe)
1466 {
1467 	const struct mes_firmware_header_v1_0 *mes_hdr;
1468 	struct amdgpu_firmware_info *info;
1469 	char ucode_prefix[30];
1470 	char fw_name[40];
1471 	bool need_retry = false;
1472 	int r;
1473 
1474 	amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix,
1475 				       sizeof(ucode_prefix));
1476 	if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(11, 0, 0)) {
1477 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mes%s.bin",
1478 			 ucode_prefix,
1479 			 pipe == AMDGPU_MES_SCHED_PIPE ? "_2" : "1");
1480 		need_retry = true;
1481 	} else {
1482 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mes%s.bin",
1483 			 ucode_prefix,
1484 			 pipe == AMDGPU_MES_SCHED_PIPE ? "" : "1");
1485 	}
1486 
1487 	r = amdgpu_ucode_request(adev, &adev->mes.fw[pipe], fw_name);
1488 	if (r && need_retry && pipe == AMDGPU_MES_SCHED_PIPE) {
1489 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mes.bin",
1490 			 ucode_prefix);
1491 		DRM_INFO("try to fall back to %s\n", fw_name);
1492 		r = amdgpu_ucode_request(adev, &adev->mes.fw[pipe],
1493 					 fw_name);
1494 	}
1495 
1496 	if (r)
1497 		goto out;
1498 
1499 	mes_hdr = (const struct mes_firmware_header_v1_0 *)
1500 		adev->mes.fw[pipe]->data;
1501 	adev->mes.uc_start_addr[pipe] =
1502 		le32_to_cpu(mes_hdr->mes_uc_start_addr_lo) |
1503 		((uint64_t)(le32_to_cpu(mes_hdr->mes_uc_start_addr_hi)) << 32);
1504 	adev->mes.data_start_addr[pipe] =
1505 		le32_to_cpu(mes_hdr->mes_data_start_addr_lo) |
1506 		((uint64_t)(le32_to_cpu(mes_hdr->mes_data_start_addr_hi)) << 32);
1507 
1508 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1509 		int ucode, ucode_data;
1510 
1511 		if (pipe == AMDGPU_MES_SCHED_PIPE) {
1512 			ucode = AMDGPU_UCODE_ID_CP_MES;
1513 			ucode_data = AMDGPU_UCODE_ID_CP_MES_DATA;
1514 		} else {
1515 			ucode = AMDGPU_UCODE_ID_CP_MES1;
1516 			ucode_data = AMDGPU_UCODE_ID_CP_MES1_DATA;
1517 		}
1518 
1519 		info = &adev->firmware.ucode[ucode];
1520 		info->ucode_id = ucode;
1521 		info->fw = adev->mes.fw[pipe];
1522 		adev->firmware.fw_size +=
1523 			ALIGN(le32_to_cpu(mes_hdr->mes_ucode_size_bytes),
1524 			      PAGE_SIZE);
1525 
1526 		info = &adev->firmware.ucode[ucode_data];
1527 		info->ucode_id = ucode_data;
1528 		info->fw = adev->mes.fw[pipe];
1529 		adev->firmware.fw_size +=
1530 			ALIGN(le32_to_cpu(mes_hdr->mes_ucode_data_size_bytes),
1531 			      PAGE_SIZE);
1532 	}
1533 
1534 	return 0;
1535 out:
1536 	amdgpu_ucode_release(&adev->mes.fw[pipe]);
1537 	return r;
1538 }
1539 
1540 #if defined(CONFIG_DEBUG_FS)
1541 
1542 static int amdgpu_debugfs_mes_event_log_show(struct seq_file *m, void *unused)
1543 {
1544 	struct amdgpu_device *adev = m->private;
1545 	uint32_t *mem = (uint32_t *)(adev->mes.event_log_cpu_addr);
1546 
1547 	seq_hex_dump(m, "", DUMP_PREFIX_OFFSET, 32, 4,
1548 		     mem, PAGE_SIZE, false);
1549 
1550 	return 0;
1551 }
1552 
1553 
1554 DEFINE_SHOW_ATTRIBUTE(amdgpu_debugfs_mes_event_log);
1555 
1556 #endif
1557 
1558 void amdgpu_debugfs_mes_event_log_init(struct amdgpu_device *adev)
1559 {
1560 
1561 #if defined(CONFIG_DEBUG_FS)
1562 	struct drm_minor *minor = adev_to_drm(adev)->primary;
1563 	struct dentry *root = minor->debugfs_root;
1564 	if (adev->enable_mes)
1565 		debugfs_create_file("amdgpu_mes_event_log", 0444, root,
1566 				    adev, &amdgpu_debugfs_mes_event_log_fops);
1567 
1568 #endif
1569 }
1570