xref: /linux/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c (revision 727b77df826b44853476d6e8690fec4cf5515eca)
1 /*
2  * Copyright 2023 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 
24 #include <linux/firmware.h>
25 #include <linux/module.h>
26 #include "amdgpu.h"
27 #include "gfx_v12_0.h"
28 #include "soc15_common.h"
29 #include "soc21.h"
30 #include "gc/gc_12_0_0_offset.h"
31 #include "gc/gc_12_0_0_sh_mask.h"
32 #include "gc/gc_11_0_0_default.h"
33 #include "v12_structs.h"
34 #include "mes_v12_api_def.h"
35 
36 MODULE_FIRMWARE("amdgpu/gc_12_0_0_mes.bin");
37 MODULE_FIRMWARE("amdgpu/gc_12_0_0_mes1.bin");
38 MODULE_FIRMWARE("amdgpu/gc_12_0_0_uni_mes.bin");
39 MODULE_FIRMWARE("amdgpu/gc_12_0_1_mes.bin");
40 MODULE_FIRMWARE("amdgpu/gc_12_0_1_mes1.bin");
41 MODULE_FIRMWARE("amdgpu/gc_12_0_1_uni_mes.bin");
42 
43 static int mes_v12_0_hw_init(struct amdgpu_ip_block *ip_block);
44 static int mes_v12_0_hw_fini(struct amdgpu_ip_block *ip_block);
45 static int mes_v12_0_kiq_hw_init(struct amdgpu_device *adev);
46 static int mes_v12_0_kiq_hw_fini(struct amdgpu_device *adev);
47 
48 #define MES_EOP_SIZE   2048
49 
50 static void mes_v12_0_ring_set_wptr(struct amdgpu_ring *ring)
51 {
52 	struct amdgpu_device *adev = ring->adev;
53 
54 	if (ring->use_doorbell) {
55 		atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
56 			     ring->wptr);
57 		WDOORBELL64(ring->doorbell_index, ring->wptr);
58 	} else {
59 		BUG();
60 	}
61 }
62 
63 static u64 mes_v12_0_ring_get_rptr(struct amdgpu_ring *ring)
64 {
65 	return *ring->rptr_cpu_addr;
66 }
67 
68 static u64 mes_v12_0_ring_get_wptr(struct amdgpu_ring *ring)
69 {
70 	u64 wptr;
71 
72 	if (ring->use_doorbell)
73 		wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
74 	else
75 		BUG();
76 	return wptr;
77 }
78 
79 static const struct amdgpu_ring_funcs mes_v12_0_ring_funcs = {
80 	.type = AMDGPU_RING_TYPE_MES,
81 	.align_mask = 1,
82 	.nop = 0,
83 	.support_64bit_ptrs = true,
84 	.get_rptr = mes_v12_0_ring_get_rptr,
85 	.get_wptr = mes_v12_0_ring_get_wptr,
86 	.set_wptr = mes_v12_0_ring_set_wptr,
87 	.insert_nop = amdgpu_ring_insert_nop,
88 };
89 
90 static const char *mes_v12_0_opcodes[] = {
91 	"SET_HW_RSRC",
92 	"SET_SCHEDULING_CONFIG",
93 	"ADD_QUEUE",
94 	"REMOVE_QUEUE",
95 	"PERFORM_YIELD",
96 	"SET_GANG_PRIORITY_LEVEL",
97 	"SUSPEND",
98 	"RESUME",
99 	"RESET",
100 	"SET_LOG_BUFFER",
101 	"CHANGE_GANG_PRORITY",
102 	"QUERY_SCHEDULER_STATUS",
103 	"unused",
104 	"SET_DEBUG_VMID",
105 	"MISC",
106 	"UPDATE_ROOT_PAGE_TABLE",
107 	"AMD_LOG",
108 	"SET_SE_MODE",
109 	"SET_GANG_SUBMIT",
110 	"SET_HW_RSRC_1",
111 };
112 
113 static const char *mes_v12_0_misc_opcodes[] = {
114 	"WRITE_REG",
115 	"INV_GART",
116 	"QUERY_STATUS",
117 	"READ_REG",
118 	"WAIT_REG_MEM",
119 	"SET_SHADER_DEBUGGER",
120 	"NOTIFY_WORK_ON_UNMAPPED_QUEUE",
121 	"NOTIFY_TO_UNMAP_PROCESSES",
122 };
123 
124 static const char *mes_v12_0_get_op_string(union MESAPI__MISC *x_pkt)
125 {
126 	const char *op_str = NULL;
127 
128 	if (x_pkt->header.opcode < ARRAY_SIZE(mes_v12_0_opcodes))
129 		op_str = mes_v12_0_opcodes[x_pkt->header.opcode];
130 
131 	return op_str;
132 }
133 
134 static const char *mes_v12_0_get_misc_op_string(union MESAPI__MISC *x_pkt)
135 {
136 	const char *op_str = NULL;
137 
138 	if ((x_pkt->header.opcode == MES_SCH_API_MISC) &&
139 	    (x_pkt->opcode < ARRAY_SIZE(mes_v12_0_misc_opcodes)))
140 		op_str = mes_v12_0_misc_opcodes[x_pkt->opcode];
141 
142 	return op_str;
143 }
144 
145 static int mes_v12_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes,
146 					    int pipe, void *pkt, int size,
147 					    int api_status_off)
148 {
149 	union MESAPI__QUERY_MES_STATUS mes_status_pkt;
150 	signed long timeout = 2100000; /* 2100 ms */
151 	struct amdgpu_device *adev = mes->adev;
152 	struct amdgpu_ring *ring = &mes->ring[pipe];
153 	spinlock_t *ring_lock = &mes->ring_lock[pipe];
154 	struct MES_API_STATUS *api_status;
155 	union MESAPI__MISC *x_pkt = pkt;
156 	const char *op_str, *misc_op_str;
157 	unsigned long flags;
158 	u64 status_gpu_addr;
159 	u32 seq, status_offset;
160 	u64 *status_ptr;
161 	signed long r;
162 	int ret;
163 
164 	if (x_pkt->header.opcode >= MES_SCH_API_MAX)
165 		return -EINVAL;
166 
167 	if (amdgpu_emu_mode) {
168 		timeout *= 100;
169 	} else if (amdgpu_sriov_vf(adev)) {
170 		/* Worst case in sriov where all other 15 VF timeout, each VF needs about 600ms */
171 		timeout = 15 * 600 * 1000;
172 	}
173 
174 	ret = amdgpu_device_wb_get(adev, &status_offset);
175 	if (ret)
176 		return ret;
177 
178 	status_gpu_addr = adev->wb.gpu_addr + (status_offset * 4);
179 	status_ptr = (u64 *)&adev->wb.wb[status_offset];
180 	*status_ptr = 0;
181 
182 	spin_lock_irqsave(ring_lock, flags);
183 	r = amdgpu_ring_alloc(ring, (size + sizeof(mes_status_pkt)) / 4);
184 	if (r)
185 		goto error_unlock_free;
186 
187 	seq = ++ring->fence_drv.sync_seq;
188 	r = amdgpu_fence_wait_polling(ring,
189 				      seq - ring->fence_drv.num_fences_mask,
190 				      timeout);
191 	if (r < 1)
192 		goto error_undo;
193 
194 	api_status = (struct MES_API_STATUS *)((char *)pkt + api_status_off);
195 	api_status->api_completion_fence_addr = status_gpu_addr;
196 	api_status->api_completion_fence_value = 1;
197 
198 	amdgpu_ring_write_multiple(ring, pkt, size / 4);
199 
200 	memset(&mes_status_pkt, 0, sizeof(mes_status_pkt));
201 	mes_status_pkt.header.type = MES_API_TYPE_SCHEDULER;
202 	mes_status_pkt.header.opcode = MES_SCH_API_QUERY_SCHEDULER_STATUS;
203 	mes_status_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
204 	mes_status_pkt.api_status.api_completion_fence_addr =
205 		ring->fence_drv.gpu_addr;
206 	mes_status_pkt.api_status.api_completion_fence_value = seq;
207 
208 	amdgpu_ring_write_multiple(ring, &mes_status_pkt,
209 				   sizeof(mes_status_pkt) / 4);
210 
211 	amdgpu_ring_commit(ring);
212 	spin_unlock_irqrestore(ring_lock, flags);
213 
214 	op_str = mes_v12_0_get_op_string(x_pkt);
215 	misc_op_str = mes_v12_0_get_misc_op_string(x_pkt);
216 
217 	if (misc_op_str)
218 		dev_dbg(adev->dev, "MES(%d) msg=%s (%s) was emitted\n",
219 			pipe, op_str, misc_op_str);
220 	else if (op_str)
221 		dev_dbg(adev->dev, "MES(%d) msg=%s was emitted\n",
222 			pipe, op_str);
223 	else
224 		dev_dbg(adev->dev, "MES(%d) msg=%d was emitted\n",
225 			pipe, x_pkt->header.opcode);
226 
227 	r = amdgpu_fence_wait_polling(ring, seq, timeout);
228 	if (r < 1 || !*status_ptr) {
229 
230 		if (misc_op_str)
231 			dev_err(adev->dev, "MES(%d) failed to respond to msg=%s (%s)\n",
232 				pipe, op_str, misc_op_str);
233 		else if (op_str)
234 			dev_err(adev->dev, "MES(%d) failed to respond to msg=%s\n",
235 				pipe, op_str);
236 		else
237 			dev_err(adev->dev, "MES(%d) failed to respond to msg=%d\n",
238 				pipe, x_pkt->header.opcode);
239 
240 		while (halt_if_hws_hang)
241 			schedule();
242 
243 		r = -ETIMEDOUT;
244 		goto error_wb_free;
245 	}
246 
247 	amdgpu_device_wb_free(adev, status_offset);
248 	return 0;
249 
250 error_undo:
251 	dev_err(adev->dev, "MES ring buffer is full.\n");
252 	amdgpu_ring_undo(ring);
253 
254 error_unlock_free:
255 	spin_unlock_irqrestore(ring_lock, flags);
256 
257 error_wb_free:
258 	amdgpu_device_wb_free(adev, status_offset);
259 	return r;
260 }
261 
262 static int convert_to_mes_queue_type(int queue_type)
263 {
264 	if (queue_type == AMDGPU_RING_TYPE_GFX)
265 		return MES_QUEUE_TYPE_GFX;
266 	else if (queue_type == AMDGPU_RING_TYPE_COMPUTE)
267 		return MES_QUEUE_TYPE_COMPUTE;
268 	else if (queue_type == AMDGPU_RING_TYPE_SDMA)
269 		return MES_QUEUE_TYPE_SDMA;
270 	else if (queue_type == AMDGPU_RING_TYPE_MES)
271 		return MES_QUEUE_TYPE_SCHQ;
272 	else
273 		BUG();
274 	return -1;
275 }
276 
277 static int convert_to_mes_priority_level(int priority_level)
278 {
279 	switch (priority_level) {
280 	case AMDGPU_MES_PRIORITY_LEVEL_LOW:
281 		return AMD_PRIORITY_LEVEL_LOW;
282 	case AMDGPU_MES_PRIORITY_LEVEL_NORMAL:
283 	default:
284 		return AMD_PRIORITY_LEVEL_NORMAL;
285 	case AMDGPU_MES_PRIORITY_LEVEL_MEDIUM:
286 		return AMD_PRIORITY_LEVEL_MEDIUM;
287 	case AMDGPU_MES_PRIORITY_LEVEL_HIGH:
288 		return AMD_PRIORITY_LEVEL_HIGH;
289 	case AMDGPU_MES_PRIORITY_LEVEL_REALTIME:
290 		return AMD_PRIORITY_LEVEL_REALTIME;
291 	}
292 }
293 
294 static int mes_v12_0_add_hw_queue(struct amdgpu_mes *mes,
295 				  struct mes_add_queue_input *input)
296 {
297 	struct amdgpu_device *adev = mes->adev;
298 	union MESAPI__ADD_QUEUE mes_add_queue_pkt;
299 	struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
300 	uint32_t vm_cntx_cntl = hub->vm_cntx_cntl;
301 
302 	memset(&mes_add_queue_pkt, 0, sizeof(mes_add_queue_pkt));
303 
304 	mes_add_queue_pkt.header.type = MES_API_TYPE_SCHEDULER;
305 	mes_add_queue_pkt.header.opcode = MES_SCH_API_ADD_QUEUE;
306 	mes_add_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
307 
308 	mes_add_queue_pkt.process_id = input->process_id;
309 	mes_add_queue_pkt.page_table_base_addr = input->page_table_base_addr;
310 	mes_add_queue_pkt.process_va_start = input->process_va_start;
311 	mes_add_queue_pkt.process_va_end = input->process_va_end;
312 	mes_add_queue_pkt.process_quantum = input->process_quantum;
313 	mes_add_queue_pkt.process_context_addr = input->process_context_addr;
314 	mes_add_queue_pkt.gang_quantum = input->gang_quantum;
315 	mes_add_queue_pkt.gang_context_addr = input->gang_context_addr;
316 	mes_add_queue_pkt.inprocess_gang_priority =
317 		convert_to_mes_priority_level(input->inprocess_gang_priority);
318 	mes_add_queue_pkt.gang_global_priority_level =
319 		convert_to_mes_priority_level(input->gang_global_priority_level);
320 	mes_add_queue_pkt.doorbell_offset = input->doorbell_offset;
321 	mes_add_queue_pkt.mqd_addr = input->mqd_addr;
322 
323 	mes_add_queue_pkt.wptr_addr = input->wptr_mc_addr;
324 
325 	mes_add_queue_pkt.queue_type =
326 		convert_to_mes_queue_type(input->queue_type);
327 	mes_add_queue_pkt.paging = input->paging;
328 	mes_add_queue_pkt.vm_context_cntl = vm_cntx_cntl;
329 	mes_add_queue_pkt.gws_base = input->gws_base;
330 	mes_add_queue_pkt.gws_size = input->gws_size;
331 	mes_add_queue_pkt.trap_handler_addr = input->tba_addr;
332 	mes_add_queue_pkt.tma_addr = input->tma_addr;
333 	mes_add_queue_pkt.trap_en = input->trap_en;
334 	mes_add_queue_pkt.skip_process_ctx_clear = input->skip_process_ctx_clear;
335 	mes_add_queue_pkt.is_kfd_process = input->is_kfd_process;
336 
337 	/* For KFD, gds_size is re-used for queue size (needed in MES for AQL queues) */
338 	mes_add_queue_pkt.is_aql_queue = input->is_aql_queue;
339 	mes_add_queue_pkt.gds_size = input->queue_size;
340 
341 	/* For KFD, gds_size is re-used for queue size (needed in MES for AQL queues) */
342 	mes_add_queue_pkt.is_aql_queue = input->is_aql_queue;
343 	mes_add_queue_pkt.gds_size = input->queue_size;
344 
345 	return mes_v12_0_submit_pkt_and_poll_completion(mes,
346 			AMDGPU_MES_SCHED_PIPE,
347 			&mes_add_queue_pkt, sizeof(mes_add_queue_pkt),
348 			offsetof(union MESAPI__ADD_QUEUE, api_status));
349 }
350 
351 static int mes_v12_0_remove_hw_queue(struct amdgpu_mes *mes,
352 				     struct mes_remove_queue_input *input)
353 {
354 	union MESAPI__REMOVE_QUEUE mes_remove_queue_pkt;
355 
356 	memset(&mes_remove_queue_pkt, 0, sizeof(mes_remove_queue_pkt));
357 
358 	mes_remove_queue_pkt.header.type = MES_API_TYPE_SCHEDULER;
359 	mes_remove_queue_pkt.header.opcode = MES_SCH_API_REMOVE_QUEUE;
360 	mes_remove_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
361 
362 	mes_remove_queue_pkt.doorbell_offset = input->doorbell_offset;
363 	mes_remove_queue_pkt.gang_context_addr = input->gang_context_addr;
364 
365 	return mes_v12_0_submit_pkt_and_poll_completion(mes,
366 			AMDGPU_MES_SCHED_PIPE,
367 			&mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt),
368 			offsetof(union MESAPI__REMOVE_QUEUE, api_status));
369 }
370 
371 int gfx_v12_0_request_gfx_index_mutex(struct amdgpu_device *adev,
372 				      bool req)
373 {
374 	u32 i, tmp, val;
375 
376 	for (i = 0; i < adev->usec_timeout; i++) {
377 		/* Request with MeId=2, PipeId=0 */
378 		tmp = REG_SET_FIELD(0, CP_GFX_INDEX_MUTEX, REQUEST, req);
379 		tmp = REG_SET_FIELD(tmp, CP_GFX_INDEX_MUTEX, CLIENTID, 4);
380 		WREG32_SOC15(GC, 0, regCP_GFX_INDEX_MUTEX, tmp);
381 
382 		val = RREG32_SOC15(GC, 0, regCP_GFX_INDEX_MUTEX);
383 		if (req) {
384 			if (val == tmp)
385 				break;
386 		} else {
387 			tmp = REG_SET_FIELD(tmp, CP_GFX_INDEX_MUTEX,
388 					    REQUEST, 1);
389 
390 			/* unlocked or locked by firmware */
391 			if (val != tmp)
392 				break;
393 		}
394 		udelay(1);
395 	}
396 
397 	if (i >= adev->usec_timeout)
398 		return -EINVAL;
399 
400 	return 0;
401 }
402 
403 static int mes_v12_0_reset_queue_mmio(struct amdgpu_mes *mes, uint32_t queue_type,
404 				      uint32_t me_id, uint32_t pipe_id,
405 				      uint32_t queue_id, uint32_t vmid)
406 {
407 	struct amdgpu_device *adev = mes->adev;
408 	uint32_t value, reg;
409 	int i, r = 0;
410 
411 	amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
412 
413 	if (queue_type == AMDGPU_RING_TYPE_GFX) {
414 		dev_info(adev->dev, "reset gfx queue (%d:%d:%d: vmid:%d)\n",
415 			 me_id, pipe_id, queue_id, vmid);
416 
417 		mutex_lock(&adev->gfx.reset_sem_mutex);
418 		gfx_v12_0_request_gfx_index_mutex(adev, true);
419 		/* all se allow writes */
420 		WREG32_SOC15(GC, 0, regGRBM_GFX_INDEX,
421 			     (uint32_t)(0x1 << GRBM_GFX_INDEX__SE_BROADCAST_WRITES__SHIFT));
422 		value = REG_SET_FIELD(0, CP_VMID_RESET, RESET_REQUEST, 1 << vmid);
423 		if (pipe_id == 0)
424 			value = REG_SET_FIELD(value, CP_VMID_RESET, PIPE0_QUEUES, 1 << queue_id);
425 		else
426 			value = REG_SET_FIELD(value, CP_VMID_RESET, PIPE1_QUEUES, 1 << queue_id);
427 		WREG32_SOC15(GC, 0, regCP_VMID_RESET, value);
428 		gfx_v12_0_request_gfx_index_mutex(adev, false);
429 		mutex_unlock(&adev->gfx.reset_sem_mutex);
430 
431 		mutex_lock(&adev->srbm_mutex);
432 		soc21_grbm_select(adev, me_id, pipe_id, queue_id, 0);
433 		/* wait till dequeue take effects */
434 		for (i = 0; i < adev->usec_timeout; i++) {
435 			if (!(RREG32_SOC15(GC, 0, regCP_GFX_HQD_ACTIVE) & 1))
436 				break;
437 			udelay(1);
438 		}
439 		if (i >= adev->usec_timeout) {
440 			dev_err(adev->dev, "failed to wait on gfx hqd deactivate\n");
441 			r = -ETIMEDOUT;
442 		}
443 
444 		soc21_grbm_select(adev, 0, 0, 0, 0);
445 		mutex_unlock(&adev->srbm_mutex);
446 	} else if (queue_type == AMDGPU_RING_TYPE_COMPUTE) {
447 		dev_info(adev->dev, "reset compute queue (%d:%d:%d)\n",
448 			 me_id, pipe_id, queue_id);
449 		mutex_lock(&adev->srbm_mutex);
450 		soc21_grbm_select(adev, me_id, pipe_id, queue_id, 0);
451 		WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 0x2);
452 		WREG32_SOC15(GC, 0, regSPI_COMPUTE_QUEUE_RESET, 0x1);
453 
454 		/* wait till dequeue take effects */
455 		for (i = 0; i < adev->usec_timeout; i++) {
456 			if (!(RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1))
457 				break;
458 			udelay(1);
459 		}
460 		if (i >= adev->usec_timeout) {
461 			dev_err(adev->dev, "failed to wait on hqd deactivate\n");
462 			r = -ETIMEDOUT;
463 		}
464 		soc21_grbm_select(adev, 0, 0, 0, 0);
465 		mutex_unlock(&adev->srbm_mutex);
466 	} else if (queue_type == AMDGPU_RING_TYPE_SDMA) {
467 		dev_info(adev->dev, "reset sdma queue (%d:%d:%d)\n",
468 			 me_id, pipe_id, queue_id);
469 		switch (me_id) {
470 		case 1:
471 			reg = SOC15_REG_OFFSET(GC, 0, regSDMA1_QUEUE_RESET_REQ);
472 			break;
473 		case 0:
474 		default:
475 			reg = SOC15_REG_OFFSET(GC, 0, regSDMA0_QUEUE_RESET_REQ);
476 			break;
477 		}
478 
479 		value = 1 << queue_id;
480 		WREG32(reg, value);
481 		/* wait for queue reset done */
482 		for (i = 0; i < adev->usec_timeout; i++) {
483 			if (!(RREG32(reg) & value))
484 				break;
485 			udelay(1);
486 		}
487 		if (i >= adev->usec_timeout) {
488 			dev_err(adev->dev, "failed to wait on sdma queue reset done\n");
489 			r = -ETIMEDOUT;
490 		}
491 	}
492 
493 	amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
494 	return r;
495 }
496 
497 static int mes_v12_0_reset_hw_queue(struct amdgpu_mes *mes,
498 				    struct mes_reset_queue_input *input)
499 {
500 	union MESAPI__RESET mes_reset_queue_pkt;
501 	int pipe;
502 
503 	memset(&mes_reset_queue_pkt, 0, sizeof(mes_reset_queue_pkt));
504 
505 	mes_reset_queue_pkt.header.type = MES_API_TYPE_SCHEDULER;
506 	mes_reset_queue_pkt.header.opcode = MES_SCH_API_RESET;
507 	mes_reset_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
508 
509 	mes_reset_queue_pkt.doorbell_offset = input->doorbell_offset;
510 	mes_reset_queue_pkt.gang_context_addr = input->gang_context_addr;
511 	/*mes_reset_queue_pkt.reset_queue_only = 1;*/
512 
513 	if (mes->adev->enable_uni_mes)
514 		pipe = AMDGPU_MES_KIQ_PIPE;
515 	else
516 		pipe = AMDGPU_MES_SCHED_PIPE;
517 
518 	return mes_v12_0_submit_pkt_and_poll_completion(mes, pipe,
519 			&mes_reset_queue_pkt, sizeof(mes_reset_queue_pkt),
520 			offsetof(union MESAPI__REMOVE_QUEUE, api_status));
521 }
522 
523 static int mes_v12_0_map_legacy_queue(struct amdgpu_mes *mes,
524 				      struct mes_map_legacy_queue_input *input)
525 {
526 	union MESAPI__ADD_QUEUE mes_add_queue_pkt;
527 	int pipe;
528 
529 	memset(&mes_add_queue_pkt, 0, sizeof(mes_add_queue_pkt));
530 
531 	mes_add_queue_pkt.header.type = MES_API_TYPE_SCHEDULER;
532 	mes_add_queue_pkt.header.opcode = MES_SCH_API_ADD_QUEUE;
533 	mes_add_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
534 
535 	mes_add_queue_pkt.pipe_id = input->pipe_id;
536 	mes_add_queue_pkt.queue_id = input->queue_id;
537 	mes_add_queue_pkt.doorbell_offset = input->doorbell_offset;
538 	mes_add_queue_pkt.mqd_addr = input->mqd_addr;
539 	mes_add_queue_pkt.wptr_addr = input->wptr_addr;
540 	mes_add_queue_pkt.queue_type =
541 		convert_to_mes_queue_type(input->queue_type);
542 	mes_add_queue_pkt.map_legacy_kq = 1;
543 
544 	if (mes->adev->enable_uni_mes)
545 		pipe = AMDGPU_MES_KIQ_PIPE;
546 	else
547 		pipe = AMDGPU_MES_SCHED_PIPE;
548 
549 	return mes_v12_0_submit_pkt_and_poll_completion(mes, pipe,
550 			&mes_add_queue_pkt, sizeof(mes_add_queue_pkt),
551 			offsetof(union MESAPI__ADD_QUEUE, api_status));
552 }
553 
554 static int mes_v12_0_unmap_legacy_queue(struct amdgpu_mes *mes,
555 			struct mes_unmap_legacy_queue_input *input)
556 {
557 	union MESAPI__REMOVE_QUEUE mes_remove_queue_pkt;
558 	int pipe;
559 
560 	memset(&mes_remove_queue_pkt, 0, sizeof(mes_remove_queue_pkt));
561 
562 	mes_remove_queue_pkt.header.type = MES_API_TYPE_SCHEDULER;
563 	mes_remove_queue_pkt.header.opcode = MES_SCH_API_REMOVE_QUEUE;
564 	mes_remove_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
565 
566 	mes_remove_queue_pkt.doorbell_offset = input->doorbell_offset;
567 	mes_remove_queue_pkt.gang_context_addr = 0;
568 
569 	mes_remove_queue_pkt.pipe_id = input->pipe_id;
570 	mes_remove_queue_pkt.queue_id = input->queue_id;
571 
572 	if (input->action == PREEMPT_QUEUES_NO_UNMAP) {
573 		mes_remove_queue_pkt.preempt_legacy_gfx_queue = 1;
574 		mes_remove_queue_pkt.tf_addr = input->trail_fence_addr;
575 		mes_remove_queue_pkt.tf_data =
576 			lower_32_bits(input->trail_fence_data);
577 	} else {
578 		mes_remove_queue_pkt.unmap_legacy_queue = 1;
579 		mes_remove_queue_pkt.queue_type =
580 			convert_to_mes_queue_type(input->queue_type);
581 	}
582 
583 	if (mes->adev->enable_uni_mes)
584 		pipe = AMDGPU_MES_KIQ_PIPE;
585 	else
586 		pipe = AMDGPU_MES_SCHED_PIPE;
587 
588 	return mes_v12_0_submit_pkt_and_poll_completion(mes, pipe,
589 			&mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt),
590 			offsetof(union MESAPI__REMOVE_QUEUE, api_status));
591 }
592 
593 static int mes_v12_0_suspend_gang(struct amdgpu_mes *mes,
594 				  struct mes_suspend_gang_input *input)
595 {
596 	return 0;
597 }
598 
599 static int mes_v12_0_resume_gang(struct amdgpu_mes *mes,
600 				 struct mes_resume_gang_input *input)
601 {
602 	return 0;
603 }
604 
605 static int mes_v12_0_query_sched_status(struct amdgpu_mes *mes, int pipe)
606 {
607 	union MESAPI__QUERY_MES_STATUS mes_status_pkt;
608 
609 	memset(&mes_status_pkt, 0, sizeof(mes_status_pkt));
610 
611 	mes_status_pkt.header.type = MES_API_TYPE_SCHEDULER;
612 	mes_status_pkt.header.opcode = MES_SCH_API_QUERY_SCHEDULER_STATUS;
613 	mes_status_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
614 
615 	return mes_v12_0_submit_pkt_and_poll_completion(mes, pipe,
616 			&mes_status_pkt, sizeof(mes_status_pkt),
617 			offsetof(union MESAPI__QUERY_MES_STATUS, api_status));
618 }
619 
620 static int mes_v12_0_misc_op(struct amdgpu_mes *mes,
621 			     struct mes_misc_op_input *input)
622 {
623 	union MESAPI__MISC misc_pkt;
624 	int pipe;
625 
626 	if (mes->adev->enable_uni_mes)
627 		pipe = AMDGPU_MES_KIQ_PIPE;
628 	else
629 		pipe = AMDGPU_MES_SCHED_PIPE;
630 
631 	memset(&misc_pkt, 0, sizeof(misc_pkt));
632 
633 	misc_pkt.header.type = MES_API_TYPE_SCHEDULER;
634 	misc_pkt.header.opcode = MES_SCH_API_MISC;
635 	misc_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
636 
637 	switch (input->op) {
638 	case MES_MISC_OP_READ_REG:
639 		misc_pkt.opcode = MESAPI_MISC__READ_REG;
640 		misc_pkt.read_reg.reg_offset = input->read_reg.reg_offset;
641 		misc_pkt.read_reg.buffer_addr = input->read_reg.buffer_addr;
642 		break;
643 	case MES_MISC_OP_WRITE_REG:
644 		misc_pkt.opcode = MESAPI_MISC__WRITE_REG;
645 		misc_pkt.write_reg.reg_offset = input->write_reg.reg_offset;
646 		misc_pkt.write_reg.reg_value = input->write_reg.reg_value;
647 		break;
648 	case MES_MISC_OP_WRM_REG_WAIT:
649 		misc_pkt.opcode = MESAPI_MISC__WAIT_REG_MEM;
650 		misc_pkt.wait_reg_mem.op = WRM_OPERATION__WAIT_REG_MEM;
651 		misc_pkt.wait_reg_mem.reference = input->wrm_reg.ref;
652 		misc_pkt.wait_reg_mem.mask = input->wrm_reg.mask;
653 		misc_pkt.wait_reg_mem.reg_offset1 = input->wrm_reg.reg0;
654 		misc_pkt.wait_reg_mem.reg_offset2 = 0;
655 		break;
656 	case MES_MISC_OP_WRM_REG_WR_WAIT:
657 		misc_pkt.opcode = MESAPI_MISC__WAIT_REG_MEM;
658 		misc_pkt.wait_reg_mem.op = WRM_OPERATION__WR_WAIT_WR_REG;
659 		misc_pkt.wait_reg_mem.reference = input->wrm_reg.ref;
660 		misc_pkt.wait_reg_mem.mask = input->wrm_reg.mask;
661 		misc_pkt.wait_reg_mem.reg_offset1 = input->wrm_reg.reg0;
662 		misc_pkt.wait_reg_mem.reg_offset2 = input->wrm_reg.reg1;
663 		break;
664 	case MES_MISC_OP_SET_SHADER_DEBUGGER:
665 		pipe = AMDGPU_MES_SCHED_PIPE;
666 		misc_pkt.opcode = MESAPI_MISC__SET_SHADER_DEBUGGER;
667 		misc_pkt.set_shader_debugger.process_context_addr =
668 				input->set_shader_debugger.process_context_addr;
669 		misc_pkt.set_shader_debugger.flags.u32all =
670 				input->set_shader_debugger.flags.u32all;
671 		misc_pkt.set_shader_debugger.spi_gdbg_per_vmid_cntl =
672 				input->set_shader_debugger.spi_gdbg_per_vmid_cntl;
673 		memcpy(misc_pkt.set_shader_debugger.tcp_watch_cntl,
674 				input->set_shader_debugger.tcp_watch_cntl,
675 				sizeof(misc_pkt.set_shader_debugger.tcp_watch_cntl));
676 		misc_pkt.set_shader_debugger.trap_en = input->set_shader_debugger.trap_en;
677 		break;
678 	case MES_MISC_OP_CHANGE_CONFIG:
679 		misc_pkt.opcode = MESAPI_MISC__CHANGE_CONFIG;
680 		misc_pkt.change_config.opcode =
681 				MESAPI_MISC__CHANGE_CONFIG_OPTION_LIMIT_SINGLE_PROCESS;
682 		misc_pkt.change_config.option.bits.limit_single_process =
683 				input->change_config.option.limit_single_process;
684 		break;
685 
686 	default:
687 		DRM_ERROR("unsupported misc op (%d) \n", input->op);
688 		return -EINVAL;
689 	}
690 
691 	return mes_v12_0_submit_pkt_and_poll_completion(mes, pipe,
692 			&misc_pkt, sizeof(misc_pkt),
693 			offsetof(union MESAPI__MISC, api_status));
694 }
695 
696 static int mes_v12_0_set_hw_resources_1(struct amdgpu_mes *mes, int pipe)
697 {
698 	union MESAPI_SET_HW_RESOURCES_1 mes_set_hw_res_1_pkt;
699 
700 	memset(&mes_set_hw_res_1_pkt, 0, sizeof(mes_set_hw_res_1_pkt));
701 
702 	mes_set_hw_res_1_pkt.header.type = MES_API_TYPE_SCHEDULER;
703 	mes_set_hw_res_1_pkt.header.opcode = MES_SCH_API_SET_HW_RSRC_1;
704 	mes_set_hw_res_1_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
705 	mes_set_hw_res_1_pkt.mes_kiq_unmap_timeout = 0xa;
706 	mes_set_hw_res_1_pkt.cleaner_shader_fence_mc_addr =
707 		mes->resource_1_gpu_addr[pipe];
708 
709 	return mes_v12_0_submit_pkt_and_poll_completion(mes, pipe,
710 			&mes_set_hw_res_1_pkt, sizeof(mes_set_hw_res_1_pkt),
711 			offsetof(union MESAPI_SET_HW_RESOURCES_1, api_status));
712 }
713 
714 static int mes_v12_0_set_hw_resources(struct amdgpu_mes *mes, int pipe)
715 {
716 	int i;
717 	struct amdgpu_device *adev = mes->adev;
718 	union MESAPI_SET_HW_RESOURCES mes_set_hw_res_pkt;
719 
720 	memset(&mes_set_hw_res_pkt, 0, sizeof(mes_set_hw_res_pkt));
721 
722 	mes_set_hw_res_pkt.header.type = MES_API_TYPE_SCHEDULER;
723 	mes_set_hw_res_pkt.header.opcode = MES_SCH_API_SET_HW_RSRC;
724 	mes_set_hw_res_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
725 
726 	if (pipe == AMDGPU_MES_SCHED_PIPE) {
727 		mes_set_hw_res_pkt.vmid_mask_mmhub = mes->vmid_mask_mmhub;
728 		mes_set_hw_res_pkt.vmid_mask_gfxhub = mes->vmid_mask_gfxhub;
729 		mes_set_hw_res_pkt.gds_size = adev->gds.gds_size;
730 		mes_set_hw_res_pkt.paging_vmid = 0;
731 
732 		for (i = 0; i < MAX_COMPUTE_PIPES; i++)
733 			mes_set_hw_res_pkt.compute_hqd_mask[i] =
734 				mes->compute_hqd_mask[i];
735 
736 		for (i = 0; i < MAX_GFX_PIPES; i++)
737 			mes_set_hw_res_pkt.gfx_hqd_mask[i] =
738 				mes->gfx_hqd_mask[i];
739 
740 		for (i = 0; i < MAX_SDMA_PIPES; i++)
741 			mes_set_hw_res_pkt.sdma_hqd_mask[i] =
742 				mes->sdma_hqd_mask[i];
743 
744 		for (i = 0; i < AMD_PRIORITY_NUM_LEVELS; i++)
745 			mes_set_hw_res_pkt.aggregated_doorbells[i] =
746 				mes->aggregated_doorbells[i];
747 	}
748 
749 	mes_set_hw_res_pkt.g_sch_ctx_gpu_mc_ptr =
750 		mes->sch_ctx_gpu_addr[pipe];
751 	mes_set_hw_res_pkt.query_status_fence_gpu_mc_ptr =
752 		mes->query_status_fence_gpu_addr[pipe];
753 
754 	for (i = 0; i < 5; i++) {
755 		mes_set_hw_res_pkt.gc_base[i] = adev->reg_offset[GC_HWIP][0][i];
756 		mes_set_hw_res_pkt.mmhub_base[i] =
757 				adev->reg_offset[MMHUB_HWIP][0][i];
758 		mes_set_hw_res_pkt.osssys_base[i] =
759 		adev->reg_offset[OSSSYS_HWIP][0][i];
760 	}
761 
762 	mes_set_hw_res_pkt.disable_reset = 1;
763 	mes_set_hw_res_pkt.disable_mes_log = 1;
764 	mes_set_hw_res_pkt.use_different_vmid_compute = 1;
765 	mes_set_hw_res_pkt.enable_reg_active_poll = 1;
766 	mes_set_hw_res_pkt.enable_level_process_quantum_check = 1;
767 
768 	/*
769 	 * Keep oversubscribe timer for sdma . When we have unmapped doorbell
770 	 * handling support, other queue will not use the oversubscribe timer.
771 	 * handling  mode - 0: disabled; 1: basic version; 2: basic+ version
772 	 */
773 	mes_set_hw_res_pkt.oversubscription_timer = 50;
774 	mes_set_hw_res_pkt.unmapped_doorbell_handling = 1;
775 
776 	if (amdgpu_mes_log_enable) {
777 		mes_set_hw_res_pkt.enable_mes_event_int_logging = 1;
778 		mes_set_hw_res_pkt.event_intr_history_gpu_mc_ptr = mes->event_log_gpu_addr +
779 				pipe * (AMDGPU_MES_LOG_BUFFER_SIZE + AMDGPU_MES_MSCRATCH_SIZE);
780 	}
781 
782 	if (adev->enforce_isolation[0] == AMDGPU_ENFORCE_ISOLATION_ENABLE)
783 		mes_set_hw_res_pkt.limit_single_process = 1;
784 
785 	return mes_v12_0_submit_pkt_and_poll_completion(mes, pipe,
786 			&mes_set_hw_res_pkt, sizeof(mes_set_hw_res_pkt),
787 			offsetof(union MESAPI_SET_HW_RESOURCES, api_status));
788 }
789 
790 static void mes_v12_0_init_aggregated_doorbell(struct amdgpu_mes *mes)
791 {
792 	struct amdgpu_device *adev = mes->adev;
793 	uint32_t data;
794 
795 	data = RREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL1);
796 	data &= ~(CP_MES_DOORBELL_CONTROL1__DOORBELL_OFFSET_MASK |
797 		  CP_MES_DOORBELL_CONTROL1__DOORBELL_EN_MASK |
798 		  CP_MES_DOORBELL_CONTROL1__DOORBELL_HIT_MASK);
799 	data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_LOW] <<
800 		CP_MES_DOORBELL_CONTROL1__DOORBELL_OFFSET__SHIFT;
801 	data |= 1 << CP_MES_DOORBELL_CONTROL1__DOORBELL_EN__SHIFT;
802 	WREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL1, data);
803 
804 	data = RREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL2);
805 	data &= ~(CP_MES_DOORBELL_CONTROL2__DOORBELL_OFFSET_MASK |
806 		  CP_MES_DOORBELL_CONTROL2__DOORBELL_EN_MASK |
807 		  CP_MES_DOORBELL_CONTROL2__DOORBELL_HIT_MASK);
808 	data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_NORMAL] <<
809 		CP_MES_DOORBELL_CONTROL2__DOORBELL_OFFSET__SHIFT;
810 	data |= 1 << CP_MES_DOORBELL_CONTROL2__DOORBELL_EN__SHIFT;
811 	WREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL2, data);
812 
813 	data = RREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL3);
814 	data &= ~(CP_MES_DOORBELL_CONTROL3__DOORBELL_OFFSET_MASK |
815 		  CP_MES_DOORBELL_CONTROL3__DOORBELL_EN_MASK |
816 		  CP_MES_DOORBELL_CONTROL3__DOORBELL_HIT_MASK);
817 	data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_MEDIUM] <<
818 		CP_MES_DOORBELL_CONTROL3__DOORBELL_OFFSET__SHIFT;
819 	data |= 1 << CP_MES_DOORBELL_CONTROL3__DOORBELL_EN__SHIFT;
820 	WREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL3, data);
821 
822 	data = RREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL4);
823 	data &= ~(CP_MES_DOORBELL_CONTROL4__DOORBELL_OFFSET_MASK |
824 		  CP_MES_DOORBELL_CONTROL4__DOORBELL_EN_MASK |
825 		  CP_MES_DOORBELL_CONTROL4__DOORBELL_HIT_MASK);
826 	data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_HIGH] <<
827 		CP_MES_DOORBELL_CONTROL4__DOORBELL_OFFSET__SHIFT;
828 	data |= 1 << CP_MES_DOORBELL_CONTROL4__DOORBELL_EN__SHIFT;
829 	WREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL4, data);
830 
831 	data = RREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL5);
832 	data &= ~(CP_MES_DOORBELL_CONTROL5__DOORBELL_OFFSET_MASK |
833 		  CP_MES_DOORBELL_CONTROL5__DOORBELL_EN_MASK |
834 		  CP_MES_DOORBELL_CONTROL5__DOORBELL_HIT_MASK);
835 	data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_REALTIME] <<
836 		CP_MES_DOORBELL_CONTROL5__DOORBELL_OFFSET__SHIFT;
837 	data |= 1 << CP_MES_DOORBELL_CONTROL5__DOORBELL_EN__SHIFT;
838 	WREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL5, data);
839 
840 	data = 1 << CP_HQD_GFX_CONTROL__DB_UPDATED_MSG_EN__SHIFT;
841 	WREG32_SOC15(GC, 0, regCP_HQD_GFX_CONTROL, data);
842 }
843 
844 
845 static void mes_v12_0_enable_unmapped_doorbell_handling(
846 		struct amdgpu_mes *mes, bool enable)
847 {
848 	struct amdgpu_device *adev = mes->adev;
849 	uint32_t data = RREG32_SOC15(GC, 0, regCP_UNMAPPED_DOORBELL);
850 
851 	/*
852 	 * The default PROC_LSB settng is 0xc which means doorbell
853 	 * addr[16:12] gives the doorbell page number. For kfd, each
854 	 * process will use 2 pages of doorbell, we need to change the
855 	 * setting to 0xd
856 	 */
857 	data &= ~CP_UNMAPPED_DOORBELL__PROC_LSB_MASK;
858 	data |= 0xd <<  CP_UNMAPPED_DOORBELL__PROC_LSB__SHIFT;
859 
860 	data |= (enable ? 1 : 0) << CP_UNMAPPED_DOORBELL__ENABLE__SHIFT;
861 
862 	WREG32_SOC15(GC, 0, regCP_UNMAPPED_DOORBELL, data);
863 }
864 
865 static int mes_v12_0_reset_legacy_queue(struct amdgpu_mes *mes,
866 					struct mes_reset_legacy_queue_input *input)
867 {
868 	union MESAPI__RESET mes_reset_queue_pkt;
869 	int pipe;
870 
871 	if (input->use_mmio)
872 		return mes_v12_0_reset_queue_mmio(mes, input->queue_type,
873 						  input->me_id, input->pipe_id,
874 						  input->queue_id, input->vmid);
875 
876 	memset(&mes_reset_queue_pkt, 0, sizeof(mes_reset_queue_pkt));
877 
878 	mes_reset_queue_pkt.header.type = MES_API_TYPE_SCHEDULER;
879 	mes_reset_queue_pkt.header.opcode = MES_SCH_API_RESET;
880 	mes_reset_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
881 
882 	mes_reset_queue_pkt.queue_type =
883 		convert_to_mes_queue_type(input->queue_type);
884 
885 	if (mes_reset_queue_pkt.queue_type == MES_QUEUE_TYPE_GFX) {
886 		mes_reset_queue_pkt.reset_legacy_gfx = 1;
887 		mes_reset_queue_pkt.pipe_id_lp = input->pipe_id;
888 		mes_reset_queue_pkt.queue_id_lp = input->queue_id;
889 		mes_reset_queue_pkt.mqd_mc_addr_lp = input->mqd_addr;
890 		mes_reset_queue_pkt.doorbell_offset_lp = input->doorbell_offset;
891 		mes_reset_queue_pkt.wptr_addr_lp = input->wptr_addr;
892 		mes_reset_queue_pkt.vmid_id_lp = input->vmid;
893 	} else {
894 		mes_reset_queue_pkt.reset_queue_only = 1;
895 		mes_reset_queue_pkt.doorbell_offset = input->doorbell_offset;
896 	}
897 
898 	if (mes->adev->enable_uni_mes)
899 		pipe = AMDGPU_MES_KIQ_PIPE;
900 	else
901 		pipe = AMDGPU_MES_SCHED_PIPE;
902 
903 	return mes_v12_0_submit_pkt_and_poll_completion(mes, pipe,
904 			&mes_reset_queue_pkt, sizeof(mes_reset_queue_pkt),
905 			offsetof(union MESAPI__RESET, api_status));
906 }
907 
908 static const struct amdgpu_mes_funcs mes_v12_0_funcs = {
909 	.add_hw_queue = mes_v12_0_add_hw_queue,
910 	.remove_hw_queue = mes_v12_0_remove_hw_queue,
911 	.map_legacy_queue = mes_v12_0_map_legacy_queue,
912 	.unmap_legacy_queue = mes_v12_0_unmap_legacy_queue,
913 	.suspend_gang = mes_v12_0_suspend_gang,
914 	.resume_gang = mes_v12_0_resume_gang,
915 	.misc_op = mes_v12_0_misc_op,
916 	.reset_legacy_queue = mes_v12_0_reset_legacy_queue,
917 	.reset_hw_queue = mes_v12_0_reset_hw_queue,
918 };
919 
920 static int mes_v12_0_allocate_ucode_buffer(struct amdgpu_device *adev,
921 					   enum amdgpu_mes_pipe pipe)
922 {
923 	int r;
924 	const struct mes_firmware_header_v1_0 *mes_hdr;
925 	const __le32 *fw_data;
926 	unsigned fw_size;
927 
928 	mes_hdr = (const struct mes_firmware_header_v1_0 *)
929 		adev->mes.fw[pipe]->data;
930 
931 	fw_data = (const __le32 *)(adev->mes.fw[pipe]->data +
932 		   le32_to_cpu(mes_hdr->mes_ucode_offset_bytes));
933 	fw_size = le32_to_cpu(mes_hdr->mes_ucode_size_bytes);
934 
935 	r = amdgpu_bo_create_reserved(adev, fw_size,
936 				      PAGE_SIZE,
937 				      AMDGPU_GEM_DOMAIN_VRAM,
938 				      &adev->mes.ucode_fw_obj[pipe],
939 				      &adev->mes.ucode_fw_gpu_addr[pipe],
940 				      (void **)&adev->mes.ucode_fw_ptr[pipe]);
941 	if (r) {
942 		dev_err(adev->dev, "(%d) failed to create mes fw bo\n", r);
943 		return r;
944 	}
945 
946 	memcpy(adev->mes.ucode_fw_ptr[pipe], fw_data, fw_size);
947 
948 	amdgpu_bo_kunmap(adev->mes.ucode_fw_obj[pipe]);
949 	amdgpu_bo_unreserve(adev->mes.ucode_fw_obj[pipe]);
950 
951 	return 0;
952 }
953 
954 static int mes_v12_0_allocate_ucode_data_buffer(struct amdgpu_device *adev,
955 						enum amdgpu_mes_pipe pipe)
956 {
957 	int r;
958 	const struct mes_firmware_header_v1_0 *mes_hdr;
959 	const __le32 *fw_data;
960 	unsigned fw_size;
961 
962 	mes_hdr = (const struct mes_firmware_header_v1_0 *)
963 		adev->mes.fw[pipe]->data;
964 
965 	fw_data = (const __le32 *)(adev->mes.fw[pipe]->data +
966 		   le32_to_cpu(mes_hdr->mes_ucode_data_offset_bytes));
967 	fw_size = le32_to_cpu(mes_hdr->mes_ucode_data_size_bytes);
968 
969 	r = amdgpu_bo_create_reserved(adev, fw_size,
970 				      64 * 1024,
971 				      AMDGPU_GEM_DOMAIN_VRAM,
972 				      &adev->mes.data_fw_obj[pipe],
973 				      &adev->mes.data_fw_gpu_addr[pipe],
974 				      (void **)&adev->mes.data_fw_ptr[pipe]);
975 	if (r) {
976 		dev_err(adev->dev, "(%d) failed to create mes data fw bo\n", r);
977 		return r;
978 	}
979 
980 	memcpy(adev->mes.data_fw_ptr[pipe], fw_data, fw_size);
981 
982 	amdgpu_bo_kunmap(adev->mes.data_fw_obj[pipe]);
983 	amdgpu_bo_unreserve(adev->mes.data_fw_obj[pipe]);
984 
985 	return 0;
986 }
987 
988 static void mes_v12_0_free_ucode_buffers(struct amdgpu_device *adev,
989 					 enum amdgpu_mes_pipe pipe)
990 {
991 	amdgpu_bo_free_kernel(&adev->mes.data_fw_obj[pipe],
992 			      &adev->mes.data_fw_gpu_addr[pipe],
993 			      (void **)&adev->mes.data_fw_ptr[pipe]);
994 
995 	amdgpu_bo_free_kernel(&adev->mes.ucode_fw_obj[pipe],
996 			      &adev->mes.ucode_fw_gpu_addr[pipe],
997 			      (void **)&adev->mes.ucode_fw_ptr[pipe]);
998 }
999 
1000 static void mes_v12_0_enable(struct amdgpu_device *adev, bool enable)
1001 {
1002 	uint64_t ucode_addr;
1003 	uint32_t pipe, data = 0;
1004 
1005 	if (enable) {
1006 		mutex_lock(&adev->srbm_mutex);
1007 		for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) {
1008 			soc21_grbm_select(adev, 3, pipe, 0, 0);
1009 			if (amdgpu_mes_log_enable) {
1010 				u32 log_size = AMDGPU_MES_LOG_BUFFER_SIZE + AMDGPU_MES_MSCRATCH_SIZE;
1011 				/* In case uni mes is not enabled, only program for pipe 0 */
1012 				if (adev->mes.event_log_size >= (pipe + 1) * log_size) {
1013 					WREG32_SOC15(GC, 0, regCP_MES_MSCRATCH_LO,
1014 						     lower_32_bits(adev->mes.event_log_gpu_addr +
1015 						     pipe * log_size + AMDGPU_MES_LOG_BUFFER_SIZE));
1016 					WREG32_SOC15(GC, 0, regCP_MES_MSCRATCH_HI,
1017 						     upper_32_bits(adev->mes.event_log_gpu_addr +
1018 						     pipe * log_size + AMDGPU_MES_LOG_BUFFER_SIZE));
1019 					dev_info(adev->dev, "Setup CP MES MSCRATCH address : 0x%x. 0x%x\n",
1020 						 RREG32_SOC15(GC, 0, regCP_MES_MSCRATCH_HI),
1021 						 RREG32_SOC15(GC, 0, regCP_MES_MSCRATCH_LO));
1022 				}
1023 			}
1024 
1025 			data = RREG32_SOC15(GC, 0, regCP_MES_CNTL);
1026 			if (pipe == 0)
1027 				data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_RESET, 1);
1028 			else
1029 				data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_RESET, 1);
1030 			WREG32_SOC15(GC, 0, regCP_MES_CNTL, data);
1031 
1032 			ucode_addr = adev->mes.uc_start_addr[pipe] >> 2;
1033 			WREG32_SOC15(GC, 0, regCP_MES_PRGRM_CNTR_START,
1034 				     lower_32_bits(ucode_addr));
1035 			WREG32_SOC15(GC, 0, regCP_MES_PRGRM_CNTR_START_HI,
1036 				     upper_32_bits(ucode_addr));
1037 
1038 			/* unhalt MES and activate one pipe each loop */
1039 			data = REG_SET_FIELD(0, CP_MES_CNTL, MES_PIPE0_ACTIVE, 1);
1040 			if (pipe)
1041 				data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_ACTIVE, 1);
1042 			dev_info(adev->dev, "program CP_MES_CNTL : 0x%x\n", data);
1043 
1044 			WREG32_SOC15(GC, 0, regCP_MES_CNTL, data);
1045 
1046 		}
1047 		soc21_grbm_select(adev, 0, 0, 0, 0);
1048 		mutex_unlock(&adev->srbm_mutex);
1049 
1050 		if (amdgpu_emu_mode)
1051 			msleep(100);
1052 		else if (adev->enable_uni_mes)
1053 			udelay(500);
1054 		else
1055 			udelay(50);
1056 	} else {
1057 		data = RREG32_SOC15(GC, 0, regCP_MES_CNTL);
1058 		data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_ACTIVE, 0);
1059 		data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_ACTIVE, 0);
1060 		data = REG_SET_FIELD(data, CP_MES_CNTL,
1061 				     MES_INVALIDATE_ICACHE, 1);
1062 		data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_RESET, 1);
1063 		data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_RESET, 1);
1064 		data = REG_SET_FIELD(data, CP_MES_CNTL, MES_HALT, 1);
1065 		WREG32_SOC15(GC, 0, regCP_MES_CNTL, data);
1066 	}
1067 }
1068 
1069 static void mes_v12_0_set_ucode_start_addr(struct amdgpu_device *adev)
1070 {
1071 	uint64_t ucode_addr;
1072 	int pipe;
1073 
1074 	mes_v12_0_enable(adev, false);
1075 
1076 	mutex_lock(&adev->srbm_mutex);
1077 	for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) {
1078 		/* me=3, queue=0 */
1079 		soc21_grbm_select(adev, 3, pipe, 0, 0);
1080 
1081 		/* set ucode start address */
1082 		ucode_addr = adev->mes.uc_start_addr[pipe] >> 2;
1083 		WREG32_SOC15(GC, 0, regCP_MES_PRGRM_CNTR_START,
1084 				lower_32_bits(ucode_addr));
1085 		WREG32_SOC15(GC, 0, regCP_MES_PRGRM_CNTR_START_HI,
1086 				upper_32_bits(ucode_addr));
1087 
1088 		soc21_grbm_select(adev, 0, 0, 0, 0);
1089 	}
1090 	mutex_unlock(&adev->srbm_mutex);
1091 }
1092 
1093 /* This function is for backdoor MES firmware */
1094 static int mes_v12_0_load_microcode(struct amdgpu_device *adev,
1095 				    enum amdgpu_mes_pipe pipe, bool prime_icache)
1096 {
1097 	int r;
1098 	uint32_t data;
1099 
1100 	mes_v12_0_enable(adev, false);
1101 
1102 	if (!adev->mes.fw[pipe])
1103 		return -EINVAL;
1104 
1105 	r = mes_v12_0_allocate_ucode_buffer(adev, pipe);
1106 	if (r)
1107 		return r;
1108 
1109 	r = mes_v12_0_allocate_ucode_data_buffer(adev, pipe);
1110 	if (r) {
1111 		mes_v12_0_free_ucode_buffers(adev, pipe);
1112 		return r;
1113 	}
1114 
1115 	mutex_lock(&adev->srbm_mutex);
1116 	/* me=3, pipe=0, queue=0 */
1117 	soc21_grbm_select(adev, 3, pipe, 0, 0);
1118 
1119 	WREG32_SOC15(GC, 0, regCP_MES_IC_BASE_CNTL, 0);
1120 
1121 	/* set ucode fimrware address */
1122 	WREG32_SOC15(GC, 0, regCP_MES_IC_BASE_LO,
1123 		     lower_32_bits(adev->mes.ucode_fw_gpu_addr[pipe]));
1124 	WREG32_SOC15(GC, 0, regCP_MES_IC_BASE_HI,
1125 		     upper_32_bits(adev->mes.ucode_fw_gpu_addr[pipe]));
1126 
1127 	/* set ucode instruction cache boundary to 2M-1 */
1128 	WREG32_SOC15(GC, 0, regCP_MES_MIBOUND_LO, 0x1FFFFF);
1129 
1130 	/* set ucode data firmware address */
1131 	WREG32_SOC15(GC, 0, regCP_MES_MDBASE_LO,
1132 		     lower_32_bits(adev->mes.data_fw_gpu_addr[pipe]));
1133 	WREG32_SOC15(GC, 0, regCP_MES_MDBASE_HI,
1134 		     upper_32_bits(adev->mes.data_fw_gpu_addr[pipe]));
1135 
1136 	/* Set data cache boundary CP_MES_MDBOUND_LO */
1137 	WREG32_SOC15(GC, 0, regCP_MES_MDBOUND_LO, 0x7FFFF);
1138 
1139 	if (prime_icache) {
1140 		/* invalidate ICACHE */
1141 		data = RREG32_SOC15(GC, 0, regCP_MES_IC_OP_CNTL);
1142 		data = REG_SET_FIELD(data, CP_MES_IC_OP_CNTL, PRIME_ICACHE, 0);
1143 		data = REG_SET_FIELD(data, CP_MES_IC_OP_CNTL, INVALIDATE_CACHE, 1);
1144 		WREG32_SOC15(GC, 0, regCP_MES_IC_OP_CNTL, data);
1145 
1146 		/* prime the ICACHE. */
1147 		data = RREG32_SOC15(GC, 0, regCP_MES_IC_OP_CNTL);
1148 		data = REG_SET_FIELD(data, CP_MES_IC_OP_CNTL, PRIME_ICACHE, 1);
1149 		WREG32_SOC15(GC, 0, regCP_MES_IC_OP_CNTL, data);
1150 	}
1151 
1152 	soc21_grbm_select(adev, 0, 0, 0, 0);
1153 	mutex_unlock(&adev->srbm_mutex);
1154 
1155 	return 0;
1156 }
1157 
1158 static int mes_v12_0_allocate_eop_buf(struct amdgpu_device *adev,
1159 				      enum amdgpu_mes_pipe pipe)
1160 {
1161 	int r;
1162 	u32 *eop;
1163 
1164 	r = amdgpu_bo_create_reserved(adev, MES_EOP_SIZE, PAGE_SIZE,
1165 			      AMDGPU_GEM_DOMAIN_GTT,
1166 			      &adev->mes.eop_gpu_obj[pipe],
1167 			      &adev->mes.eop_gpu_addr[pipe],
1168 			      (void **)&eop);
1169 	if (r) {
1170 		dev_warn(adev->dev, "(%d) create EOP bo failed\n", r);
1171 		return r;
1172 	}
1173 
1174 	memset(eop, 0,
1175 	       adev->mes.eop_gpu_obj[pipe]->tbo.base.size);
1176 
1177 	amdgpu_bo_kunmap(adev->mes.eop_gpu_obj[pipe]);
1178 	amdgpu_bo_unreserve(adev->mes.eop_gpu_obj[pipe]);
1179 
1180 	return 0;
1181 }
1182 
1183 static int mes_v12_0_mqd_init(struct amdgpu_ring *ring)
1184 {
1185 	struct v12_compute_mqd *mqd = ring->mqd_ptr;
1186 	uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
1187 	uint32_t tmp;
1188 
1189 	mqd->header = 0xC0310800;
1190 	mqd->compute_pipelinestat_enable = 0x00000001;
1191 	mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
1192 	mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
1193 	mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
1194 	mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
1195 	mqd->compute_misc_reserved = 0x00000007;
1196 
1197 	eop_base_addr = ring->eop_gpu_addr >> 8;
1198 
1199 	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
1200 	tmp = regCP_HQD_EOP_CONTROL_DEFAULT;
1201 	tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
1202 			(order_base_2(MES_EOP_SIZE / 4) - 1));
1203 
1204 	mqd->cp_hqd_eop_base_addr_lo = lower_32_bits(eop_base_addr);
1205 	mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
1206 	mqd->cp_hqd_eop_control = tmp;
1207 
1208 	/* disable the queue if it's active */
1209 	ring->wptr = 0;
1210 	mqd->cp_hqd_pq_rptr = 0;
1211 	mqd->cp_hqd_pq_wptr_lo = 0;
1212 	mqd->cp_hqd_pq_wptr_hi = 0;
1213 
1214 	/* set the pointer to the MQD */
1215 	mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
1216 	mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
1217 
1218 	/* set MQD vmid to 0 */
1219 	tmp = regCP_MQD_CONTROL_DEFAULT;
1220 	tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
1221 	mqd->cp_mqd_control = tmp;
1222 
1223 	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
1224 	hqd_gpu_addr = ring->gpu_addr >> 8;
1225 	mqd->cp_hqd_pq_base_lo = lower_32_bits(hqd_gpu_addr);
1226 	mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
1227 
1228 	/* set the wb address whether it's enabled or not */
1229 	wb_gpu_addr = ring->rptr_gpu_addr;
1230 	mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
1231 	mqd->cp_hqd_pq_rptr_report_addr_hi =
1232 		upper_32_bits(wb_gpu_addr) & 0xffff;
1233 
1234 	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
1235 	wb_gpu_addr = ring->wptr_gpu_addr;
1236 	mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffff8;
1237 	mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
1238 
1239 	/* set up the HQD, this is similar to CP_RB0_CNTL */
1240 	tmp = regCP_HQD_PQ_CONTROL_DEFAULT;
1241 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
1242 			    (order_base_2(ring->ring_size / 4) - 1));
1243 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
1244 			    ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
1245 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 1);
1246 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, 0);
1247 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
1248 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
1249 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, NO_UPDATE_RPTR, 1);
1250 	mqd->cp_hqd_pq_control = tmp;
1251 
1252 	/* enable doorbell */
1253 	tmp = 0;
1254 	if (ring->use_doorbell) {
1255 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
1256 				    DOORBELL_OFFSET, ring->doorbell_index);
1257 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
1258 				    DOORBELL_EN, 1);
1259 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
1260 				    DOORBELL_SOURCE, 0);
1261 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
1262 				    DOORBELL_HIT, 0);
1263 	} else {
1264 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
1265 				    DOORBELL_EN, 0);
1266 	}
1267 	mqd->cp_hqd_pq_doorbell_control = tmp;
1268 
1269 	mqd->cp_hqd_vmid = 0;
1270 	/* activate the queue */
1271 	mqd->cp_hqd_active = 1;
1272 
1273 	tmp = regCP_HQD_PERSISTENT_STATE_DEFAULT;
1274 	tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE,
1275 			    PRELOAD_SIZE, 0x55);
1276 	mqd->cp_hqd_persistent_state = tmp;
1277 
1278 	mqd->cp_hqd_ib_control = regCP_HQD_IB_CONTROL_DEFAULT;
1279 	mqd->cp_hqd_iq_timer = regCP_HQD_IQ_TIMER_DEFAULT;
1280 	mqd->cp_hqd_quantum = regCP_HQD_QUANTUM_DEFAULT;
1281 
1282 	/*
1283 	 * Set CP_HQD_GFX_CONTROL.DB_UPDATED_MSG_EN[15] to enable unmapped
1284 	 * doorbell handling. This is a reserved CP internal register can
1285 	 * not be accesss by others
1286 	 */
1287 	mqd->reserved_184 = BIT(15);
1288 
1289 	return 0;
1290 }
1291 
1292 static void mes_v12_0_queue_init_register(struct amdgpu_ring *ring)
1293 {
1294 	struct v12_compute_mqd *mqd = ring->mqd_ptr;
1295 	struct amdgpu_device *adev = ring->adev;
1296 	uint32_t data = 0;
1297 
1298 	mutex_lock(&adev->srbm_mutex);
1299 	soc21_grbm_select(adev, 3, ring->pipe, 0, 0);
1300 
1301 	/* set CP_HQD_VMID.VMID = 0. */
1302 	data = RREG32_SOC15(GC, 0, regCP_HQD_VMID);
1303 	data = REG_SET_FIELD(data, CP_HQD_VMID, VMID, 0);
1304 	WREG32_SOC15(GC, 0, regCP_HQD_VMID, data);
1305 
1306 	/* set CP_HQD_PQ_DOORBELL_CONTROL.DOORBELL_EN=0 */
1307 	data = RREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL);
1308 	data = REG_SET_FIELD(data, CP_HQD_PQ_DOORBELL_CONTROL,
1309 			     DOORBELL_EN, 0);
1310 	WREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL, data);
1311 
1312 	/* set CP_MQD_BASE_ADDR/HI with the MQD base address */
1313 	WREG32_SOC15(GC, 0, regCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo);
1314 	WREG32_SOC15(GC, 0, regCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
1315 
1316 	/* set CP_MQD_CONTROL.VMID=0 */
1317 	data = RREG32_SOC15(GC, 0, regCP_MQD_CONTROL);
1318 	data = REG_SET_FIELD(data, CP_MQD_CONTROL, VMID, 0);
1319 	WREG32_SOC15(GC, 0, regCP_MQD_CONTROL, 0);
1320 
1321 	/* set CP_HQD_PQ_BASE/HI with the ring buffer base address */
1322 	WREG32_SOC15(GC, 0, regCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo);
1323 	WREG32_SOC15(GC, 0, regCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi);
1324 
1325 	/* set CP_HQD_PQ_RPTR_REPORT_ADDR/HI */
1326 	WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR_REPORT_ADDR,
1327 		     mqd->cp_hqd_pq_rptr_report_addr_lo);
1328 	WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
1329 		     mqd->cp_hqd_pq_rptr_report_addr_hi);
1330 
1331 	/* set CP_HQD_PQ_CONTROL */
1332 	WREG32_SOC15(GC, 0, regCP_HQD_PQ_CONTROL, mqd->cp_hqd_pq_control);
1333 
1334 	/* set CP_HQD_PQ_WPTR_POLL_ADDR/HI */
1335 	WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR,
1336 		     mqd->cp_hqd_pq_wptr_poll_addr_lo);
1337 	WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR_HI,
1338 		     mqd->cp_hqd_pq_wptr_poll_addr_hi);
1339 
1340 	/* set CP_HQD_PQ_DOORBELL_CONTROL */
1341 	WREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL,
1342 		     mqd->cp_hqd_pq_doorbell_control);
1343 
1344 	/* set CP_HQD_PERSISTENT_STATE.PRELOAD_SIZE=0x53 */
1345 	WREG32_SOC15(GC, 0, regCP_HQD_PERSISTENT_STATE, mqd->cp_hqd_persistent_state);
1346 
1347 	/* set CP_HQD_ACTIVE.ACTIVE=1 */
1348 	WREG32_SOC15(GC, 0, regCP_HQD_ACTIVE, mqd->cp_hqd_active);
1349 
1350 	soc21_grbm_select(adev, 0, 0, 0, 0);
1351 	mutex_unlock(&adev->srbm_mutex);
1352 }
1353 
1354 static int mes_v12_0_kiq_enable_queue(struct amdgpu_device *adev)
1355 {
1356 	struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
1357 	struct amdgpu_ring *kiq_ring = &adev->gfx.kiq[0].ring;
1358 	int r;
1359 
1360 	if (!kiq->pmf || !kiq->pmf->kiq_map_queues)
1361 		return -EINVAL;
1362 
1363 	r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size);
1364 	if (r) {
1365 		DRM_ERROR("Failed to lock KIQ (%d).\n", r);
1366 		return r;
1367 	}
1368 
1369 	kiq->pmf->kiq_map_queues(kiq_ring, &adev->mes.ring[0]);
1370 
1371 	r = amdgpu_ring_test_ring(kiq_ring);
1372 	if (r) {
1373 		DRM_ERROR("kfq enable failed\n");
1374 		kiq_ring->sched.ready = false;
1375 	}
1376 	return r;
1377 }
1378 
1379 static int mes_v12_0_queue_init(struct amdgpu_device *adev,
1380 				enum amdgpu_mes_pipe pipe)
1381 {
1382 	struct amdgpu_ring *ring;
1383 	int r;
1384 
1385 	if (!adev->enable_uni_mes && pipe == AMDGPU_MES_KIQ_PIPE)
1386 		ring = &adev->gfx.kiq[0].ring;
1387 	else
1388 		ring = &adev->mes.ring[pipe];
1389 
1390 	if ((adev->enable_uni_mes || pipe == AMDGPU_MES_SCHED_PIPE) &&
1391 	    (amdgpu_in_reset(adev) || adev->in_suspend)) {
1392 		*(ring->wptr_cpu_addr) = 0;
1393 		*(ring->rptr_cpu_addr) = 0;
1394 		amdgpu_ring_clear_ring(ring);
1395 	}
1396 
1397 	r = mes_v12_0_mqd_init(ring);
1398 	if (r)
1399 		return r;
1400 
1401 	if (pipe == AMDGPU_MES_SCHED_PIPE) {
1402 		if (adev->enable_uni_mes)
1403 			r = amdgpu_mes_map_legacy_queue(adev, ring);
1404 		else
1405 			r = mes_v12_0_kiq_enable_queue(adev);
1406 		if (r)
1407 			return r;
1408 	} else {
1409 		mes_v12_0_queue_init_register(ring);
1410 	}
1411 
1412 	if (((pipe == AMDGPU_MES_SCHED_PIPE) && !adev->mes.sched_version) ||
1413 	    ((pipe == AMDGPU_MES_KIQ_PIPE) && !adev->mes.kiq_version)) {
1414 		/* get MES scheduler/KIQ versions */
1415 		mutex_lock(&adev->srbm_mutex);
1416 		soc21_grbm_select(adev, 3, pipe, 0, 0);
1417 
1418 		if (pipe == AMDGPU_MES_SCHED_PIPE)
1419 			adev->mes.sched_version = RREG32_SOC15(GC, 0, regCP_MES_GP3_LO);
1420 		else if (pipe == AMDGPU_MES_KIQ_PIPE && adev->enable_mes_kiq)
1421 			adev->mes.kiq_version = RREG32_SOC15(GC, 0, regCP_MES_GP3_LO);
1422 
1423 		soc21_grbm_select(adev, 0, 0, 0, 0);
1424 		mutex_unlock(&adev->srbm_mutex);
1425 	}
1426 
1427 	return 0;
1428 }
1429 
1430 static int mes_v12_0_ring_init(struct amdgpu_device *adev, int pipe)
1431 {
1432 	struct amdgpu_ring *ring;
1433 
1434 	ring = &adev->mes.ring[pipe];
1435 
1436 	ring->funcs = &mes_v12_0_ring_funcs;
1437 
1438 	ring->me = 3;
1439 	ring->pipe = pipe;
1440 	ring->queue = 0;
1441 
1442 	ring->ring_obj = NULL;
1443 	ring->use_doorbell = true;
1444 	ring->eop_gpu_addr = adev->mes.eop_gpu_addr[pipe];
1445 	ring->no_scheduler = true;
1446 	sprintf(ring->name, "mes_%d.%d.%d", ring->me, ring->pipe, ring->queue);
1447 
1448 	if (pipe == AMDGPU_MES_SCHED_PIPE)
1449 		ring->doorbell_index = adev->doorbell_index.mes_ring0 << 1;
1450 	else
1451 		ring->doorbell_index = adev->doorbell_index.mes_ring1 << 1;
1452 
1453 	return amdgpu_ring_init(adev, ring, 1024, NULL, 0,
1454 				AMDGPU_RING_PRIO_DEFAULT, NULL);
1455 }
1456 
1457 static int mes_v12_0_kiq_ring_init(struct amdgpu_device *adev)
1458 {
1459 	struct amdgpu_ring *ring;
1460 
1461 	spin_lock_init(&adev->gfx.kiq[0].ring_lock);
1462 
1463 	ring = &adev->gfx.kiq[0].ring;
1464 
1465 	ring->me = 3;
1466 	ring->pipe = 1;
1467 	ring->queue = 0;
1468 
1469 	ring->adev = NULL;
1470 	ring->ring_obj = NULL;
1471 	ring->use_doorbell = true;
1472 	ring->doorbell_index = adev->doorbell_index.mes_ring1 << 1;
1473 	ring->eop_gpu_addr = adev->mes.eop_gpu_addr[AMDGPU_MES_KIQ_PIPE];
1474 	ring->no_scheduler = true;
1475 	sprintf(ring->name, "mes_kiq_%d.%d.%d",
1476 		ring->me, ring->pipe, ring->queue);
1477 
1478 	return amdgpu_ring_init(adev, ring, 1024, NULL, 0,
1479 				AMDGPU_RING_PRIO_DEFAULT, NULL);
1480 }
1481 
1482 static int mes_v12_0_mqd_sw_init(struct amdgpu_device *adev,
1483 				 enum amdgpu_mes_pipe pipe)
1484 {
1485 	int r, mqd_size = sizeof(struct v12_compute_mqd);
1486 	struct amdgpu_ring *ring;
1487 
1488 	if (!adev->enable_uni_mes && pipe == AMDGPU_MES_KIQ_PIPE)
1489 		ring = &adev->gfx.kiq[0].ring;
1490 	else
1491 		ring = &adev->mes.ring[pipe];
1492 
1493 	if (ring->mqd_obj)
1494 		return 0;
1495 
1496 	r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE,
1497 				    AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj,
1498 				    &ring->mqd_gpu_addr, &ring->mqd_ptr);
1499 	if (r) {
1500 		dev_warn(adev->dev, "failed to create ring mqd bo (%d)", r);
1501 		return r;
1502 	}
1503 
1504 	memset(ring->mqd_ptr, 0, mqd_size);
1505 
1506 	/* prepare MQD backup */
1507 	adev->mes.mqd_backup[pipe] = kmalloc(mqd_size, GFP_KERNEL);
1508 	if (!adev->mes.mqd_backup[pipe])
1509 		dev_warn(adev->dev,
1510 			 "no memory to create MQD backup for ring %s\n",
1511 			 ring->name);
1512 
1513 	return 0;
1514 }
1515 
1516 static int mes_v12_0_sw_init(struct amdgpu_ip_block *ip_block)
1517 {
1518 	struct amdgpu_device *adev = ip_block->adev;
1519 	int pipe, r;
1520 
1521 	adev->mes.funcs = &mes_v12_0_funcs;
1522 	adev->mes.kiq_hw_init = &mes_v12_0_kiq_hw_init;
1523 	adev->mes.kiq_hw_fini = &mes_v12_0_kiq_hw_fini;
1524 	adev->mes.enable_legacy_queue_map = true;
1525 
1526 	adev->mes.event_log_size = adev->enable_uni_mes ?
1527 		(AMDGPU_MAX_MES_PIPES * (AMDGPU_MES_LOG_BUFFER_SIZE + AMDGPU_MES_MSCRATCH_SIZE)) :
1528 		(AMDGPU_MES_LOG_BUFFER_SIZE + AMDGPU_MES_MSCRATCH_SIZE);
1529 	r = amdgpu_mes_init(adev);
1530 	if (r)
1531 		return r;
1532 
1533 	for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) {
1534 		r = mes_v12_0_allocate_eop_buf(adev, pipe);
1535 		if (r)
1536 			return r;
1537 
1538 		r = mes_v12_0_mqd_sw_init(adev, pipe);
1539 		if (r)
1540 			return r;
1541 
1542 		if (!adev->enable_uni_mes && pipe == AMDGPU_MES_KIQ_PIPE) {
1543 			r = mes_v12_0_kiq_ring_init(adev);
1544 		}
1545 		else {
1546 			r = mes_v12_0_ring_init(adev, pipe);
1547 			if (r)
1548 				return r;
1549 			r = amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE, PAGE_SIZE,
1550 						    AMDGPU_GEM_DOMAIN_VRAM,
1551 						    &adev->mes.resource_1[pipe],
1552 						    &adev->mes.resource_1_gpu_addr[pipe],
1553 						    &adev->mes.resource_1_addr[pipe]);
1554 			if (r) {
1555 				dev_err(adev->dev, "(%d) failed to create mes resource_1 bo pipe[%d]\n", r, pipe);
1556 				return r;
1557 			}
1558 		}
1559 	}
1560 
1561 	return 0;
1562 }
1563 
1564 static int mes_v12_0_sw_fini(struct amdgpu_ip_block *ip_block)
1565 {
1566 	struct amdgpu_device *adev = ip_block->adev;
1567 	int pipe;
1568 
1569 	for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) {
1570 		amdgpu_bo_free_kernel(&adev->mes.resource_1[pipe],
1571 				      &adev->mes.resource_1_gpu_addr[pipe],
1572 				      &adev->mes.resource_1_addr[pipe]);
1573 
1574 		kfree(adev->mes.mqd_backup[pipe]);
1575 
1576 		amdgpu_bo_free_kernel(&adev->mes.eop_gpu_obj[pipe],
1577 				      &adev->mes.eop_gpu_addr[pipe],
1578 				      NULL);
1579 		amdgpu_ucode_release(&adev->mes.fw[pipe]);
1580 
1581 		if (adev->enable_uni_mes || pipe == AMDGPU_MES_SCHED_PIPE) {
1582 			amdgpu_bo_free_kernel(&adev->mes.ring[pipe].mqd_obj,
1583 					      &adev->mes.ring[pipe].mqd_gpu_addr,
1584 					      &adev->mes.ring[pipe].mqd_ptr);
1585 			amdgpu_ring_fini(&adev->mes.ring[pipe]);
1586 		}
1587 	}
1588 
1589 	if (!adev->enable_uni_mes) {
1590 		amdgpu_bo_free_kernel(&adev->gfx.kiq[0].ring.mqd_obj,
1591 				      &adev->gfx.kiq[0].ring.mqd_gpu_addr,
1592 				      &adev->gfx.kiq[0].ring.mqd_ptr);
1593 		amdgpu_ring_fini(&adev->gfx.kiq[0].ring);
1594 	}
1595 
1596 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
1597 		mes_v12_0_free_ucode_buffers(adev, AMDGPU_MES_KIQ_PIPE);
1598 		mes_v12_0_free_ucode_buffers(adev, AMDGPU_MES_SCHED_PIPE);
1599 	}
1600 
1601 	amdgpu_mes_fini(adev);
1602 	return 0;
1603 }
1604 
1605 static void mes_v12_0_kiq_dequeue_sched(struct amdgpu_device *adev)
1606 {
1607 	uint32_t data;
1608 	int i;
1609 
1610 	mutex_lock(&adev->srbm_mutex);
1611 	soc21_grbm_select(adev, 3, AMDGPU_MES_SCHED_PIPE, 0, 0);
1612 
1613 	/* disable the queue if it's active */
1614 	if (RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1) {
1615 		WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 1);
1616 		for (i = 0; i < adev->usec_timeout; i++) {
1617 			if (!(RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1))
1618 				break;
1619 			udelay(1);
1620 		}
1621 	}
1622 	data = RREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL);
1623 	data = REG_SET_FIELD(data, CP_HQD_PQ_DOORBELL_CONTROL,
1624 				DOORBELL_EN, 0);
1625 	data = REG_SET_FIELD(data, CP_HQD_PQ_DOORBELL_CONTROL,
1626 				DOORBELL_HIT, 1);
1627 	WREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL, data);
1628 
1629 	WREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL, 0);
1630 
1631 	WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_LO, 0);
1632 	WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_HI, 0);
1633 	WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR, 0);
1634 
1635 	soc21_grbm_select(adev, 0, 0, 0, 0);
1636 	mutex_unlock(&adev->srbm_mutex);
1637 
1638 	adev->mes.ring[0].sched.ready = false;
1639 }
1640 
1641 static void mes_v12_0_kiq_setting(struct amdgpu_ring *ring)
1642 {
1643 	uint32_t tmp;
1644 	struct amdgpu_device *adev = ring->adev;
1645 
1646 	/* tell RLC which is KIQ queue */
1647 	tmp = RREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS);
1648 	tmp &= 0xffffff00;
1649 	tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
1650 	WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp | 0x80);
1651 }
1652 
1653 static int mes_v12_0_kiq_hw_init(struct amdgpu_device *adev)
1654 {
1655 	int r = 0;
1656 	struct amdgpu_ip_block *ip_block;
1657 
1658 	if (adev->enable_uni_mes)
1659 		mes_v12_0_kiq_setting(&adev->mes.ring[AMDGPU_MES_KIQ_PIPE]);
1660 	else
1661 		mes_v12_0_kiq_setting(&adev->gfx.kiq[0].ring);
1662 
1663 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
1664 
1665 		r = mes_v12_0_load_microcode(adev, AMDGPU_MES_SCHED_PIPE, false);
1666 		if (r) {
1667 			DRM_ERROR("failed to load MES fw, r=%d\n", r);
1668 			return r;
1669 		}
1670 
1671 		r = mes_v12_0_load_microcode(adev, AMDGPU_MES_KIQ_PIPE, true);
1672 		if (r) {
1673 			DRM_ERROR("failed to load MES kiq fw, r=%d\n", r);
1674 			return r;
1675 		}
1676 
1677 		mes_v12_0_set_ucode_start_addr(adev);
1678 
1679 	} else if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO)
1680 		mes_v12_0_set_ucode_start_addr(adev);
1681 
1682 	mes_v12_0_enable(adev, true);
1683 
1684 	ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_MES);
1685 	if (unlikely(!ip_block)) {
1686 		dev_err(adev->dev, "Failed to get MES handle\n");
1687 		return -EINVAL;
1688 	}
1689 
1690 	r = mes_v12_0_queue_init(adev, AMDGPU_MES_KIQ_PIPE);
1691 	if (r)
1692 		goto failure;
1693 
1694 	if (adev->enable_uni_mes) {
1695 		r = mes_v12_0_set_hw_resources(&adev->mes, AMDGPU_MES_KIQ_PIPE);
1696 		if (r)
1697 			goto failure;
1698 
1699 		mes_v12_0_set_hw_resources_1(&adev->mes, AMDGPU_MES_KIQ_PIPE);
1700 	}
1701 
1702 	if (adev->mes.enable_legacy_queue_map) {
1703 		r = mes_v12_0_hw_init(ip_block);
1704 		if (r)
1705 			goto failure;
1706 	}
1707 
1708 	return r;
1709 
1710 failure:
1711 	mes_v12_0_hw_fini(ip_block);
1712 	return r;
1713 }
1714 
1715 static int mes_v12_0_kiq_hw_fini(struct amdgpu_device *adev)
1716 {
1717 	if (adev->mes.ring[0].sched.ready) {
1718 		if (adev->enable_uni_mes)
1719 			amdgpu_mes_unmap_legacy_queue(adev,
1720 				      &adev->mes.ring[AMDGPU_MES_SCHED_PIPE],
1721 				      RESET_QUEUES, 0, 0);
1722 		else
1723 			mes_v12_0_kiq_dequeue_sched(adev);
1724 
1725 		adev->mes.ring[0].sched.ready = false;
1726 	}
1727 
1728 	mes_v12_0_enable(adev, false);
1729 
1730 	return 0;
1731 }
1732 
1733 static int mes_v12_0_hw_init(struct amdgpu_ip_block *ip_block)
1734 {
1735 	int r;
1736 	struct amdgpu_device *adev = ip_block->adev;
1737 
1738 	if (adev->mes.ring[0].sched.ready)
1739 		goto out;
1740 
1741 	if (!adev->enable_mes_kiq) {
1742 		if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
1743 			r = mes_v12_0_load_microcode(adev,
1744 					     AMDGPU_MES_SCHED_PIPE, true);
1745 			if (r) {
1746 				DRM_ERROR("failed to MES fw, r=%d\n", r);
1747 				return r;
1748 			}
1749 
1750 			mes_v12_0_set_ucode_start_addr(adev);
1751 
1752 		} else if (adev->firmware.load_type ==
1753 			   AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) {
1754 
1755 			mes_v12_0_set_ucode_start_addr(adev);
1756 		}
1757 
1758 		mes_v12_0_enable(adev, true);
1759 	}
1760 
1761 	/* Enable the MES to handle doorbell ring on unmapped queue */
1762 	mes_v12_0_enable_unmapped_doorbell_handling(&adev->mes, true);
1763 
1764 	r = mes_v12_0_queue_init(adev, AMDGPU_MES_SCHED_PIPE);
1765 	if (r)
1766 		goto failure;
1767 
1768 	r = mes_v12_0_set_hw_resources(&adev->mes, AMDGPU_MES_SCHED_PIPE);
1769 	if (r)
1770 		goto failure;
1771 
1772 	mes_v12_0_set_hw_resources_1(&adev->mes, AMDGPU_MES_SCHED_PIPE);
1773 
1774 	mes_v12_0_init_aggregated_doorbell(&adev->mes);
1775 
1776 	r = mes_v12_0_query_sched_status(&adev->mes, AMDGPU_MES_SCHED_PIPE);
1777 	if (r) {
1778 		DRM_ERROR("MES is busy\n");
1779 		goto failure;
1780 	}
1781 
1782 	r = amdgpu_mes_update_enforce_isolation(adev);
1783 	if (r)
1784 		goto failure;
1785 
1786 out:
1787 	/*
1788 	 * Disable KIQ ring usage from the driver once MES is enabled.
1789 	 * MES uses KIQ ring exclusively so driver cannot access KIQ ring
1790 	 * with MES enabled.
1791 	 */
1792 	adev->gfx.kiq[0].ring.sched.ready = false;
1793 	adev->mes.ring[0].sched.ready = true;
1794 
1795 	return 0;
1796 
1797 failure:
1798 	mes_v12_0_hw_fini(ip_block);
1799 	return r;
1800 }
1801 
1802 static int mes_v12_0_hw_fini(struct amdgpu_ip_block *ip_block)
1803 {
1804 	return 0;
1805 }
1806 
1807 static int mes_v12_0_suspend(struct amdgpu_ip_block *ip_block)
1808 {
1809 	return mes_v12_0_hw_fini(ip_block);
1810 }
1811 
1812 static int mes_v12_0_resume(struct amdgpu_ip_block *ip_block)
1813 {
1814 	return mes_v12_0_hw_init(ip_block);
1815 }
1816 
1817 static int mes_v12_0_early_init(struct amdgpu_ip_block *ip_block)
1818 {
1819 	struct amdgpu_device *adev = ip_block->adev;
1820 	int pipe, r;
1821 
1822 	for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) {
1823 		r = amdgpu_mes_init_microcode(adev, pipe);
1824 		if (r)
1825 			return r;
1826 	}
1827 
1828 	return 0;
1829 }
1830 
1831 static const struct amd_ip_funcs mes_v12_0_ip_funcs = {
1832 	.name = "mes_v12_0",
1833 	.early_init = mes_v12_0_early_init,
1834 	.late_init = NULL,
1835 	.sw_init = mes_v12_0_sw_init,
1836 	.sw_fini = mes_v12_0_sw_fini,
1837 	.hw_init = mes_v12_0_hw_init,
1838 	.hw_fini = mes_v12_0_hw_fini,
1839 	.suspend = mes_v12_0_suspend,
1840 	.resume = mes_v12_0_resume,
1841 };
1842 
1843 const struct amdgpu_ip_block_version mes_v12_0_ip_block = {
1844 	.type = AMD_IP_BLOCK_TYPE_MES,
1845 	.major = 12,
1846 	.minor = 0,
1847 	.rev = 0,
1848 	.funcs = &mes_v12_0_ip_funcs,
1849 };
1850