xref: /linux/drivers/gpu/drm/amd/amdgpu/mes_v12_1.c (revision 13c072b8e91a5ccb5855ca1ba6fe3ea467dbf94d)
1 /*
2  * Copyright 2025 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 
24 #include <linux/firmware.h>
25 #include <linux/module.h>
26 #include "amdgpu.h"
27 #include "soc15_common.h"
28 #include "soc_v1_0.h"
29 #include "gc/gc_12_1_0_offset.h"
30 #include "gc/gc_12_1_0_sh_mask.h"
31 #include "gc/gc_11_0_0_default.h"
32 #include "v12_structs.h"
33 #include "mes_v12_api_def.h"
34 #include "gfx_v12_1_pkt.h"
35 #include "sdma_v7_1_0_pkt_open.h"
36 
37 MODULE_FIRMWARE("amdgpu/gc_12_1_0_mes.bin");
38 MODULE_FIRMWARE("amdgpu/gc_12_1_0_mes1.bin");
39 MODULE_FIRMWARE("amdgpu/gc_12_1_0_uni_mes.bin");
40 
41 static int mes_v12_1_hw_init(struct amdgpu_ip_block *ip_block);
42 static int mes_v12_1_xcc_hw_init(struct amdgpu_ip_block *ip_block, int xcc_id);
43 static int mes_v12_1_hw_fini(struct amdgpu_ip_block *ip_block);
44 static int mes_v12_1_kiq_hw_init(struct amdgpu_device *adev, uint32_t xcc_id);
45 static int mes_v12_1_kiq_hw_fini(struct amdgpu_device *adev, uint32_t xcc_id);
46 static int mes_v12_1_self_test(struct amdgpu_device *adev, int xcc_id);
47 
48 #define MES_EOP_SIZE   2048
49 
50 #define regCP_HQD_IB_CONTROL_MES_12_1_DEFAULT 0x100000
51 #define XCC_MID_MASK 0x41000000
52 
53 static void mes_v12_1_ring_set_wptr(struct amdgpu_ring *ring)
54 {
55 	struct amdgpu_device *adev = ring->adev;
56 
57 	if (ring->use_doorbell) {
58 		atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
59 			     ring->wptr);
60 		WDOORBELL64(ring->doorbell_index, ring->wptr);
61 	} else {
62 		BUG();
63 	}
64 }
65 
66 static u64 mes_v12_1_ring_get_rptr(struct amdgpu_ring *ring)
67 {
68 	return *ring->rptr_cpu_addr;
69 }
70 
71 static u64 mes_v12_1_ring_get_wptr(struct amdgpu_ring *ring)
72 {
73 	u64 wptr;
74 
75 	if (ring->use_doorbell)
76 		wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
77 	else
78 		BUG();
79 	return wptr;
80 }
81 
82 static const struct amdgpu_ring_funcs mes_v12_1_ring_funcs = {
83 	.type = AMDGPU_RING_TYPE_MES,
84 	.align_mask = 1,
85 	.nop = 0,
86 	.support_64bit_ptrs = true,
87 	.get_rptr = mes_v12_1_ring_get_rptr,
88 	.get_wptr = mes_v12_1_ring_get_wptr,
89 	.set_wptr = mes_v12_1_ring_set_wptr,
90 	.insert_nop = amdgpu_ring_insert_nop,
91 };
92 
93 static const char *mes_v12_1_opcodes[] = {
94 	"SET_HW_RSRC",
95 	"SET_SCHEDULING_CONFIG",
96 	"ADD_QUEUE",
97 	"REMOVE_QUEUE",
98 	"PERFORM_YIELD",
99 	"SET_GANG_PRIORITY_LEVEL",
100 	"SUSPEND",
101 	"RESUME",
102 	"RESET",
103 	"SET_LOG_BUFFER",
104 	"CHANGE_GANG_PRORITY",
105 	"QUERY_SCHEDULER_STATUS",
106 	"unused",
107 	"SET_DEBUG_VMID",
108 	"MISC",
109 	"UPDATE_ROOT_PAGE_TABLE",
110 	"AMD_LOG",
111 	"SET_SE_MODE",
112 	"SET_GANG_SUBMIT",
113 	"SET_HW_RSRC_1",
114 	"INVALIDATE_TLBS",
115 };
116 
117 static const char *mes_v12_1_misc_opcodes[] = {
118 	"WRITE_REG",
119 	"INV_GART",
120 	"QUERY_STATUS",
121 	"READ_REG",
122 	"WAIT_REG_MEM",
123 	"SET_SHADER_DEBUGGER",
124 	"NOTIFY_WORK_ON_UNMAPPED_QUEUE",
125 	"NOTIFY_TO_UNMAP_PROCESSES",
126 };
127 
128 static const char *mes_v12_1_get_op_string(union MESAPI__MISC *x_pkt)
129 {
130 	const char *op_str = NULL;
131 
132 	if (x_pkt->header.opcode < ARRAY_SIZE(mes_v12_1_opcodes))
133 		op_str = mes_v12_1_opcodes[x_pkt->header.opcode];
134 
135 	return op_str;
136 }
137 
138 static const char *mes_v12_1_get_misc_op_string(union MESAPI__MISC *x_pkt)
139 {
140 	const char *op_str = NULL;
141 
142 	if ((x_pkt->header.opcode == MES_SCH_API_MISC) &&
143 	    (x_pkt->opcode < ARRAY_SIZE(mes_v12_1_misc_opcodes)))
144 		op_str = mes_v12_1_misc_opcodes[x_pkt->opcode];
145 
146 	return op_str;
147 }
148 
149 static int mes_v12_1_submit_pkt_and_poll_completion(struct amdgpu_mes *mes,
150 					    int xcc_id, int pipe, void *pkt,
151 					    int size, int api_status_off)
152 {
153 	union MESAPI__QUERY_MES_STATUS mes_status_pkt;
154 	signed long timeout = 2100000; /* 2100 ms */
155 	struct amdgpu_device *adev = mes->adev;
156 	struct amdgpu_ring *ring = &mes->ring[MES_PIPE_INST(xcc_id, pipe)];
157 	spinlock_t *ring_lock = &mes->ring_lock[MES_PIPE_INST(xcc_id, pipe)];
158 	struct MES_API_STATUS *api_status;
159 	union MESAPI__MISC *x_pkt = pkt;
160 	const char *op_str, *misc_op_str;
161 	unsigned long flags;
162 	u64 status_gpu_addr;
163 	u32 seq, status_offset;
164 	u64 *status_ptr;
165 	signed long r;
166 	int ret;
167 
168 	if (x_pkt->header.opcode >= MES_SCH_API_MAX)
169 		return -EINVAL;
170 
171 	if (amdgpu_emu_mode) {
172 		timeout *= 1000;
173 	} else if (amdgpu_sriov_vf(adev)) {
174 		/* Worst case in sriov where all other 15 VF timeout, each VF needs about 600ms */
175 		timeout = 15 * 600 * 1000;
176 	}
177 
178 	ret = amdgpu_device_wb_get(adev, &status_offset);
179 	if (ret)
180 		return ret;
181 
182 	status_gpu_addr = adev->wb.gpu_addr + (status_offset * 4);
183 	status_ptr = (u64 *)&adev->wb.wb[status_offset];
184 	*status_ptr = 0;
185 
186 	spin_lock_irqsave(ring_lock, flags);
187 	r = amdgpu_ring_alloc(ring, (size + sizeof(mes_status_pkt)) / 4);
188 	if (r)
189 		goto error_unlock_free;
190 
191 	seq = ++ring->fence_drv.sync_seq;
192 	r = amdgpu_fence_wait_polling(ring,
193 				      seq - ring->fence_drv.num_fences_mask,
194 				      timeout);
195 	if (r < 1)
196 		goto error_undo;
197 
198 	api_status = (struct MES_API_STATUS *)((char *)pkt + api_status_off);
199 	api_status->api_completion_fence_addr = status_gpu_addr;
200 	api_status->api_completion_fence_value = 1;
201 
202 	amdgpu_ring_write_multiple(ring, pkt, size / 4);
203 
204 	memset(&mes_status_pkt, 0, sizeof(mes_status_pkt));
205 	mes_status_pkt.header.type = MES_API_TYPE_SCHEDULER;
206 	mes_status_pkt.header.opcode = MES_SCH_API_QUERY_SCHEDULER_STATUS;
207 	mes_status_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
208 	mes_status_pkt.api_status.api_completion_fence_addr =
209 		ring->fence_drv.gpu_addr;
210 	mes_status_pkt.api_status.api_completion_fence_value = seq;
211 
212 	amdgpu_ring_write_multiple(ring, &mes_status_pkt,
213 				   sizeof(mes_status_pkt) / 4);
214 
215 	amdgpu_ring_commit(ring);
216 	spin_unlock_irqrestore(ring_lock, flags);
217 
218 	op_str = mes_v12_1_get_op_string(x_pkt);
219 	misc_op_str = mes_v12_1_get_misc_op_string(x_pkt);
220 
221 	if (misc_op_str)
222 		dev_dbg(adev->dev, "MES(%d, %d) msg=%s (%s) was emitted\n",
223 			xcc_id, pipe, op_str, misc_op_str);
224 	else if (op_str)
225 		dev_dbg(adev->dev, "MES(%d, %d) msg=%s was emitted\n",
226 			xcc_id, pipe, op_str);
227 	else
228 		dev_dbg(adev->dev, "MES(%d, %d) msg=%d was emitted\n",
229 			xcc_id, pipe, x_pkt->header.opcode);
230 
231 	r = amdgpu_fence_wait_polling(ring, seq, timeout);
232 	if (r < 1 || !*status_ptr) {
233 		if (misc_op_str)
234 			dev_err(adev->dev,
235 				"MES(%d, %d) failed to respond to msg=%s (%s)\n",
236 				xcc_id, pipe, op_str, misc_op_str);
237 		else if (op_str)
238 			dev_err(adev->dev,
239 				"MES(%d, %d) failed to respond to msg=%s\n",
240 				xcc_id, pipe, op_str);
241 		else
242 			dev_err(adev->dev,
243 				"MES(%d, %d) failed to respond to msg=%d\n",
244 				xcc_id, pipe, x_pkt->header.opcode);
245 
246 		while (halt_if_hws_hang)
247 			schedule();
248 
249 		r = -ETIMEDOUT;
250 		goto error_wb_free;
251 	}
252 
253 	amdgpu_device_wb_free(adev, status_offset);
254 	return 0;
255 
256 error_undo:
257 	dev_err(adev->dev, "MES(%d, %d) ring buffer is full.\n", xcc_id, pipe);
258 	amdgpu_ring_undo(ring);
259 
260 error_unlock_free:
261 	spin_unlock_irqrestore(ring_lock, flags);
262 
263 error_wb_free:
264 	amdgpu_device_wb_free(adev, status_offset);
265 	return r;
266 }
267 
268 static int convert_to_mes_queue_type(int queue_type)
269 {
270 	if (queue_type == AMDGPU_RING_TYPE_GFX)
271 		return MES_QUEUE_TYPE_GFX;
272 	else if (queue_type == AMDGPU_RING_TYPE_COMPUTE)
273 		return MES_QUEUE_TYPE_COMPUTE;
274 	else if (queue_type == AMDGPU_RING_TYPE_SDMA)
275 		return MES_QUEUE_TYPE_SDMA;
276 	else if (queue_type == AMDGPU_RING_TYPE_MES)
277 		return MES_QUEUE_TYPE_SCHQ;
278 	else
279 		BUG();
280 	return -1;
281 }
282 
283 static int mes_v12_1_add_hw_queue(struct amdgpu_mes *mes,
284 				  struct mes_add_queue_input *input)
285 {
286 	union MESAPI__ADD_QUEUE mes_add_queue_pkt;
287 	int xcc_id = input->xcc_id;
288 	int inst = MES_PIPE_INST(xcc_id, AMDGPU_MES_SCHED_PIPE);
289 
290 	if (mes->enable_coop_mode)
291 		xcc_id = mes->master_xcc_ids[inst];
292 
293 	memset(&mes_add_queue_pkt, 0, sizeof(mes_add_queue_pkt));
294 
295 	mes_add_queue_pkt.header.type = MES_API_TYPE_SCHEDULER;
296 	mes_add_queue_pkt.header.opcode = MES_SCH_API_ADD_QUEUE;
297 	mes_add_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
298 
299 	mes_add_queue_pkt.process_id = input->process_id;
300 	mes_add_queue_pkt.page_table_base_addr = input->page_table_base_addr;
301 	mes_add_queue_pkt.process_va_start = input->process_va_start;
302 	mes_add_queue_pkt.process_va_end = input->process_va_end;
303 	mes_add_queue_pkt.process_quantum = input->process_quantum;
304 	mes_add_queue_pkt.process_context_addr = input->process_context_addr;
305 	mes_add_queue_pkt.gang_quantum = input->gang_quantum;
306 	mes_add_queue_pkt.gang_context_addr = input->gang_context_addr;
307 	mes_add_queue_pkt.inprocess_gang_priority =
308 		input->inprocess_gang_priority;
309 	mes_add_queue_pkt.gang_global_priority_level =
310 		input->gang_global_priority_level;
311 	mes_add_queue_pkt.doorbell_offset = input->doorbell_offset;
312 	mes_add_queue_pkt.mqd_addr = input->mqd_addr;
313 
314 	mes_add_queue_pkt.wptr_addr = input->wptr_mc_addr;
315 
316 	mes_add_queue_pkt.queue_type =
317 		convert_to_mes_queue_type(input->queue_type);
318 	mes_add_queue_pkt.paging = input->paging;
319 	mes_add_queue_pkt.vm_context_cntl = input->vm_cntx_cntl;
320 	mes_add_queue_pkt.gws_base = input->gws_base;
321 	mes_add_queue_pkt.gws_size = input->gws_size;
322 	mes_add_queue_pkt.trap_handler_addr = input->tba_addr;
323 	mes_add_queue_pkt.tma_addr = input->tma_addr;
324 	mes_add_queue_pkt.trap_en = input->trap_en;
325 	mes_add_queue_pkt.skip_process_ctx_clear = input->skip_process_ctx_clear;
326 	mes_add_queue_pkt.is_kfd_process = input->is_kfd_process;
327 
328 	/* For KFD, gds_size is re-used for queue size (needed in MES for AQL queues) */
329 	mes_add_queue_pkt.is_aql_queue = input->is_aql_queue;
330 	mes_add_queue_pkt.gds_size = input->queue_size;
331 
332 	/* For KFD, gds_size is re-used for queue size (needed in MES for AQL queues) */
333 	mes_add_queue_pkt.is_aql_queue = input->is_aql_queue;
334 	mes_add_queue_pkt.gds_size = input->queue_size;
335 
336 	mes_add_queue_pkt.full_sh_mem_config_data = input->sh_mem_config_data;
337 
338 	return mes_v12_1_submit_pkt_and_poll_completion(mes,
339 			xcc_id, AMDGPU_MES_SCHED_PIPE,
340 			&mes_add_queue_pkt, sizeof(mes_add_queue_pkt),
341 			offsetof(union MESAPI__ADD_QUEUE, api_status));
342 }
343 
344 static int mes_v12_1_remove_hw_queue(struct amdgpu_mes *mes,
345 				     struct mes_remove_queue_input *input)
346 {
347 	union MESAPI__REMOVE_QUEUE mes_remove_queue_pkt;
348 	int xcc_id = input->xcc_id;
349 	int inst = MES_PIPE_INST(xcc_id, AMDGPU_MES_SCHED_PIPE);
350 
351 	if (mes->enable_coop_mode)
352 		xcc_id = mes->master_xcc_ids[inst];
353 
354 	memset(&mes_remove_queue_pkt, 0, sizeof(mes_remove_queue_pkt));
355 
356 	mes_remove_queue_pkt.header.type = MES_API_TYPE_SCHEDULER;
357 	mes_remove_queue_pkt.header.opcode = MES_SCH_API_REMOVE_QUEUE;
358 	mes_remove_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
359 
360 	mes_remove_queue_pkt.doorbell_offset = input->doorbell_offset;
361 	mes_remove_queue_pkt.gang_context_addr = input->gang_context_addr;
362 
363 	return mes_v12_1_submit_pkt_and_poll_completion(mes,
364 			xcc_id, AMDGPU_MES_SCHED_PIPE,
365 			&mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt),
366 			offsetof(union MESAPI__REMOVE_QUEUE, api_status));
367 }
368 
369 static int mes_v12_1_reset_hw_queue(struct amdgpu_mes *mes,
370 				    struct mes_reset_queue_input *input)
371 {
372 	union MESAPI__RESET mes_reset_queue_pkt;
373 	int pipe;
374 
375 	memset(&mes_reset_queue_pkt, 0, sizeof(mes_reset_queue_pkt));
376 
377 	mes_reset_queue_pkt.header.type = MES_API_TYPE_SCHEDULER;
378 	mes_reset_queue_pkt.header.opcode = MES_SCH_API_RESET;
379 	mes_reset_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
380 
381 	mes_reset_queue_pkt.doorbell_offset = input->doorbell_offset;
382 	/* mes_reset_queue_pkt.gang_context_addr = input->gang_context_addr; */
383 	/*mes_reset_queue_pkt.reset_queue_only = 1;*/
384 
385 	if (mes->adev->enable_uni_mes)
386 		pipe = AMDGPU_MES_KIQ_PIPE;
387 	else
388 		pipe = AMDGPU_MES_SCHED_PIPE;
389 
390 	return mes_v12_1_submit_pkt_and_poll_completion(mes,
391 			input->xcc_id, pipe,
392 			&mes_reset_queue_pkt, sizeof(mes_reset_queue_pkt),
393 			offsetof(union MESAPI__REMOVE_QUEUE, api_status));
394 }
395 
396 static int mes_v12_1_map_legacy_queue(struct amdgpu_mes *mes,
397 				      struct mes_map_legacy_queue_input *input)
398 {
399 	union MESAPI__ADD_QUEUE mes_add_queue_pkt;
400 	int pipe;
401 
402 	memset(&mes_add_queue_pkt, 0, sizeof(mes_add_queue_pkt));
403 
404 	mes_add_queue_pkt.header.type = MES_API_TYPE_SCHEDULER;
405 	mes_add_queue_pkt.header.opcode = MES_SCH_API_ADD_QUEUE;
406 	mes_add_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
407 
408 	mes_add_queue_pkt.pipe_id = input->pipe_id;
409 	mes_add_queue_pkt.queue_id = input->queue_id;
410 	mes_add_queue_pkt.doorbell_offset = input->doorbell_offset;
411 	mes_add_queue_pkt.mqd_addr = input->mqd_addr;
412 	mes_add_queue_pkt.wptr_addr = input->wptr_addr;
413 	mes_add_queue_pkt.queue_type =
414 		convert_to_mes_queue_type(input->queue_type);
415 	mes_add_queue_pkt.map_legacy_kq = 1;
416 
417 	if (mes->adev->enable_uni_mes)
418 		pipe = AMDGPU_MES_KIQ_PIPE;
419 	else
420 		pipe = AMDGPU_MES_SCHED_PIPE;
421 
422 	return mes_v12_1_submit_pkt_and_poll_completion(mes,
423 			input->xcc_id, pipe,
424 			&mes_add_queue_pkt, sizeof(mes_add_queue_pkt),
425 			offsetof(union MESAPI__ADD_QUEUE, api_status));
426 }
427 
428 static int mes_v12_1_unmap_legacy_queue(struct amdgpu_mes *mes,
429 			struct mes_unmap_legacy_queue_input *input)
430 {
431 	union MESAPI__REMOVE_QUEUE mes_remove_queue_pkt;
432 	int pipe;
433 
434 	memset(&mes_remove_queue_pkt, 0, sizeof(mes_remove_queue_pkt));
435 
436 	mes_remove_queue_pkt.header.type = MES_API_TYPE_SCHEDULER;
437 	mes_remove_queue_pkt.header.opcode = MES_SCH_API_REMOVE_QUEUE;
438 	mes_remove_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
439 
440 	mes_remove_queue_pkt.doorbell_offset = input->doorbell_offset;
441 	mes_remove_queue_pkt.gang_context_addr = 0;
442 
443 	mes_remove_queue_pkt.pipe_id = input->pipe_id;
444 	mes_remove_queue_pkt.queue_id = input->queue_id;
445 
446 	if (input->action == PREEMPT_QUEUES_NO_UNMAP) {
447 		mes_remove_queue_pkt.preempt_legacy_gfx_queue = 1;
448 		mes_remove_queue_pkt.tf_addr = input->trail_fence_addr;
449 		mes_remove_queue_pkt.tf_data =
450 			lower_32_bits(input->trail_fence_data);
451 	} else {
452 		mes_remove_queue_pkt.unmap_legacy_queue = 1;
453 		mes_remove_queue_pkt.queue_type =
454 			convert_to_mes_queue_type(input->queue_type);
455 	}
456 
457 	if (mes->adev->enable_uni_mes)
458 		pipe = AMDGPU_MES_KIQ_PIPE;
459 	else
460 		pipe = AMDGPU_MES_SCHED_PIPE;
461 
462 	return mes_v12_1_submit_pkt_and_poll_completion(mes,
463 			input->xcc_id, pipe,
464 			&mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt),
465 			offsetof(union MESAPI__REMOVE_QUEUE, api_status));
466 }
467 
468 static int mes_v12_1_suspend_gang(struct amdgpu_mes *mes,
469 				  struct mes_suspend_gang_input *input)
470 {
471 	return 0;
472 }
473 
474 static int mes_v12_1_resume_gang(struct amdgpu_mes *mes,
475 				 struct mes_resume_gang_input *input)
476 {
477 	return 0;
478 }
479 
480 static int mes_v12_1_query_sched_status(struct amdgpu_mes *mes,
481 					  int pipe, int xcc_id)
482 {
483 	union MESAPI__QUERY_MES_STATUS mes_status_pkt;
484 
485 	memset(&mes_status_pkt, 0, sizeof(mes_status_pkt));
486 
487 	mes_status_pkt.header.type = MES_API_TYPE_SCHEDULER;
488 	mes_status_pkt.header.opcode = MES_SCH_API_QUERY_SCHEDULER_STATUS;
489 	mes_status_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
490 
491 	return mes_v12_1_submit_pkt_and_poll_completion(mes, xcc_id, pipe,
492 			&mes_status_pkt, sizeof(mes_status_pkt),
493 			offsetof(union MESAPI__QUERY_MES_STATUS, api_status));
494 }
495 static uint32_t mes_v12_1_get_xcc_from_reg(uint32_t reg_offset)
496 {
497 	return ((reg_offset >> 16) & 0x7);
498 }
499 
500 static void mes_v12_1_get_rrmt(uint32_t reg, uint32_t xcc_id,
501 			       struct RRMT_OPTION *rrmt_opt,
502 			       uint32_t *out_reg)
503 {
504 	uint32_t normalized_reg = soc_v1_0_normalize_xcc_reg_offset(reg);
505 
506 	if (soc_v1_0_normalize_xcc_reg_range(normalized_reg)) {
507 		rrmt_opt->xcd_die_id = mes_v12_1_get_xcc_from_reg(reg);
508 		rrmt_opt->mode = (xcc_id == rrmt_opt->xcd_die_id) ?
509 			 MES_RRMT_MODE_LOCAL_XCD : MES_RRMT_MODE_REMOTE_XCD;
510 	} else {
511 		rrmt_opt->mode = MES_RRMT_MODE_REMOTE_MID;
512 		if (soc_v1_0_mid1_reg_range(reg))
513 			rrmt_opt->mid_die_id = 1;
514 	}
515 
516 	*out_reg = soc_v1_0_normalize_reg_offset(reg);
517 }
518 
519 static int mes_v12_1_misc_op(struct amdgpu_mes *mes,
520 			     struct mes_misc_op_input *input)
521 {
522 	struct amdgpu_device *adev = mes->adev;
523 	union MESAPI__MISC misc_pkt;
524 	int pipe;
525 
526 	if (mes->adev->enable_uni_mes)
527 		pipe = AMDGPU_MES_KIQ_PIPE;
528 	else
529 		pipe = AMDGPU_MES_SCHED_PIPE;
530 
531 	memset(&misc_pkt, 0, sizeof(misc_pkt));
532 
533 	misc_pkt.header.type = MES_API_TYPE_SCHEDULER;
534 	misc_pkt.header.opcode = MES_SCH_API_MISC;
535 	misc_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
536 
537 	switch (input->op) {
538 	case MES_MISC_OP_READ_REG:
539 		misc_pkt.opcode = MESAPI_MISC__READ_REG;
540 		misc_pkt.read_reg.buffer_addr = input->read_reg.buffer_addr;
541 		mes_v12_1_get_rrmt(input->read_reg.reg_offset,
542 				   GET_INST(GC, input->xcc_id),
543 				   &misc_pkt.read_reg.rrmt_opt,
544 				   &misc_pkt.read_reg.reg_offset);
545 		break;
546 	case MES_MISC_OP_WRITE_REG:
547 		misc_pkt.opcode = MESAPI_MISC__WRITE_REG;
548 		misc_pkt.write_reg.reg_value = input->write_reg.reg_value;
549 		mes_v12_1_get_rrmt(input->write_reg.reg_offset,
550 				   GET_INST(GC, input->xcc_id),
551 				   &misc_pkt.write_reg.rrmt_opt,
552 				   &misc_pkt.write_reg.reg_offset);
553 		break;
554 	case MES_MISC_OP_WRM_REG_WAIT:
555 		misc_pkt.opcode = MESAPI_MISC__WAIT_REG_MEM;
556 		misc_pkt.wait_reg_mem.op = WRM_OPERATION__WAIT_REG_MEM;
557 		misc_pkt.wait_reg_mem.reference = input->wrm_reg.ref;
558 		misc_pkt.wait_reg_mem.mask = input->wrm_reg.mask;
559 		misc_pkt.wait_reg_mem.reg_offset2 = 0;
560 		mes_v12_1_get_rrmt(input->wrm_reg.reg0,
561 				   GET_INST(GC, input->xcc_id),
562 				   &misc_pkt.wait_reg_mem.rrmt_opt1,
563 				   &misc_pkt.wait_reg_mem.reg_offset1);
564 		break;
565 	case MES_MISC_OP_WRM_REG_WR_WAIT:
566 		misc_pkt.opcode = MESAPI_MISC__WAIT_REG_MEM;
567 		misc_pkt.wait_reg_mem.op = WRM_OPERATION__WR_WAIT_WR_REG;
568 		misc_pkt.wait_reg_mem.reference = input->wrm_reg.ref;
569 		misc_pkt.wait_reg_mem.mask = input->wrm_reg.mask;
570 		mes_v12_1_get_rrmt(input->wrm_reg.reg0,
571 				   GET_INST(GC, input->xcc_id),
572 				   &misc_pkt.wait_reg_mem.rrmt_opt1,
573 				   &misc_pkt.wait_reg_mem.reg_offset1);
574 		mes_v12_1_get_rrmt(input->wrm_reg.reg1,
575 				   GET_INST(GC, input->xcc_id),
576 				   &misc_pkt.wait_reg_mem.rrmt_opt2,
577 				   &misc_pkt.wait_reg_mem.reg_offset2);
578 		break;
579 	case MES_MISC_OP_SET_SHADER_DEBUGGER:
580 		pipe = AMDGPU_MES_SCHED_PIPE;
581 		misc_pkt.opcode = MESAPI_MISC__SET_SHADER_DEBUGGER;
582 		misc_pkt.set_shader_debugger.process_context_addr =
583 				input->set_shader_debugger.process_context_addr;
584 		misc_pkt.set_shader_debugger.flags.u32all =
585 				input->set_shader_debugger.flags.u32all;
586 		misc_pkt.set_shader_debugger.spi_gdbg_per_vmid_cntl =
587 				input->set_shader_debugger.spi_gdbg_per_vmid_cntl;
588 		memcpy(misc_pkt.set_shader_debugger.tcp_watch_cntl,
589 				input->set_shader_debugger.tcp_watch_cntl,
590 				sizeof(misc_pkt.set_shader_debugger.tcp_watch_cntl));
591 		misc_pkt.set_shader_debugger.trap_en = input->set_shader_debugger.trap_en;
592 		break;
593 	case MES_MISC_OP_CHANGE_CONFIG:
594 		misc_pkt.opcode = MESAPI_MISC__CHANGE_CONFIG;
595 		misc_pkt.change_config.opcode =
596 			MESAPI_MISC__CHANGE_CONFIG_OPTION_LIMIT_SINGLE_PROCESS;
597 		misc_pkt.change_config.option.bits.limit_single_process =
598 			input->change_config.option.limit_single_process;
599 		break;
600 	default:
601 		DRM_ERROR("unsupported misc op (%d) \n", input->op);
602 		return -EINVAL;
603 	}
604 
605 	return mes_v12_1_submit_pkt_and_poll_completion(mes,
606 			input->xcc_id, pipe,
607 			&misc_pkt, sizeof(misc_pkt),
608 			offsetof(union MESAPI__MISC, api_status));
609 }
610 
611 static int mes_v12_1_set_hw_resources_1(struct amdgpu_mes *mes,
612 					  int pipe, int xcc_id)
613 {
614 	union MESAPI_SET_HW_RESOURCES_1 mes_set_hw_res_1_pkt;
615 	int master_xcc_id, inst = MES_PIPE_INST(xcc_id, pipe);
616 
617 	memset(&mes_set_hw_res_1_pkt, 0, sizeof(mes_set_hw_res_1_pkt));
618 
619 	mes_set_hw_res_1_pkt.header.type = MES_API_TYPE_SCHEDULER;
620 	mes_set_hw_res_1_pkt.header.opcode = MES_SCH_API_SET_HW_RSRC_1;
621 	mes_set_hw_res_1_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
622 	mes_set_hw_res_1_pkt.mes_kiq_unmap_timeout = 100;
623 
624 	if (mes->enable_coop_mode && pipe == AMDGPU_MES_SCHED_PIPE) {
625 		master_xcc_id = mes->master_xcc_ids[inst];
626 		mes_set_hw_res_1_pkt.mes_coop_mode = 1;
627 		mes_set_hw_res_1_pkt.coop_sch_shared_mc_addr =
628 			mes->shared_cmd_buf_gpu_addr[master_xcc_id];
629 	}
630 
631 	return mes_v12_1_submit_pkt_and_poll_completion(mes, xcc_id, pipe,
632 			&mes_set_hw_res_1_pkt, sizeof(mes_set_hw_res_1_pkt),
633 			offsetof(union MESAPI_SET_HW_RESOURCES_1, api_status));
634 }
635 
636 static void mes_v12_1_set_gfx_hqd_mask(union MESAPI_SET_HW_RESOURCES *pkt)
637 {
638 	/*
639 	 * GFX V12 has only one GFX pipe, but 8 queues in it.
640 	 * GFX pipe 0 queue 0 is being used by Kernel queue.
641 	 * Set GFX pipe 0 queue 1-7 for MES scheduling
642 	 * mask = 1111 1110b
643 	 */
644 	pkt->gfx_hqd_mask[0] = 0xFE;
645 }
646 
647 static int mes_v12_1_set_hw_resources(struct amdgpu_mes *mes,
648 					int pipe, int xcc_id)
649 {
650 	int i;
651 	struct amdgpu_device *adev = mes->adev;
652 	union MESAPI_SET_HW_RESOURCES mes_set_hw_res_pkt;
653 
654 	memset(&mes_set_hw_res_pkt, 0, sizeof(mes_set_hw_res_pkt));
655 
656 	mes_set_hw_res_pkt.header.type = MES_API_TYPE_SCHEDULER;
657 	mes_set_hw_res_pkt.header.opcode = MES_SCH_API_SET_HW_RSRC;
658 	mes_set_hw_res_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
659 
660 	if (pipe == AMDGPU_MES_SCHED_PIPE) {
661 		mes_set_hw_res_pkt.vmid_mask_mmhub = mes->vmid_mask_mmhub;
662 		mes_set_hw_res_pkt.vmid_mask_gfxhub = mes->vmid_mask_gfxhub;
663 		mes_set_hw_res_pkt.gds_size = adev->gds.gds_size;
664 		mes_set_hw_res_pkt.paging_vmid = 0;
665 
666 		for (i = 0; i < MAX_COMPUTE_PIPES; i++)
667 			mes_set_hw_res_pkt.compute_hqd_mask[i] =
668 				mes->compute_hqd_mask[i];
669 
670 		mes_v12_1_set_gfx_hqd_mask(&mes_set_hw_res_pkt);
671 
672 		for (i = 0; i < MAX_SDMA_PIPES; i++)
673 			mes_set_hw_res_pkt.sdma_hqd_mask[i] =
674 				mes->sdma_hqd_mask[i];
675 
676 		for (i = 0; i < AMD_PRIORITY_NUM_LEVELS; i++)
677 			mes_set_hw_res_pkt.aggregated_doorbells[i] =
678 				mes->aggregated_doorbells[i];
679 	}
680 
681 	mes_set_hw_res_pkt.g_sch_ctx_gpu_mc_ptr =
682 		mes->sch_ctx_gpu_addr[pipe];
683 	mes_set_hw_res_pkt.query_status_fence_gpu_mc_ptr =
684 		mes->query_status_fence_gpu_addr[pipe];
685 
686 	for (i = 0; i < 5; i++) {
687 		mes_set_hw_res_pkt.gc_base[i] =
688 			adev->reg_offset[GC_HWIP][0][i];
689 		mes_set_hw_res_pkt.mmhub_base[i] =
690 				adev->reg_offset[MMHUB_HWIP][0][i];
691 		mes_set_hw_res_pkt.osssys_base[i] =
692 		adev->reg_offset[OSSSYS_HWIP][0][i];
693 	}
694 
695 	mes_set_hw_res_pkt.disable_reset = 1;
696 	mes_set_hw_res_pkt.disable_mes_log = 1;
697 	mes_set_hw_res_pkt.use_different_vmid_compute = 1;
698 	mes_set_hw_res_pkt.enable_reg_active_poll = 1;
699 	mes_set_hw_res_pkt.enable_level_process_quantum_check = 1;
700 
701 	/*
702 	 * Keep oversubscribe timer for sdma . When we have unmapped doorbell
703 	 * handling support, other queue will not use the oversubscribe timer.
704 	 * handling  mode - 0: disabled; 1: basic version; 2: basic+ version
705 	 */
706 	mes_set_hw_res_pkt.oversubscription_timer = 50;
707 	mes_set_hw_res_pkt.unmapped_doorbell_handling = 1;
708 
709 	if (amdgpu_mes_log_enable) {
710 		mes_set_hw_res_pkt.enable_mes_event_int_logging = 1;
711 		mes_set_hw_res_pkt.event_intr_history_gpu_mc_ptr =
712 			mes->event_log_gpu_addr + MES_PIPE_INST(xcc_id, pipe) * AMDGPU_MES_LOG_BUFFER_SIZE;
713 	}
714 
715 	if (adev->enforce_isolation[0] == AMDGPU_ENFORCE_ISOLATION_ENABLE)
716 		mes_set_hw_res_pkt.limit_single_process = 1;
717 
718 	return mes_v12_1_submit_pkt_and_poll_completion(mes, xcc_id, pipe,
719 			&mes_set_hw_res_pkt, sizeof(mes_set_hw_res_pkt),
720 			offsetof(union MESAPI_SET_HW_RESOURCES, api_status));
721 }
722 
723 static void mes_v12_1_init_aggregated_doorbell(struct amdgpu_mes *mes,
724 						 int xcc_id)
725 {
726 	struct amdgpu_device *adev = mes->adev;
727 	uint32_t data;
728 
729 	data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_DOORBELL_CONTROL1);
730 	data &= ~(CP_MES_DOORBELL_CONTROL1__DOORBELL_OFFSET_MASK |
731 		  CP_MES_DOORBELL_CONTROL1__DOORBELL_EN_MASK |
732 		  CP_MES_DOORBELL_CONTROL1__DOORBELL_HIT_MASK);
733 	data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_LOW] <<
734 		CP_MES_DOORBELL_CONTROL1__DOORBELL_OFFSET__SHIFT;
735 	data |= 1 << CP_MES_DOORBELL_CONTROL1__DOORBELL_EN__SHIFT;
736 	WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_DOORBELL_CONTROL1, data);
737 
738 	data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_DOORBELL_CONTROL2);
739 	data &= ~(CP_MES_DOORBELL_CONTROL2__DOORBELL_OFFSET_MASK |
740 		  CP_MES_DOORBELL_CONTROL2__DOORBELL_EN_MASK |
741 		  CP_MES_DOORBELL_CONTROL2__DOORBELL_HIT_MASK);
742 	data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_NORMAL] <<
743 		CP_MES_DOORBELL_CONTROL2__DOORBELL_OFFSET__SHIFT;
744 	data |= 1 << CP_MES_DOORBELL_CONTROL2__DOORBELL_EN__SHIFT;
745 	WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_DOORBELL_CONTROL2, data);
746 
747 	data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_DOORBELL_CONTROL3);
748 	data &= ~(CP_MES_DOORBELL_CONTROL3__DOORBELL_OFFSET_MASK |
749 		  CP_MES_DOORBELL_CONTROL3__DOORBELL_EN_MASK |
750 		  CP_MES_DOORBELL_CONTROL3__DOORBELL_HIT_MASK);
751 	data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_MEDIUM] <<
752 		CP_MES_DOORBELL_CONTROL3__DOORBELL_OFFSET__SHIFT;
753 	data |= 1 << CP_MES_DOORBELL_CONTROL3__DOORBELL_EN__SHIFT;
754 	WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_DOORBELL_CONTROL3, data);
755 
756 	data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_DOORBELL_CONTROL4);
757 	data &= ~(CP_MES_DOORBELL_CONTROL4__DOORBELL_OFFSET_MASK |
758 		  CP_MES_DOORBELL_CONTROL4__DOORBELL_EN_MASK |
759 		  CP_MES_DOORBELL_CONTROL4__DOORBELL_HIT_MASK);
760 	data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_HIGH] <<
761 		CP_MES_DOORBELL_CONTROL4__DOORBELL_OFFSET__SHIFT;
762 	data |= 1 << CP_MES_DOORBELL_CONTROL4__DOORBELL_EN__SHIFT;
763 	WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_DOORBELL_CONTROL4, data);
764 
765 	data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_DOORBELL_CONTROL5);
766 	data &= ~(CP_MES_DOORBELL_CONTROL5__DOORBELL_OFFSET_MASK |
767 		  CP_MES_DOORBELL_CONTROL5__DOORBELL_EN_MASK |
768 		  CP_MES_DOORBELL_CONTROL5__DOORBELL_HIT_MASK);
769 	data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_REALTIME] <<
770 		CP_MES_DOORBELL_CONTROL5__DOORBELL_OFFSET__SHIFT;
771 	data |= 1 << CP_MES_DOORBELL_CONTROL5__DOORBELL_EN__SHIFT;
772 	WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_DOORBELL_CONTROL5, data);
773 
774 	data = 1 << CP_HQD_GFX_CONTROL__DB_UPDATED_MSG_EN__SHIFT;
775 	WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_GFX_CONTROL, data);
776 }
777 
778 
779 static void mes_v12_1_enable_unmapped_doorbell_handling(
780 	struct amdgpu_mes *mes, bool enable, int xcc_id)
781 {
782 	struct amdgpu_device *adev = mes->adev;
783 	uint32_t data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_UNMAPPED_DOORBELL);
784 
785 	/*
786 	 * The default PROC_LSB settng is 0xc which means doorbell
787 	 * addr[16:12] gives the doorbell page number. For kfd, each
788 	 * process will use 2 pages of doorbell, we need to change the
789 	 * setting to 0xd
790 	 */
791 	data &= ~CP_UNMAPPED_DOORBELL__PROC_LSB_MASK;
792 	data |= 0xd <<  CP_UNMAPPED_DOORBELL__PROC_LSB__SHIFT;
793 
794 	data |= (enable ? 1 : 0) << CP_UNMAPPED_DOORBELL__ENABLE__SHIFT;
795 
796 	WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_UNMAPPED_DOORBELL, data);
797 }
798 
799 #if 0
800 static int mes_v12_1_reset_legacy_queue(struct amdgpu_mes *mes,
801 					struct mes_reset_legacy_queue_input *input)
802 {
803 	union MESAPI__RESET mes_reset_queue_pkt;
804 	int pipe;
805 
806 	memset(&mes_reset_queue_pkt, 0, sizeof(mes_reset_queue_pkt));
807 
808 	mes_reset_queue_pkt.header.type = MES_API_TYPE_SCHEDULER;
809 	mes_reset_queue_pkt.header.opcode = MES_SCH_API_RESET;
810 	mes_reset_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
811 
812 	mes_reset_queue_pkt.queue_type =
813 		convert_to_mes_queue_type(input->queue_type);
814 
815 	if (mes_reset_queue_pkt.queue_type == MES_QUEUE_TYPE_GFX) {
816 		mes_reset_queue_pkt.reset_legacy_gfx = 1;
817 		mes_reset_queue_pkt.pipe_id_lp = input->pipe_id;
818 		mes_reset_queue_pkt.queue_id_lp = input->queue_id;
819 		mes_reset_queue_pkt.mqd_mc_addr_lp = input->mqd_addr;
820 		mes_reset_queue_pkt.doorbell_offset_lp = input->doorbell_offset;
821 		mes_reset_queue_pkt.wptr_addr_lp = input->wptr_addr;
822 		mes_reset_queue_pkt.vmid_id_lp = input->vmid;
823 	} else {
824 		mes_reset_queue_pkt.reset_queue_only = 1;
825 		mes_reset_queue_pkt.doorbell_offset = input->doorbell_offset;
826 	}
827 
828 	if (mes->adev->enable_uni_mes)
829 		pipe = AMDGPU_MES_KIQ_PIPE;
830 	else
831 		pipe = AMDGPU_MES_SCHED_PIPE;
832 
833 	return mes_v12_1_submit_pkt_and_poll_completion(mes,
834 			input->xcc_id, pipe,
835 			&mes_reset_queue_pkt, sizeof(mes_reset_queue_pkt),
836 			offsetof(union MESAPI__RESET, api_status));
837 }
838 #endif
839 
840 static int mes_v12_inv_tlb_convert_hub_id(uint8_t id)
841 {
842 	/*
843 	 * MES doesn't support invalidate gc_hub on slave xcc individually
844 	 * master xcc will invalidate all gc_hub for the partition
845 	 */
846 	if (AMDGPU_IS_GFXHUB(id))
847 		return 0;
848 	else if (AMDGPU_IS_MMHUB0(id))
849 		return 1;
850 	else if (AMDGPU_IS_MMHUB1(id))
851 		return 2;
852 	return -EINVAL;
853 
854 }
855 
856 static int mes_v12_1_inv_tlbs_pasid(struct amdgpu_mes *mes,
857 				    struct mes_inv_tlbs_pasid_input *input)
858 {
859 	union MESAPI__INV_TLBS mes_inv_tlbs;
860 	int xcc_id = input->xcc_id;
861 	int inst = MES_PIPE_INST(xcc_id, AMDGPU_MES_SCHED_PIPE);
862 	int ret;
863 
864 	if (mes->enable_coop_mode)
865 		xcc_id = mes->master_xcc_ids[inst];
866 
867 	memset(&mes_inv_tlbs, 0, sizeof(mes_inv_tlbs));
868 
869 	mes_inv_tlbs.header.type = MES_API_TYPE_SCHEDULER;
870 	mes_inv_tlbs.header.opcode = MES_SCH_API_INV_TLBS;
871 	mes_inv_tlbs.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
872 
873 	mes_inv_tlbs.invalidate_tlbs.inv_sel = 0;
874 	mes_inv_tlbs.invalidate_tlbs.flush_type = input->flush_type;
875 	mes_inv_tlbs.invalidate_tlbs.inv_sel_id = input->pasid;
876 
877 	/*convert amdgpu_mes_hub_id to mes expected hub_id */
878 	ret = mes_v12_inv_tlb_convert_hub_id(input->hub_id);
879 	if (ret < 0)
880 		return -EINVAL;
881 	mes_inv_tlbs.invalidate_tlbs.hub_id = ret;
882 	return mes_v12_1_submit_pkt_and_poll_completion(mes, xcc_id, AMDGPU_MES_KIQ_PIPE,
883 			&mes_inv_tlbs, sizeof(mes_inv_tlbs),
884 			offsetof(union MESAPI__INV_TLBS, api_status));
885 
886 }
887 
888 static const struct amdgpu_mes_funcs mes_v12_1_funcs = {
889 	.add_hw_queue = mes_v12_1_add_hw_queue,
890 	.remove_hw_queue = mes_v12_1_remove_hw_queue,
891 	.map_legacy_queue = mes_v12_1_map_legacy_queue,
892 	.unmap_legacy_queue = mes_v12_1_unmap_legacy_queue,
893 	.suspend_gang = mes_v12_1_suspend_gang,
894 	.resume_gang = mes_v12_1_resume_gang,
895 	.misc_op = mes_v12_1_misc_op,
896 	.reset_hw_queue = mes_v12_1_reset_hw_queue,
897 	.invalidate_tlbs_pasid = mes_v12_1_inv_tlbs_pasid,
898 };
899 
900 static int mes_v12_1_allocate_ucode_buffer(struct amdgpu_device *adev,
901 					     enum amdgpu_mes_pipe pipe,
902 					     int xcc_id)
903 {
904 	int r, inst = MES_PIPE_INST(xcc_id, pipe);
905 	const struct mes_firmware_header_v1_0 *mes_hdr;
906 	const __le32 *fw_data;
907 	unsigned fw_size;
908 
909 	mes_hdr = (const struct mes_firmware_header_v1_0 *)
910 		adev->mes.fw[pipe]->data;
911 
912 	fw_data = (const __le32 *)(adev->mes.fw[pipe]->data +
913 		   le32_to_cpu(mes_hdr->mes_ucode_offset_bytes));
914 	fw_size = le32_to_cpu(mes_hdr->mes_ucode_size_bytes);
915 
916 	r = amdgpu_bo_create_reserved(adev, fw_size,
917 				      PAGE_SIZE,
918 				      AMDGPU_GEM_DOMAIN_VRAM,
919 				      &adev->mes.ucode_fw_obj[inst],
920 				      &adev->mes.ucode_fw_gpu_addr[inst],
921 				      (void **)&adev->mes.ucode_fw_ptr[inst]);
922 	if (r) {
923 		dev_err(adev->dev, "(%d) failed to create mes fw bo\n", r);
924 		return r;
925 	}
926 
927 	memcpy(adev->mes.ucode_fw_ptr[inst], fw_data, fw_size);
928 
929 	amdgpu_bo_kunmap(adev->mes.ucode_fw_obj[inst]);
930 	amdgpu_bo_unreserve(adev->mes.ucode_fw_obj[inst]);
931 
932 	return 0;
933 }
934 
935 static int mes_v12_1_allocate_ucode_data_buffer(struct amdgpu_device *adev,
936 						  enum amdgpu_mes_pipe pipe,
937 						  int xcc_id)
938 {
939 	int r, inst = MES_PIPE_INST(xcc_id, pipe);
940 	const struct mes_firmware_header_v1_0 *mes_hdr;
941 	const __le32 *fw_data;
942 	unsigned fw_size;
943 
944 	mes_hdr = (const struct mes_firmware_header_v1_0 *)
945 		adev->mes.fw[pipe]->data;
946 
947 	fw_data = (const __le32 *)(adev->mes.fw[pipe]->data +
948 		   le32_to_cpu(mes_hdr->mes_ucode_data_offset_bytes));
949 	fw_size = le32_to_cpu(mes_hdr->mes_ucode_data_size_bytes);
950 
951 	r = amdgpu_bo_create_reserved(adev, fw_size,
952 				      64 * 1024,
953 				      AMDGPU_GEM_DOMAIN_VRAM,
954 				      &adev->mes.data_fw_obj[inst],
955 				      &adev->mes.data_fw_gpu_addr[inst],
956 				      (void **)&adev->mes.data_fw_ptr[inst]);
957 	if (r) {
958 		dev_err(adev->dev, "(%d) failed to create mes data fw bo\n", r);
959 		return r;
960 	}
961 
962 	memcpy(adev->mes.data_fw_ptr[inst], fw_data, fw_size);
963 
964 	amdgpu_bo_kunmap(adev->mes.data_fw_obj[inst]);
965 	amdgpu_bo_unreserve(adev->mes.data_fw_obj[inst]);
966 
967 	return 0;
968 }
969 
970 static void mes_v12_1_free_ucode_buffers(struct amdgpu_device *adev,
971 					   enum amdgpu_mes_pipe pipe,
972 					   int xcc_id)
973 {
974 	int inst = MES_PIPE_INST(xcc_id, pipe);
975 
976 	amdgpu_bo_free_kernel(&adev->mes.data_fw_obj[inst],
977 			      &adev->mes.data_fw_gpu_addr[inst],
978 			      (void **)&adev->mes.data_fw_ptr[inst]);
979 
980 	amdgpu_bo_free_kernel(&adev->mes.ucode_fw_obj[inst],
981 			      &adev->mes.ucode_fw_gpu_addr[inst],
982 			      (void **)&adev->mes.ucode_fw_ptr[inst]);
983 }
984 
985 static void mes_v12_1_enable(struct amdgpu_device *adev,
986 			       bool enable, int xcc_id)
987 {
988 	uint64_t ucode_addr;
989 	uint32_t pipe, data = 0;
990 
991 	if (enable) {
992 		data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_CNTL);
993 		data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_RESET, 1);
994 		data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_RESET, 1);
995 		WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_CNTL, data);
996 
997 		mutex_lock(&adev->srbm_mutex);
998 		for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) {
999 			soc_v1_0_grbm_select(adev, 3, pipe, 0, 0,
1000 					     GET_INST(GC, xcc_id));
1001 
1002 			ucode_addr = adev->mes.uc_start_addr[pipe] >> 2;
1003 			WREG32_SOC15(GC, GET_INST(GC, xcc_id),
1004 				     regCP_MES_PRGRM_CNTR_START,
1005 				     lower_32_bits(ucode_addr));
1006 			WREG32_SOC15(GC, GET_INST(GC, xcc_id),
1007 				     regCP_MES_PRGRM_CNTR_START_HI,
1008 				     upper_32_bits(ucode_addr));
1009 		}
1010 		soc_v1_0_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id));
1011 		mutex_unlock(&adev->srbm_mutex);
1012 
1013 		/* unhalt MES and activate pipe0 */
1014 		data = REG_SET_FIELD(0, CP_MES_CNTL, MES_PIPE0_ACTIVE, 1);
1015 		data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_ACTIVE, 1);
1016 		WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_CNTL, data);
1017 
1018 		if (amdgpu_emu_mode)
1019 			msleep(500);
1020 		else if (adev->enable_uni_mes)
1021 			udelay(500);
1022 		else
1023 			udelay(50);
1024 	} else {
1025 		data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_CNTL);
1026 		data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_ACTIVE, 0);
1027 		data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_ACTIVE, 0);
1028 		data = REG_SET_FIELD(data, CP_MES_CNTL,
1029 				     MES_INVALIDATE_ICACHE, 1);
1030 		data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_RESET, 1);
1031 		data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_RESET, 1);
1032 		data = REG_SET_FIELD(data, CP_MES_CNTL, MES_HALT, 1);
1033 		WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_CNTL, data);
1034 	}
1035 }
1036 
1037 static void mes_v12_1_set_ucode_start_addr(struct amdgpu_device *adev,
1038 					     int xcc_id)
1039 {
1040 	uint64_t ucode_addr;
1041 	int pipe;
1042 
1043 	mes_v12_1_enable(adev, false, xcc_id);
1044 
1045 	mutex_lock(&adev->srbm_mutex);
1046 	for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) {
1047 		/* me=3, queue=0 */
1048 		soc_v1_0_grbm_select(adev, 3, pipe, 0, 0, GET_INST(GC, xcc_id));
1049 
1050 		/* set ucode start address */
1051 		ucode_addr = adev->mes.uc_start_addr[pipe] >> 2;
1052 		WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_PRGRM_CNTR_START,
1053 				lower_32_bits(ucode_addr));
1054 		WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_PRGRM_CNTR_START_HI,
1055 				upper_32_bits(ucode_addr));
1056 
1057 		soc_v1_0_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id));
1058 	}
1059 	mutex_unlock(&adev->srbm_mutex);
1060 }
1061 
1062 /* This function is for backdoor MES firmware */
1063 static int mes_v12_1_load_microcode(struct amdgpu_device *adev,
1064 				      enum amdgpu_mes_pipe pipe,
1065 				      bool prime_icache, int xcc_id)
1066 {
1067 	int r, inst = MES_PIPE_INST(xcc_id, pipe);
1068 	uint32_t data;
1069 
1070 	mes_v12_1_enable(adev, false, xcc_id);
1071 
1072 	if (!adev->mes.fw[pipe])
1073 		return -EINVAL;
1074 
1075 	r = mes_v12_1_allocate_ucode_buffer(adev, pipe, xcc_id);
1076 	if (r)
1077 		return r;
1078 
1079 	r = mes_v12_1_allocate_ucode_data_buffer(adev, pipe, xcc_id);
1080 	if (r) {
1081 		mes_v12_1_free_ucode_buffers(adev, pipe, xcc_id);
1082 		return r;
1083 	}
1084 
1085 	mutex_lock(&adev->srbm_mutex);
1086 	/* me=3, pipe=0, queue=0 */
1087 	soc_v1_0_grbm_select(adev, 3, pipe, 0, 0, GET_INST(GC, xcc_id));
1088 
1089 	WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_IC_BASE_CNTL, 0);
1090 
1091 	/* set ucode fimrware address */
1092 	WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_IC_BASE_LO,
1093 		     lower_32_bits(adev->mes.ucode_fw_gpu_addr[inst]));
1094 	WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_IC_BASE_HI,
1095 		     upper_32_bits(adev->mes.ucode_fw_gpu_addr[inst]));
1096 
1097 	/* set ucode instruction cache boundary to 2M-1 */
1098 	WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_MIBOUND_LO, 0x1FFFFF);
1099 
1100 	/* set ucode data firmware address */
1101 	WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_MDBASE_LO,
1102 		     lower_32_bits(adev->mes.data_fw_gpu_addr[inst]));
1103 	WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_MDBASE_HI,
1104 		     upper_32_bits(adev->mes.data_fw_gpu_addr[inst]));
1105 
1106 	/* Set data cache boundary CP_MES_MDBOUND_LO */
1107 	WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_MDBOUND_LO, 0x7FFFF);
1108 
1109 	if (prime_icache) {
1110 		/* invalidate ICACHE */
1111 		data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_IC_OP_CNTL);
1112 		data = REG_SET_FIELD(data, CP_MES_IC_OP_CNTL, PRIME_ICACHE, 0);
1113 		data = REG_SET_FIELD(data, CP_MES_IC_OP_CNTL, INVALIDATE_CACHE, 1);
1114 		WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_IC_OP_CNTL, data);
1115 
1116 		/* prime the ICACHE. */
1117 		data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_IC_OP_CNTL);
1118 		data = REG_SET_FIELD(data, CP_MES_IC_OP_CNTL, PRIME_ICACHE, 1);
1119 		WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_IC_OP_CNTL, data);
1120 	}
1121 
1122 	soc_v1_0_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id));
1123 	mutex_unlock(&adev->srbm_mutex);
1124 
1125 	return 0;
1126 }
1127 
1128 static int mes_v12_1_allocate_eop_buf(struct amdgpu_device *adev,
1129 					enum amdgpu_mes_pipe pipe,
1130 					int xcc_id)
1131 {
1132 	int r, inst = MES_PIPE_INST(xcc_id, pipe);
1133 	u32 *eop;
1134 
1135 	r = amdgpu_bo_create_reserved(adev, MES_EOP_SIZE, PAGE_SIZE,
1136 			      AMDGPU_GEM_DOMAIN_GTT,
1137 			      &adev->mes.eop_gpu_obj[inst],
1138 			      &adev->mes.eop_gpu_addr[inst],
1139 			      (void **)&eop);
1140 	if (r) {
1141 		dev_warn(adev->dev, "(%d) create EOP bo failed\n", r);
1142 		return r;
1143 	}
1144 
1145 	memset(eop, 0,
1146 	       adev->mes.eop_gpu_obj[inst]->tbo.base.size);
1147 
1148 	amdgpu_bo_kunmap(adev->mes.eop_gpu_obj[inst]);
1149 	amdgpu_bo_unreserve(adev->mes.eop_gpu_obj[inst]);
1150 
1151 	return 0;
1152 }
1153 
1154 static int mes_v12_1_allocate_shared_cmd_buf(struct amdgpu_device *adev,
1155 					     enum amdgpu_mes_pipe pipe,
1156 					     int xcc_id)
1157 {
1158 	int r, inst = MES_PIPE_INST(xcc_id, pipe);
1159 
1160 	if (pipe == AMDGPU_MES_KIQ_PIPE)
1161 		return 0;
1162 
1163 	r = amdgpu_bo_create_kernel(adev, PAGE_SIZE, PAGE_SIZE,
1164 				    AMDGPU_GEM_DOMAIN_VRAM,
1165 				    &adev->mes.shared_cmd_buf_obj[inst],
1166 				    &adev->mes.shared_cmd_buf_gpu_addr[inst],
1167 				    NULL);
1168 	if (r) {
1169 		dev_err(adev->dev,
1170 			"(%d) failed to create shared cmd buf bo\n", r);
1171 		return r;
1172 	}
1173 
1174 	return 0;
1175 }
1176 
1177 static int mes_v12_1_mqd_init(struct amdgpu_ring *ring)
1178 {
1179 	struct v12_1_mes_mqd *mqd = ring->mqd_ptr;
1180 	uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
1181 	uint32_t tmp;
1182 
1183 	mqd->header = 0xC0310800;
1184 	mqd->compute_pipelinestat_enable = 0x00000001;
1185 	mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
1186 	mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
1187 	mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
1188 	mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
1189 	mqd->compute_misc_reserved = 0x00000007;
1190 
1191 	eop_base_addr = ring->eop_gpu_addr >> 8;
1192 
1193 	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
1194 	tmp = regCP_HQD_EOP_CONTROL_DEFAULT;
1195 	tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
1196 			(order_base_2(MES_EOP_SIZE / 4) - 1));
1197 
1198 	mqd->cp_hqd_eop_base_addr_lo = lower_32_bits(eop_base_addr);
1199 	mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
1200 	mqd->cp_hqd_eop_control = tmp;
1201 
1202 	/* disable the queue if it's active */
1203 	ring->wptr = 0;
1204 	mqd->cp_hqd_pq_rptr = 0;
1205 	mqd->cp_hqd_pq_wptr_lo = 0;
1206 	mqd->cp_hqd_pq_wptr_hi = 0;
1207 
1208 	/* set the pointer to the MQD */
1209 	mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
1210 	mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
1211 
1212 	/* set MQD vmid to 0 */
1213 	tmp = regCP_MQD_CONTROL_DEFAULT;
1214 	tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
1215 	mqd->cp_mqd_control = tmp;
1216 
1217 	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
1218 	hqd_gpu_addr = ring->gpu_addr >> 8;
1219 	mqd->cp_hqd_pq_base_lo = lower_32_bits(hqd_gpu_addr);
1220 	mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
1221 
1222 	/* set the wb address whether it's enabled or not */
1223 	wb_gpu_addr = ring->rptr_gpu_addr;
1224 	mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
1225 	mqd->cp_hqd_pq_rptr_report_addr_hi =
1226 		upper_32_bits(wb_gpu_addr) & 0xffff;
1227 
1228 	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
1229 	wb_gpu_addr = ring->wptr_gpu_addr;
1230 	mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffff8;
1231 	mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
1232 
1233 	/* set up the HQD, this is similar to CP_RB0_CNTL */
1234 	tmp = regCP_HQD_PQ_CONTROL_DEFAULT;
1235 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
1236 			    (order_base_2(ring->ring_size / 4) - 1));
1237 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
1238 			    ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
1239 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 1);
1240 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, 0);
1241 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
1242 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
1243 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, NO_UPDATE_RPTR, 1);
1244 	mqd->cp_hqd_pq_control = tmp;
1245 
1246 	/* enable doorbell */
1247 	tmp = 0;
1248 	if (ring->use_doorbell) {
1249 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
1250 				    DOORBELL_OFFSET, ring->doorbell_index);
1251 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
1252 				    DOORBELL_EN, 1);
1253 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
1254 				    DOORBELL_SOURCE, 0);
1255 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
1256 				    DOORBELL_HIT, 0);
1257 	} else {
1258 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
1259 				    DOORBELL_EN, 0);
1260 	}
1261 	mqd->cp_hqd_pq_doorbell_control = tmp;
1262 
1263 	mqd->cp_hqd_vmid = 0;
1264 	/* activate the queue */
1265 	mqd->cp_hqd_active = 1;
1266 
1267 	tmp = regCP_HQD_PERSISTENT_STATE_DEFAULT;
1268 	tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE,
1269 			    PRELOAD_SIZE, 0x63);
1270 	mqd->cp_hqd_persistent_state = tmp;
1271 
1272 	mqd->cp_hqd_ib_control = regCP_HQD_IB_CONTROL_MES_12_1_DEFAULT;
1273 	mqd->cp_hqd_iq_timer = regCP_HQD_IQ_TIMER_DEFAULT;
1274 	mqd->cp_hqd_quantum = regCP_HQD_QUANTUM_DEFAULT;
1275 
1276 	/*
1277 	 * Set CP_HQD_GFX_CONTROL.DB_UPDATED_MSG_EN[15] to enable unmapped
1278 	 * doorbell handling. This is a reserved CP internal register can
1279 	 * not be accesss by others
1280 	 */
1281 	mqd->cp_hqd_gfx_control = BIT(15);
1282 
1283 	return 0;
1284 }
1285 
1286 static void mes_v12_1_queue_init_register(struct amdgpu_ring *ring,
1287 					    int xcc_id)
1288 {
1289 	struct v12_1_mes_mqd *mqd = ring->mqd_ptr;
1290 	struct amdgpu_device *adev = ring->adev;
1291 	uint32_t data = 0;
1292 
1293 	mutex_lock(&adev->srbm_mutex);
1294 	soc_v1_0_grbm_select(adev, 3, ring->pipe, 0, 0, GET_INST(GC, xcc_id));
1295 
1296 	/* set CP_HQD_VMID.VMID = 0. */
1297 	data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_VMID);
1298 	data = REG_SET_FIELD(data, CP_HQD_VMID, VMID, 0);
1299 	WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_VMID, data);
1300 
1301 	/* set CP_HQD_PQ_DOORBELL_CONTROL.DOORBELL_EN=0 */
1302 	data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_DOORBELL_CONTROL);
1303 	data = REG_SET_FIELD(data, CP_HQD_PQ_DOORBELL_CONTROL,
1304 			     DOORBELL_EN, 0);
1305 	WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_DOORBELL_CONTROL, data);
1306 
1307 	/* set CP_MQD_BASE_ADDR/HI with the MQD base address */
1308 	WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo);
1309 	WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
1310 
1311 	/* set CP_MQD_CONTROL.VMID=0 */
1312 	data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MQD_CONTROL);
1313 	data = REG_SET_FIELD(data, CP_MQD_CONTROL, VMID, 0);
1314 	WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MQD_CONTROL, 0);
1315 
1316 	/* set CP_HQD_PQ_BASE/HI with the ring buffer base address */
1317 	WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo);
1318 	WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi);
1319 
1320 	/* set CP_HQD_PQ_RPTR_REPORT_ADDR/HI */
1321 	WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_RPTR_REPORT_ADDR,
1322 		     mqd->cp_hqd_pq_rptr_report_addr_lo);
1323 	WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
1324 		     mqd->cp_hqd_pq_rptr_report_addr_hi);
1325 
1326 	/* set CP_HQD_PQ_CONTROL */
1327 	WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_CONTROL, mqd->cp_hqd_pq_control);
1328 
1329 	/* set CP_HQD_PQ_WPTR_POLL_ADDR/HI */
1330 	WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_WPTR_POLL_ADDR,
1331 		     mqd->cp_hqd_pq_wptr_poll_addr_lo);
1332 	WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_WPTR_POLL_ADDR_HI,
1333 		     mqd->cp_hqd_pq_wptr_poll_addr_hi);
1334 
1335 	/* set CP_HQD_PQ_DOORBELL_CONTROL */
1336 	WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_DOORBELL_CONTROL,
1337 		     mqd->cp_hqd_pq_doorbell_control);
1338 
1339 	/* set CP_HQD_PERSISTENT_STATE.PRELOAD_SIZE=0x53 */
1340 	WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PERSISTENT_STATE, mqd->cp_hqd_persistent_state);
1341 
1342 	/* set CP_HQD_ACTIVE.ACTIVE=1 */
1343 	WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_ACTIVE, mqd->cp_hqd_active);
1344 
1345 	soc_v1_0_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id));
1346 	mutex_unlock(&adev->srbm_mutex);
1347 }
1348 
1349 static int mes_v12_1_kiq_enable_queue(struct amdgpu_device *adev, int xcc_id)
1350 {
1351 	struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
1352 	struct amdgpu_ring *kiq_ring = &adev->gfx.kiq[xcc_id].ring;
1353 	int r, inst = MES_PIPE_INST(xcc_id, AMDGPU_MES_SCHED_PIPE);
1354 
1355 	if (!kiq->pmf || !kiq->pmf->kiq_map_queues)
1356 		return -EINVAL;
1357 
1358 	r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size);
1359 	if (r) {
1360 		DRM_ERROR("Failed to lock KIQ (%d).\n", r);
1361 		return r;
1362 	}
1363 
1364 	kiq->pmf->kiq_map_queues(kiq_ring, &adev->mes.ring[inst]);
1365 
1366 	r = amdgpu_ring_test_ring(kiq_ring);
1367 	if (r) {
1368 		DRM_ERROR("kfq enable failed\n");
1369 		kiq_ring->sched.ready = false;
1370 	}
1371 	return r;
1372 }
1373 
1374 static int mes_v12_1_queue_init(struct amdgpu_device *adev,
1375 				  enum amdgpu_mes_pipe pipe,
1376 				  int xcc_id)
1377 {
1378 	struct amdgpu_ring *ring;
1379 	int r;
1380 
1381 	if (!adev->enable_uni_mes && pipe == AMDGPU_MES_KIQ_PIPE)
1382 		ring = &adev->gfx.kiq[xcc_id].ring;
1383 	else
1384 		ring = &adev->mes.ring[MES_PIPE_INST(xcc_id, pipe)];
1385 
1386 	if ((adev->enable_uni_mes || pipe == AMDGPU_MES_SCHED_PIPE) &&
1387 	    (amdgpu_in_reset(adev) || adev->in_suspend)) {
1388 		*(ring->wptr_cpu_addr) = 0;
1389 		*(ring->rptr_cpu_addr) = 0;
1390 		amdgpu_ring_clear_ring(ring);
1391 	}
1392 
1393 	r = mes_v12_1_mqd_init(ring);
1394 	if (r)
1395 		return r;
1396 
1397 	if (pipe == AMDGPU_MES_SCHED_PIPE) {
1398 		if (adev->enable_uni_mes)
1399 			r = amdgpu_mes_map_legacy_queue(adev, ring, xcc_id);
1400 		else
1401 			r = mes_v12_1_kiq_enable_queue(adev, xcc_id);
1402 		if (r)
1403 			return r;
1404 	} else {
1405 		mes_v12_1_queue_init_register(ring, xcc_id);
1406 	}
1407 
1408 	/* get MES scheduler/KIQ versions */
1409 	mutex_lock(&adev->srbm_mutex);
1410 	soc_v1_0_grbm_select(adev, 3, pipe, 0, 0, GET_INST(GC, xcc_id));
1411 
1412 	if (pipe == AMDGPU_MES_SCHED_PIPE)
1413 		adev->mes.sched_version = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_GP3_LO);
1414 	else if (pipe == AMDGPU_MES_KIQ_PIPE && adev->enable_mes_kiq)
1415 		adev->mes.kiq_version = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_GP3_LO);
1416 
1417 	soc_v1_0_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id));
1418 	mutex_unlock(&adev->srbm_mutex);
1419 
1420 	return 0;
1421 }
1422 
1423 static int mes_v12_1_ring_init(struct amdgpu_device *adev,
1424 				 int xcc_id, int pipe)
1425 {
1426 	struct amdgpu_ring *ring;
1427 	int inst = MES_PIPE_INST(xcc_id, pipe);
1428 
1429 	ring = &adev->mes.ring[inst];
1430 
1431 	ring->funcs = &mes_v12_1_ring_funcs;
1432 
1433 	ring->me = 3;
1434 	ring->pipe = pipe;
1435 	ring->queue = 0;
1436 	ring->xcc_id = xcc_id;
1437 	ring->vm_hub = AMDGPU_GFXHUB(xcc_id);
1438 
1439 	ring->ring_obj = NULL;
1440 	ring->use_doorbell = true;
1441 	ring->eop_gpu_addr = adev->mes.eop_gpu_addr[inst];
1442 	ring->no_scheduler = true;
1443 	snprintf(ring->name, sizeof(ring->name), "mes_%hhu.%hhu.%hhu.%hhu",
1444 		 (unsigned char)xcc_id, (unsigned char)ring->me,
1445 		 (unsigned char)ring->pipe, (unsigned char)ring->queue);
1446 
1447 	if (pipe == AMDGPU_MES_SCHED_PIPE)
1448 		ring->doorbell_index =
1449 			(adev->doorbell_index.mes_ring0 +
1450 			 xcc_id * adev->doorbell_index.xcc_doorbell_range)
1451 			<< 1;
1452 	else
1453 		ring->doorbell_index =
1454 			(adev->doorbell_index.mes_ring1 +
1455 			 xcc_id * adev->doorbell_index.xcc_doorbell_range)
1456 			<< 1;
1457 
1458 	return amdgpu_ring_init(adev, ring, 1024, NULL, 0,
1459 				AMDGPU_RING_PRIO_DEFAULT, NULL);
1460 }
1461 
1462 static int mes_v12_1_kiq_ring_init(struct amdgpu_device *adev, int xcc_id)
1463 {
1464 	struct amdgpu_ring *ring;
1465 	int inst = MES_PIPE_INST(xcc_id, AMDGPU_MES_KIQ_PIPE);
1466 
1467 	spin_lock_init(&adev->gfx.kiq[xcc_id].ring_lock);
1468 
1469 	ring = &adev->gfx.kiq[xcc_id].ring;
1470 
1471 	ring->me = 3;
1472 	ring->pipe = 1;
1473 	ring->queue = 0;
1474 	ring->xcc_id = xcc_id;
1475 	ring->vm_hub = AMDGPU_GFXHUB(xcc_id);
1476 
1477 	ring->adev = NULL;
1478 	ring->ring_obj = NULL;
1479 	ring->use_doorbell = true;
1480 	ring->eop_gpu_addr = adev->mes.eop_gpu_addr[inst];
1481 	ring->no_scheduler = true;
1482 	ring->doorbell_index =
1483 		(adev->doorbell_index.mes_ring1 +
1484 		 xcc_id * adev->doorbell_index.xcc_doorbell_range)
1485 		<< 1;
1486 
1487 	snprintf(ring->name, sizeof(ring->name), "mes_kiq_%hhu.%hhu.%hhu.%hhu",
1488 		 (unsigned char)xcc_id, (unsigned char)ring->me,
1489 		 (unsigned char)ring->pipe, (unsigned char)ring->queue);
1490 
1491 	return amdgpu_ring_init(adev, ring, 1024, NULL, 0,
1492 				AMDGPU_RING_PRIO_DEFAULT, NULL);
1493 }
1494 
1495 static int mes_v12_1_mqd_sw_init(struct amdgpu_device *adev,
1496 				   enum amdgpu_mes_pipe pipe,
1497 				   int xcc_id)
1498 {
1499 	int r, mqd_size = sizeof(struct v12_1_mes_mqd);
1500 	struct amdgpu_ring *ring;
1501 	int inst = MES_PIPE_INST(xcc_id, pipe);
1502 
1503 	if (!adev->enable_uni_mes && pipe == AMDGPU_MES_KIQ_PIPE)
1504 		ring = &adev->gfx.kiq[xcc_id].ring;
1505 	else
1506 		ring = &adev->mes.ring[inst];
1507 
1508 	if (ring->mqd_obj)
1509 		return 0;
1510 
1511 	r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE,
1512 				    AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj,
1513 				    &ring->mqd_gpu_addr, &ring->mqd_ptr);
1514 	if (r) {
1515 		dev_warn(adev->dev, "failed to create ring mqd bo (%d)", r);
1516 		return r;
1517 	}
1518 
1519 	memset(ring->mqd_ptr, 0, mqd_size);
1520 
1521 	/* prepare MQD backup */
1522 	adev->mes.mqd_backup[inst] = kmalloc(mqd_size, GFP_KERNEL);
1523 	if (!adev->mes.mqd_backup[inst])
1524 		dev_warn(adev->dev,
1525 			 "no memory to create MQD backup for ring %s\n",
1526 			 ring->name);
1527 
1528 	return 0;
1529 }
1530 
1531 static int mes_v12_1_sw_init(struct amdgpu_ip_block *ip_block)
1532 {
1533 	struct amdgpu_device *adev = ip_block->adev;
1534 	int pipe, r, xcc_id, num_xcc = NUM_XCC(adev->gfx.xcc_mask);
1535 
1536 	adev->mes.funcs = &mes_v12_1_funcs;
1537 	adev->mes.kiq_hw_init = &mes_v12_1_kiq_hw_init;
1538 	adev->mes.kiq_hw_fini = &mes_v12_1_kiq_hw_fini;
1539 	adev->mes.enable_legacy_queue_map = true;
1540 
1541 	adev->mes.event_log_size =
1542 		adev->enable_uni_mes ? (AMDGPU_MAX_MES_PIPES * AMDGPU_MES_LOG_BUFFER_SIZE * num_xcc) : AMDGPU_MES_LOG_BUFFER_SIZE;
1543 
1544 	r = amdgpu_mes_init(adev);
1545 	if (r)
1546 		return r;
1547 
1548 	for (xcc_id = 0; xcc_id < num_xcc; xcc_id++) {
1549 		for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) {
1550 			r = mes_v12_1_allocate_eop_buf(adev, pipe, xcc_id);
1551 			if (r)
1552 				return r;
1553 
1554 			r = mes_v12_1_mqd_sw_init(adev, pipe, xcc_id);
1555 			if (r)
1556 				return r;
1557 
1558 			if (!adev->enable_uni_mes && pipe ==
1559 			    AMDGPU_MES_KIQ_PIPE)
1560 				r = mes_v12_1_kiq_ring_init(adev, xcc_id);
1561 			else
1562 				r = mes_v12_1_ring_init(adev, xcc_id, pipe);
1563 			if (r)
1564 				return r;
1565 
1566 			if (adev->enable_uni_mes && num_xcc > 1) {
1567 				r = mes_v12_1_allocate_shared_cmd_buf(adev,
1568 							      pipe, xcc_id);
1569 				if (r)
1570 					return r;
1571 			}
1572 		}
1573 	}
1574 
1575 	return 0;
1576 }
1577 
1578 static int mes_v12_1_sw_fini(struct amdgpu_ip_block *ip_block)
1579 {
1580 	struct amdgpu_device *adev = ip_block->adev;
1581 	int pipe, inst, xcc_id, num_xcc = NUM_XCC(adev->gfx.xcc_mask);
1582 
1583 	for (xcc_id = 0; xcc_id < num_xcc; xcc_id++) {
1584 		for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) {
1585 			inst = MES_PIPE_INST(xcc_id, pipe);
1586 
1587 			amdgpu_bo_free_kernel(&adev->mes.shared_cmd_buf_obj[inst],
1588 					      &adev->mes.shared_cmd_buf_gpu_addr[inst],
1589 					      NULL);
1590 
1591 			kfree(adev->mes.mqd_backup[inst]);
1592 
1593 			amdgpu_bo_free_kernel(&adev->mes.eop_gpu_obj[inst],
1594 					      &adev->mes.eop_gpu_addr[inst],
1595 					      NULL);
1596 
1597 			if (adev->enable_uni_mes || pipe == AMDGPU_MES_SCHED_PIPE) {
1598 				amdgpu_bo_free_kernel(&adev->mes.ring[inst].mqd_obj,
1599 						      &adev->mes.ring[inst].mqd_gpu_addr,
1600 						      &adev->mes.ring[inst].mqd_ptr);
1601 				amdgpu_ring_fini(&adev->mes.ring[inst]);
1602 			}
1603 		}
1604 	}
1605 
1606 	for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++)
1607 		amdgpu_ucode_release(&adev->mes.fw[pipe]);
1608 
1609 	for (xcc_id = 0; xcc_id < num_xcc; xcc_id++) {
1610 		if (!adev->enable_uni_mes) {
1611 			amdgpu_bo_free_kernel(&adev->gfx.kiq[xcc_id].ring.mqd_obj,
1612 				      &adev->gfx.kiq[xcc_id].ring.mqd_gpu_addr,
1613 				      &adev->gfx.kiq[xcc_id].ring.mqd_ptr);
1614 			amdgpu_ring_fini(&adev->gfx.kiq[xcc_id].ring);
1615 		}
1616 
1617 		if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
1618 			mes_v12_1_free_ucode_buffers(adev,
1619 				       AMDGPU_MES_KIQ_PIPE, xcc_id);
1620 			mes_v12_1_free_ucode_buffers(adev,
1621 				       AMDGPU_MES_SCHED_PIPE, xcc_id);
1622 		}
1623 	}
1624 
1625 	amdgpu_mes_fini(adev);
1626 	return 0;
1627 }
1628 
1629 static void mes_v12_1_kiq_dequeue_sched(struct amdgpu_device *adev,
1630 					  int xcc_id)
1631 {
1632 	uint32_t data;
1633 	int i;
1634 
1635 	mutex_lock(&adev->srbm_mutex);
1636 	soc_v1_0_grbm_select(adev, 3, AMDGPU_MES_SCHED_PIPE, 0, 0,
1637 			     GET_INST(GC, xcc_id));
1638 
1639 	/* disable the queue if it's active */
1640 	if (RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_ACTIVE) & 1) {
1641 		WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_DEQUEUE_REQUEST, 1);
1642 		for (i = 0; i < adev->usec_timeout; i++) {
1643 			if (!(RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_ACTIVE) & 1))
1644 				break;
1645 			udelay(1);
1646 		}
1647 	}
1648 	data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_DOORBELL_CONTROL);
1649 	data = REG_SET_FIELD(data, CP_HQD_PQ_DOORBELL_CONTROL,
1650 				DOORBELL_EN, 0);
1651 	data = REG_SET_FIELD(data, CP_HQD_PQ_DOORBELL_CONTROL,
1652 				DOORBELL_HIT, 1);
1653 	WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_DOORBELL_CONTROL, data);
1654 
1655 	WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_DOORBELL_CONTROL, 0);
1656 
1657 	WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_WPTR_LO, 0);
1658 	WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_WPTR_HI, 0);
1659 	WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_RPTR, 0);
1660 
1661 	soc_v1_0_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id));
1662 	mutex_unlock(&adev->srbm_mutex);
1663 
1664 	adev->mes.ring[MES_PIPE_INST(xcc_id, 0)].sched.ready = false;
1665 }
1666 
1667 static void mes_v12_1_kiq_setting(struct amdgpu_ring *ring, int xcc_id)
1668 {
1669 	uint32_t tmp;
1670 	struct amdgpu_device *adev = ring->adev;
1671 
1672 	/* tell RLC which is KIQ queue */
1673 	tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CP_SCHEDULERS);
1674 	tmp &= 0xffffff00;
1675 	tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
1676 	WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CP_SCHEDULERS, tmp);
1677 	tmp |= 0x80;
1678 	WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CP_SCHEDULERS, tmp);
1679 }
1680 
1681 static int mes_v12_1_kiq_hw_init(struct amdgpu_device *adev, uint32_t xcc_id)
1682 {
1683 	int inst = MES_PIPE_INST(xcc_id, AMDGPU_MES_KIQ_PIPE);
1684 	int r = 0;
1685 	struct amdgpu_ip_block *ip_block;
1686 
1687 	if (adev->enable_uni_mes)
1688 		mes_v12_1_kiq_setting(&adev->mes.ring[inst], xcc_id);
1689 	else
1690 		mes_v12_1_kiq_setting(&adev->gfx.kiq[xcc_id].ring, xcc_id);
1691 
1692 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
1693 
1694 		r = mes_v12_1_load_microcode(adev, AMDGPU_MES_SCHED_PIPE,
1695 					       false, xcc_id);
1696 		if (r) {
1697 			DRM_ERROR("failed to load MES fw, r=%d\n", r);
1698 			return r;
1699 		}
1700 
1701 		r = mes_v12_1_load_microcode(adev, AMDGPU_MES_KIQ_PIPE,
1702 					       true, xcc_id);
1703 		if (r) {
1704 			DRM_ERROR("failed to load MES kiq fw, r=%d\n", r);
1705 			return r;
1706 		}
1707 
1708 		mes_v12_1_set_ucode_start_addr(adev, xcc_id);
1709 
1710 	} else if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO)
1711 		mes_v12_1_set_ucode_start_addr(adev, xcc_id);
1712 
1713 	mes_v12_1_enable(adev, true, xcc_id);
1714 
1715 	ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_MES);
1716 	if (unlikely(!ip_block)) {
1717 		dev_err(adev->dev, "Failed to get MES handle\n");
1718 		return -EINVAL;
1719 	}
1720 
1721 	r = mes_v12_1_queue_init(adev, AMDGPU_MES_KIQ_PIPE, xcc_id);
1722 	if (r)
1723 		goto failure;
1724 
1725 	if (adev->enable_uni_mes) {
1726 		r = mes_v12_1_set_hw_resources(&adev->mes,
1727 						 AMDGPU_MES_KIQ_PIPE, xcc_id);
1728 		if (r)
1729 			goto failure;
1730 
1731 		mes_v12_1_set_hw_resources_1(&adev->mes,
1732 					       AMDGPU_MES_KIQ_PIPE, xcc_id);
1733 	}
1734 
1735 	if (adev->mes.enable_legacy_queue_map) {
1736 		r = mes_v12_1_xcc_hw_init(ip_block, xcc_id);
1737 		if (r)
1738 			goto failure;
1739 	}
1740 
1741 	return r;
1742 
1743 failure:
1744 	mes_v12_1_hw_fini(ip_block);
1745 	return r;
1746 }
1747 
1748 static int mes_v12_1_kiq_hw_fini(struct amdgpu_device *adev, uint32_t xcc_id)
1749 {
1750 	int inst = MES_PIPE_INST(xcc_id, AMDGPU_MES_SCHED_PIPE);
1751 
1752 	if (adev->mes.ring[inst].sched.ready) {
1753 		if (adev->enable_uni_mes)
1754 			amdgpu_mes_unmap_legacy_queue(adev,
1755 				      &adev->mes.ring[inst],
1756 				      RESET_QUEUES, 0, 0, xcc_id);
1757 		else
1758 			mes_v12_1_kiq_dequeue_sched(adev, xcc_id);
1759 
1760 		adev->mes.ring[inst].sched.ready = false;
1761 	}
1762 
1763 	mes_v12_1_enable(adev, false, xcc_id);
1764 
1765 	return 0;
1766 }
1767 
1768 static int mes_v12_1_setup_coop_mode(struct amdgpu_device *adev, int xcc_id)
1769 {
1770 	u32 num_xcc_per_xcp, num_xcc = NUM_XCC(adev->gfx.xcc_mask);
1771 	int r = 0;
1772 
1773 	if (num_xcc == 1)
1774 		return r;
1775 
1776 	if (adev->gfx.funcs &&
1777 	    adev->gfx.funcs->get_xccs_per_xcp)
1778 		num_xcc_per_xcp = adev->gfx.funcs->get_xccs_per_xcp(adev);
1779 	else
1780 		return -EINVAL;
1781 
1782 	switch (adev->xcp_mgr->mode) {
1783 	case AMDGPU_SPX_PARTITION_MODE:
1784 		adev->mes.enable_coop_mode = 1;
1785 		adev->mes.master_xcc_ids[xcc_id] = 0;
1786 		break;
1787 	case AMDGPU_DPX_PARTITION_MODE:
1788 		adev->mes.enable_coop_mode = 1;
1789 		adev->mes.master_xcc_ids[xcc_id] =
1790 			(xcc_id/num_xcc_per_xcp) * (num_xcc / 2);
1791 		break;
1792 	case AMDGPU_QPX_PARTITION_MODE:
1793 		adev->mes.enable_coop_mode = 1;
1794 		adev->mes.master_xcc_ids[xcc_id] =
1795 			(xcc_id/num_xcc_per_xcp) * (num_xcc / 4);
1796 		break;
1797 	case AMDGPU_CPX_PARTITION_MODE:
1798 		adev->mes.enable_coop_mode = 0;
1799 		break;
1800 	default:
1801 		r = -EINVAL;
1802 		break;
1803 	}
1804 	return r;
1805 }
1806 
1807 static int mes_v12_1_xcc_hw_init(struct amdgpu_ip_block *ip_block, int xcc_id)
1808 {
1809 	int r;
1810 	struct amdgpu_device *adev = ip_block->adev;
1811 
1812 	if (adev->mes.ring[MES_PIPE_INST(xcc_id, 0)].sched.ready)
1813 		goto out;
1814 
1815 	if (!adev->enable_mes_kiq) {
1816 		if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
1817 			r = mes_v12_1_load_microcode(adev,
1818 				       AMDGPU_MES_SCHED_PIPE, true, xcc_id);
1819 			if (r) {
1820 				DRM_ERROR("failed to MES fw, r=%d\n", r);
1821 				return r;
1822 			}
1823 
1824 			mes_v12_1_set_ucode_start_addr(adev, xcc_id);
1825 
1826 		} else if (adev->firmware.load_type ==
1827 			   AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) {
1828 
1829 			mes_v12_1_set_ucode_start_addr(adev, xcc_id);
1830 		}
1831 
1832 		mes_v12_1_enable(adev, true, xcc_id);
1833 	}
1834 
1835 	/* Enable the MES to handle doorbell ring on unmapped queue */
1836 	mes_v12_1_enable_unmapped_doorbell_handling(&adev->mes, true, xcc_id);
1837 
1838 	r = mes_v12_1_queue_init(adev, AMDGPU_MES_SCHED_PIPE, xcc_id);
1839 	if (r)
1840 		goto failure;
1841 
1842 	r = mes_v12_1_set_hw_resources(&adev->mes,
1843 					 AMDGPU_MES_SCHED_PIPE, xcc_id);
1844 	if (r)
1845 		goto failure;
1846 
1847 	if (adev->enable_uni_mes) {
1848 		r = mes_v12_1_setup_coop_mode(adev, xcc_id);
1849 		if (r)
1850 			goto failure;
1851 		mes_v12_1_set_hw_resources_1(&adev->mes,
1852 					       AMDGPU_MES_SCHED_PIPE, xcc_id);
1853 	}
1854 	mes_v12_1_init_aggregated_doorbell(&adev->mes, xcc_id);
1855 
1856 	r = mes_v12_1_query_sched_status(&adev->mes,
1857 					   AMDGPU_MES_SCHED_PIPE, xcc_id);
1858 	if (r) {
1859 		DRM_ERROR("MES is busy\n");
1860 		goto failure;
1861 	}
1862 
1863 out:
1864 	/*
1865 	 * Disable KIQ ring usage from the driver once MES is enabled.
1866 	 * MES uses KIQ ring exclusively so driver cannot access KIQ ring
1867 	 * with MES enabled.
1868 	 */
1869 	adev->gfx.kiq[xcc_id].ring.sched.ready = false;
1870 	adev->mes.ring[MES_PIPE_INST(xcc_id, 0)].sched.ready = true;
1871 
1872 	return 0;
1873 
1874 failure:
1875 	mes_v12_1_hw_fini(ip_block);
1876 	return r;
1877 }
1878 
1879 static int mes_v12_1_hw_init(struct amdgpu_ip_block *ip_block)
1880 {
1881 	struct amdgpu_device *adev = ip_block->adev;
1882 	int r, xcc_id, num_xcc = NUM_XCC(adev->gfx.xcc_mask);
1883 
1884 	for (xcc_id = 0; xcc_id < num_xcc; xcc_id++) {
1885 		r = mes_v12_1_xcc_hw_init(ip_block, xcc_id);
1886 		if (r)
1887 			return r;
1888 	}
1889 
1890 	return 0;
1891 }
1892 
1893 static int mes_v12_1_hw_fini(struct amdgpu_ip_block *ip_block)
1894 {
1895 	return 0;
1896 }
1897 
1898 static int mes_v12_1_suspend(struct amdgpu_ip_block *ip_block)
1899 {
1900 	int r;
1901 
1902 	r = amdgpu_mes_suspend(ip_block->adev);
1903 	if (r)
1904 		return r;
1905 
1906 	return mes_v12_1_hw_fini(ip_block);
1907 }
1908 
1909 static int mes_v12_1_resume(struct amdgpu_ip_block *ip_block)
1910 {
1911 	int r;
1912 
1913 	r = mes_v12_1_hw_init(ip_block);
1914 	if (r)
1915 		return r;
1916 
1917 	return amdgpu_mes_resume(ip_block->adev);
1918 }
1919 
1920 static int mes_v12_1_early_init(struct amdgpu_ip_block *ip_block)
1921 {
1922 	struct amdgpu_device *adev = ip_block->adev;
1923 	int pipe, r;
1924 
1925 	for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) {
1926 		r = amdgpu_mes_init_microcode(adev, pipe);
1927 		if (r)
1928 			return r;
1929 	}
1930 
1931 	return 0;
1932 }
1933 
1934 static int mes_v12_1_late_init(struct amdgpu_ip_block *ip_block)
1935 {
1936 	struct amdgpu_device *adev = ip_block->adev;
1937 	int xcc_id, num_xcc = NUM_XCC(adev->gfx.xcc_mask);
1938 
1939 	/* TODO: remove it if issue fixed. */
1940 	if (adev->mes.enable_coop_mode)
1941 		return 0;
1942 
1943 	for (xcc_id = 0; xcc_id < num_xcc; xcc_id++) {
1944 		/* for COOP mode, only test master xcc. */
1945 		if (adev->mes.enable_coop_mode &&
1946 		    adev->mes.master_xcc_ids[xcc_id] != xcc_id)
1947 			continue;
1948 
1949 		mes_v12_1_self_test(adev, xcc_id);
1950 	}
1951 
1952 	return 0;
1953 }
1954 
1955 static const struct amd_ip_funcs mes_v12_1_ip_funcs = {
1956 	.name = "mes_v12_1",
1957 	.early_init = mes_v12_1_early_init,
1958 	.late_init = mes_v12_1_late_init,
1959 	.sw_init = mes_v12_1_sw_init,
1960 	.sw_fini = mes_v12_1_sw_fini,
1961 	.hw_init = mes_v12_1_hw_init,
1962 	.hw_fini = mes_v12_1_hw_fini,
1963 	.suspend = mes_v12_1_suspend,
1964 	.resume = mes_v12_1_resume,
1965 };
1966 
1967 const struct amdgpu_ip_block_version mes_v12_1_ip_block = {
1968 	.type = AMD_IP_BLOCK_TYPE_MES,
1969 	.major = 12,
1970 	.minor = 1,
1971 	.rev = 0,
1972 	.funcs = &mes_v12_1_ip_funcs,
1973 };
1974 
1975 static int mes_v12_1_alloc_test_buf(struct amdgpu_device *adev,
1976 				    struct amdgpu_bo **bo, uint64_t *addr,
1977 				    void **ptr, int size)
1978 {
1979 	amdgpu_bo_create_kernel(adev, size, PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1980 				bo, addr, ptr);
1981 	if (!*bo) {
1982 		dev_err(adev->dev, "failed to allocate test buffer bo\n");
1983 		return -ENOMEM;
1984 	}
1985 	memset(*ptr, 0, size);
1986 	return 0;
1987 }
1988 
1989 static int mes_v12_1_map_test_bo(struct amdgpu_device *adev,
1990 				 struct amdgpu_bo *bo, struct amdgpu_vm *vm,
1991 				 struct amdgpu_bo_va **bo_va, u64 va, int size)
1992 {
1993 	struct amdgpu_sync sync;
1994 	int r;
1995 
1996 	r = amdgpu_map_static_csa(adev, vm, bo, bo_va, va, size);
1997 	if (r)
1998 		return r;
1999 
2000 	amdgpu_sync_create(&sync);
2001 
2002 	r = amdgpu_vm_bo_update(adev, *bo_va, false);
2003 	if (r) {
2004 		dev_err(adev->dev, "failed to do vm_bo_update on meta data\n");
2005 		goto error;
2006 	}
2007 	amdgpu_sync_fence(&sync, (*bo_va)->last_pt_update, GFP_KERNEL);
2008 
2009 	r = amdgpu_vm_update_pdes(adev, vm, false);
2010 	if (r) {
2011 		dev_err(adev->dev, "failed to update pdes on meta data\n");
2012 		goto error;
2013 	}
2014 	amdgpu_sync_fence(&sync, vm->last_update, GFP_KERNEL);
2015 	amdgpu_sync_wait(&sync, false);
2016 
2017 error:
2018 	amdgpu_sync_free(&sync);
2019 	return 0;
2020 }
2021 
2022 static int mes_v12_1_test_ring(struct amdgpu_device *adev, int xcc_id,
2023 			       u32 *queue_ptr, u64 fence_gpu_addr,
2024 			       void *fence_cpu_ptr, void *wptr_cpu_addr,
2025 			       u64 doorbell_idx, int queue_type)
2026 {
2027 	volatile uint32_t *cpu_ptr = fence_cpu_ptr;
2028 	int num_xcc = NUM_XCC(adev->gfx.xcc_mask);
2029 	int sdma_ring_align = 0x10, compute_ring_align = 0x100;
2030 	uint32_t tmp, xcc_offset;
2031 	int r = 0, i, wptr = 0;
2032 
2033 	if (queue_type == AMDGPU_RING_TYPE_COMPUTE) {
2034 		if (!adev->mes.enable_coop_mode) {
2035 			WREG32_SOC15(GC, GET_INST(GC, xcc_id),
2036 				     regSCRATCH_REG0, 0xCAFEDEAD);
2037 		} else {
2038 			for (i = 0; i < num_xcc; i++) {
2039 				if (adev->mes.master_xcc_ids[i] == xcc_id)
2040 					WREG32_SOC15(GC, GET_INST(GC, i),
2041 					       regSCRATCH_REG0, 0xCAFEDEAD);
2042 			}
2043 		}
2044 
2045 		xcc_offset = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG0);
2046 		queue_ptr[wptr++] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
2047 		queue_ptr[wptr++] = xcc_offset - PACKET3_SET_UCONFIG_REG_START;
2048 		queue_ptr[wptr++] = 0xDEADBEEF;
2049 
2050 		for (i = wptr; i < compute_ring_align; i++)
2051 			queue_ptr[wptr++] = PACKET3(PACKET3_NOP, 0x3FFF);
2052 
2053 	}  else if (queue_type == AMDGPU_RING_TYPE_SDMA) {
2054 		*cpu_ptr = 0xCAFEDEAD;
2055 
2056 		queue_ptr[wptr++] = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_WRITE) |
2057 			SDMA_PKT_COPY_LINEAR_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR);
2058 		queue_ptr[wptr++] = lower_32_bits(fence_gpu_addr);
2059 		queue_ptr[wptr++] = upper_32_bits(fence_gpu_addr);
2060 		queue_ptr[wptr++] = SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(0);
2061 		queue_ptr[wptr++] = 0xDEADBEEF;
2062 
2063 		for (i = wptr; i < sdma_ring_align; i++)
2064 			queue_ptr[wptr++] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP);
2065 
2066 		wptr <<= 2;
2067 	}
2068 
2069 	atomic64_set((atomic64_t *)wptr_cpu_addr, wptr);
2070 	WDOORBELL64(doorbell_idx, wptr);
2071 
2072 	for (i = 0; i < adev->usec_timeout; i++) {
2073 		if (queue_type == AMDGPU_RING_TYPE_SDMA) {
2074 			tmp = le32_to_cpu(*cpu_ptr);
2075 		} else {
2076 			if (!adev->mes.enable_coop_mode) {
2077 				tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id),
2078 						   regSCRATCH_REG0);
2079 			} else {
2080 				for (i = 0; i < num_xcc; i++) {
2081 					if (xcc_id != adev->mes.master_xcc_ids[i])
2082 						continue;
2083 
2084 					tmp = RREG32_SOC15(GC, GET_INST(GC, i),
2085 							   regSCRATCH_REG0);
2086 					if (tmp != 0xDEADBEEF)
2087 						break;
2088 				}
2089 			}
2090 		}
2091 
2092 		if (tmp == 0xDEADBEEF)
2093 			break;
2094 
2095 		if (amdgpu_emu_mode == 1)
2096 			msleep(1);
2097 		else
2098 			udelay(1);
2099 	}
2100 
2101 	if (i >= adev->usec_timeout) {
2102 		dev_err(adev->dev, "xcc%d: mes self test (%s) failed\n", xcc_id,
2103 		      queue_type == AMDGPU_RING_TYPE_SDMA ? "sdma" : "compute");
2104 
2105 		while (halt_if_hws_hang)
2106 			schedule();
2107 
2108 		r = -ETIMEDOUT;
2109 	} else {
2110 		dev_info(adev->dev, "xcc%d: mes self test (%s) pass\n", xcc_id,
2111 		      queue_type == AMDGPU_RING_TYPE_SDMA ? "sdma" : "compute");
2112 	}
2113 
2114 	return r;
2115 }
2116 
2117 #define USER_CTX_SIZE (PAGE_SIZE * 2)
2118 #define USER_CTX_VA AMDGPU_VA_RESERVED_BOTTOM
2119 #define RING_OFFSET(addr) ((addr))
2120 #define EOP_OFFSET(addr)  ((addr) + PAGE_SIZE)
2121 #define WPTR_OFFSET(addr) ((addr) + USER_CTX_SIZE - sizeof(u64))
2122 #define RPTR_OFFSET(addr) ((addr) + USER_CTX_SIZE - sizeof(u64) * 2)
2123 #define FENCE_OFFSET(addr) ((addr) + USER_CTX_SIZE - sizeof(u64) * 3)
2124 
2125 static int mes_v12_1_test_queue(struct amdgpu_device *adev, int xcc_id,
2126 		     int pasid, struct amdgpu_vm *vm, u64 meta_gpu_addr,
2127 		     u64 queue_gpu_addr, void *ctx_ptr, int queue_type)
2128 {
2129 	struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
2130 	struct amdgpu_mqd *mqd_mgr = &adev->mqds[queue_type];
2131 	struct amdgpu_mqd_prop mqd_prop = {0};
2132 	struct mes_add_queue_input add_queue = {0};
2133 	struct mes_remove_queue_input remove_queue = {0};
2134 	struct amdgpu_bo *mqd_bo = NULL;
2135 	int num_xcc = NUM_XCC(adev->gfx.xcc_mask);
2136 	int i, r, off, mqd_size, mqd_count = 1;
2137 	void *mqd_ptr = NULL;
2138 	u64 mqd_gpu_addr, doorbell_idx;
2139 
2140 	/* extra one page size padding for mes fw */
2141 	mqd_size = mqd_mgr->mqd_size + PAGE_SIZE;
2142 
2143 	if (queue_type == AMDGPU_RING_TYPE_SDMA) {
2144 		doorbell_idx = adev->mes.db_start_dw_offset +	\
2145 			adev->doorbell_index.sdma_engine[0];
2146 	} else {
2147 		doorbell_idx = adev->mes.db_start_dw_offset + \
2148 			adev->doorbell_index.userqueue_start;
2149 	}
2150 
2151 	if (adev->mes.enable_coop_mode &&
2152 	    queue_type == AMDGPU_RING_TYPE_COMPUTE) {
2153 		for (i = 0, mqd_count = 0; i < num_xcc; i++) {
2154 			if (adev->mes.master_xcc_ids[i] == xcc_id)
2155 				mqd_count++;
2156 		}
2157 		mqd_size *= mqd_count;
2158 	}
2159 
2160 	r = mes_v12_1_alloc_test_buf(adev, &mqd_bo, &mqd_gpu_addr,
2161 				     &mqd_ptr, mqd_size * mqd_count);
2162 	if (r < 0)
2163 		return r;
2164 
2165 	mqd_prop.mqd_gpu_addr = mqd_gpu_addr;
2166 	mqd_prop.hqd_base_gpu_addr = RING_OFFSET(USER_CTX_VA);
2167 	mqd_prop.eop_gpu_addr = EOP_OFFSET(USER_CTX_VA);
2168 	mqd_prop.wptr_gpu_addr = WPTR_OFFSET(USER_CTX_VA);
2169 	mqd_prop.rptr_gpu_addr = RPTR_OFFSET(USER_CTX_VA);
2170 	mqd_prop.doorbell_index = doorbell_idx;
2171 	mqd_prop.queue_size = PAGE_SIZE;
2172 	mqd_prop.mqd_stride_size = mqd_size;
2173 	mqd_prop.use_doorbell = true;
2174 	mqd_prop.hqd_active = false;
2175 
2176 	mqd_mgr->init_mqd(adev, mqd_ptr, &mqd_prop);
2177 	if (mqd_count > 1) {
2178 		for (i = 1; i < mqd_count; i++) {
2179 			off = mqd_size * i;
2180 			mqd_prop.mqd_gpu_addr = mqd_gpu_addr + off;
2181 			mqd_mgr->init_mqd(adev, (char *)mqd_ptr + off,
2182 					  &mqd_prop);
2183 		}
2184 	}
2185 
2186 	add_queue.xcc_id = xcc_id;
2187 	add_queue.process_id = pasid;
2188 	add_queue.page_table_base_addr = adev->vm_manager.vram_base_offset +
2189 		amdgpu_bo_gpu_offset(vm->root.bo) - adev->gmc.vram_start;
2190 	add_queue.process_va_start = 0;
2191 	add_queue.process_va_end = adev->vm_manager.max_pfn - 1;
2192 	add_queue.process_context_addr = meta_gpu_addr;
2193 	add_queue.gang_context_addr = meta_gpu_addr + AMDGPU_MES_PROC_CTX_SIZE;
2194 	add_queue.doorbell_offset = doorbell_idx;
2195 	add_queue.mqd_addr = mqd_gpu_addr;
2196 	add_queue.wptr_addr = mqd_prop.wptr_gpu_addr;
2197 	add_queue.wptr_mc_addr = WPTR_OFFSET(queue_gpu_addr);
2198 	add_queue.queue_type = queue_type;
2199 	add_queue.vm_cntx_cntl = hub->vm_cntx_cntl;
2200 
2201 	r = mes_v12_1_add_hw_queue(&adev->mes, &add_queue);
2202 	if (r)
2203 		goto error;
2204 
2205 	mes_v12_1_test_ring(adev, xcc_id, (u32 *)RING_OFFSET((char *)ctx_ptr),
2206 			    FENCE_OFFSET(USER_CTX_VA),
2207 			    FENCE_OFFSET((char *)ctx_ptr),
2208 			    WPTR_OFFSET((char *)ctx_ptr),
2209 			    doorbell_idx, queue_type);
2210 
2211 	remove_queue.xcc_id = xcc_id;
2212 	remove_queue.doorbell_offset = doorbell_idx;
2213 	remove_queue.gang_context_addr = add_queue.gang_context_addr;
2214 	r = mes_v12_1_remove_hw_queue(&adev->mes, &remove_queue);
2215 
2216 error:
2217 	amdgpu_bo_free_kernel(&mqd_bo, &mqd_gpu_addr, &mqd_ptr);
2218 	return r;
2219 }
2220 
2221 static int mes_v12_1_self_test(struct amdgpu_device *adev, int xcc_id)
2222 {
2223 	int queue_types[] = { AMDGPU_RING_TYPE_COMPUTE,
2224 		              /* AMDGPU_RING_TYPE_SDMA */ };
2225 	struct amdgpu_bo_va *bo_va = NULL;
2226 	struct amdgpu_vm *vm = NULL;
2227 	struct amdgpu_bo *meta_bo = NULL, *ctx_bo = NULL;
2228 	void *meta_ptr = NULL, *ctx_ptr = NULL;
2229 	u64 meta_gpu_addr, ctx_gpu_addr;
2230 	int size, i, r, pasid;;
2231 
2232 	pasid = amdgpu_pasid_alloc(16);
2233 	if (pasid < 0)
2234 		pasid = 0;
2235 
2236 	size = AMDGPU_MES_PROC_CTX_SIZE + AMDGPU_MES_GANG_CTX_SIZE;
2237 	r = mes_v12_1_alloc_test_buf(adev, &meta_bo, &meta_gpu_addr,
2238 				     &meta_ptr, size);
2239 	if (r < 0)
2240 		goto err2;
2241 
2242 	r = mes_v12_1_alloc_test_buf(adev, &ctx_bo, &ctx_gpu_addr,
2243 				     &ctx_ptr, USER_CTX_SIZE);
2244 	if (r < 0)
2245 		goto err2;
2246 
2247 	vm = kzalloc(sizeof(*vm), GFP_KERNEL);
2248 	if (!vm) {
2249 		r = -ENOMEM;
2250 		goto err2;
2251 	}
2252 
2253 	r = amdgpu_vm_init(adev, vm, -1, pasid);
2254 	if (r)
2255 		goto err1;
2256 
2257 	r = mes_v12_1_map_test_bo(adev, ctx_bo, vm, &bo_va,
2258 				  USER_CTX_VA, USER_CTX_SIZE);
2259 	if (r)
2260 		goto err0;
2261 
2262 	for (i = 0; i < ARRAY_SIZE(queue_types); i++) {
2263 		memset(ctx_ptr, 0, USER_CTX_SIZE);
2264 
2265 		r = mes_v12_1_test_queue(adev, xcc_id, pasid, vm, meta_gpu_addr,
2266 					 ctx_gpu_addr, ctx_ptr, queue_types[i]);
2267 		if (r)
2268 			break;
2269 	}
2270 
2271 	amdgpu_unmap_static_csa(adev, vm, ctx_bo, bo_va, USER_CTX_VA);
2272 err0:
2273 	amdgpu_vm_fini(adev, vm);
2274 err1:
2275 	kfree(vm);
2276 err2:
2277 	amdgpu_bo_free_kernel(&meta_bo, &meta_gpu_addr, &meta_ptr);
2278 	amdgpu_bo_free_kernel(&ctx_bo, &ctx_gpu_addr, &ctx_ptr);
2279 	amdgpu_pasid_free(pasid);
2280 	return r;
2281 }
2282 
2283