xref: /linux/drivers/gpu/drm/amd/amdgpu/mes_v12_1.c (revision c17ee635fd3a482b2ad2bf5e269755c2eae5f25e)
1e220edf2SJack Xiao /*
2e220edf2SJack Xiao  * Copyright 2025 Advanced Micro Devices, Inc.
3e220edf2SJack Xiao  *
4e220edf2SJack Xiao  * Permission is hereby granted, free of charge, to any person obtaining a
5e220edf2SJack Xiao  * copy of this software and associated documentation files (the "Software"),
6e220edf2SJack Xiao  * to deal in the Software without restriction, including without limitation
7e220edf2SJack Xiao  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8e220edf2SJack Xiao  * and/or sell copies of the Software, and to permit persons to whom the
9e220edf2SJack Xiao  * Software is furnished to do so, subject to the following conditions:
10e220edf2SJack Xiao  *
11e220edf2SJack Xiao  * The above copyright notice and this permission notice shall be included in
12e220edf2SJack Xiao  * all copies or substantial portions of the Software.
13e220edf2SJack Xiao  *
14e220edf2SJack Xiao  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15e220edf2SJack Xiao  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16e220edf2SJack Xiao  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17e220edf2SJack Xiao  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18e220edf2SJack Xiao  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19e220edf2SJack Xiao  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20e220edf2SJack Xiao  * OTHER DEALINGS IN THE SOFTWARE.
21e220edf2SJack Xiao  *
22e220edf2SJack Xiao  */
23e220edf2SJack Xiao 
24e220edf2SJack Xiao #include <linux/firmware.h>
25e220edf2SJack Xiao #include <linux/module.h>
26e220edf2SJack Xiao #include "amdgpu.h"
27e220edf2SJack Xiao #include "soc15_common.h"
28e220edf2SJack Xiao #include "soc_v1_0.h"
29e220edf2SJack Xiao #include "gc/gc_12_1_0_offset.h"
30e220edf2SJack Xiao #include "gc/gc_12_1_0_sh_mask.h"
31e220edf2SJack Xiao #include "gc/gc_11_0_0_default.h"
32e220edf2SJack Xiao #include "v12_structs.h"
33e220edf2SJack Xiao #include "mes_v12_api_def.h"
34e220edf2SJack Xiao 
35e220edf2SJack Xiao MODULE_FIRMWARE("amdgpu/gc_12_1_0_mes.bin");
36e220edf2SJack Xiao MODULE_FIRMWARE("amdgpu/gc_12_1_0_mes1.bin");
37e220edf2SJack Xiao MODULE_FIRMWARE("amdgpu/gc_12_1_0_uni_mes.bin");
38e220edf2SJack Xiao 
39e220edf2SJack Xiao static int mes_v12_1_hw_init(struct amdgpu_ip_block *ip_block);
40a5192fbbSLikun Gao static int mes_v12_1_xcc_hw_init(struct amdgpu_ip_block *ip_block, int xcc_id);
41e220edf2SJack Xiao static int mes_v12_1_hw_fini(struct amdgpu_ip_block *ip_block);
42e220edf2SJack Xiao static int mes_v12_1_kiq_hw_init(struct amdgpu_device *adev, uint32_t xcc_id);
43e220edf2SJack Xiao static int mes_v12_1_kiq_hw_fini(struct amdgpu_device *adev, uint32_t xcc_id);
44e220edf2SJack Xiao 
45e220edf2SJack Xiao #define MES_EOP_SIZE   2048
46e220edf2SJack Xiao 
47e220edf2SJack Xiao #define regCP_HQD_IB_CONTROL_MES_12_1_DEFAULT 0x100000
48f8692d2fSAlex Sierra #define XCC_MID_MASK 0x41000000
49f8692d2fSAlex Sierra 
50e220edf2SJack Xiao static void mes_v12_1_ring_set_wptr(struct amdgpu_ring *ring)
51e220edf2SJack Xiao {
52e220edf2SJack Xiao 	struct amdgpu_device *adev = ring->adev;
53e220edf2SJack Xiao 
54e220edf2SJack Xiao 	if (ring->use_doorbell) {
55e220edf2SJack Xiao 		atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
56e220edf2SJack Xiao 			     ring->wptr);
57e220edf2SJack Xiao 		WDOORBELL64(ring->doorbell_index, ring->wptr);
58e220edf2SJack Xiao 	} else {
59e220edf2SJack Xiao 		BUG();
60e220edf2SJack Xiao 	}
61e220edf2SJack Xiao }
62e220edf2SJack Xiao 
63e220edf2SJack Xiao static u64 mes_v12_1_ring_get_rptr(struct amdgpu_ring *ring)
64e220edf2SJack Xiao {
65e220edf2SJack Xiao 	return *ring->rptr_cpu_addr;
66e220edf2SJack Xiao }
67e220edf2SJack Xiao 
68e220edf2SJack Xiao static u64 mes_v12_1_ring_get_wptr(struct amdgpu_ring *ring)
69e220edf2SJack Xiao {
70e220edf2SJack Xiao 	u64 wptr;
71e220edf2SJack Xiao 
72e220edf2SJack Xiao 	if (ring->use_doorbell)
73e220edf2SJack Xiao 		wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
74e220edf2SJack Xiao 	else
75e220edf2SJack Xiao 		BUG();
76e220edf2SJack Xiao 	return wptr;
77e220edf2SJack Xiao }
78e220edf2SJack Xiao 
79e220edf2SJack Xiao static const struct amdgpu_ring_funcs mes_v12_1_ring_funcs = {
80e220edf2SJack Xiao 	.type = AMDGPU_RING_TYPE_MES,
81e220edf2SJack Xiao 	.align_mask = 1,
82e220edf2SJack Xiao 	.nop = 0,
83e220edf2SJack Xiao 	.support_64bit_ptrs = true,
84e220edf2SJack Xiao 	.get_rptr = mes_v12_1_ring_get_rptr,
85e220edf2SJack Xiao 	.get_wptr = mes_v12_1_ring_get_wptr,
86e220edf2SJack Xiao 	.set_wptr = mes_v12_1_ring_set_wptr,
87e220edf2SJack Xiao 	.insert_nop = amdgpu_ring_insert_nop,
88e220edf2SJack Xiao };
89e220edf2SJack Xiao 
90e220edf2SJack Xiao static const char *mes_v12_1_opcodes[] = {
91e220edf2SJack Xiao 	"SET_HW_RSRC",
92e220edf2SJack Xiao 	"SET_SCHEDULING_CONFIG",
93e220edf2SJack Xiao 	"ADD_QUEUE",
94e220edf2SJack Xiao 	"REMOVE_QUEUE",
95e220edf2SJack Xiao 	"PERFORM_YIELD",
96e220edf2SJack Xiao 	"SET_GANG_PRIORITY_LEVEL",
97e220edf2SJack Xiao 	"SUSPEND",
98e220edf2SJack Xiao 	"RESUME",
99e220edf2SJack Xiao 	"RESET",
100e220edf2SJack Xiao 	"SET_LOG_BUFFER",
101e220edf2SJack Xiao 	"CHANGE_GANG_PRORITY",
102e220edf2SJack Xiao 	"QUERY_SCHEDULER_STATUS",
103e220edf2SJack Xiao 	"unused",
104e220edf2SJack Xiao 	"SET_DEBUG_VMID",
105e220edf2SJack Xiao 	"MISC",
106e220edf2SJack Xiao 	"UPDATE_ROOT_PAGE_TABLE",
107e220edf2SJack Xiao 	"AMD_LOG",
108e220edf2SJack Xiao 	"SET_SE_MODE",
109e220edf2SJack Xiao 	"SET_GANG_SUBMIT",
110e220edf2SJack Xiao 	"SET_HW_RSRC_1",
111*d0c989a0SShaoyun Liu 	"INVALIDATE_TLBS",
112e220edf2SJack Xiao };
113e220edf2SJack Xiao 
114e220edf2SJack Xiao static const char *mes_v12_1_misc_opcodes[] = {
115e220edf2SJack Xiao 	"WRITE_REG",
116e220edf2SJack Xiao 	"INV_GART",
117e220edf2SJack Xiao 	"QUERY_STATUS",
118e220edf2SJack Xiao 	"READ_REG",
119e220edf2SJack Xiao 	"WAIT_REG_MEM",
120e220edf2SJack Xiao 	"SET_SHADER_DEBUGGER",
121e220edf2SJack Xiao 	"NOTIFY_WORK_ON_UNMAPPED_QUEUE",
122e220edf2SJack Xiao 	"NOTIFY_TO_UNMAP_PROCESSES",
123e220edf2SJack Xiao };
124e220edf2SJack Xiao 
125e220edf2SJack Xiao static const char *mes_v12_1_get_op_string(union MESAPI__MISC *x_pkt)
126e220edf2SJack Xiao {
127e220edf2SJack Xiao 	const char *op_str = NULL;
128e220edf2SJack Xiao 
129e220edf2SJack Xiao 	if (x_pkt->header.opcode < ARRAY_SIZE(mes_v12_1_opcodes))
130e220edf2SJack Xiao 		op_str = mes_v12_1_opcodes[x_pkt->header.opcode];
131e220edf2SJack Xiao 
132e220edf2SJack Xiao 	return op_str;
133e220edf2SJack Xiao }
134e220edf2SJack Xiao 
135e220edf2SJack Xiao static const char *mes_v12_1_get_misc_op_string(union MESAPI__MISC *x_pkt)
136e220edf2SJack Xiao {
137e220edf2SJack Xiao 	const char *op_str = NULL;
138e220edf2SJack Xiao 
139e220edf2SJack Xiao 	if ((x_pkt->header.opcode == MES_SCH_API_MISC) &&
140e220edf2SJack Xiao 	    (x_pkt->opcode < ARRAY_SIZE(mes_v12_1_misc_opcodes)))
141e220edf2SJack Xiao 		op_str = mes_v12_1_misc_opcodes[x_pkt->opcode];
142e220edf2SJack Xiao 
143e220edf2SJack Xiao 	return op_str;
144e220edf2SJack Xiao }
145e220edf2SJack Xiao 
146e220edf2SJack Xiao static int mes_v12_1_submit_pkt_and_poll_completion(struct amdgpu_mes *mes,
147e220edf2SJack Xiao 					    int xcc_id, int pipe, void *pkt,
148e220edf2SJack Xiao 					    int size, int api_status_off)
149e220edf2SJack Xiao {
150e220edf2SJack Xiao 	union MESAPI__QUERY_MES_STATUS mes_status_pkt;
151e220edf2SJack Xiao 	signed long timeout = 2100000; /* 2100 ms */
152e220edf2SJack Xiao 	struct amdgpu_device *adev = mes->adev;
153e220edf2SJack Xiao 	struct amdgpu_ring *ring = &mes->ring[MES_PIPE_INST(xcc_id, pipe)];
154e220edf2SJack Xiao 	spinlock_t *ring_lock = &mes->ring_lock[MES_PIPE_INST(xcc_id, pipe)];
155e220edf2SJack Xiao 	struct MES_API_STATUS *api_status;
156e220edf2SJack Xiao 	union MESAPI__MISC *x_pkt = pkt;
157e220edf2SJack Xiao 	const char *op_str, *misc_op_str;
158e220edf2SJack Xiao 	unsigned long flags;
159e220edf2SJack Xiao 	u64 status_gpu_addr;
160e220edf2SJack Xiao 	u32 seq, status_offset;
161e220edf2SJack Xiao 	u64 *status_ptr;
162e220edf2SJack Xiao 	signed long r;
163e220edf2SJack Xiao 	int ret;
164e220edf2SJack Xiao 
165e220edf2SJack Xiao 	if (x_pkt->header.opcode >= MES_SCH_API_MAX)
166e220edf2SJack Xiao 		return -EINVAL;
167e220edf2SJack Xiao 
168e220edf2SJack Xiao 	if (amdgpu_emu_mode) {
169e220edf2SJack Xiao 		timeout *= 1000;
170e220edf2SJack Xiao 	} else if (amdgpu_sriov_vf(adev)) {
171e220edf2SJack Xiao 		/* Worst case in sriov where all other 15 VF timeout, each VF needs about 600ms */
172e220edf2SJack Xiao 		timeout = 15 * 600 * 1000;
173e220edf2SJack Xiao 	}
174e220edf2SJack Xiao 
175e220edf2SJack Xiao 	ret = amdgpu_device_wb_get(adev, &status_offset);
176e220edf2SJack Xiao 	if (ret)
177e220edf2SJack Xiao 		return ret;
178e220edf2SJack Xiao 
179e220edf2SJack Xiao 	status_gpu_addr = adev->wb.gpu_addr + (status_offset * 4);
180e220edf2SJack Xiao 	status_ptr = (u64 *)&adev->wb.wb[status_offset];
181e220edf2SJack Xiao 	*status_ptr = 0;
182e220edf2SJack Xiao 
183e220edf2SJack Xiao 	spin_lock_irqsave(ring_lock, flags);
184e220edf2SJack Xiao 	r = amdgpu_ring_alloc(ring, (size + sizeof(mes_status_pkt)) / 4);
185e220edf2SJack Xiao 	if (r)
186e220edf2SJack Xiao 		goto error_unlock_free;
187e220edf2SJack Xiao 
188e220edf2SJack Xiao 	seq = ++ring->fence_drv.sync_seq;
189e220edf2SJack Xiao 	r = amdgpu_fence_wait_polling(ring,
190e220edf2SJack Xiao 				      seq - ring->fence_drv.num_fences_mask,
191e220edf2SJack Xiao 				      timeout);
192e220edf2SJack Xiao 	if (r < 1)
193e220edf2SJack Xiao 		goto error_undo;
194e220edf2SJack Xiao 
195e220edf2SJack Xiao 	api_status = (struct MES_API_STATUS *)((char *)pkt + api_status_off);
196e220edf2SJack Xiao 	api_status->api_completion_fence_addr = status_gpu_addr;
197e220edf2SJack Xiao 	api_status->api_completion_fence_value = 1;
198e220edf2SJack Xiao 
199e220edf2SJack Xiao 	amdgpu_ring_write_multiple(ring, pkt, size / 4);
200e220edf2SJack Xiao 
201e220edf2SJack Xiao 	memset(&mes_status_pkt, 0, sizeof(mes_status_pkt));
202e220edf2SJack Xiao 	mes_status_pkt.header.type = MES_API_TYPE_SCHEDULER;
203e220edf2SJack Xiao 	mes_status_pkt.header.opcode = MES_SCH_API_QUERY_SCHEDULER_STATUS;
204e220edf2SJack Xiao 	mes_status_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
205e220edf2SJack Xiao 	mes_status_pkt.api_status.api_completion_fence_addr =
206e220edf2SJack Xiao 		ring->fence_drv.gpu_addr;
207e220edf2SJack Xiao 	mes_status_pkt.api_status.api_completion_fence_value = seq;
208e220edf2SJack Xiao 
209e220edf2SJack Xiao 	amdgpu_ring_write_multiple(ring, &mes_status_pkt,
210e220edf2SJack Xiao 				   sizeof(mes_status_pkt) / 4);
211e220edf2SJack Xiao 
212e220edf2SJack Xiao 	amdgpu_ring_commit(ring);
213e220edf2SJack Xiao 	spin_unlock_irqrestore(ring_lock, flags);
214e220edf2SJack Xiao 
215e220edf2SJack Xiao 	op_str = mes_v12_1_get_op_string(x_pkt);
216e220edf2SJack Xiao 	misc_op_str = mes_v12_1_get_misc_op_string(x_pkt);
217e220edf2SJack Xiao 
218e220edf2SJack Xiao 	if (misc_op_str)
219e220edf2SJack Xiao 		dev_dbg(adev->dev, "MES(%d, %d) msg=%s (%s) was emitted\n",
220e220edf2SJack Xiao 			xcc_id, pipe, op_str, misc_op_str);
221e220edf2SJack Xiao 	else if (op_str)
222e220edf2SJack Xiao 		dev_dbg(adev->dev, "MES(%d, %d) msg=%s was emitted\n",
223e220edf2SJack Xiao 			xcc_id, pipe, op_str);
224e220edf2SJack Xiao 	else
225e220edf2SJack Xiao 		dev_dbg(adev->dev, "MES(%d, %d) msg=%d was emitted\n",
226e220edf2SJack Xiao 			xcc_id, pipe, x_pkt->header.opcode);
227e220edf2SJack Xiao 
228e220edf2SJack Xiao 	r = amdgpu_fence_wait_polling(ring, seq, timeout);
229e220edf2SJack Xiao 	if (r < 1 || !*status_ptr) {
230e220edf2SJack Xiao 		if (misc_op_str)
231e220edf2SJack Xiao 			dev_err(adev->dev,
232e220edf2SJack Xiao 				"MES(%d, %d) failed to respond to msg=%s (%s)\n",
233e220edf2SJack Xiao 				xcc_id, pipe, op_str, misc_op_str);
234e220edf2SJack Xiao 		else if (op_str)
235e220edf2SJack Xiao 			dev_err(adev->dev,
236e220edf2SJack Xiao 				"MES(%d, %d) failed to respond to msg=%s\n",
237e220edf2SJack Xiao 				xcc_id, pipe, op_str);
238e220edf2SJack Xiao 		else
239e220edf2SJack Xiao 			dev_err(adev->dev,
240e220edf2SJack Xiao 				"MES(%d, %d) failed to respond to msg=%d\n",
241e220edf2SJack Xiao 				xcc_id, pipe, x_pkt->header.opcode);
242e220edf2SJack Xiao 
243e220edf2SJack Xiao 		while (halt_if_hws_hang)
244e220edf2SJack Xiao 			schedule();
245e220edf2SJack Xiao 
246e220edf2SJack Xiao 		r = -ETIMEDOUT;
247e220edf2SJack Xiao 		goto error_wb_free;
248e220edf2SJack Xiao 	}
249e220edf2SJack Xiao 
250e220edf2SJack Xiao 	amdgpu_device_wb_free(adev, status_offset);
251e220edf2SJack Xiao 	return 0;
252e220edf2SJack Xiao 
253e220edf2SJack Xiao error_undo:
254e220edf2SJack Xiao 	dev_err(adev->dev, "MES(%d, %d) ring buffer is full.\n", xcc_id, pipe);
255e220edf2SJack Xiao 	amdgpu_ring_undo(ring);
256e220edf2SJack Xiao 
257e220edf2SJack Xiao error_unlock_free:
258e220edf2SJack Xiao 	spin_unlock_irqrestore(ring_lock, flags);
259e220edf2SJack Xiao 
260e220edf2SJack Xiao error_wb_free:
261e220edf2SJack Xiao 	amdgpu_device_wb_free(adev, status_offset);
262e220edf2SJack Xiao 	return r;
263e220edf2SJack Xiao }
264e220edf2SJack Xiao 
265e220edf2SJack Xiao static int convert_to_mes_queue_type(int queue_type)
266e220edf2SJack Xiao {
267e220edf2SJack Xiao 	if (queue_type == AMDGPU_RING_TYPE_GFX)
268e220edf2SJack Xiao 		return MES_QUEUE_TYPE_GFX;
269e220edf2SJack Xiao 	else if (queue_type == AMDGPU_RING_TYPE_COMPUTE)
270e220edf2SJack Xiao 		return MES_QUEUE_TYPE_COMPUTE;
271e220edf2SJack Xiao 	else if (queue_type == AMDGPU_RING_TYPE_SDMA)
272e220edf2SJack Xiao 		return MES_QUEUE_TYPE_SDMA;
273e220edf2SJack Xiao 	else if (queue_type == AMDGPU_RING_TYPE_MES)
274e220edf2SJack Xiao 		return MES_QUEUE_TYPE_SCHQ;
275e220edf2SJack Xiao 	else
276e220edf2SJack Xiao 		BUG();
277e220edf2SJack Xiao 	return -1;
278e220edf2SJack Xiao }
279e220edf2SJack Xiao 
280e220edf2SJack Xiao static int mes_v12_1_add_hw_queue(struct amdgpu_mes *mes,
281e220edf2SJack Xiao 				  struct mes_add_queue_input *input)
282e220edf2SJack Xiao {
283e220edf2SJack Xiao 	union MESAPI__ADD_QUEUE mes_add_queue_pkt;
28475053887SJack Xiao 	int xcc_id = input->xcc_id;
28575053887SJack Xiao 	int inst = MES_PIPE_INST(xcc_id, AMDGPU_MES_SCHED_PIPE);
28675053887SJack Xiao 
28775053887SJack Xiao 	if (mes->enable_coop_mode)
28875053887SJack Xiao 		xcc_id = mes->master_xcc_ids[inst];
289e220edf2SJack Xiao 
290e220edf2SJack Xiao 	memset(&mes_add_queue_pkt, 0, sizeof(mes_add_queue_pkt));
291e220edf2SJack Xiao 
292e220edf2SJack Xiao 	mes_add_queue_pkt.header.type = MES_API_TYPE_SCHEDULER;
293e220edf2SJack Xiao 	mes_add_queue_pkt.header.opcode = MES_SCH_API_ADD_QUEUE;
294e220edf2SJack Xiao 	mes_add_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
295e220edf2SJack Xiao 
296e220edf2SJack Xiao 	mes_add_queue_pkt.process_id = input->process_id;
297e220edf2SJack Xiao 	mes_add_queue_pkt.page_table_base_addr = input->page_table_base_addr;
298e220edf2SJack Xiao 	mes_add_queue_pkt.process_va_start = input->process_va_start;
299e220edf2SJack Xiao 	mes_add_queue_pkt.process_va_end = input->process_va_end;
300e220edf2SJack Xiao 	mes_add_queue_pkt.process_quantum = input->process_quantum;
301e220edf2SJack Xiao 	mes_add_queue_pkt.process_context_addr = input->process_context_addr;
302e220edf2SJack Xiao 	mes_add_queue_pkt.gang_quantum = input->gang_quantum;
303e220edf2SJack Xiao 	mes_add_queue_pkt.gang_context_addr = input->gang_context_addr;
304e220edf2SJack Xiao 	mes_add_queue_pkt.inprocess_gang_priority =
305e220edf2SJack Xiao 		input->inprocess_gang_priority;
306e220edf2SJack Xiao 	mes_add_queue_pkt.gang_global_priority_level =
307e220edf2SJack Xiao 		input->gang_global_priority_level;
308e220edf2SJack Xiao 	mes_add_queue_pkt.doorbell_offset = input->doorbell_offset;
309e220edf2SJack Xiao 	mes_add_queue_pkt.mqd_addr = input->mqd_addr;
310e220edf2SJack Xiao 
311e220edf2SJack Xiao 	mes_add_queue_pkt.wptr_addr = input->wptr_mc_addr;
312e220edf2SJack Xiao 
313e220edf2SJack Xiao 	mes_add_queue_pkt.queue_type =
314e220edf2SJack Xiao 		convert_to_mes_queue_type(input->queue_type);
315e220edf2SJack Xiao 	mes_add_queue_pkt.paging = input->paging;
3163235a5b7SMukul Joshi 	mes_add_queue_pkt.vm_context_cntl = input->vm_cntx_cntl;
317e220edf2SJack Xiao 	mes_add_queue_pkt.gws_base = input->gws_base;
318e220edf2SJack Xiao 	mes_add_queue_pkt.gws_size = input->gws_size;
319e220edf2SJack Xiao 	mes_add_queue_pkt.trap_handler_addr = input->tba_addr;
320e220edf2SJack Xiao 	mes_add_queue_pkt.tma_addr = input->tma_addr;
321e220edf2SJack Xiao 	mes_add_queue_pkt.trap_en = input->trap_en;
322e220edf2SJack Xiao 	mes_add_queue_pkt.skip_process_ctx_clear = input->skip_process_ctx_clear;
323e220edf2SJack Xiao 	mes_add_queue_pkt.is_kfd_process = input->is_kfd_process;
324e220edf2SJack Xiao 
325e220edf2SJack Xiao 	/* For KFD, gds_size is re-used for queue size (needed in MES for AQL queues) */
326e220edf2SJack Xiao 	mes_add_queue_pkt.is_aql_queue = input->is_aql_queue;
327e220edf2SJack Xiao 	mes_add_queue_pkt.gds_size = input->queue_size;
328e220edf2SJack Xiao 
329e220edf2SJack Xiao 	/* For KFD, gds_size is re-used for queue size (needed in MES for AQL queues) */
330e220edf2SJack Xiao 	mes_add_queue_pkt.is_aql_queue = input->is_aql_queue;
331e220edf2SJack Xiao 	mes_add_queue_pkt.gds_size = input->queue_size;
332e220edf2SJack Xiao 
333cc52af1aSMukul Joshi 	mes_add_queue_pkt.full_sh_mem_config_data = input->sh_mem_config_data;
334cc52af1aSMukul Joshi 
335e220edf2SJack Xiao 	return mes_v12_1_submit_pkt_and_poll_completion(mes,
33675053887SJack Xiao 			xcc_id, AMDGPU_MES_SCHED_PIPE,
337e220edf2SJack Xiao 			&mes_add_queue_pkt, sizeof(mes_add_queue_pkt),
338e220edf2SJack Xiao 			offsetof(union MESAPI__ADD_QUEUE, api_status));
339e220edf2SJack Xiao }
340e220edf2SJack Xiao 
341e220edf2SJack Xiao static int mes_v12_1_remove_hw_queue(struct amdgpu_mes *mes,
342e220edf2SJack Xiao 				     struct mes_remove_queue_input *input)
343e220edf2SJack Xiao {
344e220edf2SJack Xiao 	union MESAPI__REMOVE_QUEUE mes_remove_queue_pkt;
34575053887SJack Xiao 	int xcc_id = input->xcc_id;
34675053887SJack Xiao 	int inst = MES_PIPE_INST(xcc_id, AMDGPU_MES_SCHED_PIPE);
34775053887SJack Xiao 
34875053887SJack Xiao 	if (mes->enable_coop_mode)
34975053887SJack Xiao 		xcc_id = mes->master_xcc_ids[inst];
350e220edf2SJack Xiao 
351e220edf2SJack Xiao 	memset(&mes_remove_queue_pkt, 0, sizeof(mes_remove_queue_pkt));
352e220edf2SJack Xiao 
353e220edf2SJack Xiao 	mes_remove_queue_pkt.header.type = MES_API_TYPE_SCHEDULER;
354e220edf2SJack Xiao 	mes_remove_queue_pkt.header.opcode = MES_SCH_API_REMOVE_QUEUE;
355e220edf2SJack Xiao 	mes_remove_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
356e220edf2SJack Xiao 
357e220edf2SJack Xiao 	mes_remove_queue_pkt.doorbell_offset = input->doorbell_offset;
358e220edf2SJack Xiao 	mes_remove_queue_pkt.gang_context_addr = input->gang_context_addr;
359e220edf2SJack Xiao 
360e220edf2SJack Xiao 	return mes_v12_1_submit_pkt_and_poll_completion(mes,
36175053887SJack Xiao 			xcc_id, AMDGPU_MES_SCHED_PIPE,
362e220edf2SJack Xiao 			&mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt),
363e220edf2SJack Xiao 			offsetof(union MESAPI__REMOVE_QUEUE, api_status));
364e220edf2SJack Xiao }
365e220edf2SJack Xiao 
366e220edf2SJack Xiao static int mes_v12_1_reset_hw_queue(struct amdgpu_mes *mes,
367e220edf2SJack Xiao 				    struct mes_reset_queue_input *input)
368e220edf2SJack Xiao {
369e220edf2SJack Xiao 	union MESAPI__RESET mes_reset_queue_pkt;
370e220edf2SJack Xiao 	int pipe;
371e220edf2SJack Xiao 
372e220edf2SJack Xiao 	memset(&mes_reset_queue_pkt, 0, sizeof(mes_reset_queue_pkt));
373e220edf2SJack Xiao 
374e220edf2SJack Xiao 	mes_reset_queue_pkt.header.type = MES_API_TYPE_SCHEDULER;
375e220edf2SJack Xiao 	mes_reset_queue_pkt.header.opcode = MES_SCH_API_RESET;
376e220edf2SJack Xiao 	mes_reset_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
377e220edf2SJack Xiao 
378e220edf2SJack Xiao 	mes_reset_queue_pkt.doorbell_offset = input->doorbell_offset;
379e220edf2SJack Xiao 	/* mes_reset_queue_pkt.gang_context_addr = input->gang_context_addr; */
380e220edf2SJack Xiao 	/*mes_reset_queue_pkt.reset_queue_only = 1;*/
381e220edf2SJack Xiao 
382e220edf2SJack Xiao 	if (mes->adev->enable_uni_mes)
383e220edf2SJack Xiao 		pipe = AMDGPU_MES_KIQ_PIPE;
384e220edf2SJack Xiao 	else
385e220edf2SJack Xiao 		pipe = AMDGPU_MES_SCHED_PIPE;
386e220edf2SJack Xiao 
387e220edf2SJack Xiao 	return mes_v12_1_submit_pkt_and_poll_completion(mes,
388e220edf2SJack Xiao 			input->xcc_id, pipe,
389e220edf2SJack Xiao 			&mes_reset_queue_pkt, sizeof(mes_reset_queue_pkt),
390e220edf2SJack Xiao 			offsetof(union MESAPI__REMOVE_QUEUE, api_status));
391e220edf2SJack Xiao }
392e220edf2SJack Xiao 
393e220edf2SJack Xiao static int mes_v12_1_map_legacy_queue(struct amdgpu_mes *mes,
394e220edf2SJack Xiao 				      struct mes_map_legacy_queue_input *input)
395e220edf2SJack Xiao {
396e220edf2SJack Xiao 	union MESAPI__ADD_QUEUE mes_add_queue_pkt;
397e220edf2SJack Xiao 	int pipe;
398e220edf2SJack Xiao 
399e220edf2SJack Xiao 	memset(&mes_add_queue_pkt, 0, sizeof(mes_add_queue_pkt));
400e220edf2SJack Xiao 
401e220edf2SJack Xiao 	mes_add_queue_pkt.header.type = MES_API_TYPE_SCHEDULER;
402e220edf2SJack Xiao 	mes_add_queue_pkt.header.opcode = MES_SCH_API_ADD_QUEUE;
403e220edf2SJack Xiao 	mes_add_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
404e220edf2SJack Xiao 
405e220edf2SJack Xiao 	mes_add_queue_pkt.pipe_id = input->pipe_id;
406e220edf2SJack Xiao 	mes_add_queue_pkt.queue_id = input->queue_id;
407e220edf2SJack Xiao 	mes_add_queue_pkt.doorbell_offset = input->doorbell_offset;
408e220edf2SJack Xiao 	mes_add_queue_pkt.mqd_addr = input->mqd_addr;
409e220edf2SJack Xiao 	mes_add_queue_pkt.wptr_addr = input->wptr_addr;
410e220edf2SJack Xiao 	mes_add_queue_pkt.queue_type =
411e220edf2SJack Xiao 		convert_to_mes_queue_type(input->queue_type);
412e220edf2SJack Xiao 	mes_add_queue_pkt.map_legacy_kq = 1;
413e220edf2SJack Xiao 
414e220edf2SJack Xiao 	if (mes->adev->enable_uni_mes)
415e220edf2SJack Xiao 		pipe = AMDGPU_MES_KIQ_PIPE;
416e220edf2SJack Xiao 	else
417e220edf2SJack Xiao 		pipe = AMDGPU_MES_SCHED_PIPE;
418e220edf2SJack Xiao 
419e220edf2SJack Xiao 	return mes_v12_1_submit_pkt_and_poll_completion(mes,
420e220edf2SJack Xiao 			input->xcc_id, pipe,
421e220edf2SJack Xiao 			&mes_add_queue_pkt, sizeof(mes_add_queue_pkt),
422e220edf2SJack Xiao 			offsetof(union MESAPI__ADD_QUEUE, api_status));
423e220edf2SJack Xiao }
424e220edf2SJack Xiao 
425e220edf2SJack Xiao static int mes_v12_1_unmap_legacy_queue(struct amdgpu_mes *mes,
426e220edf2SJack Xiao 			struct mes_unmap_legacy_queue_input *input)
427e220edf2SJack Xiao {
428e220edf2SJack Xiao 	union MESAPI__REMOVE_QUEUE mes_remove_queue_pkt;
429e220edf2SJack Xiao 	int pipe;
430e220edf2SJack Xiao 
431e220edf2SJack Xiao 	memset(&mes_remove_queue_pkt, 0, sizeof(mes_remove_queue_pkt));
432e220edf2SJack Xiao 
433e220edf2SJack Xiao 	mes_remove_queue_pkt.header.type = MES_API_TYPE_SCHEDULER;
434e220edf2SJack Xiao 	mes_remove_queue_pkt.header.opcode = MES_SCH_API_REMOVE_QUEUE;
435e220edf2SJack Xiao 	mes_remove_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
436e220edf2SJack Xiao 
437e220edf2SJack Xiao 	mes_remove_queue_pkt.doorbell_offset = input->doorbell_offset;
438e220edf2SJack Xiao 	mes_remove_queue_pkt.gang_context_addr = 0;
439e220edf2SJack Xiao 
440e220edf2SJack Xiao 	mes_remove_queue_pkt.pipe_id = input->pipe_id;
441e220edf2SJack Xiao 	mes_remove_queue_pkt.queue_id = input->queue_id;
442e220edf2SJack Xiao 
443e220edf2SJack Xiao 	if (input->action == PREEMPT_QUEUES_NO_UNMAP) {
444e220edf2SJack Xiao 		mes_remove_queue_pkt.preempt_legacy_gfx_queue = 1;
445e220edf2SJack Xiao 		mes_remove_queue_pkt.tf_addr = input->trail_fence_addr;
446e220edf2SJack Xiao 		mes_remove_queue_pkt.tf_data =
447e220edf2SJack Xiao 			lower_32_bits(input->trail_fence_data);
448e220edf2SJack Xiao 	} else {
449e220edf2SJack Xiao 		mes_remove_queue_pkt.unmap_legacy_queue = 1;
450e220edf2SJack Xiao 		mes_remove_queue_pkt.queue_type =
451e220edf2SJack Xiao 			convert_to_mes_queue_type(input->queue_type);
452e220edf2SJack Xiao 	}
453e220edf2SJack Xiao 
454e220edf2SJack Xiao 	if (mes->adev->enable_uni_mes)
455e220edf2SJack Xiao 		pipe = AMDGPU_MES_KIQ_PIPE;
456e220edf2SJack Xiao 	else
457e220edf2SJack Xiao 		pipe = AMDGPU_MES_SCHED_PIPE;
458e220edf2SJack Xiao 
459e220edf2SJack Xiao 	return mes_v12_1_submit_pkt_and_poll_completion(mes,
460e220edf2SJack Xiao 			input->xcc_id, pipe,
461e220edf2SJack Xiao 			&mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt),
462e220edf2SJack Xiao 			offsetof(union MESAPI__REMOVE_QUEUE, api_status));
463e220edf2SJack Xiao }
464e220edf2SJack Xiao 
465e220edf2SJack Xiao static int mes_v12_1_suspend_gang(struct amdgpu_mes *mes,
466e220edf2SJack Xiao 				  struct mes_suspend_gang_input *input)
467e220edf2SJack Xiao {
468e220edf2SJack Xiao 	return 0;
469e220edf2SJack Xiao }
470e220edf2SJack Xiao 
471e220edf2SJack Xiao static int mes_v12_1_resume_gang(struct amdgpu_mes *mes,
472e220edf2SJack Xiao 				 struct mes_resume_gang_input *input)
473e220edf2SJack Xiao {
474e220edf2SJack Xiao 	return 0;
475e220edf2SJack Xiao }
476e220edf2SJack Xiao 
477e220edf2SJack Xiao static int mes_v12_1_query_sched_status(struct amdgpu_mes *mes,
478e220edf2SJack Xiao 					  int pipe, int xcc_id)
479e220edf2SJack Xiao {
480e220edf2SJack Xiao 	union MESAPI__QUERY_MES_STATUS mes_status_pkt;
481e220edf2SJack Xiao 
482e220edf2SJack Xiao 	memset(&mes_status_pkt, 0, sizeof(mes_status_pkt));
483e220edf2SJack Xiao 
484e220edf2SJack Xiao 	mes_status_pkt.header.type = MES_API_TYPE_SCHEDULER;
485e220edf2SJack Xiao 	mes_status_pkt.header.opcode = MES_SCH_API_QUERY_SCHEDULER_STATUS;
486e220edf2SJack Xiao 	mes_status_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
487e220edf2SJack Xiao 
488e220edf2SJack Xiao 	return mes_v12_1_submit_pkt_and_poll_completion(mes, xcc_id, pipe,
489e220edf2SJack Xiao 			&mes_status_pkt, sizeof(mes_status_pkt),
490e220edf2SJack Xiao 			offsetof(union MESAPI__QUERY_MES_STATUS, api_status));
491e220edf2SJack Xiao }
492f8692d2fSAlex Sierra static uint32_t mes_v12_1_get_xcc_from_reg(uint32_t reg_offset)
493f8692d2fSAlex Sierra {
494f8692d2fSAlex Sierra 	/* Check xcc reg offset range */
495f8692d2fSAlex Sierra 	uint32_t xcc = (reg_offset & XCC_MID_MASK) ? 4 : 0;
496f8692d2fSAlex Sierra 	/* Each XCC has two register ranges.
497f8692d2fSAlex Sierra 	 * These are represented in reg_offset[17:16]
498f8692d2fSAlex Sierra 	 */
499f8692d2fSAlex Sierra 	return ((reg_offset >> 16) & 0x3) + xcc;
500f8692d2fSAlex Sierra }
501f8692d2fSAlex Sierra 
502f8692d2fSAlex Sierra static void mes_v12_1_get_rrmt(uint32_t reg, uint32_t xcc_id,
503f8692d2fSAlex Sierra 				 struct RRMT_OPTION *rrmt_opt)
504f8692d2fSAlex Sierra {
505fcc4fc75SLikun Gao 	uint32_t normalized_reg = soc_v1_0_normalize_xcc_reg_offset(reg);
506f8692d2fSAlex Sierra 
507fcc4fc75SLikun Gao 	if (soc_v1_0_normalize_xcc_reg_range(normalized_reg)) {
508f8692d2fSAlex Sierra 		rrmt_opt->xcd_die_id = mes_v12_1_get_xcc_from_reg(reg);
509f8692d2fSAlex Sierra 		rrmt_opt->mode = (xcc_id == rrmt_opt->xcd_die_id) ?
510f8692d2fSAlex Sierra 			 MES_RRMT_MODE_LOCAL_XCD : MES_RRMT_MODE_REMOTE_XCD;
511f8692d2fSAlex Sierra 	} else {
512f8692d2fSAlex Sierra 		rrmt_opt->mode = MES_RRMT_MODE_LOCAL_REMOTE_AID;
513f8692d2fSAlex Sierra 	}
514f8692d2fSAlex Sierra }
515e220edf2SJack Xiao 
516e220edf2SJack Xiao static int mes_v12_1_misc_op(struct amdgpu_mes *mes,
517e220edf2SJack Xiao 			     struct mes_misc_op_input *input)
518e220edf2SJack Xiao {
5191eb2a5edSLikun Gao 	struct amdgpu_device *adev = mes->adev;
520e220edf2SJack Xiao 	union MESAPI__MISC misc_pkt;
521e220edf2SJack Xiao 	int pipe;
522e220edf2SJack Xiao 
523e220edf2SJack Xiao 	if (mes->adev->enable_uni_mes)
524e220edf2SJack Xiao 		pipe = AMDGPU_MES_KIQ_PIPE;
525e220edf2SJack Xiao 	else
526e220edf2SJack Xiao 		pipe = AMDGPU_MES_SCHED_PIPE;
527e220edf2SJack Xiao 
528e220edf2SJack Xiao 	memset(&misc_pkt, 0, sizeof(misc_pkt));
529e220edf2SJack Xiao 
530e220edf2SJack Xiao 	misc_pkt.header.type = MES_API_TYPE_SCHEDULER;
531e220edf2SJack Xiao 	misc_pkt.header.opcode = MES_SCH_API_MISC;
532e220edf2SJack Xiao 	misc_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
533e220edf2SJack Xiao 
534e220edf2SJack Xiao 	switch (input->op) {
535e220edf2SJack Xiao 	case MES_MISC_OP_READ_REG:
536e220edf2SJack Xiao 		misc_pkt.opcode = MESAPI_MISC__READ_REG;
537e220edf2SJack Xiao 		misc_pkt.read_reg.reg_offset = input->read_reg.reg_offset;
538e220edf2SJack Xiao 		misc_pkt.read_reg.buffer_addr = input->read_reg.buffer_addr;
5391eb2a5edSLikun Gao 		mes_v12_1_get_rrmt(input->read_reg.reg_offset,
5401eb2a5edSLikun Gao 				   GET_INST(GC, input->xcc_id),
541f8692d2fSAlex Sierra 				   &misc_pkt.read_reg.rrmt_opt);
542b9a0716aSJack Xiao 		if (misc_pkt.read_reg.rrmt_opt.mode != MES_RRMT_MODE_REMOTE_MID) {
543b9a0716aSJack Xiao 			misc_pkt.read_reg.reg_offset =
544fcc4fc75SLikun Gao 				soc_v1_0_normalize_xcc_reg_offset(misc_pkt.read_reg.reg_offset);
545b9a0716aSJack Xiao 		}
546e220edf2SJack Xiao 		break;
547e220edf2SJack Xiao 	case MES_MISC_OP_WRITE_REG:
548e220edf2SJack Xiao 		misc_pkt.opcode = MESAPI_MISC__WRITE_REG;
549e220edf2SJack Xiao 		misc_pkt.write_reg.reg_offset = input->write_reg.reg_offset;
550e220edf2SJack Xiao 		misc_pkt.write_reg.reg_value = input->write_reg.reg_value;
5511eb2a5edSLikun Gao 		mes_v12_1_get_rrmt(input->write_reg.reg_offset,
5521eb2a5edSLikun Gao 				   GET_INST(GC, input->xcc_id),
553f8692d2fSAlex Sierra 				   &misc_pkt.write_reg.rrmt_opt);
554b9a0716aSJack Xiao 		if (misc_pkt.write_reg.rrmt_opt.mode != MES_RRMT_MODE_REMOTE_MID) {
555b9a0716aSJack Xiao 			misc_pkt.write_reg.reg_offset =
556fcc4fc75SLikun Gao 				soc_v1_0_normalize_xcc_reg_offset(misc_pkt.write_reg.reg_offset);
557b9a0716aSJack Xiao 		}
558e220edf2SJack Xiao 		break;
559e220edf2SJack Xiao 	case MES_MISC_OP_WRM_REG_WAIT:
560e220edf2SJack Xiao 		misc_pkt.opcode = MESAPI_MISC__WAIT_REG_MEM;
561e220edf2SJack Xiao 		misc_pkt.wait_reg_mem.op = WRM_OPERATION__WAIT_REG_MEM;
562e220edf2SJack Xiao 		misc_pkt.wait_reg_mem.reference = input->wrm_reg.ref;
563e220edf2SJack Xiao 		misc_pkt.wait_reg_mem.mask = input->wrm_reg.mask;
564e220edf2SJack Xiao 		misc_pkt.wait_reg_mem.reg_offset1 = input->wrm_reg.reg0;
565e220edf2SJack Xiao 		misc_pkt.wait_reg_mem.reg_offset2 = 0;
5661eb2a5edSLikun Gao 		mes_v12_1_get_rrmt(input->wrm_reg.reg0,
5671eb2a5edSLikun Gao 				   GET_INST(GC, input->xcc_id),
568f8692d2fSAlex Sierra 				   &misc_pkt.wait_reg_mem.rrmt_opt1);
569b9a0716aSJack Xiao 		if (misc_pkt.wait_reg_mem.rrmt_opt1.mode != MES_RRMT_MODE_REMOTE_MID) {
570b9a0716aSJack Xiao 			misc_pkt.wait_reg_mem.reg_offset1 =
571fcc4fc75SLikun Gao 				soc_v1_0_normalize_xcc_reg_offset(misc_pkt.wait_reg_mem.reg_offset1);
572b9a0716aSJack Xiao 		}
573e220edf2SJack Xiao 		break;
574e220edf2SJack Xiao 	case MES_MISC_OP_WRM_REG_WR_WAIT:
575e220edf2SJack Xiao 		misc_pkt.opcode = MESAPI_MISC__WAIT_REG_MEM;
576e220edf2SJack Xiao 		misc_pkt.wait_reg_mem.op = WRM_OPERATION__WR_WAIT_WR_REG;
577e220edf2SJack Xiao 		misc_pkt.wait_reg_mem.reference = input->wrm_reg.ref;
578e220edf2SJack Xiao 		misc_pkt.wait_reg_mem.mask = input->wrm_reg.mask;
579e220edf2SJack Xiao 		misc_pkt.wait_reg_mem.reg_offset1 = input->wrm_reg.reg0;
580e220edf2SJack Xiao 		misc_pkt.wait_reg_mem.reg_offset2 = input->wrm_reg.reg1;
5811eb2a5edSLikun Gao 		mes_v12_1_get_rrmt(input->wrm_reg.reg0,
5821eb2a5edSLikun Gao 				   GET_INST(GC, input->xcc_id),
583f8692d2fSAlex Sierra 				   &misc_pkt.wait_reg_mem.rrmt_opt1);
5841eb2a5edSLikun Gao 		mes_v12_1_get_rrmt(input->wrm_reg.reg1,
5851eb2a5edSLikun Gao 				   GET_INST(GC, input->xcc_id),
586f8692d2fSAlex Sierra 				   &misc_pkt.wait_reg_mem.rrmt_opt2);
587aa0f09f9SMukul Joshi 
588b9a0716aSJack Xiao 		if (misc_pkt.wait_reg_mem.rrmt_opt1.mode != MES_RRMT_MODE_REMOTE_MID) {
589b9a0716aSJack Xiao 			misc_pkt.wait_reg_mem.reg_offset1 =
590fcc4fc75SLikun Gao 				soc_v1_0_normalize_xcc_reg_offset(misc_pkt.wait_reg_mem.reg_offset1);
591b9a0716aSJack Xiao 		}
592b9a0716aSJack Xiao 		if (misc_pkt.wait_reg_mem.rrmt_opt2.mode != MES_RRMT_MODE_REMOTE_MID) {
593b9a0716aSJack Xiao 			misc_pkt.wait_reg_mem.reg_offset2 =
594fcc4fc75SLikun Gao 				soc_v1_0_normalize_xcc_reg_offset(misc_pkt.wait_reg_mem.reg_offset2);
595b9a0716aSJack Xiao 		}
596e220edf2SJack Xiao 		break;
597e220edf2SJack Xiao 	case MES_MISC_OP_SET_SHADER_DEBUGGER:
598e220edf2SJack Xiao 		pipe = AMDGPU_MES_SCHED_PIPE;
599e220edf2SJack Xiao 		misc_pkt.opcode = MESAPI_MISC__SET_SHADER_DEBUGGER;
600e220edf2SJack Xiao 		misc_pkt.set_shader_debugger.process_context_addr =
601e220edf2SJack Xiao 				input->set_shader_debugger.process_context_addr;
602e220edf2SJack Xiao 		misc_pkt.set_shader_debugger.flags.u32all =
603e220edf2SJack Xiao 				input->set_shader_debugger.flags.u32all;
604e220edf2SJack Xiao 		misc_pkt.set_shader_debugger.spi_gdbg_per_vmid_cntl =
605e220edf2SJack Xiao 				input->set_shader_debugger.spi_gdbg_per_vmid_cntl;
606e220edf2SJack Xiao 		memcpy(misc_pkt.set_shader_debugger.tcp_watch_cntl,
607e220edf2SJack Xiao 				input->set_shader_debugger.tcp_watch_cntl,
608e220edf2SJack Xiao 				sizeof(misc_pkt.set_shader_debugger.tcp_watch_cntl));
609e220edf2SJack Xiao 		misc_pkt.set_shader_debugger.trap_en = input->set_shader_debugger.trap_en;
610e220edf2SJack Xiao 		break;
611e220edf2SJack Xiao 	case MES_MISC_OP_CHANGE_CONFIG:
612e220edf2SJack Xiao 		misc_pkt.opcode = MESAPI_MISC__CHANGE_CONFIG;
613e220edf2SJack Xiao 		misc_pkt.change_config.opcode =
614e220edf2SJack Xiao 			MESAPI_MISC__CHANGE_CONFIG_OPTION_LIMIT_SINGLE_PROCESS;
615e220edf2SJack Xiao 		misc_pkt.change_config.option.bits.limit_single_process =
616e220edf2SJack Xiao 			input->change_config.option.limit_single_process;
617e220edf2SJack Xiao 		break;
618e220edf2SJack Xiao 	default:
619e220edf2SJack Xiao 		DRM_ERROR("unsupported misc op (%d) \n", input->op);
620e220edf2SJack Xiao 		return -EINVAL;
621e220edf2SJack Xiao 	}
622e220edf2SJack Xiao 
623e220edf2SJack Xiao 	return mes_v12_1_submit_pkt_and_poll_completion(mes,
624e220edf2SJack Xiao 			input->xcc_id, pipe,
625e220edf2SJack Xiao 			&misc_pkt, sizeof(misc_pkt),
626e220edf2SJack Xiao 			offsetof(union MESAPI__MISC, api_status));
627e220edf2SJack Xiao }
628e220edf2SJack Xiao 
629e220edf2SJack Xiao static int mes_v12_1_set_hw_resources_1(struct amdgpu_mes *mes,
630e220edf2SJack Xiao 					  int pipe, int xcc_id)
631e220edf2SJack Xiao {
632e220edf2SJack Xiao 	union MESAPI_SET_HW_RESOURCES_1 mes_set_hw_res_1_pkt;
63375053887SJack Xiao 	int master_xcc_id, inst = MES_PIPE_INST(xcc_id, pipe);
634e220edf2SJack Xiao 
635e220edf2SJack Xiao 	memset(&mes_set_hw_res_1_pkt, 0, sizeof(mes_set_hw_res_1_pkt));
636e220edf2SJack Xiao 
637e220edf2SJack Xiao 	mes_set_hw_res_1_pkt.header.type = MES_API_TYPE_SCHEDULER;
638e220edf2SJack Xiao 	mes_set_hw_res_1_pkt.header.opcode = MES_SCH_API_SET_HW_RSRC_1;
639e220edf2SJack Xiao 	mes_set_hw_res_1_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
640e220edf2SJack Xiao 	mes_set_hw_res_1_pkt.mes_kiq_unmap_timeout = 100;
641e220edf2SJack Xiao 
64275053887SJack Xiao 	if (mes->enable_coop_mode && pipe == AMDGPU_MES_SCHED_PIPE) {
64375053887SJack Xiao 		master_xcc_id = mes->master_xcc_ids[inst];
64475053887SJack Xiao 		mes_set_hw_res_1_pkt.mes_coop_mode = 1;
64575053887SJack Xiao 		mes_set_hw_res_1_pkt.coop_sch_shared_mc_addr =
64675053887SJack Xiao 			mes->shared_cmd_buf_gpu_addr[master_xcc_id];
64775053887SJack Xiao 	}
64875053887SJack Xiao 
649e220edf2SJack Xiao 	return mes_v12_1_submit_pkt_and_poll_completion(mes, xcc_id, pipe,
650e220edf2SJack Xiao 			&mes_set_hw_res_1_pkt, sizeof(mes_set_hw_res_1_pkt),
651e220edf2SJack Xiao 			offsetof(union MESAPI_SET_HW_RESOURCES_1, api_status));
652e220edf2SJack Xiao }
653e220edf2SJack Xiao 
654e220edf2SJack Xiao static void mes_v12_1_set_gfx_hqd_mask(union MESAPI_SET_HW_RESOURCES *pkt)
655e220edf2SJack Xiao {
656e220edf2SJack Xiao 	/*
657e220edf2SJack Xiao 	 * GFX V12 has only one GFX pipe, but 8 queues in it.
658e220edf2SJack Xiao 	 * GFX pipe 0 queue 0 is being used by Kernel queue.
659e220edf2SJack Xiao 	 * Set GFX pipe 0 queue 1-7 for MES scheduling
660e220edf2SJack Xiao 	 * mask = 1111 1110b
661e220edf2SJack Xiao 	 */
662e220edf2SJack Xiao 	pkt->gfx_hqd_mask[0] = 0xFE;
663e220edf2SJack Xiao }
664e220edf2SJack Xiao 
665e220edf2SJack Xiao static int mes_v12_1_set_hw_resources(struct amdgpu_mes *mes,
666e220edf2SJack Xiao 					int pipe, int xcc_id)
667e220edf2SJack Xiao {
668e220edf2SJack Xiao 	int i;
669e220edf2SJack Xiao 	struct amdgpu_device *adev = mes->adev;
670e220edf2SJack Xiao 	union MESAPI_SET_HW_RESOURCES mes_set_hw_res_pkt;
671e220edf2SJack Xiao 
672e220edf2SJack Xiao 	memset(&mes_set_hw_res_pkt, 0, sizeof(mes_set_hw_res_pkt));
673e220edf2SJack Xiao 
674e220edf2SJack Xiao 	mes_set_hw_res_pkt.header.type = MES_API_TYPE_SCHEDULER;
675e220edf2SJack Xiao 	mes_set_hw_res_pkt.header.opcode = MES_SCH_API_SET_HW_RSRC;
676e220edf2SJack Xiao 	mes_set_hw_res_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
677e220edf2SJack Xiao 
678e220edf2SJack Xiao 	if (pipe == AMDGPU_MES_SCHED_PIPE) {
679e220edf2SJack Xiao 		mes_set_hw_res_pkt.vmid_mask_mmhub = mes->vmid_mask_mmhub;
680e220edf2SJack Xiao 		mes_set_hw_res_pkt.vmid_mask_gfxhub = mes->vmid_mask_gfxhub;
681e220edf2SJack Xiao 		mes_set_hw_res_pkt.gds_size = adev->gds.gds_size;
682e220edf2SJack Xiao 		mes_set_hw_res_pkt.paging_vmid = 0;
683e220edf2SJack Xiao 
684e220edf2SJack Xiao 		for (i = 0; i < MAX_COMPUTE_PIPES; i++)
685e220edf2SJack Xiao 			mes_set_hw_res_pkt.compute_hqd_mask[i] =
686e220edf2SJack Xiao 				mes->compute_hqd_mask[i];
687e220edf2SJack Xiao 
688e220edf2SJack Xiao 		mes_v12_1_set_gfx_hqd_mask(&mes_set_hw_res_pkt);
689e220edf2SJack Xiao 
690e220edf2SJack Xiao 		for (i = 0; i < MAX_SDMA_PIPES; i++)
691e220edf2SJack Xiao 			mes_set_hw_res_pkt.sdma_hqd_mask[i] =
692e220edf2SJack Xiao 				mes->sdma_hqd_mask[i];
693e220edf2SJack Xiao 
694e220edf2SJack Xiao 		for (i = 0; i < AMD_PRIORITY_NUM_LEVELS; i++)
695e220edf2SJack Xiao 			mes_set_hw_res_pkt.aggregated_doorbells[i] =
696e220edf2SJack Xiao 				mes->aggregated_doorbells[i];
697e220edf2SJack Xiao 	}
698e220edf2SJack Xiao 
699e220edf2SJack Xiao 	mes_set_hw_res_pkt.g_sch_ctx_gpu_mc_ptr =
700e220edf2SJack Xiao 		mes->sch_ctx_gpu_addr[pipe];
701e220edf2SJack Xiao 	mes_set_hw_res_pkt.query_status_fence_gpu_mc_ptr =
702e220edf2SJack Xiao 		mes->query_status_fence_gpu_addr[pipe];
703e220edf2SJack Xiao 
704e220edf2SJack Xiao 	for (i = 0; i < 5; i++) {
70575053887SJack Xiao 		mes_set_hw_res_pkt.gc_base[i] =
706a5192fbbSLikun Gao 			adev->reg_offset[GC_HWIP][0][i];
707e220edf2SJack Xiao 		mes_set_hw_res_pkt.mmhub_base[i] =
708e220edf2SJack Xiao 				adev->reg_offset[MMHUB_HWIP][0][i];
709e220edf2SJack Xiao 		mes_set_hw_res_pkt.osssys_base[i] =
710e220edf2SJack Xiao 		adev->reg_offset[OSSSYS_HWIP][0][i];
711e220edf2SJack Xiao 	}
712e220edf2SJack Xiao 
713e220edf2SJack Xiao 	mes_set_hw_res_pkt.disable_reset = 1;
714e220edf2SJack Xiao 	mes_set_hw_res_pkt.disable_mes_log = 1;
715e220edf2SJack Xiao 	mes_set_hw_res_pkt.use_different_vmid_compute = 1;
716e220edf2SJack Xiao 	mes_set_hw_res_pkt.enable_reg_active_poll = 1;
717e220edf2SJack Xiao 	mes_set_hw_res_pkt.enable_level_process_quantum_check = 1;
718e220edf2SJack Xiao 
719e220edf2SJack Xiao 	/*
720e220edf2SJack Xiao 	 * Keep oversubscribe timer for sdma . When we have unmapped doorbell
721e220edf2SJack Xiao 	 * handling support, other queue will not use the oversubscribe timer.
722e220edf2SJack Xiao 	 * handling  mode - 0: disabled; 1: basic version; 2: basic+ version
723e220edf2SJack Xiao 	 */
724e220edf2SJack Xiao 	mes_set_hw_res_pkt.oversubscription_timer = 50;
725e220edf2SJack Xiao 	mes_set_hw_res_pkt.unmapped_doorbell_handling = 1;
726e220edf2SJack Xiao 
727e220edf2SJack Xiao 	if (amdgpu_mes_log_enable) {
728e220edf2SJack Xiao 		mes_set_hw_res_pkt.enable_mes_event_int_logging = 1;
729e220edf2SJack Xiao 		mes_set_hw_res_pkt.event_intr_history_gpu_mc_ptr =
73061a3ade2SMichael Chen 			mes->event_log_gpu_addr + MES_PIPE_INST(xcc_id, pipe) * AMDGPU_MES_LOG_BUFFER_SIZE;
731e220edf2SJack Xiao 	}
732e220edf2SJack Xiao 
733e220edf2SJack Xiao 	if (adev->enforce_isolation[0] == AMDGPU_ENFORCE_ISOLATION_ENABLE)
734e220edf2SJack Xiao 		mes_set_hw_res_pkt.limit_single_process = 1;
735e220edf2SJack Xiao 
736e220edf2SJack Xiao 	return mes_v12_1_submit_pkt_and_poll_completion(mes, xcc_id, pipe,
737e220edf2SJack Xiao 			&mes_set_hw_res_pkt, sizeof(mes_set_hw_res_pkt),
738e220edf2SJack Xiao 			offsetof(union MESAPI_SET_HW_RESOURCES, api_status));
739e220edf2SJack Xiao }
740e220edf2SJack Xiao 
741e220edf2SJack Xiao static void mes_v12_1_init_aggregated_doorbell(struct amdgpu_mes *mes,
742e220edf2SJack Xiao 						 int xcc_id)
743e220edf2SJack Xiao {
744e220edf2SJack Xiao 	struct amdgpu_device *adev = mes->adev;
745e220edf2SJack Xiao 	uint32_t data;
746e220edf2SJack Xiao 
747e220edf2SJack Xiao 	data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_DOORBELL_CONTROL1);
748e220edf2SJack Xiao 	data &= ~(CP_MES_DOORBELL_CONTROL1__DOORBELL_OFFSET_MASK |
749e220edf2SJack Xiao 		  CP_MES_DOORBELL_CONTROL1__DOORBELL_EN_MASK |
750e220edf2SJack Xiao 		  CP_MES_DOORBELL_CONTROL1__DOORBELL_HIT_MASK);
751e220edf2SJack Xiao 	data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_LOW] <<
752e220edf2SJack Xiao 		CP_MES_DOORBELL_CONTROL1__DOORBELL_OFFSET__SHIFT;
753e220edf2SJack Xiao 	data |= 1 << CP_MES_DOORBELL_CONTROL1__DOORBELL_EN__SHIFT;
754e220edf2SJack Xiao 	WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_DOORBELL_CONTROL1, data);
755e220edf2SJack Xiao 
756e220edf2SJack Xiao 	data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_DOORBELL_CONTROL2);
757e220edf2SJack Xiao 	data &= ~(CP_MES_DOORBELL_CONTROL2__DOORBELL_OFFSET_MASK |
758e220edf2SJack Xiao 		  CP_MES_DOORBELL_CONTROL2__DOORBELL_EN_MASK |
759e220edf2SJack Xiao 		  CP_MES_DOORBELL_CONTROL2__DOORBELL_HIT_MASK);
760e220edf2SJack Xiao 	data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_NORMAL] <<
761e220edf2SJack Xiao 		CP_MES_DOORBELL_CONTROL2__DOORBELL_OFFSET__SHIFT;
762e220edf2SJack Xiao 	data |= 1 << CP_MES_DOORBELL_CONTROL2__DOORBELL_EN__SHIFT;
763e220edf2SJack Xiao 	WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_DOORBELL_CONTROL2, data);
764e220edf2SJack Xiao 
765e220edf2SJack Xiao 	data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_DOORBELL_CONTROL3);
766e220edf2SJack Xiao 	data &= ~(CP_MES_DOORBELL_CONTROL3__DOORBELL_OFFSET_MASK |
767e220edf2SJack Xiao 		  CP_MES_DOORBELL_CONTROL3__DOORBELL_EN_MASK |
768e220edf2SJack Xiao 		  CP_MES_DOORBELL_CONTROL3__DOORBELL_HIT_MASK);
769e220edf2SJack Xiao 	data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_MEDIUM] <<
770e220edf2SJack Xiao 		CP_MES_DOORBELL_CONTROL3__DOORBELL_OFFSET__SHIFT;
771e220edf2SJack Xiao 	data |= 1 << CP_MES_DOORBELL_CONTROL3__DOORBELL_EN__SHIFT;
772e220edf2SJack Xiao 	WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_DOORBELL_CONTROL3, data);
773e220edf2SJack Xiao 
774e220edf2SJack Xiao 	data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_DOORBELL_CONTROL4);
775e220edf2SJack Xiao 	data &= ~(CP_MES_DOORBELL_CONTROL4__DOORBELL_OFFSET_MASK |
776e220edf2SJack Xiao 		  CP_MES_DOORBELL_CONTROL4__DOORBELL_EN_MASK |
777e220edf2SJack Xiao 		  CP_MES_DOORBELL_CONTROL4__DOORBELL_HIT_MASK);
778e220edf2SJack Xiao 	data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_HIGH] <<
779e220edf2SJack Xiao 		CP_MES_DOORBELL_CONTROL4__DOORBELL_OFFSET__SHIFT;
780e220edf2SJack Xiao 	data |= 1 << CP_MES_DOORBELL_CONTROL4__DOORBELL_EN__SHIFT;
781e220edf2SJack Xiao 	WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_DOORBELL_CONTROL4, data);
782e220edf2SJack Xiao 
783e220edf2SJack Xiao 	data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_DOORBELL_CONTROL5);
784e220edf2SJack Xiao 	data &= ~(CP_MES_DOORBELL_CONTROL5__DOORBELL_OFFSET_MASK |
785e220edf2SJack Xiao 		  CP_MES_DOORBELL_CONTROL5__DOORBELL_EN_MASK |
786e220edf2SJack Xiao 		  CP_MES_DOORBELL_CONTROL5__DOORBELL_HIT_MASK);
787e220edf2SJack Xiao 	data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_REALTIME] <<
788e220edf2SJack Xiao 		CP_MES_DOORBELL_CONTROL5__DOORBELL_OFFSET__SHIFT;
789e220edf2SJack Xiao 	data |= 1 << CP_MES_DOORBELL_CONTROL5__DOORBELL_EN__SHIFT;
790e220edf2SJack Xiao 	WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_DOORBELL_CONTROL5, data);
791e220edf2SJack Xiao 
792e220edf2SJack Xiao 	data = 1 << CP_HQD_GFX_CONTROL__DB_UPDATED_MSG_EN__SHIFT;
793e220edf2SJack Xiao 	WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_GFX_CONTROL, data);
794e220edf2SJack Xiao }
795e220edf2SJack Xiao 
796e220edf2SJack Xiao 
797e220edf2SJack Xiao static void mes_v12_1_enable_unmapped_doorbell_handling(
798e220edf2SJack Xiao 	struct amdgpu_mes *mes, bool enable, int xcc_id)
799e220edf2SJack Xiao {
800e220edf2SJack Xiao 	struct amdgpu_device *adev = mes->adev;
801e220edf2SJack Xiao 	uint32_t data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_UNMAPPED_DOORBELL);
802e220edf2SJack Xiao 
803e220edf2SJack Xiao 	/*
804e220edf2SJack Xiao 	 * The default PROC_LSB settng is 0xc which means doorbell
805e220edf2SJack Xiao 	 * addr[16:12] gives the doorbell page number. For kfd, each
806e220edf2SJack Xiao 	 * process will use 2 pages of doorbell, we need to change the
807e220edf2SJack Xiao 	 * setting to 0xd
808e220edf2SJack Xiao 	 */
809e220edf2SJack Xiao 	data &= ~CP_UNMAPPED_DOORBELL__PROC_LSB_MASK;
810e220edf2SJack Xiao 	data |= 0xd <<  CP_UNMAPPED_DOORBELL__PROC_LSB__SHIFT;
811e220edf2SJack Xiao 
812e220edf2SJack Xiao 	data |= (enable ? 1 : 0) << CP_UNMAPPED_DOORBELL__ENABLE__SHIFT;
813e220edf2SJack Xiao 
814e220edf2SJack Xiao 	WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_UNMAPPED_DOORBELL, data);
815e220edf2SJack Xiao }
816e220edf2SJack Xiao 
817e220edf2SJack Xiao #if 0
818e220edf2SJack Xiao static int mes_v12_1_reset_legacy_queue(struct amdgpu_mes *mes,
819e220edf2SJack Xiao 					struct mes_reset_legacy_queue_input *input)
820e220edf2SJack Xiao {
821e220edf2SJack Xiao 	union MESAPI__RESET mes_reset_queue_pkt;
822e220edf2SJack Xiao 	int pipe;
823e220edf2SJack Xiao 
824e220edf2SJack Xiao 	memset(&mes_reset_queue_pkt, 0, sizeof(mes_reset_queue_pkt));
825e220edf2SJack Xiao 
826e220edf2SJack Xiao 	mes_reset_queue_pkt.header.type = MES_API_TYPE_SCHEDULER;
827e220edf2SJack Xiao 	mes_reset_queue_pkt.header.opcode = MES_SCH_API_RESET;
828e220edf2SJack Xiao 	mes_reset_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
829e220edf2SJack Xiao 
830e220edf2SJack Xiao 	mes_reset_queue_pkt.queue_type =
831e220edf2SJack Xiao 		convert_to_mes_queue_type(input->queue_type);
832e220edf2SJack Xiao 
833e220edf2SJack Xiao 	if (mes_reset_queue_pkt.queue_type == MES_QUEUE_TYPE_GFX) {
834e220edf2SJack Xiao 		mes_reset_queue_pkt.reset_legacy_gfx = 1;
835e220edf2SJack Xiao 		mes_reset_queue_pkt.pipe_id_lp = input->pipe_id;
836e220edf2SJack Xiao 		mes_reset_queue_pkt.queue_id_lp = input->queue_id;
837e220edf2SJack Xiao 		mes_reset_queue_pkt.mqd_mc_addr_lp = input->mqd_addr;
838e220edf2SJack Xiao 		mes_reset_queue_pkt.doorbell_offset_lp = input->doorbell_offset;
839e220edf2SJack Xiao 		mes_reset_queue_pkt.wptr_addr_lp = input->wptr_addr;
840e220edf2SJack Xiao 		mes_reset_queue_pkt.vmid_id_lp = input->vmid;
841e220edf2SJack Xiao 	} else {
842e220edf2SJack Xiao 		mes_reset_queue_pkt.reset_queue_only = 1;
843e220edf2SJack Xiao 		mes_reset_queue_pkt.doorbell_offset = input->doorbell_offset;
844e220edf2SJack Xiao 	}
845e220edf2SJack Xiao 
846e220edf2SJack Xiao 	if (mes->adev->enable_uni_mes)
847e220edf2SJack Xiao 		pipe = AMDGPU_MES_KIQ_PIPE;
848e220edf2SJack Xiao 	else
849e220edf2SJack Xiao 		pipe = AMDGPU_MES_SCHED_PIPE;
850e220edf2SJack Xiao 
851e220edf2SJack Xiao 	return mes_v12_1_submit_pkt_and_poll_completion(mes,
852e220edf2SJack Xiao 			input->xcc_id, pipe,
853e220edf2SJack Xiao 			&mes_reset_queue_pkt, sizeof(mes_reset_queue_pkt),
854e220edf2SJack Xiao 			offsetof(union MESAPI__RESET, api_status));
855e220edf2SJack Xiao }
856e220edf2SJack Xiao #endif
857e220edf2SJack Xiao 
858*d0c989a0SShaoyun Liu static int mes_v12_inv_tlb_convert_hub_id(uint8_t id)
859*d0c989a0SShaoyun Liu {
860*d0c989a0SShaoyun Liu 	/*
861*d0c989a0SShaoyun Liu 	 * MES doesn't support invalidate gc_hub on slave xcc individually
862*d0c989a0SShaoyun Liu 	 * master xcc will invalidate all gc_hub for the partition
863*d0c989a0SShaoyun Liu 	 */
864*d0c989a0SShaoyun Liu 	if (AMDGPU_IS_GFXHUB(id))
865*d0c989a0SShaoyun Liu 		return 0;
866*d0c989a0SShaoyun Liu 	else if (AMDGPU_IS_MMHUB0(id))
867*d0c989a0SShaoyun Liu 		return 1;
868*d0c989a0SShaoyun Liu 	else if (AMDGPU_IS_MMHUB1(id))
869*d0c989a0SShaoyun Liu 		return 2;
870*d0c989a0SShaoyun Liu 	return -EINVAL;
871*d0c989a0SShaoyun Liu 
872*d0c989a0SShaoyun Liu }
873*d0c989a0SShaoyun Liu 
874*d0c989a0SShaoyun Liu static int mes_v12_1_inv_tlbs_pasid(struct amdgpu_mes *mes,
875*d0c989a0SShaoyun Liu 				    struct mes_inv_tlbs_pasid_input *input)
876*d0c989a0SShaoyun Liu {
877*d0c989a0SShaoyun Liu 	union MESAPI__INV_TLBS mes_inv_tlbs;
878*d0c989a0SShaoyun Liu 	int xcc_id = input->xcc_id;
879*d0c989a0SShaoyun Liu 	int inst = MES_PIPE_INST(xcc_id, AMDGPU_MES_SCHED_PIPE);
880*d0c989a0SShaoyun Liu 	int ret;
881*d0c989a0SShaoyun Liu 
882*d0c989a0SShaoyun Liu 	if (mes->enable_coop_mode)
883*d0c989a0SShaoyun Liu 		xcc_id = mes->master_xcc_ids[inst];
884*d0c989a0SShaoyun Liu 
885*d0c989a0SShaoyun Liu 	memset(&mes_inv_tlbs, 0, sizeof(mes_inv_tlbs));
886*d0c989a0SShaoyun Liu 
887*d0c989a0SShaoyun Liu 	mes_inv_tlbs.header.type = MES_API_TYPE_SCHEDULER;
888*d0c989a0SShaoyun Liu 	mes_inv_tlbs.header.opcode = MES_SCH_API_INV_TLBS;
889*d0c989a0SShaoyun Liu 	mes_inv_tlbs.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
890*d0c989a0SShaoyun Liu 
891*d0c989a0SShaoyun Liu 	mes_inv_tlbs.invalidate_tlbs.inv_sel = 0;
892*d0c989a0SShaoyun Liu 	mes_inv_tlbs.invalidate_tlbs.flush_type = input->flush_type;
893*d0c989a0SShaoyun Liu 	mes_inv_tlbs.invalidate_tlbs.inv_sel_id = input->pasid;
894*d0c989a0SShaoyun Liu 
895*d0c989a0SShaoyun Liu 	/*convert amdgpu_mes_hub_id to mes expected hub_id */
896*d0c989a0SShaoyun Liu 	ret = mes_v12_inv_tlb_convert_hub_id(input->hub_id);
897*d0c989a0SShaoyun Liu 	if (ret < 0)
898*d0c989a0SShaoyun Liu 		return -EINVAL;
899*d0c989a0SShaoyun Liu 	mes_inv_tlbs.invalidate_tlbs.hub_id = ret;
900*d0c989a0SShaoyun Liu 	return mes_v12_1_submit_pkt_and_poll_completion(mes, xcc_id, AMDGPU_MES_KIQ_PIPE,
901*d0c989a0SShaoyun Liu 			&mes_inv_tlbs, sizeof(mes_inv_tlbs),
902*d0c989a0SShaoyun Liu 			offsetof(union MESAPI__INV_TLBS, api_status));
903*d0c989a0SShaoyun Liu 
904*d0c989a0SShaoyun Liu }
905*d0c989a0SShaoyun Liu 
906e220edf2SJack Xiao static const struct amdgpu_mes_funcs mes_v12_1_funcs = {
907e220edf2SJack Xiao 	.add_hw_queue = mes_v12_1_add_hw_queue,
908e220edf2SJack Xiao 	.remove_hw_queue = mes_v12_1_remove_hw_queue,
909e220edf2SJack Xiao 	.map_legacy_queue = mes_v12_1_map_legacy_queue,
910e220edf2SJack Xiao 	.unmap_legacy_queue = mes_v12_1_unmap_legacy_queue,
911e220edf2SJack Xiao 	.suspend_gang = mes_v12_1_suspend_gang,
912e220edf2SJack Xiao 	.resume_gang = mes_v12_1_resume_gang,
913e220edf2SJack Xiao 	.misc_op = mes_v12_1_misc_op,
914e220edf2SJack Xiao 	.reset_hw_queue = mes_v12_1_reset_hw_queue,
915*d0c989a0SShaoyun Liu 	.invalidate_tlbs_pasid = mes_v12_1_inv_tlbs_pasid,
916e220edf2SJack Xiao };
917e220edf2SJack Xiao 
918e220edf2SJack Xiao static int mes_v12_1_allocate_ucode_buffer(struct amdgpu_device *adev,
919e220edf2SJack Xiao 					     enum amdgpu_mes_pipe pipe,
920e220edf2SJack Xiao 					     int xcc_id)
921e220edf2SJack Xiao {
922e220edf2SJack Xiao 	int r, inst = MES_PIPE_INST(xcc_id, pipe);
923e220edf2SJack Xiao 	const struct mes_firmware_header_v1_0 *mes_hdr;
924e220edf2SJack Xiao 	const __le32 *fw_data;
925e220edf2SJack Xiao 	unsigned fw_size;
926e220edf2SJack Xiao 
927e220edf2SJack Xiao 	mes_hdr = (const struct mes_firmware_header_v1_0 *)
928e220edf2SJack Xiao 		adev->mes.fw[pipe]->data;
929e220edf2SJack Xiao 
930e220edf2SJack Xiao 	fw_data = (const __le32 *)(adev->mes.fw[pipe]->data +
931e220edf2SJack Xiao 		   le32_to_cpu(mes_hdr->mes_ucode_offset_bytes));
932e220edf2SJack Xiao 	fw_size = le32_to_cpu(mes_hdr->mes_ucode_size_bytes);
933e220edf2SJack Xiao 
934e220edf2SJack Xiao 	r = amdgpu_bo_create_reserved(adev, fw_size,
935e220edf2SJack Xiao 				      PAGE_SIZE,
936e220edf2SJack Xiao 				      AMDGPU_GEM_DOMAIN_VRAM,
937e220edf2SJack Xiao 				      &adev->mes.ucode_fw_obj[inst],
938e220edf2SJack Xiao 				      &adev->mes.ucode_fw_gpu_addr[inst],
939e220edf2SJack Xiao 				      (void **)&adev->mes.ucode_fw_ptr[inst]);
940e220edf2SJack Xiao 	if (r) {
941e220edf2SJack Xiao 		dev_err(adev->dev, "(%d) failed to create mes fw bo\n", r);
942e220edf2SJack Xiao 		return r;
943e220edf2SJack Xiao 	}
944e220edf2SJack Xiao 
945e220edf2SJack Xiao 	memcpy(adev->mes.ucode_fw_ptr[inst], fw_data, fw_size);
946e220edf2SJack Xiao 
947e220edf2SJack Xiao 	amdgpu_bo_kunmap(adev->mes.ucode_fw_obj[inst]);
948e220edf2SJack Xiao 	amdgpu_bo_unreserve(adev->mes.ucode_fw_obj[inst]);
949e220edf2SJack Xiao 
950e220edf2SJack Xiao 	return 0;
951e220edf2SJack Xiao }
952e220edf2SJack Xiao 
953e220edf2SJack Xiao static int mes_v12_1_allocate_ucode_data_buffer(struct amdgpu_device *adev,
954e220edf2SJack Xiao 						  enum amdgpu_mes_pipe pipe,
955e220edf2SJack Xiao 						  int xcc_id)
956e220edf2SJack Xiao {
957e220edf2SJack Xiao 	int r, inst = MES_PIPE_INST(xcc_id, pipe);
958e220edf2SJack Xiao 	const struct mes_firmware_header_v1_0 *mes_hdr;
959e220edf2SJack Xiao 	const __le32 *fw_data;
960e220edf2SJack Xiao 	unsigned fw_size;
961e220edf2SJack Xiao 
962e220edf2SJack Xiao 	mes_hdr = (const struct mes_firmware_header_v1_0 *)
963e220edf2SJack Xiao 		adev->mes.fw[pipe]->data;
964e220edf2SJack Xiao 
965e220edf2SJack Xiao 	fw_data = (const __le32 *)(adev->mes.fw[pipe]->data +
966e220edf2SJack Xiao 		   le32_to_cpu(mes_hdr->mes_ucode_data_offset_bytes));
967e220edf2SJack Xiao 	fw_size = le32_to_cpu(mes_hdr->mes_ucode_data_size_bytes);
968e220edf2SJack Xiao 
969e220edf2SJack Xiao 	r = amdgpu_bo_create_reserved(adev, fw_size,
970e220edf2SJack Xiao 				      64 * 1024,
971e220edf2SJack Xiao 				      AMDGPU_GEM_DOMAIN_VRAM,
972e220edf2SJack Xiao 				      &adev->mes.data_fw_obj[inst],
973e220edf2SJack Xiao 				      &adev->mes.data_fw_gpu_addr[inst],
974e220edf2SJack Xiao 				      (void **)&adev->mes.data_fw_ptr[inst]);
975e220edf2SJack Xiao 	if (r) {
976e220edf2SJack Xiao 		dev_err(adev->dev, "(%d) failed to create mes data fw bo\n", r);
977e220edf2SJack Xiao 		return r;
978e220edf2SJack Xiao 	}
979e220edf2SJack Xiao 
980e220edf2SJack Xiao 	memcpy(adev->mes.data_fw_ptr[inst], fw_data, fw_size);
981e220edf2SJack Xiao 
982e220edf2SJack Xiao 	amdgpu_bo_kunmap(adev->mes.data_fw_obj[inst]);
983e220edf2SJack Xiao 	amdgpu_bo_unreserve(adev->mes.data_fw_obj[inst]);
984e220edf2SJack Xiao 
985e220edf2SJack Xiao 	return 0;
986e220edf2SJack Xiao }
987e220edf2SJack Xiao 
988e220edf2SJack Xiao static void mes_v12_1_free_ucode_buffers(struct amdgpu_device *adev,
989e220edf2SJack Xiao 					   enum amdgpu_mes_pipe pipe,
990e220edf2SJack Xiao 					   int xcc_id)
991e220edf2SJack Xiao {
992e220edf2SJack Xiao 	int inst = MES_PIPE_INST(xcc_id, pipe);
993e220edf2SJack Xiao 
994e220edf2SJack Xiao 	amdgpu_bo_free_kernel(&adev->mes.data_fw_obj[inst],
995e220edf2SJack Xiao 			      &adev->mes.data_fw_gpu_addr[inst],
996e220edf2SJack Xiao 			      (void **)&adev->mes.data_fw_ptr[inst]);
997e220edf2SJack Xiao 
998e220edf2SJack Xiao 	amdgpu_bo_free_kernel(&adev->mes.ucode_fw_obj[inst],
999e220edf2SJack Xiao 			      &adev->mes.ucode_fw_gpu_addr[inst],
1000e220edf2SJack Xiao 			      (void **)&adev->mes.ucode_fw_ptr[inst]);
1001e220edf2SJack Xiao }
1002e220edf2SJack Xiao 
1003e220edf2SJack Xiao static void mes_v12_1_enable(struct amdgpu_device *adev,
1004e220edf2SJack Xiao 			       bool enable, int xcc_id)
1005e220edf2SJack Xiao {
1006e220edf2SJack Xiao 	uint64_t ucode_addr;
1007e220edf2SJack Xiao 	uint32_t pipe, data = 0;
1008e220edf2SJack Xiao 
1009e220edf2SJack Xiao 	if (enable) {
1010e220edf2SJack Xiao 		data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_CNTL);
1011e220edf2SJack Xiao 		data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_RESET, 1);
1012e220edf2SJack Xiao 		data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_RESET, 1);
1013e220edf2SJack Xiao 		WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_CNTL, data);
1014e220edf2SJack Xiao 
1015e220edf2SJack Xiao 		mutex_lock(&adev->srbm_mutex);
1016e220edf2SJack Xiao 		for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) {
1017e220edf2SJack Xiao 			soc_v1_0_grbm_select(adev, 3, pipe, 0, 0,
1018e220edf2SJack Xiao 					     GET_INST(GC, xcc_id));
1019e220edf2SJack Xiao 
1020e220edf2SJack Xiao 			ucode_addr = adev->mes.uc_start_addr[pipe] >> 2;
1021e220edf2SJack Xiao 			WREG32_SOC15(GC, GET_INST(GC, xcc_id),
1022e220edf2SJack Xiao 				     regCP_MES_PRGRM_CNTR_START,
1023e220edf2SJack Xiao 				     lower_32_bits(ucode_addr));
1024e220edf2SJack Xiao 			WREG32_SOC15(GC, GET_INST(GC, xcc_id),
1025e220edf2SJack Xiao 				     regCP_MES_PRGRM_CNTR_START_HI,
1026e220edf2SJack Xiao 				     upper_32_bits(ucode_addr));
1027e220edf2SJack Xiao 		}
1028e220edf2SJack Xiao 		soc_v1_0_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id));
1029e220edf2SJack Xiao 		mutex_unlock(&adev->srbm_mutex);
1030e220edf2SJack Xiao 
1031e220edf2SJack Xiao 		/* unhalt MES and activate pipe0 */
1032e220edf2SJack Xiao 		data = REG_SET_FIELD(0, CP_MES_CNTL, MES_PIPE0_ACTIVE, 1);
1033e220edf2SJack Xiao 		data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_ACTIVE, 1);
1034e220edf2SJack Xiao 		WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_CNTL, data);
1035e220edf2SJack Xiao 
1036e220edf2SJack Xiao 		if (amdgpu_emu_mode)
1037e220edf2SJack Xiao 			msleep(500);
1038e220edf2SJack Xiao 		else if (adev->enable_uni_mes)
1039e220edf2SJack Xiao 			udelay(500);
1040e220edf2SJack Xiao 		else
1041e220edf2SJack Xiao 			udelay(50);
1042e220edf2SJack Xiao 	} else {
1043e220edf2SJack Xiao 		data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_CNTL);
1044e220edf2SJack Xiao 		data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_ACTIVE, 0);
1045e220edf2SJack Xiao 		data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_ACTIVE, 0);
1046e220edf2SJack Xiao 		data = REG_SET_FIELD(data, CP_MES_CNTL,
1047e220edf2SJack Xiao 				     MES_INVALIDATE_ICACHE, 1);
1048e220edf2SJack Xiao 		data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_RESET, 1);
1049e220edf2SJack Xiao 		data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_RESET, 1);
1050e220edf2SJack Xiao 		data = REG_SET_FIELD(data, CP_MES_CNTL, MES_HALT, 1);
1051e220edf2SJack Xiao 		WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_CNTL, data);
1052e220edf2SJack Xiao 	}
1053e220edf2SJack Xiao }
1054e220edf2SJack Xiao 
1055e220edf2SJack Xiao static void mes_v12_1_set_ucode_start_addr(struct amdgpu_device *adev,
1056e220edf2SJack Xiao 					     int xcc_id)
1057e220edf2SJack Xiao {
1058e220edf2SJack Xiao 	uint64_t ucode_addr;
1059e220edf2SJack Xiao 	int pipe;
1060e220edf2SJack Xiao 
1061e220edf2SJack Xiao 	mes_v12_1_enable(adev, false, xcc_id);
1062e220edf2SJack Xiao 
1063e220edf2SJack Xiao 	mutex_lock(&adev->srbm_mutex);
1064e220edf2SJack Xiao 	for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) {
1065e220edf2SJack Xiao 		/* me=3, queue=0 */
1066e220edf2SJack Xiao 		soc_v1_0_grbm_select(adev, 3, pipe, 0, 0, GET_INST(GC, xcc_id));
1067e220edf2SJack Xiao 
1068e220edf2SJack Xiao 		/* set ucode start address */
1069e220edf2SJack Xiao 		ucode_addr = adev->mes.uc_start_addr[pipe] >> 2;
1070e220edf2SJack Xiao 		WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_PRGRM_CNTR_START,
1071e220edf2SJack Xiao 				lower_32_bits(ucode_addr));
1072e220edf2SJack Xiao 		WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_PRGRM_CNTR_START_HI,
1073e220edf2SJack Xiao 				upper_32_bits(ucode_addr));
1074e220edf2SJack Xiao 
1075e220edf2SJack Xiao 		soc_v1_0_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id));
1076e220edf2SJack Xiao 	}
1077e220edf2SJack Xiao 	mutex_unlock(&adev->srbm_mutex);
1078e220edf2SJack Xiao }
1079e220edf2SJack Xiao 
1080e220edf2SJack Xiao /* This function is for backdoor MES firmware */
1081e220edf2SJack Xiao static int mes_v12_1_load_microcode(struct amdgpu_device *adev,
1082e220edf2SJack Xiao 				      enum amdgpu_mes_pipe pipe,
1083e220edf2SJack Xiao 				      bool prime_icache, int xcc_id)
1084e220edf2SJack Xiao {
1085e220edf2SJack Xiao 	int r, inst = MES_PIPE_INST(xcc_id, pipe);
1086e220edf2SJack Xiao 	uint32_t data;
1087e220edf2SJack Xiao 
1088e220edf2SJack Xiao 	mes_v12_1_enable(adev, false, xcc_id);
1089e220edf2SJack Xiao 
1090e220edf2SJack Xiao 	if (!adev->mes.fw[pipe])
1091e220edf2SJack Xiao 		return -EINVAL;
1092e220edf2SJack Xiao 
1093e220edf2SJack Xiao 	r = mes_v12_1_allocate_ucode_buffer(adev, pipe, xcc_id);
1094e220edf2SJack Xiao 	if (r)
1095e220edf2SJack Xiao 		return r;
1096e220edf2SJack Xiao 
1097e220edf2SJack Xiao 	r = mes_v12_1_allocate_ucode_data_buffer(adev, pipe, xcc_id);
1098e220edf2SJack Xiao 	if (r) {
1099e220edf2SJack Xiao 		mes_v12_1_free_ucode_buffers(adev, pipe, xcc_id);
1100e220edf2SJack Xiao 		return r;
1101e220edf2SJack Xiao 	}
1102e220edf2SJack Xiao 
1103e220edf2SJack Xiao 	mutex_lock(&adev->srbm_mutex);
1104e220edf2SJack Xiao 	/* me=3, pipe=0, queue=0 */
1105e220edf2SJack Xiao 	soc_v1_0_grbm_select(adev, 3, pipe, 0, 0, GET_INST(GC, xcc_id));
1106e220edf2SJack Xiao 
1107e220edf2SJack Xiao 	WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_IC_BASE_CNTL, 0);
1108e220edf2SJack Xiao 
1109e220edf2SJack Xiao 	/* set ucode fimrware address */
1110e220edf2SJack Xiao 	WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_IC_BASE_LO,
1111e220edf2SJack Xiao 		     lower_32_bits(adev->mes.ucode_fw_gpu_addr[inst]));
1112e220edf2SJack Xiao 	WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_IC_BASE_HI,
1113e220edf2SJack Xiao 		     upper_32_bits(adev->mes.ucode_fw_gpu_addr[inst]));
1114e220edf2SJack Xiao 
1115e220edf2SJack Xiao 	/* set ucode instruction cache boundary to 2M-1 */
1116e220edf2SJack Xiao 	WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_MIBOUND_LO, 0x1FFFFF);
1117e220edf2SJack Xiao 
1118e220edf2SJack Xiao 	/* set ucode data firmware address */
1119e220edf2SJack Xiao 	WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_MDBASE_LO,
1120e220edf2SJack Xiao 		     lower_32_bits(adev->mes.data_fw_gpu_addr[inst]));
1121e220edf2SJack Xiao 	WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_MDBASE_HI,
1122e220edf2SJack Xiao 		     upper_32_bits(adev->mes.data_fw_gpu_addr[inst]));
1123e220edf2SJack Xiao 
1124e220edf2SJack Xiao 	/* Set data cache boundary CP_MES_MDBOUND_LO */
1125e220edf2SJack Xiao 	WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_MDBOUND_LO, 0x7FFFF);
1126e220edf2SJack Xiao 
1127e220edf2SJack Xiao 	if (prime_icache) {
1128e220edf2SJack Xiao 		/* invalidate ICACHE */
1129e220edf2SJack Xiao 		data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_IC_OP_CNTL);
1130e220edf2SJack Xiao 		data = REG_SET_FIELD(data, CP_MES_IC_OP_CNTL, PRIME_ICACHE, 0);
1131e220edf2SJack Xiao 		data = REG_SET_FIELD(data, CP_MES_IC_OP_CNTL, INVALIDATE_CACHE, 1);
1132e220edf2SJack Xiao 		WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_IC_OP_CNTL, data);
1133e220edf2SJack Xiao 
1134e220edf2SJack Xiao 		/* prime the ICACHE. */
1135e220edf2SJack Xiao 		data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_IC_OP_CNTL);
1136e220edf2SJack Xiao 		data = REG_SET_FIELD(data, CP_MES_IC_OP_CNTL, PRIME_ICACHE, 1);
1137e220edf2SJack Xiao 		WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_IC_OP_CNTL, data);
1138e220edf2SJack Xiao 	}
1139e220edf2SJack Xiao 
1140e220edf2SJack Xiao 	soc_v1_0_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id));
1141e220edf2SJack Xiao 	mutex_unlock(&adev->srbm_mutex);
1142e220edf2SJack Xiao 
1143e220edf2SJack Xiao 	return 0;
1144e220edf2SJack Xiao }
1145e220edf2SJack Xiao 
1146e220edf2SJack Xiao static int mes_v12_1_allocate_eop_buf(struct amdgpu_device *adev,
1147e220edf2SJack Xiao 					enum amdgpu_mes_pipe pipe,
1148e220edf2SJack Xiao 					int xcc_id)
1149e220edf2SJack Xiao {
1150e220edf2SJack Xiao 	int r, inst = MES_PIPE_INST(xcc_id, pipe);
1151e220edf2SJack Xiao 	u32 *eop;
1152e220edf2SJack Xiao 
1153e220edf2SJack Xiao 	r = amdgpu_bo_create_reserved(adev, MES_EOP_SIZE, PAGE_SIZE,
1154e220edf2SJack Xiao 			      AMDGPU_GEM_DOMAIN_GTT,
1155e220edf2SJack Xiao 			      &adev->mes.eop_gpu_obj[inst],
1156e220edf2SJack Xiao 			      &adev->mes.eop_gpu_addr[inst],
1157e220edf2SJack Xiao 			      (void **)&eop);
1158e220edf2SJack Xiao 	if (r) {
1159e220edf2SJack Xiao 		dev_warn(adev->dev, "(%d) create EOP bo failed\n", r);
1160e220edf2SJack Xiao 		return r;
1161e220edf2SJack Xiao 	}
1162e220edf2SJack Xiao 
1163e220edf2SJack Xiao 	memset(eop, 0,
1164e220edf2SJack Xiao 	       adev->mes.eop_gpu_obj[inst]->tbo.base.size);
1165e220edf2SJack Xiao 
1166e220edf2SJack Xiao 	amdgpu_bo_kunmap(adev->mes.eop_gpu_obj[inst]);
1167e220edf2SJack Xiao 	amdgpu_bo_unreserve(adev->mes.eop_gpu_obj[inst]);
1168e220edf2SJack Xiao 
1169e220edf2SJack Xiao 	return 0;
1170e220edf2SJack Xiao }
1171e220edf2SJack Xiao 
117275053887SJack Xiao static int mes_v12_1_allocate_shared_cmd_buf(struct amdgpu_device *adev,
117375053887SJack Xiao 					     enum amdgpu_mes_pipe pipe,
117475053887SJack Xiao 					     int xcc_id)
117575053887SJack Xiao {
117675053887SJack Xiao 	int r, inst = MES_PIPE_INST(xcc_id, pipe);
117775053887SJack Xiao 
117875053887SJack Xiao 	if (pipe == AMDGPU_MES_KIQ_PIPE)
117975053887SJack Xiao 		return 0;
118075053887SJack Xiao 
118175053887SJack Xiao 	r = amdgpu_bo_create_kernel(adev, PAGE_SIZE, PAGE_SIZE,
118275053887SJack Xiao 				    AMDGPU_GEM_DOMAIN_VRAM,
118375053887SJack Xiao 				    &adev->mes.shared_cmd_buf_obj[inst],
118475053887SJack Xiao 				    &adev->mes.shared_cmd_buf_gpu_addr[inst],
118575053887SJack Xiao 				    NULL);
118675053887SJack Xiao 	if (r) {
118775053887SJack Xiao 		dev_err(adev->dev,
118875053887SJack Xiao 			"(%d) failed to create shared cmd buf bo\n", r);
118975053887SJack Xiao 		return r;
119075053887SJack Xiao 	}
119175053887SJack Xiao 
119275053887SJack Xiao 	return 0;
119375053887SJack Xiao }
119475053887SJack Xiao 
1195e220edf2SJack Xiao static int mes_v12_1_mqd_init(struct amdgpu_ring *ring)
1196e220edf2SJack Xiao {
1197e220edf2SJack Xiao 	struct v12_1_mes_mqd *mqd = ring->mqd_ptr;
1198e220edf2SJack Xiao 	uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
1199e220edf2SJack Xiao 	uint32_t tmp;
1200e220edf2SJack Xiao 
1201e220edf2SJack Xiao 	mqd->header = 0xC0310800;
1202e220edf2SJack Xiao 	mqd->compute_pipelinestat_enable = 0x00000001;
1203e220edf2SJack Xiao 	mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
1204e220edf2SJack Xiao 	mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
1205e220edf2SJack Xiao 	mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
1206e220edf2SJack Xiao 	mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
1207e220edf2SJack Xiao 	mqd->compute_misc_reserved = 0x00000007;
1208e220edf2SJack Xiao 
1209e220edf2SJack Xiao 	eop_base_addr = ring->eop_gpu_addr >> 8;
1210e220edf2SJack Xiao 
1211e220edf2SJack Xiao 	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
1212e220edf2SJack Xiao 	tmp = regCP_HQD_EOP_CONTROL_DEFAULT;
1213e220edf2SJack Xiao 	tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
1214e220edf2SJack Xiao 			(order_base_2(MES_EOP_SIZE / 4) - 1));
1215e220edf2SJack Xiao 
1216e220edf2SJack Xiao 	mqd->cp_hqd_eop_base_addr_lo = lower_32_bits(eop_base_addr);
1217e220edf2SJack Xiao 	mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
1218e220edf2SJack Xiao 	mqd->cp_hqd_eop_control = tmp;
1219e220edf2SJack Xiao 
1220e220edf2SJack Xiao 	/* disable the queue if it's active */
1221e220edf2SJack Xiao 	ring->wptr = 0;
1222e220edf2SJack Xiao 	mqd->cp_hqd_pq_rptr = 0;
1223e220edf2SJack Xiao 	mqd->cp_hqd_pq_wptr_lo = 0;
1224e220edf2SJack Xiao 	mqd->cp_hqd_pq_wptr_hi = 0;
1225e220edf2SJack Xiao 
1226e220edf2SJack Xiao 	/* set the pointer to the MQD */
1227e220edf2SJack Xiao 	mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
1228e220edf2SJack Xiao 	mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
1229e220edf2SJack Xiao 
1230e220edf2SJack Xiao 	/* set MQD vmid to 0 */
1231e220edf2SJack Xiao 	tmp = regCP_MQD_CONTROL_DEFAULT;
1232e220edf2SJack Xiao 	tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
1233e220edf2SJack Xiao 	mqd->cp_mqd_control = tmp;
1234e220edf2SJack Xiao 
1235e220edf2SJack Xiao 	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
1236e220edf2SJack Xiao 	hqd_gpu_addr = ring->gpu_addr >> 8;
1237e220edf2SJack Xiao 	mqd->cp_hqd_pq_base_lo = lower_32_bits(hqd_gpu_addr);
1238e220edf2SJack Xiao 	mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
1239e220edf2SJack Xiao 
1240e220edf2SJack Xiao 	/* set the wb address whether it's enabled or not */
1241e220edf2SJack Xiao 	wb_gpu_addr = ring->rptr_gpu_addr;
1242e220edf2SJack Xiao 	mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
1243e220edf2SJack Xiao 	mqd->cp_hqd_pq_rptr_report_addr_hi =
1244e220edf2SJack Xiao 		upper_32_bits(wb_gpu_addr) & 0xffff;
1245e220edf2SJack Xiao 
1246e220edf2SJack Xiao 	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
1247e220edf2SJack Xiao 	wb_gpu_addr = ring->wptr_gpu_addr;
1248e220edf2SJack Xiao 	mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffff8;
1249e220edf2SJack Xiao 	mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
1250e220edf2SJack Xiao 
1251e220edf2SJack Xiao 	/* set up the HQD, this is similar to CP_RB0_CNTL */
1252e220edf2SJack Xiao 	tmp = regCP_HQD_PQ_CONTROL_DEFAULT;
1253e220edf2SJack Xiao 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
1254e220edf2SJack Xiao 			    (order_base_2(ring->ring_size / 4) - 1));
1255e220edf2SJack Xiao 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
1256e220edf2SJack Xiao 			    ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
1257e220edf2SJack Xiao 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 1);
1258e220edf2SJack Xiao 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, 0);
1259e220edf2SJack Xiao 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
1260e220edf2SJack Xiao 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
1261e220edf2SJack Xiao 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, NO_UPDATE_RPTR, 1);
1262e220edf2SJack Xiao 	mqd->cp_hqd_pq_control = tmp;
1263e220edf2SJack Xiao 
1264e220edf2SJack Xiao 	/* enable doorbell */
1265e220edf2SJack Xiao 	tmp = 0;
1266e220edf2SJack Xiao 	if (ring->use_doorbell) {
1267e220edf2SJack Xiao 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
1268e220edf2SJack Xiao 				    DOORBELL_OFFSET, ring->doorbell_index);
1269e220edf2SJack Xiao 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
1270e220edf2SJack Xiao 				    DOORBELL_EN, 1);
1271e220edf2SJack Xiao 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
1272e220edf2SJack Xiao 				    DOORBELL_SOURCE, 0);
1273e220edf2SJack Xiao 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
1274e220edf2SJack Xiao 				    DOORBELL_HIT, 0);
1275e220edf2SJack Xiao 	} else {
1276e220edf2SJack Xiao 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
1277e220edf2SJack Xiao 				    DOORBELL_EN, 0);
1278e220edf2SJack Xiao 	}
1279e220edf2SJack Xiao 	mqd->cp_hqd_pq_doorbell_control = tmp;
1280e220edf2SJack Xiao 
1281e220edf2SJack Xiao 	mqd->cp_hqd_vmid = 0;
1282e220edf2SJack Xiao 	/* activate the queue */
1283e220edf2SJack Xiao 	mqd->cp_hqd_active = 1;
1284e220edf2SJack Xiao 
1285e220edf2SJack Xiao 	tmp = regCP_HQD_PERSISTENT_STATE_DEFAULT;
1286e220edf2SJack Xiao 	tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE,
1287e220edf2SJack Xiao 			    PRELOAD_SIZE, 0x63);
1288e220edf2SJack Xiao 	mqd->cp_hqd_persistent_state = tmp;
1289e220edf2SJack Xiao 
1290e220edf2SJack Xiao 	mqd->cp_hqd_ib_control = regCP_HQD_IB_CONTROL_MES_12_1_DEFAULT;
1291e220edf2SJack Xiao 	mqd->cp_hqd_iq_timer = regCP_HQD_IQ_TIMER_DEFAULT;
1292e220edf2SJack Xiao 	mqd->cp_hqd_quantum = regCP_HQD_QUANTUM_DEFAULT;
1293e220edf2SJack Xiao 
1294e220edf2SJack Xiao 	/*
1295e220edf2SJack Xiao 	 * Set CP_HQD_GFX_CONTROL.DB_UPDATED_MSG_EN[15] to enable unmapped
1296e220edf2SJack Xiao 	 * doorbell handling. This is a reserved CP internal register can
1297e220edf2SJack Xiao 	 * not be accesss by others
1298e220edf2SJack Xiao 	 */
129977385a2dSLikun Gao 	mqd->cp_hqd_gfx_control = BIT(15);
1300e220edf2SJack Xiao 
1301e220edf2SJack Xiao 	return 0;
1302e220edf2SJack Xiao }
1303e220edf2SJack Xiao 
1304e220edf2SJack Xiao static void mes_v12_1_queue_init_register(struct amdgpu_ring *ring,
1305e220edf2SJack Xiao 					    int xcc_id)
1306e220edf2SJack Xiao {
1307e220edf2SJack Xiao 	struct v12_1_mes_mqd *mqd = ring->mqd_ptr;
1308e220edf2SJack Xiao 	struct amdgpu_device *adev = ring->adev;
1309e220edf2SJack Xiao 	uint32_t data = 0;
1310e220edf2SJack Xiao 
1311e220edf2SJack Xiao 	mutex_lock(&adev->srbm_mutex);
1312e220edf2SJack Xiao 	soc_v1_0_grbm_select(adev, 3, ring->pipe, 0, 0, GET_INST(GC, xcc_id));
1313e220edf2SJack Xiao 
1314e220edf2SJack Xiao 	/* set CP_HQD_VMID.VMID = 0. */
1315e220edf2SJack Xiao 	data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_VMID);
1316e220edf2SJack Xiao 	data = REG_SET_FIELD(data, CP_HQD_VMID, VMID, 0);
1317e220edf2SJack Xiao 	WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_VMID, data);
1318e220edf2SJack Xiao 
1319e220edf2SJack Xiao 	/* set CP_HQD_PQ_DOORBELL_CONTROL.DOORBELL_EN=0 */
1320e220edf2SJack Xiao 	data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_DOORBELL_CONTROL);
1321e220edf2SJack Xiao 	data = REG_SET_FIELD(data, CP_HQD_PQ_DOORBELL_CONTROL,
1322e220edf2SJack Xiao 			     DOORBELL_EN, 0);
1323e220edf2SJack Xiao 	WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_DOORBELL_CONTROL, data);
1324e220edf2SJack Xiao 
1325e220edf2SJack Xiao 	/* set CP_MQD_BASE_ADDR/HI with the MQD base address */
1326e220edf2SJack Xiao 	WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo);
1327e220edf2SJack Xiao 	WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
1328e220edf2SJack Xiao 
1329e220edf2SJack Xiao 	/* set CP_MQD_CONTROL.VMID=0 */
1330e220edf2SJack Xiao 	data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MQD_CONTROL);
1331e220edf2SJack Xiao 	data = REG_SET_FIELD(data, CP_MQD_CONTROL, VMID, 0);
1332e220edf2SJack Xiao 	WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MQD_CONTROL, 0);
1333e220edf2SJack Xiao 
1334e220edf2SJack Xiao 	/* set CP_HQD_PQ_BASE/HI with the ring buffer base address */
1335e220edf2SJack Xiao 	WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo);
1336e220edf2SJack Xiao 	WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi);
1337e220edf2SJack Xiao 
1338e220edf2SJack Xiao 	/* set CP_HQD_PQ_RPTR_REPORT_ADDR/HI */
1339e220edf2SJack Xiao 	WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_RPTR_REPORT_ADDR,
1340e220edf2SJack Xiao 		     mqd->cp_hqd_pq_rptr_report_addr_lo);
1341e220edf2SJack Xiao 	WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
1342e220edf2SJack Xiao 		     mqd->cp_hqd_pq_rptr_report_addr_hi);
1343e220edf2SJack Xiao 
1344e220edf2SJack Xiao 	/* set CP_HQD_PQ_CONTROL */
1345e220edf2SJack Xiao 	WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_CONTROL, mqd->cp_hqd_pq_control);
1346e220edf2SJack Xiao 
1347e220edf2SJack Xiao 	/* set CP_HQD_PQ_WPTR_POLL_ADDR/HI */
1348e220edf2SJack Xiao 	WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_WPTR_POLL_ADDR,
1349e220edf2SJack Xiao 		     mqd->cp_hqd_pq_wptr_poll_addr_lo);
1350e220edf2SJack Xiao 	WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_WPTR_POLL_ADDR_HI,
1351e220edf2SJack Xiao 		     mqd->cp_hqd_pq_wptr_poll_addr_hi);
1352e220edf2SJack Xiao 
1353e220edf2SJack Xiao 	/* set CP_HQD_PQ_DOORBELL_CONTROL */
1354e220edf2SJack Xiao 	WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_DOORBELL_CONTROL,
1355e220edf2SJack Xiao 		     mqd->cp_hqd_pq_doorbell_control);
1356e220edf2SJack Xiao 
1357e220edf2SJack Xiao 	/* set CP_HQD_PERSISTENT_STATE.PRELOAD_SIZE=0x53 */
1358e220edf2SJack Xiao 	WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PERSISTENT_STATE, mqd->cp_hqd_persistent_state);
1359e220edf2SJack Xiao 
1360e220edf2SJack Xiao 	/* set CP_HQD_ACTIVE.ACTIVE=1 */
1361e220edf2SJack Xiao 	WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_ACTIVE, mqd->cp_hqd_active);
1362e220edf2SJack Xiao 
1363e220edf2SJack Xiao 	soc_v1_0_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id));
1364e220edf2SJack Xiao 	mutex_unlock(&adev->srbm_mutex);
1365e220edf2SJack Xiao }
1366e220edf2SJack Xiao 
1367e220edf2SJack Xiao static int mes_v12_1_kiq_enable_queue(struct amdgpu_device *adev, int xcc_id)
1368e220edf2SJack Xiao {
1369e220edf2SJack Xiao 	struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
1370e220edf2SJack Xiao 	struct amdgpu_ring *kiq_ring = &adev->gfx.kiq[xcc_id].ring;
1371e220edf2SJack Xiao 	int r, inst = MES_PIPE_INST(xcc_id, AMDGPU_MES_SCHED_PIPE);
1372e220edf2SJack Xiao 
1373e220edf2SJack Xiao 	if (!kiq->pmf || !kiq->pmf->kiq_map_queues)
1374e220edf2SJack Xiao 		return -EINVAL;
1375e220edf2SJack Xiao 
1376e220edf2SJack Xiao 	r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size);
1377e220edf2SJack Xiao 	if (r) {
1378e220edf2SJack Xiao 		DRM_ERROR("Failed to lock KIQ (%d).\n", r);
1379e220edf2SJack Xiao 		return r;
1380e220edf2SJack Xiao 	}
1381e220edf2SJack Xiao 
1382e220edf2SJack Xiao 	kiq->pmf->kiq_map_queues(kiq_ring, &adev->mes.ring[inst]);
1383e220edf2SJack Xiao 
1384e220edf2SJack Xiao 	r = amdgpu_ring_test_ring(kiq_ring);
1385e220edf2SJack Xiao 	if (r) {
1386e220edf2SJack Xiao 		DRM_ERROR("kfq enable failed\n");
1387e220edf2SJack Xiao 		kiq_ring->sched.ready = false;
1388e220edf2SJack Xiao 	}
1389e220edf2SJack Xiao 	return r;
1390e220edf2SJack Xiao }
1391e220edf2SJack Xiao 
1392e220edf2SJack Xiao static int mes_v12_1_queue_init(struct amdgpu_device *adev,
1393e220edf2SJack Xiao 				  enum amdgpu_mes_pipe pipe,
1394e220edf2SJack Xiao 				  int xcc_id)
1395e220edf2SJack Xiao {
1396e220edf2SJack Xiao 	struct amdgpu_ring *ring;
1397e220edf2SJack Xiao 	int r;
1398e220edf2SJack Xiao 
1399e220edf2SJack Xiao 	if (!adev->enable_uni_mes && pipe == AMDGPU_MES_KIQ_PIPE)
1400e220edf2SJack Xiao 		ring = &adev->gfx.kiq[xcc_id].ring;
1401e220edf2SJack Xiao 	else
1402e220edf2SJack Xiao 		ring = &adev->mes.ring[MES_PIPE_INST(xcc_id, pipe)];
1403e220edf2SJack Xiao 
1404e220edf2SJack Xiao 	if ((adev->enable_uni_mes || pipe == AMDGPU_MES_SCHED_PIPE) &&
1405e220edf2SJack Xiao 	    (amdgpu_in_reset(adev) || adev->in_suspend)) {
1406e220edf2SJack Xiao 		*(ring->wptr_cpu_addr) = 0;
1407e220edf2SJack Xiao 		*(ring->rptr_cpu_addr) = 0;
1408e220edf2SJack Xiao 		amdgpu_ring_clear_ring(ring);
1409e220edf2SJack Xiao 	}
1410e220edf2SJack Xiao 
1411e220edf2SJack Xiao 	r = mes_v12_1_mqd_init(ring);
1412e220edf2SJack Xiao 	if (r)
1413e220edf2SJack Xiao 		return r;
1414e220edf2SJack Xiao 
1415e220edf2SJack Xiao 	if (pipe == AMDGPU_MES_SCHED_PIPE) {
1416e220edf2SJack Xiao 		if (adev->enable_uni_mes)
1417e220edf2SJack Xiao 			r = amdgpu_mes_map_legacy_queue(adev, ring, xcc_id);
1418e220edf2SJack Xiao 		else
1419e220edf2SJack Xiao 			r = mes_v12_1_kiq_enable_queue(adev, xcc_id);
1420e220edf2SJack Xiao 		if (r)
1421e220edf2SJack Xiao 			return r;
1422e220edf2SJack Xiao 	} else {
1423e220edf2SJack Xiao 		mes_v12_1_queue_init_register(ring, xcc_id);
1424e220edf2SJack Xiao 	}
1425e220edf2SJack Xiao 
1426e220edf2SJack Xiao 	/* get MES scheduler/KIQ versions */
1427e220edf2SJack Xiao 	mutex_lock(&adev->srbm_mutex);
1428e220edf2SJack Xiao 	soc_v1_0_grbm_select(adev, 3, pipe, 0, 0, GET_INST(GC, xcc_id));
1429e220edf2SJack Xiao 
1430e220edf2SJack Xiao 	if (pipe == AMDGPU_MES_SCHED_PIPE)
1431e220edf2SJack Xiao 		adev->mes.sched_version = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_GP3_LO);
1432e220edf2SJack Xiao 	else if (pipe == AMDGPU_MES_KIQ_PIPE && adev->enable_mes_kiq)
1433e220edf2SJack Xiao 		adev->mes.kiq_version = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_GP3_LO);
1434e220edf2SJack Xiao 
1435e220edf2SJack Xiao 	soc_v1_0_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id));
1436e220edf2SJack Xiao 	mutex_unlock(&adev->srbm_mutex);
1437e220edf2SJack Xiao 
1438e220edf2SJack Xiao 	return 0;
1439e220edf2SJack Xiao }
1440e220edf2SJack Xiao 
1441e220edf2SJack Xiao static int mes_v12_1_ring_init(struct amdgpu_device *adev,
1442e220edf2SJack Xiao 				 int xcc_id, int pipe)
1443e220edf2SJack Xiao {
1444e220edf2SJack Xiao 	struct amdgpu_ring *ring;
1445e220edf2SJack Xiao 	int inst = MES_PIPE_INST(xcc_id, pipe);
1446e220edf2SJack Xiao 
1447e220edf2SJack Xiao 	ring = &adev->mes.ring[inst];
1448e220edf2SJack Xiao 
1449e220edf2SJack Xiao 	ring->funcs = &mes_v12_1_ring_funcs;
1450e220edf2SJack Xiao 
1451e220edf2SJack Xiao 	ring->me = 3;
1452e220edf2SJack Xiao 	ring->pipe = pipe;
1453e220edf2SJack Xiao 	ring->queue = 0;
1454e220edf2SJack Xiao 	ring->xcc_id = xcc_id;
1455e220edf2SJack Xiao 	ring->vm_hub = AMDGPU_GFXHUB(xcc_id);
1456e220edf2SJack Xiao 
1457e220edf2SJack Xiao 	ring->ring_obj = NULL;
1458e220edf2SJack Xiao 	ring->use_doorbell = true;
1459e220edf2SJack Xiao 	ring->eop_gpu_addr = adev->mes.eop_gpu_addr[inst];
1460e220edf2SJack Xiao 	ring->no_scheduler = true;
1461e220edf2SJack Xiao 	snprintf(ring->name, sizeof(ring->name), "mes_%hhu.%hhu.%hhu.%hhu",
1462e220edf2SJack Xiao 		 (unsigned char)xcc_id, (unsigned char)ring->me,
1463e220edf2SJack Xiao 		 (unsigned char)ring->pipe, (unsigned char)ring->queue);
1464e220edf2SJack Xiao 
1465e220edf2SJack Xiao 	if (pipe == AMDGPU_MES_SCHED_PIPE)
1466e220edf2SJack Xiao 		ring->doorbell_index =
1467e220edf2SJack Xiao 			(adev->doorbell_index.mes_ring0 +
1468e220edf2SJack Xiao 			 xcc_id * adev->doorbell_index.xcc_doorbell_range)
1469e220edf2SJack Xiao 			<< 1;
1470e220edf2SJack Xiao 	else
1471e220edf2SJack Xiao 		ring->doorbell_index =
1472e220edf2SJack Xiao 			(adev->doorbell_index.mes_ring1 +
1473e220edf2SJack Xiao 			 xcc_id * adev->doorbell_index.xcc_doorbell_range)
1474e220edf2SJack Xiao 			<< 1;
1475e220edf2SJack Xiao 
1476e220edf2SJack Xiao 	return amdgpu_ring_init(adev, ring, 1024, NULL, 0,
1477e220edf2SJack Xiao 				AMDGPU_RING_PRIO_DEFAULT, NULL);
1478e220edf2SJack Xiao }
1479e220edf2SJack Xiao 
1480e220edf2SJack Xiao static int mes_v12_1_kiq_ring_init(struct amdgpu_device *adev, int xcc_id)
1481e220edf2SJack Xiao {
1482e220edf2SJack Xiao 	struct amdgpu_ring *ring;
1483e220edf2SJack Xiao 	int inst = MES_PIPE_INST(xcc_id, AMDGPU_MES_KIQ_PIPE);
1484e220edf2SJack Xiao 
1485e220edf2SJack Xiao 	spin_lock_init(&adev->gfx.kiq[xcc_id].ring_lock);
1486e220edf2SJack Xiao 
1487e220edf2SJack Xiao 	ring = &adev->gfx.kiq[xcc_id].ring;
1488e220edf2SJack Xiao 
1489e220edf2SJack Xiao 	ring->me = 3;
1490e220edf2SJack Xiao 	ring->pipe = 1;
1491e220edf2SJack Xiao 	ring->queue = 0;
1492e220edf2SJack Xiao 	ring->xcc_id = xcc_id;
1493e220edf2SJack Xiao 	ring->vm_hub = AMDGPU_GFXHUB(xcc_id);
1494e220edf2SJack Xiao 
1495e220edf2SJack Xiao 	ring->adev = NULL;
1496e220edf2SJack Xiao 	ring->ring_obj = NULL;
1497e220edf2SJack Xiao 	ring->use_doorbell = true;
1498e220edf2SJack Xiao 	ring->eop_gpu_addr = adev->mes.eop_gpu_addr[inst];
1499e220edf2SJack Xiao 	ring->no_scheduler = true;
1500e220edf2SJack Xiao 	ring->doorbell_index =
1501e220edf2SJack Xiao 		(adev->doorbell_index.mes_ring1 +
1502e220edf2SJack Xiao 		 xcc_id * adev->doorbell_index.xcc_doorbell_range)
1503e220edf2SJack Xiao 		<< 1;
1504e220edf2SJack Xiao 
1505e220edf2SJack Xiao 	snprintf(ring->name, sizeof(ring->name), "mes_kiq_%hhu.%hhu.%hhu.%hhu",
1506e220edf2SJack Xiao 		 (unsigned char)xcc_id, (unsigned char)ring->me,
1507e220edf2SJack Xiao 		 (unsigned char)ring->pipe, (unsigned char)ring->queue);
1508e220edf2SJack Xiao 
1509e220edf2SJack Xiao 	return amdgpu_ring_init(adev, ring, 1024, NULL, 0,
1510e220edf2SJack Xiao 				AMDGPU_RING_PRIO_DEFAULT, NULL);
1511e220edf2SJack Xiao }
1512e220edf2SJack Xiao 
1513e220edf2SJack Xiao static int mes_v12_1_mqd_sw_init(struct amdgpu_device *adev,
1514e220edf2SJack Xiao 				   enum amdgpu_mes_pipe pipe,
1515e220edf2SJack Xiao 				   int xcc_id)
1516e220edf2SJack Xiao {
1517e220edf2SJack Xiao 	int r, mqd_size = sizeof(struct v12_1_mes_mqd);
1518e220edf2SJack Xiao 	struct amdgpu_ring *ring;
1519e220edf2SJack Xiao 	int inst = MES_PIPE_INST(xcc_id, pipe);
1520e220edf2SJack Xiao 
1521e220edf2SJack Xiao 	if (!adev->enable_uni_mes && pipe == AMDGPU_MES_KIQ_PIPE)
1522e220edf2SJack Xiao 		ring = &adev->gfx.kiq[xcc_id].ring;
1523e220edf2SJack Xiao 	else
1524e220edf2SJack Xiao 		ring = &adev->mes.ring[inst];
1525e220edf2SJack Xiao 
1526e220edf2SJack Xiao 	if (ring->mqd_obj)
1527e220edf2SJack Xiao 		return 0;
1528e220edf2SJack Xiao 
1529e220edf2SJack Xiao 	r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE,
1530e220edf2SJack Xiao 				    AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj,
1531e220edf2SJack Xiao 				    &ring->mqd_gpu_addr, &ring->mqd_ptr);
1532e220edf2SJack Xiao 	if (r) {
1533e220edf2SJack Xiao 		dev_warn(adev->dev, "failed to create ring mqd bo (%d)", r);
1534e220edf2SJack Xiao 		return r;
1535e220edf2SJack Xiao 	}
1536e220edf2SJack Xiao 
1537e220edf2SJack Xiao 	memset(ring->mqd_ptr, 0, mqd_size);
1538e220edf2SJack Xiao 
1539e220edf2SJack Xiao 	/* prepare MQD backup */
1540e220edf2SJack Xiao 	adev->mes.mqd_backup[inst] = kmalloc(mqd_size, GFP_KERNEL);
1541e220edf2SJack Xiao 	if (!adev->mes.mqd_backup[inst])
1542e220edf2SJack Xiao 		dev_warn(adev->dev,
1543e220edf2SJack Xiao 			 "no memory to create MQD backup for ring %s\n",
1544e220edf2SJack Xiao 			 ring->name);
1545e220edf2SJack Xiao 
1546e220edf2SJack Xiao 	return 0;
1547e220edf2SJack Xiao }
1548e220edf2SJack Xiao 
1549e220edf2SJack Xiao static int mes_v12_1_sw_init(struct amdgpu_ip_block *ip_block)
1550e220edf2SJack Xiao {
1551e220edf2SJack Xiao 	struct amdgpu_device *adev = ip_block->adev;
1552e220edf2SJack Xiao 	int pipe, r, xcc_id, num_xcc = NUM_XCC(adev->gfx.xcc_mask);
1553e220edf2SJack Xiao 
1554e220edf2SJack Xiao 	adev->mes.funcs = &mes_v12_1_funcs;
1555e220edf2SJack Xiao 	adev->mes.kiq_hw_init = &mes_v12_1_kiq_hw_init;
1556e220edf2SJack Xiao 	adev->mes.kiq_hw_fini = &mes_v12_1_kiq_hw_fini;
1557e220edf2SJack Xiao 	adev->mes.enable_legacy_queue_map = true;
1558e220edf2SJack Xiao 
1559e220edf2SJack Xiao 	adev->mes.event_log_size =
1560e220edf2SJack Xiao 		adev->enable_uni_mes ? (AMDGPU_MAX_MES_PIPES * AMDGPU_MES_LOG_BUFFER_SIZE * num_xcc) : AMDGPU_MES_LOG_BUFFER_SIZE;
1561e220edf2SJack Xiao 
1562e220edf2SJack Xiao 	r = amdgpu_mes_init(adev);
1563e220edf2SJack Xiao 	if (r)
1564e220edf2SJack Xiao 		return r;
1565e220edf2SJack Xiao 
1566e220edf2SJack Xiao 	for (xcc_id = 0; xcc_id < num_xcc; xcc_id++) {
1567e220edf2SJack Xiao 		for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) {
1568e220edf2SJack Xiao 			r = mes_v12_1_allocate_eop_buf(adev, pipe, xcc_id);
1569e220edf2SJack Xiao 			if (r)
1570e220edf2SJack Xiao 				return r;
1571e220edf2SJack Xiao 
1572e220edf2SJack Xiao 			r = mes_v12_1_mqd_sw_init(adev, pipe, xcc_id);
1573e220edf2SJack Xiao 			if (r)
1574e220edf2SJack Xiao 				return r;
1575e220edf2SJack Xiao 
1576e220edf2SJack Xiao 			if (!adev->enable_uni_mes && pipe ==
1577e220edf2SJack Xiao 			    AMDGPU_MES_KIQ_PIPE)
1578e220edf2SJack Xiao 				r = mes_v12_1_kiq_ring_init(adev, xcc_id);
1579e220edf2SJack Xiao 			else
1580e220edf2SJack Xiao 				r = mes_v12_1_ring_init(adev, xcc_id, pipe);
1581e220edf2SJack Xiao 			if (r)
1582e220edf2SJack Xiao 				return r;
158375053887SJack Xiao 
1584aa0f09f9SMukul Joshi 			if (adev->enable_uni_mes && num_xcc > 1) {
158575053887SJack Xiao 				r = mes_v12_1_allocate_shared_cmd_buf(adev,
158675053887SJack Xiao 							      pipe, xcc_id);
158775053887SJack Xiao 				if (r)
158875053887SJack Xiao 					return r;
158975053887SJack Xiao 			}
1590e220edf2SJack Xiao 		}
1591e220edf2SJack Xiao 	}
1592e220edf2SJack Xiao 
1593e220edf2SJack Xiao 	return 0;
1594e220edf2SJack Xiao }
1595e220edf2SJack Xiao 
1596e220edf2SJack Xiao static int mes_v12_1_sw_fini(struct amdgpu_ip_block *ip_block)
1597e220edf2SJack Xiao {
1598e220edf2SJack Xiao 	struct amdgpu_device *adev = ip_block->adev;
1599e220edf2SJack Xiao 	int pipe, inst, xcc_id, num_xcc = NUM_XCC(adev->gfx.xcc_mask);
1600e220edf2SJack Xiao 
1601e220edf2SJack Xiao 	for (xcc_id = 0; xcc_id < num_xcc; xcc_id++) {
1602e220edf2SJack Xiao 		for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) {
1603e220edf2SJack Xiao 			inst = MES_PIPE_INST(xcc_id, pipe);
1604e220edf2SJack Xiao 
160575053887SJack Xiao 			amdgpu_bo_free_kernel(&adev->mes.shared_cmd_buf_obj[inst],
160675053887SJack Xiao 					      &adev->mes.shared_cmd_buf_gpu_addr[inst],
160775053887SJack Xiao 					      NULL);
160875053887SJack Xiao 
1609e220edf2SJack Xiao 			kfree(adev->mes.mqd_backup[inst]);
1610e220edf2SJack Xiao 
1611e220edf2SJack Xiao 			amdgpu_bo_free_kernel(&adev->mes.eop_gpu_obj[inst],
1612e220edf2SJack Xiao 					      &adev->mes.eop_gpu_addr[inst],
1613e220edf2SJack Xiao 					      NULL);
1614e220edf2SJack Xiao 			amdgpu_ucode_release(&adev->mes.fw[inst]);
1615e220edf2SJack Xiao 
1616e220edf2SJack Xiao 			if (adev->enable_uni_mes || pipe == AMDGPU_MES_SCHED_PIPE) {
1617e220edf2SJack Xiao 				amdgpu_bo_free_kernel(&adev->mes.ring[inst].mqd_obj,
1618e220edf2SJack Xiao 						      &adev->mes.ring[inst].mqd_gpu_addr,
1619e220edf2SJack Xiao 						      &adev->mes.ring[inst].mqd_ptr);
1620e220edf2SJack Xiao 				amdgpu_ring_fini(&adev->mes.ring[inst]);
1621e220edf2SJack Xiao 			}
1622e220edf2SJack Xiao 		}
1623e220edf2SJack Xiao 	}
1624e220edf2SJack Xiao 
1625e220edf2SJack Xiao 	for (xcc_id = 0; xcc_id < num_xcc; xcc_id++) {
1626e220edf2SJack Xiao 		if (!adev->enable_uni_mes) {
1627e220edf2SJack Xiao 			amdgpu_bo_free_kernel(&adev->gfx.kiq[xcc_id].ring.mqd_obj,
1628e220edf2SJack Xiao 				      &adev->gfx.kiq[xcc_id].ring.mqd_gpu_addr,
1629e220edf2SJack Xiao 				      &adev->gfx.kiq[xcc_id].ring.mqd_ptr);
1630e220edf2SJack Xiao 			amdgpu_ring_fini(&adev->gfx.kiq[xcc_id].ring);
1631e220edf2SJack Xiao 		}
1632e220edf2SJack Xiao 
1633e220edf2SJack Xiao 		if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
1634e220edf2SJack Xiao 			mes_v12_1_free_ucode_buffers(adev,
1635e220edf2SJack Xiao 				       AMDGPU_MES_KIQ_PIPE, xcc_id);
1636e220edf2SJack Xiao 			mes_v12_1_free_ucode_buffers(adev,
1637e220edf2SJack Xiao 				       AMDGPU_MES_SCHED_PIPE, xcc_id);
1638e220edf2SJack Xiao 		}
1639e220edf2SJack Xiao 	}
1640e220edf2SJack Xiao 
1641e220edf2SJack Xiao 	amdgpu_mes_fini(adev);
1642e220edf2SJack Xiao 	return 0;
1643e220edf2SJack Xiao }
1644e220edf2SJack Xiao 
1645e220edf2SJack Xiao static void mes_v12_1_kiq_dequeue_sched(struct amdgpu_device *adev,
1646e220edf2SJack Xiao 					  int xcc_id)
1647e220edf2SJack Xiao {
1648e220edf2SJack Xiao 	uint32_t data;
1649e220edf2SJack Xiao 	int i;
1650e220edf2SJack Xiao 
1651e220edf2SJack Xiao 	mutex_lock(&adev->srbm_mutex);
1652e220edf2SJack Xiao 	soc_v1_0_grbm_select(adev, 3, AMDGPU_MES_SCHED_PIPE, 0, 0,
1653e220edf2SJack Xiao 			     GET_INST(GC, xcc_id));
1654e220edf2SJack Xiao 
1655e220edf2SJack Xiao 	/* disable the queue if it's active */
1656e220edf2SJack Xiao 	if (RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_ACTIVE) & 1) {
1657e220edf2SJack Xiao 		WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_DEQUEUE_REQUEST, 1);
1658e220edf2SJack Xiao 		for (i = 0; i < adev->usec_timeout; i++) {
1659e220edf2SJack Xiao 			if (!(RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_ACTIVE) & 1))
1660e220edf2SJack Xiao 				break;
1661e220edf2SJack Xiao 			udelay(1);
1662e220edf2SJack Xiao 		}
1663e220edf2SJack Xiao 	}
1664e220edf2SJack Xiao 	data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_DOORBELL_CONTROL);
1665e220edf2SJack Xiao 	data = REG_SET_FIELD(data, CP_HQD_PQ_DOORBELL_CONTROL,
1666e220edf2SJack Xiao 				DOORBELL_EN, 0);
1667e220edf2SJack Xiao 	data = REG_SET_FIELD(data, CP_HQD_PQ_DOORBELL_CONTROL,
1668e220edf2SJack Xiao 				DOORBELL_HIT, 1);
1669e220edf2SJack Xiao 	WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_DOORBELL_CONTROL, data);
1670e220edf2SJack Xiao 
1671e220edf2SJack Xiao 	WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_DOORBELL_CONTROL, 0);
1672e220edf2SJack Xiao 
1673e220edf2SJack Xiao 	WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_WPTR_LO, 0);
1674e220edf2SJack Xiao 	WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_WPTR_HI, 0);
1675e220edf2SJack Xiao 	WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_RPTR, 0);
1676e220edf2SJack Xiao 
1677e220edf2SJack Xiao 	soc_v1_0_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id));
1678e220edf2SJack Xiao 	mutex_unlock(&adev->srbm_mutex);
1679e220edf2SJack Xiao 
1680e220edf2SJack Xiao 	adev->mes.ring[MES_PIPE_INST(xcc_id, 0)].sched.ready = false;
1681e220edf2SJack Xiao }
1682e220edf2SJack Xiao 
1683e220edf2SJack Xiao static void mes_v12_1_kiq_setting(struct amdgpu_ring *ring, int xcc_id)
1684e220edf2SJack Xiao {
1685e220edf2SJack Xiao 	uint32_t tmp;
1686e220edf2SJack Xiao 	struct amdgpu_device *adev = ring->adev;
1687e220edf2SJack Xiao 
1688e220edf2SJack Xiao 	/* tell RLC which is KIQ queue */
1689e220edf2SJack Xiao 	tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CP_SCHEDULERS);
1690e220edf2SJack Xiao 	tmp &= 0xffffff00;
1691e220edf2SJack Xiao 	tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
1692e220edf2SJack Xiao 	WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CP_SCHEDULERS, tmp);
1693e220edf2SJack Xiao 	tmp |= 0x80;
1694e220edf2SJack Xiao 	WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CP_SCHEDULERS, tmp);
1695e220edf2SJack Xiao }
1696e220edf2SJack Xiao 
1697e220edf2SJack Xiao static int mes_v12_1_kiq_hw_init(struct amdgpu_device *adev, uint32_t xcc_id)
1698e220edf2SJack Xiao {
1699e220edf2SJack Xiao 	int inst = MES_PIPE_INST(xcc_id, AMDGPU_MES_KIQ_PIPE);
1700e220edf2SJack Xiao 	int r = 0;
1701e220edf2SJack Xiao 	struct amdgpu_ip_block *ip_block;
1702e220edf2SJack Xiao 
1703e220edf2SJack Xiao 	if (adev->enable_uni_mes)
1704e220edf2SJack Xiao 		mes_v12_1_kiq_setting(&adev->mes.ring[inst], xcc_id);
1705e220edf2SJack Xiao 	else
1706e220edf2SJack Xiao 		mes_v12_1_kiq_setting(&adev->gfx.kiq[xcc_id].ring, xcc_id);
1707e220edf2SJack Xiao 
1708e220edf2SJack Xiao 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
1709e220edf2SJack Xiao 
1710e220edf2SJack Xiao 		r = mes_v12_1_load_microcode(adev, AMDGPU_MES_SCHED_PIPE,
1711e220edf2SJack Xiao 					       false, xcc_id);
1712e220edf2SJack Xiao 		if (r) {
1713e220edf2SJack Xiao 			DRM_ERROR("failed to load MES fw, r=%d\n", r);
1714e220edf2SJack Xiao 			return r;
1715e220edf2SJack Xiao 		}
1716e220edf2SJack Xiao 
1717e220edf2SJack Xiao 		r = mes_v12_1_load_microcode(adev, AMDGPU_MES_KIQ_PIPE,
1718e220edf2SJack Xiao 					       true, xcc_id);
1719e220edf2SJack Xiao 		if (r) {
1720e220edf2SJack Xiao 			DRM_ERROR("failed to load MES kiq fw, r=%d\n", r);
1721e220edf2SJack Xiao 			return r;
1722e220edf2SJack Xiao 		}
1723e220edf2SJack Xiao 
1724e220edf2SJack Xiao 		mes_v12_1_set_ucode_start_addr(adev, xcc_id);
1725e220edf2SJack Xiao 
1726e220edf2SJack Xiao 	} else if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO)
1727e220edf2SJack Xiao 		mes_v12_1_set_ucode_start_addr(adev, xcc_id);
1728e220edf2SJack Xiao 
1729e220edf2SJack Xiao 	mes_v12_1_enable(adev, true, xcc_id);
1730e220edf2SJack Xiao 
1731e220edf2SJack Xiao 	ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_MES);
1732e220edf2SJack Xiao 	if (unlikely(!ip_block)) {
1733e220edf2SJack Xiao 		dev_err(adev->dev, "Failed to get MES handle\n");
1734e220edf2SJack Xiao 		return -EINVAL;
1735e220edf2SJack Xiao 	}
1736e220edf2SJack Xiao 
1737e220edf2SJack Xiao 	r = mes_v12_1_queue_init(adev, AMDGPU_MES_KIQ_PIPE, xcc_id);
1738e220edf2SJack Xiao 	if (r)
1739e220edf2SJack Xiao 		goto failure;
1740e220edf2SJack Xiao 
1741e220edf2SJack Xiao 	if (adev->enable_uni_mes) {
1742e220edf2SJack Xiao 		r = mes_v12_1_set_hw_resources(&adev->mes,
1743e220edf2SJack Xiao 						 AMDGPU_MES_KIQ_PIPE, xcc_id);
1744e220edf2SJack Xiao 		if (r)
1745e220edf2SJack Xiao 			goto failure;
1746e220edf2SJack Xiao 
1747e220edf2SJack Xiao 		mes_v12_1_set_hw_resources_1(&adev->mes,
1748e220edf2SJack Xiao 					       AMDGPU_MES_KIQ_PIPE, xcc_id);
1749e220edf2SJack Xiao 	}
1750e220edf2SJack Xiao 
1751e220edf2SJack Xiao 	if (adev->mes.enable_legacy_queue_map) {
1752a5192fbbSLikun Gao 		r = mes_v12_1_xcc_hw_init(ip_block, xcc_id);
1753e220edf2SJack Xiao 		if (r)
1754e220edf2SJack Xiao 			goto failure;
1755e220edf2SJack Xiao 	}
1756e220edf2SJack Xiao 
1757e220edf2SJack Xiao 	return r;
1758e220edf2SJack Xiao 
1759e220edf2SJack Xiao failure:
1760e220edf2SJack Xiao 	mes_v12_1_hw_fini(ip_block);
1761e220edf2SJack Xiao 	return r;
1762e220edf2SJack Xiao }
1763e220edf2SJack Xiao 
1764e220edf2SJack Xiao static int mes_v12_1_kiq_hw_fini(struct amdgpu_device *adev, uint32_t xcc_id)
1765e220edf2SJack Xiao {
1766e220edf2SJack Xiao 	int inst = MES_PIPE_INST(xcc_id, AMDGPU_MES_SCHED_PIPE);
1767e220edf2SJack Xiao 
1768e220edf2SJack Xiao 	if (adev->mes.ring[inst].sched.ready) {
1769e220edf2SJack Xiao 		if (adev->enable_uni_mes)
1770e220edf2SJack Xiao 			amdgpu_mes_unmap_legacy_queue(adev,
1771e220edf2SJack Xiao 				      &adev->mes.ring[inst],
1772e220edf2SJack Xiao 				      RESET_QUEUES, 0, 0, xcc_id);
1773e220edf2SJack Xiao 		else
1774e220edf2SJack Xiao 			mes_v12_1_kiq_dequeue_sched(adev, xcc_id);
1775e220edf2SJack Xiao 
1776e220edf2SJack Xiao 		adev->mes.ring[inst].sched.ready = false;
1777e220edf2SJack Xiao 	}
1778e220edf2SJack Xiao 
1779e220edf2SJack Xiao 	mes_v12_1_enable(adev, false, xcc_id);
1780e220edf2SJack Xiao 
1781e220edf2SJack Xiao 	return 0;
1782e220edf2SJack Xiao }
1783e220edf2SJack Xiao 
1784aa0f09f9SMukul Joshi static int mes_v12_1_setup_coop_mode(struct amdgpu_device *adev, int xcc_id)
1785aa0f09f9SMukul Joshi {
1786aa0f09f9SMukul Joshi 	u32 num_xcc_per_xcp, num_xcc = NUM_XCC(adev->gfx.xcc_mask);
1787aa0f09f9SMukul Joshi 	int r = 0;
1788aa0f09f9SMukul Joshi 
1789aa0f09f9SMukul Joshi 	if (num_xcc == 1)
1790aa0f09f9SMukul Joshi 		return r;
1791aa0f09f9SMukul Joshi 
1792aa0f09f9SMukul Joshi 	if (adev->gfx.funcs &&
1793aa0f09f9SMukul Joshi 	    adev->gfx.funcs->get_xccs_per_xcp)
1794aa0f09f9SMukul Joshi 		num_xcc_per_xcp = adev->gfx.funcs->get_xccs_per_xcp(adev);
1795aa0f09f9SMukul Joshi 	else
1796aa0f09f9SMukul Joshi 		return -EINVAL;
1797aa0f09f9SMukul Joshi 
1798aa0f09f9SMukul Joshi 	switch (adev->xcp_mgr->mode) {
1799aa0f09f9SMukul Joshi 	case AMDGPU_SPX_PARTITION_MODE:
1800aa0f09f9SMukul Joshi 		adev->mes.enable_coop_mode = 1;
1801aa0f09f9SMukul Joshi 		adev->mes.master_xcc_ids[xcc_id] = 0;
1802aa0f09f9SMukul Joshi 		break;
1803aa0f09f9SMukul Joshi 	case AMDGPU_DPX_PARTITION_MODE:
1804aa0f09f9SMukul Joshi 		adev->mes.enable_coop_mode = 1;
1805aa0f09f9SMukul Joshi 		adev->mes.master_xcc_ids[xcc_id] =
1806aa0f09f9SMukul Joshi 			(xcc_id/num_xcc_per_xcp) * (num_xcc / 2);
1807aa0f09f9SMukul Joshi 		break;
1808aa0f09f9SMukul Joshi 	case AMDGPU_QPX_PARTITION_MODE:
1809aa0f09f9SMukul Joshi 		adev->mes.enable_coop_mode = 1;
1810aa0f09f9SMukul Joshi 		adev->mes.master_xcc_ids[xcc_id] =
1811aa0f09f9SMukul Joshi 			(xcc_id/num_xcc_per_xcp) * (num_xcc / 4);
1812aa0f09f9SMukul Joshi 		break;
1813aa0f09f9SMukul Joshi 	case AMDGPU_CPX_PARTITION_MODE:
1814aa0f09f9SMukul Joshi 		adev->mes.enable_coop_mode = 0;
1815aa0f09f9SMukul Joshi 		break;
1816aa0f09f9SMukul Joshi 	default:
1817aa0f09f9SMukul Joshi 		r = -EINVAL;
1818aa0f09f9SMukul Joshi 		break;
1819aa0f09f9SMukul Joshi 	}
1820aa0f09f9SMukul Joshi 	return r;
1821aa0f09f9SMukul Joshi }
1822aa0f09f9SMukul Joshi 
1823e220edf2SJack Xiao static int mes_v12_1_xcc_hw_init(struct amdgpu_ip_block *ip_block, int xcc_id)
1824e220edf2SJack Xiao {
1825e220edf2SJack Xiao 	int r;
1826e220edf2SJack Xiao 	struct amdgpu_device *adev = ip_block->adev;
1827e220edf2SJack Xiao 
1828e220edf2SJack Xiao 	if (adev->mes.ring[MES_PIPE_INST(xcc_id, 0)].sched.ready)
1829e220edf2SJack Xiao 		goto out;
1830e220edf2SJack Xiao 
1831e220edf2SJack Xiao 	if (!adev->enable_mes_kiq) {
1832e220edf2SJack Xiao 		if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
1833e220edf2SJack Xiao 			r = mes_v12_1_load_microcode(adev,
1834e220edf2SJack Xiao 				       AMDGPU_MES_SCHED_PIPE, true, xcc_id);
1835e220edf2SJack Xiao 			if (r) {
1836e220edf2SJack Xiao 				DRM_ERROR("failed to MES fw, r=%d\n", r);
1837e220edf2SJack Xiao 				return r;
1838e220edf2SJack Xiao 			}
1839e220edf2SJack Xiao 
1840e220edf2SJack Xiao 			mes_v12_1_set_ucode_start_addr(adev, xcc_id);
1841e220edf2SJack Xiao 
1842e220edf2SJack Xiao 		} else if (adev->firmware.load_type ==
1843e220edf2SJack Xiao 			   AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) {
1844e220edf2SJack Xiao 
1845e220edf2SJack Xiao 			mes_v12_1_set_ucode_start_addr(adev, xcc_id);
1846e220edf2SJack Xiao 		}
1847e220edf2SJack Xiao 
1848e220edf2SJack Xiao 		mes_v12_1_enable(adev, true, xcc_id);
1849e220edf2SJack Xiao 	}
1850e220edf2SJack Xiao 
1851e220edf2SJack Xiao 	/* Enable the MES to handle doorbell ring on unmapped queue */
1852e220edf2SJack Xiao 	mes_v12_1_enable_unmapped_doorbell_handling(&adev->mes, true, xcc_id);
1853e220edf2SJack Xiao 
1854e220edf2SJack Xiao 	r = mes_v12_1_queue_init(adev, AMDGPU_MES_SCHED_PIPE, xcc_id);
1855e220edf2SJack Xiao 	if (r)
1856e220edf2SJack Xiao 		goto failure;
1857e220edf2SJack Xiao 
1858e220edf2SJack Xiao 	r = mes_v12_1_set_hw_resources(&adev->mes,
1859e220edf2SJack Xiao 					 AMDGPU_MES_SCHED_PIPE, xcc_id);
1860e220edf2SJack Xiao 	if (r)
1861e220edf2SJack Xiao 		goto failure;
1862e220edf2SJack Xiao 
1863aa0f09f9SMukul Joshi 	if (adev->enable_uni_mes) {
1864aa0f09f9SMukul Joshi 		r = mes_v12_1_setup_coop_mode(adev, xcc_id);
1865aa0f09f9SMukul Joshi 		if (r)
1866aa0f09f9SMukul Joshi 			goto failure;
1867e220edf2SJack Xiao 		mes_v12_1_set_hw_resources_1(&adev->mes,
1868e220edf2SJack Xiao 					       AMDGPU_MES_SCHED_PIPE, xcc_id);
1869aa0f09f9SMukul Joshi 	}
1870e220edf2SJack Xiao 	mes_v12_1_init_aggregated_doorbell(&adev->mes, xcc_id);
1871e220edf2SJack Xiao 
1872e220edf2SJack Xiao 	r = mes_v12_1_query_sched_status(&adev->mes,
1873e220edf2SJack Xiao 					   AMDGPU_MES_SCHED_PIPE, xcc_id);
1874e220edf2SJack Xiao 	if (r) {
1875e220edf2SJack Xiao 		DRM_ERROR("MES is busy\n");
1876e220edf2SJack Xiao 		goto failure;
1877e220edf2SJack Xiao 	}
1878e220edf2SJack Xiao 
1879e220edf2SJack Xiao out:
1880e220edf2SJack Xiao 	/*
1881e220edf2SJack Xiao 	 * Disable KIQ ring usage from the driver once MES is enabled.
1882e220edf2SJack Xiao 	 * MES uses KIQ ring exclusively so driver cannot access KIQ ring
1883e220edf2SJack Xiao 	 * with MES enabled.
1884e220edf2SJack Xiao 	 */
1885e220edf2SJack Xiao 	adev->gfx.kiq[xcc_id].ring.sched.ready = false;
1886e220edf2SJack Xiao 	adev->mes.ring[MES_PIPE_INST(xcc_id, 0)].sched.ready = true;
1887e220edf2SJack Xiao 
1888e220edf2SJack Xiao 	return 0;
1889e220edf2SJack Xiao 
1890e220edf2SJack Xiao failure:
1891e220edf2SJack Xiao 	mes_v12_1_hw_fini(ip_block);
1892e220edf2SJack Xiao 	return r;
1893e220edf2SJack Xiao }
1894e220edf2SJack Xiao 
1895e220edf2SJack Xiao static int mes_v12_1_hw_init(struct amdgpu_ip_block *ip_block)
1896e220edf2SJack Xiao {
1897e220edf2SJack Xiao 	struct amdgpu_device *adev = ip_block->adev;
1898e220edf2SJack Xiao 	int r, xcc_id, num_xcc = NUM_XCC(adev->gfx.xcc_mask);
1899e220edf2SJack Xiao 
1900e220edf2SJack Xiao 	for (xcc_id = 0; xcc_id < num_xcc; xcc_id++) {
1901e220edf2SJack Xiao 		r = mes_v12_1_xcc_hw_init(ip_block, xcc_id);
1902e220edf2SJack Xiao 		if (r)
1903e220edf2SJack Xiao 			return r;
1904e220edf2SJack Xiao 	}
1905e220edf2SJack Xiao 
1906e220edf2SJack Xiao 	return 0;
1907e220edf2SJack Xiao }
1908e220edf2SJack Xiao 
1909e220edf2SJack Xiao static int mes_v12_1_hw_fini(struct amdgpu_ip_block *ip_block)
1910e220edf2SJack Xiao {
1911e220edf2SJack Xiao 	return 0;
1912e220edf2SJack Xiao }
1913e220edf2SJack Xiao 
1914e220edf2SJack Xiao static int mes_v12_1_suspend(struct amdgpu_ip_block *ip_block)
1915e220edf2SJack Xiao {
1916e220edf2SJack Xiao 	int r;
1917e220edf2SJack Xiao 
1918e220edf2SJack Xiao 	r = amdgpu_mes_suspend(ip_block->adev);
1919e220edf2SJack Xiao 	if (r)
1920e220edf2SJack Xiao 		return r;
1921e220edf2SJack Xiao 
1922e220edf2SJack Xiao 	return mes_v12_1_hw_fini(ip_block);
1923e220edf2SJack Xiao }
1924e220edf2SJack Xiao 
1925e220edf2SJack Xiao static int mes_v12_1_resume(struct amdgpu_ip_block *ip_block)
1926e220edf2SJack Xiao {
1927e220edf2SJack Xiao 	int r;
1928e220edf2SJack Xiao 
1929e220edf2SJack Xiao 	r = mes_v12_1_hw_init(ip_block);
1930e220edf2SJack Xiao 	if (r)
1931e220edf2SJack Xiao 		return r;
1932e220edf2SJack Xiao 
1933e220edf2SJack Xiao 	return amdgpu_mes_resume(ip_block->adev);
1934e220edf2SJack Xiao }
1935e220edf2SJack Xiao 
1936e220edf2SJack Xiao static int mes_v12_1_early_init(struct amdgpu_ip_block *ip_block)
1937e220edf2SJack Xiao {
1938e220edf2SJack Xiao 	struct amdgpu_device *adev = ip_block->adev;
1939e220edf2SJack Xiao 	int pipe, r;
1940e220edf2SJack Xiao 
1941e220edf2SJack Xiao 	for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) {
1942e220edf2SJack Xiao 		r = amdgpu_mes_init_microcode(adev, pipe);
1943e220edf2SJack Xiao 		if (r)
1944e220edf2SJack Xiao 			return r;
1945e220edf2SJack Xiao 	}
1946e220edf2SJack Xiao 
1947e220edf2SJack Xiao 	return 0;
1948e220edf2SJack Xiao }
1949e220edf2SJack Xiao 
1950e220edf2SJack Xiao static const struct amd_ip_funcs mes_v12_1_ip_funcs = {
1951e220edf2SJack Xiao 	.name = "mes_v12_1",
1952e220edf2SJack Xiao 	.early_init = mes_v12_1_early_init,
1953e220edf2SJack Xiao 	.late_init = NULL,
1954e220edf2SJack Xiao 	.sw_init = mes_v12_1_sw_init,
1955e220edf2SJack Xiao 	.sw_fini = mes_v12_1_sw_fini,
1956e220edf2SJack Xiao 	.hw_init = mes_v12_1_hw_init,
1957e220edf2SJack Xiao 	.hw_fini = mes_v12_1_hw_fini,
1958e220edf2SJack Xiao 	.suspend = mes_v12_1_suspend,
1959e220edf2SJack Xiao 	.resume = mes_v12_1_resume,
1960e220edf2SJack Xiao };
1961e220edf2SJack Xiao 
1962e220edf2SJack Xiao const struct amdgpu_ip_block_version mes_v12_1_ip_block = {
1963e220edf2SJack Xiao 	.type = AMD_IP_BLOCK_TYPE_MES,
1964e220edf2SJack Xiao 	.major = 12,
1965e220edf2SJack Xiao 	.minor = 1,
1966e220edf2SJack Xiao 	.rev = 0,
1967e220edf2SJack Xiao 	.funcs = &mes_v12_1_ip_funcs,
1968e220edf2SJack Xiao };
1969