1 /*
2 * Copyright 2025 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 */
23
24 #include <linux/firmware.h>
25 #include <linux/module.h>
26 #include "amdgpu.h"
27 #include "soc15_common.h"
28 #include "soc_v1_0.h"
29 #include "gc/gc_12_1_0_offset.h"
30 #include "gc/gc_12_1_0_sh_mask.h"
31 #include "gc/gc_11_0_0_default.h"
32 #include "v12_structs.h"
33 #include "mes_v12_api_def.h"
34 #include "gfx_v12_1_pkt.h"
35 #include "sdma_v7_1_0_pkt_open.h"
36
37 MODULE_FIRMWARE("amdgpu/gc_12_1_0_mes.bin");
38 MODULE_FIRMWARE("amdgpu/gc_12_1_0_mes1.bin");
39 MODULE_FIRMWARE("amdgpu/gc_12_1_0_uni_mes.bin");
40
41 static int mes_v12_1_hw_init(struct amdgpu_ip_block *ip_block);
42 static int mes_v12_1_xcc_hw_init(struct amdgpu_ip_block *ip_block, int xcc_id);
43 static int mes_v12_1_hw_fini(struct amdgpu_ip_block *ip_block);
44 static int mes_v12_1_kiq_hw_init(struct amdgpu_device *adev, uint32_t xcc_id);
45 static int mes_v12_1_kiq_hw_fini(struct amdgpu_device *adev, uint32_t xcc_id);
46 static int mes_v12_1_self_test(struct amdgpu_device *adev, int xcc_id);
47
48 #define MES_EOP_SIZE 2048
49
50 #define regCP_HQD_IB_CONTROL_MES_12_1_DEFAULT 0x100000
51 #define XCC_MID_MASK 0x41000000
52
mes_v12_1_ring_set_wptr(struct amdgpu_ring * ring)53 static void mes_v12_1_ring_set_wptr(struct amdgpu_ring *ring)
54 {
55 struct amdgpu_device *adev = ring->adev;
56
57 if (ring->use_doorbell) {
58 atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
59 ring->wptr);
60 WDOORBELL64(ring->doorbell_index, ring->wptr);
61 } else {
62 BUG();
63 }
64 }
65
mes_v12_1_ring_get_rptr(struct amdgpu_ring * ring)66 static u64 mes_v12_1_ring_get_rptr(struct amdgpu_ring *ring)
67 {
68 return *ring->rptr_cpu_addr;
69 }
70
mes_v12_1_ring_get_wptr(struct amdgpu_ring * ring)71 static u64 mes_v12_1_ring_get_wptr(struct amdgpu_ring *ring)
72 {
73 u64 wptr;
74
75 if (ring->use_doorbell)
76 wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
77 else
78 BUG();
79 return wptr;
80 }
81
82 static const struct amdgpu_ring_funcs mes_v12_1_ring_funcs = {
83 .type = AMDGPU_RING_TYPE_MES,
84 .align_mask = 1,
85 .nop = 0,
86 .support_64bit_ptrs = true,
87 .get_rptr = mes_v12_1_ring_get_rptr,
88 .get_wptr = mes_v12_1_ring_get_wptr,
89 .set_wptr = mes_v12_1_ring_set_wptr,
90 .insert_nop = amdgpu_ring_insert_nop,
91 };
92
93 static const char *mes_v12_1_opcodes[] = {
94 "SET_HW_RSRC",
95 "SET_SCHEDULING_CONFIG",
96 "ADD_QUEUE",
97 "REMOVE_QUEUE",
98 "PERFORM_YIELD",
99 "SET_GANG_PRIORITY_LEVEL",
100 "SUSPEND",
101 "RESUME",
102 "RESET",
103 "SET_LOG_BUFFER",
104 "CHANGE_GANG_PRORITY",
105 "QUERY_SCHEDULER_STATUS",
106 "unused",
107 "SET_DEBUG_VMID",
108 "MISC",
109 "UPDATE_ROOT_PAGE_TABLE",
110 "AMD_LOG",
111 "SET_SE_MODE",
112 "SET_GANG_SUBMIT",
113 "SET_HW_RSRC_1",
114 "INVALIDATE_TLBS",
115 };
116
117 static const char *mes_v12_1_misc_opcodes[] = {
118 "WRITE_REG",
119 "INV_GART",
120 "QUERY_STATUS",
121 "READ_REG",
122 "WAIT_REG_MEM",
123 "SET_SHADER_DEBUGGER",
124 "NOTIFY_WORK_ON_UNMAPPED_QUEUE",
125 "NOTIFY_TO_UNMAP_PROCESSES",
126 };
127
mes_v12_1_get_op_string(union MESAPI__MISC * x_pkt)128 static const char *mes_v12_1_get_op_string(union MESAPI__MISC *x_pkt)
129 {
130 const char *op_str = NULL;
131
132 if (x_pkt->header.opcode < ARRAY_SIZE(mes_v12_1_opcodes))
133 op_str = mes_v12_1_opcodes[x_pkt->header.opcode];
134
135 return op_str;
136 }
137
mes_v12_1_get_misc_op_string(union MESAPI__MISC * x_pkt)138 static const char *mes_v12_1_get_misc_op_string(union MESAPI__MISC *x_pkt)
139 {
140 const char *op_str = NULL;
141
142 if ((x_pkt->header.opcode == MES_SCH_API_MISC) &&
143 (x_pkt->opcode < ARRAY_SIZE(mes_v12_1_misc_opcodes)))
144 op_str = mes_v12_1_misc_opcodes[x_pkt->opcode];
145
146 return op_str;
147 }
148
mes_v12_1_submit_pkt_and_poll_completion(struct amdgpu_mes * mes,int xcc_id,int pipe,void * pkt,int size,int api_status_off)149 static int mes_v12_1_submit_pkt_and_poll_completion(struct amdgpu_mes *mes,
150 int xcc_id, int pipe, void *pkt,
151 int size, int api_status_off)
152 {
153 union MESAPI__QUERY_MES_STATUS mes_status_pkt;
154 signed long timeout = 2100000; /* 2100 ms */
155 struct amdgpu_device *adev = mes->adev;
156 struct amdgpu_ring *ring = &mes->ring[MES_PIPE_INST(xcc_id, pipe)];
157 spinlock_t *ring_lock = &mes->ring_lock[MES_PIPE_INST(xcc_id, pipe)];
158 struct MES_API_STATUS *api_status;
159 union MESAPI__MISC *x_pkt = pkt;
160 const char *op_str, *misc_op_str;
161 unsigned long flags;
162 u64 status_gpu_addr;
163 u32 seq, status_offset;
164 u64 *status_ptr;
165 signed long r;
166 int ret;
167
168 if (x_pkt->header.opcode >= MES_SCH_API_MAX)
169 return -EINVAL;
170
171 if (amdgpu_emu_mode) {
172 timeout *= 1000;
173 } else if (amdgpu_sriov_vf(adev)) {
174 /* Worst case in sriov where all other 15 VF timeout, each VF needs about 600ms */
175 timeout = 15 * 600 * 1000;
176 }
177
178 ret = amdgpu_device_wb_get(adev, &status_offset);
179 if (ret)
180 return ret;
181
182 status_gpu_addr = adev->wb.gpu_addr + (status_offset * 4);
183 status_ptr = (u64 *)&adev->wb.wb[status_offset];
184 *status_ptr = 0;
185
186 spin_lock_irqsave(ring_lock, flags);
187 r = amdgpu_ring_alloc(ring, (size + sizeof(mes_status_pkt)) / 4);
188 if (r)
189 goto error_unlock_free;
190
191 seq = ++ring->fence_drv.sync_seq;
192 r = amdgpu_fence_wait_polling(ring,
193 seq - ring->fence_drv.num_fences_mask,
194 timeout);
195 if (r < 1)
196 goto error_undo;
197
198 api_status = (struct MES_API_STATUS *)((char *)pkt + api_status_off);
199 api_status->api_completion_fence_addr = status_gpu_addr;
200 api_status->api_completion_fence_value = 1;
201
202 amdgpu_ring_write_multiple(ring, pkt, size / 4);
203
204 memset(&mes_status_pkt, 0, sizeof(mes_status_pkt));
205 mes_status_pkt.header.type = MES_API_TYPE_SCHEDULER;
206 mes_status_pkt.header.opcode = MES_SCH_API_QUERY_SCHEDULER_STATUS;
207 mes_status_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
208 mes_status_pkt.api_status.api_completion_fence_addr =
209 ring->fence_drv.gpu_addr;
210 mes_status_pkt.api_status.api_completion_fence_value = seq;
211
212 amdgpu_ring_write_multiple(ring, &mes_status_pkt,
213 sizeof(mes_status_pkt) / 4);
214
215 amdgpu_ring_commit(ring);
216 spin_unlock_irqrestore(ring_lock, flags);
217
218 op_str = mes_v12_1_get_op_string(x_pkt);
219 misc_op_str = mes_v12_1_get_misc_op_string(x_pkt);
220
221 if (misc_op_str)
222 dev_dbg(adev->dev, "MES(%d, %d) msg=%s (%s) was emitted\n",
223 xcc_id, pipe, op_str, misc_op_str);
224 else if (op_str)
225 dev_dbg(adev->dev, "MES(%d, %d) msg=%s was emitted\n",
226 xcc_id, pipe, op_str);
227 else
228 dev_dbg(adev->dev, "MES(%d, %d) msg=%d was emitted\n",
229 xcc_id, pipe, x_pkt->header.opcode);
230
231 r = amdgpu_fence_wait_polling(ring, seq, timeout);
232 if (r < 1 || !*status_ptr) {
233 if (misc_op_str)
234 dev_err(adev->dev,
235 "MES(%d, %d) failed to respond to msg=%s (%s)\n",
236 xcc_id, pipe, op_str, misc_op_str);
237 else if (op_str)
238 dev_err(adev->dev,
239 "MES(%d, %d) failed to respond to msg=%s\n",
240 xcc_id, pipe, op_str);
241 else
242 dev_err(adev->dev,
243 "MES(%d, %d) failed to respond to msg=%d\n",
244 xcc_id, pipe, x_pkt->header.opcode);
245
246 while (halt_if_hws_hang)
247 schedule();
248
249 r = -ETIMEDOUT;
250 goto error_wb_free;
251 }
252
253 amdgpu_device_wb_free(adev, status_offset);
254 return 0;
255
256 error_undo:
257 dev_err(adev->dev, "MES(%d, %d) ring buffer is full.\n", xcc_id, pipe);
258 amdgpu_ring_undo(ring);
259
260 error_unlock_free:
261 spin_unlock_irqrestore(ring_lock, flags);
262
263 error_wb_free:
264 amdgpu_device_wb_free(adev, status_offset);
265 return r;
266 }
267
convert_to_mes_queue_type(int queue_type)268 static int convert_to_mes_queue_type(int queue_type)
269 {
270 if (queue_type == AMDGPU_RING_TYPE_GFX)
271 return MES_QUEUE_TYPE_GFX;
272 else if (queue_type == AMDGPU_RING_TYPE_COMPUTE)
273 return MES_QUEUE_TYPE_COMPUTE;
274 else if (queue_type == AMDGPU_RING_TYPE_SDMA)
275 return MES_QUEUE_TYPE_SDMA;
276 else if (queue_type == AMDGPU_RING_TYPE_MES)
277 return MES_QUEUE_TYPE_SCHQ;
278 else
279 BUG();
280 return -1;
281 }
282
mes_v12_1_add_hw_queue(struct amdgpu_mes * mes,struct mes_add_queue_input * input)283 static int mes_v12_1_add_hw_queue(struct amdgpu_mes *mes,
284 struct mes_add_queue_input *input)
285 {
286 union MESAPI__ADD_QUEUE mes_add_queue_pkt;
287 int xcc_id = input->xcc_id;
288 int inst = MES_PIPE_INST(xcc_id, AMDGPU_MES_SCHED_PIPE);
289
290 if (mes->enable_coop_mode)
291 xcc_id = mes->master_xcc_ids[inst];
292
293 memset(&mes_add_queue_pkt, 0, sizeof(mes_add_queue_pkt));
294
295 mes_add_queue_pkt.header.type = MES_API_TYPE_SCHEDULER;
296 mes_add_queue_pkt.header.opcode = MES_SCH_API_ADD_QUEUE;
297 mes_add_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
298
299 mes_add_queue_pkt.process_id = input->process_id;
300 mes_add_queue_pkt.page_table_base_addr = input->page_table_base_addr;
301 mes_add_queue_pkt.process_va_start = input->process_va_start;
302 mes_add_queue_pkt.process_va_end = input->process_va_end;
303 mes_add_queue_pkt.process_quantum = input->process_quantum;
304 mes_add_queue_pkt.process_context_addr = input->process_context_addr;
305 mes_add_queue_pkt.gang_quantum = input->gang_quantum;
306 mes_add_queue_pkt.gang_context_addr = input->gang_context_addr;
307 mes_add_queue_pkt.inprocess_gang_priority =
308 input->inprocess_gang_priority;
309 mes_add_queue_pkt.gang_global_priority_level =
310 input->gang_global_priority_level;
311 mes_add_queue_pkt.doorbell_offset = input->doorbell_offset;
312 mes_add_queue_pkt.mqd_addr = input->mqd_addr;
313
314 mes_add_queue_pkt.wptr_addr = input->wptr_mc_addr;
315
316 mes_add_queue_pkt.queue_type =
317 convert_to_mes_queue_type(input->queue_type);
318 mes_add_queue_pkt.paging = input->paging;
319 mes_add_queue_pkt.vm_context_cntl = input->vm_cntx_cntl;
320 mes_add_queue_pkt.gws_base = input->gws_base;
321 mes_add_queue_pkt.gws_size = input->gws_size;
322 mes_add_queue_pkt.trap_handler_addr = input->tba_addr;
323 mes_add_queue_pkt.tma_addr = input->tma_addr;
324 mes_add_queue_pkt.trap_en = input->trap_en;
325 mes_add_queue_pkt.skip_process_ctx_clear = input->skip_process_ctx_clear;
326 mes_add_queue_pkt.is_kfd_process = input->is_kfd_process;
327
328 /* For KFD, gds_size is re-used for queue size (needed in MES for AQL queues) */
329 mes_add_queue_pkt.is_aql_queue = input->is_aql_queue;
330 mes_add_queue_pkt.gds_size = input->queue_size;
331
332 /* For KFD, gds_size is re-used for queue size (needed in MES for AQL queues) */
333 mes_add_queue_pkt.is_aql_queue = input->is_aql_queue;
334 mes_add_queue_pkt.gds_size = input->queue_size;
335
336 mes_add_queue_pkt.full_sh_mem_config_data = input->sh_mem_config_data;
337
338 return mes_v12_1_submit_pkt_and_poll_completion(mes,
339 xcc_id, AMDGPU_MES_SCHED_PIPE,
340 &mes_add_queue_pkt, sizeof(mes_add_queue_pkt),
341 offsetof(union MESAPI__ADD_QUEUE, api_status));
342 }
343
mes_v12_1_remove_hw_queue(struct amdgpu_mes * mes,struct mes_remove_queue_input * input)344 static int mes_v12_1_remove_hw_queue(struct amdgpu_mes *mes,
345 struct mes_remove_queue_input *input)
346 {
347 union MESAPI__REMOVE_QUEUE mes_remove_queue_pkt;
348 int xcc_id = input->xcc_id;
349 int inst = MES_PIPE_INST(xcc_id, AMDGPU_MES_SCHED_PIPE);
350
351 if (mes->enable_coop_mode)
352 xcc_id = mes->master_xcc_ids[inst];
353
354 memset(&mes_remove_queue_pkt, 0, sizeof(mes_remove_queue_pkt));
355
356 mes_remove_queue_pkt.header.type = MES_API_TYPE_SCHEDULER;
357 mes_remove_queue_pkt.header.opcode = MES_SCH_API_REMOVE_QUEUE;
358 mes_remove_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
359
360 mes_remove_queue_pkt.doorbell_offset = input->doorbell_offset;
361 mes_remove_queue_pkt.gang_context_addr = input->gang_context_addr;
362
363 return mes_v12_1_submit_pkt_and_poll_completion(mes,
364 xcc_id, AMDGPU_MES_SCHED_PIPE,
365 &mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt),
366 offsetof(union MESAPI__REMOVE_QUEUE, api_status));
367 }
368
mes_v12_1_reset_hw_queue(struct amdgpu_mes * mes,struct mes_reset_queue_input * input)369 static int mes_v12_1_reset_hw_queue(struct amdgpu_mes *mes,
370 struct mes_reset_queue_input *input)
371 {
372 union MESAPI__RESET mes_reset_queue_pkt;
373 int pipe;
374
375 memset(&mes_reset_queue_pkt, 0, sizeof(mes_reset_queue_pkt));
376
377 mes_reset_queue_pkt.header.type = MES_API_TYPE_SCHEDULER;
378 mes_reset_queue_pkt.header.opcode = MES_SCH_API_RESET;
379 mes_reset_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
380
381 mes_reset_queue_pkt.doorbell_offset = input->doorbell_offset;
382 /* mes_reset_queue_pkt.gang_context_addr = input->gang_context_addr; */
383 /*mes_reset_queue_pkt.reset_queue_only = 1;*/
384
385 if (mes->adev->enable_uni_mes)
386 pipe = AMDGPU_MES_KIQ_PIPE;
387 else
388 pipe = AMDGPU_MES_SCHED_PIPE;
389
390 return mes_v12_1_submit_pkt_and_poll_completion(mes,
391 input->xcc_id, pipe,
392 &mes_reset_queue_pkt, sizeof(mes_reset_queue_pkt),
393 offsetof(union MESAPI__REMOVE_QUEUE, api_status));
394 }
395
mes_v12_1_map_legacy_queue(struct amdgpu_mes * mes,struct mes_map_legacy_queue_input * input)396 static int mes_v12_1_map_legacy_queue(struct amdgpu_mes *mes,
397 struct mes_map_legacy_queue_input *input)
398 {
399 union MESAPI__ADD_QUEUE mes_add_queue_pkt;
400 int pipe;
401
402 memset(&mes_add_queue_pkt, 0, sizeof(mes_add_queue_pkt));
403
404 mes_add_queue_pkt.header.type = MES_API_TYPE_SCHEDULER;
405 mes_add_queue_pkt.header.opcode = MES_SCH_API_ADD_QUEUE;
406 mes_add_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
407
408 mes_add_queue_pkt.pipe_id = input->pipe_id;
409 mes_add_queue_pkt.queue_id = input->queue_id;
410 mes_add_queue_pkt.doorbell_offset = input->doorbell_offset;
411 mes_add_queue_pkt.mqd_addr = input->mqd_addr;
412 mes_add_queue_pkt.wptr_addr = input->wptr_addr;
413 mes_add_queue_pkt.queue_type =
414 convert_to_mes_queue_type(input->queue_type);
415 mes_add_queue_pkt.map_legacy_kq = 1;
416
417 if (mes->adev->enable_uni_mes)
418 pipe = AMDGPU_MES_KIQ_PIPE;
419 else
420 pipe = AMDGPU_MES_SCHED_PIPE;
421
422 return mes_v12_1_submit_pkt_and_poll_completion(mes,
423 input->xcc_id, pipe,
424 &mes_add_queue_pkt, sizeof(mes_add_queue_pkt),
425 offsetof(union MESAPI__ADD_QUEUE, api_status));
426 }
427
mes_v12_1_unmap_legacy_queue(struct amdgpu_mes * mes,struct mes_unmap_legacy_queue_input * input)428 static int mes_v12_1_unmap_legacy_queue(struct amdgpu_mes *mes,
429 struct mes_unmap_legacy_queue_input *input)
430 {
431 union MESAPI__REMOVE_QUEUE mes_remove_queue_pkt;
432 int pipe;
433
434 memset(&mes_remove_queue_pkt, 0, sizeof(mes_remove_queue_pkt));
435
436 mes_remove_queue_pkt.header.type = MES_API_TYPE_SCHEDULER;
437 mes_remove_queue_pkt.header.opcode = MES_SCH_API_REMOVE_QUEUE;
438 mes_remove_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
439
440 mes_remove_queue_pkt.doorbell_offset = input->doorbell_offset;
441 mes_remove_queue_pkt.gang_context_addr = 0;
442
443 mes_remove_queue_pkt.pipe_id = input->pipe_id;
444 mes_remove_queue_pkt.queue_id = input->queue_id;
445
446 if (input->action == PREEMPT_QUEUES_NO_UNMAP) {
447 mes_remove_queue_pkt.preempt_legacy_gfx_queue = 1;
448 mes_remove_queue_pkt.tf_addr = input->trail_fence_addr;
449 mes_remove_queue_pkt.tf_data =
450 lower_32_bits(input->trail_fence_data);
451 } else {
452 mes_remove_queue_pkt.unmap_legacy_queue = 1;
453 mes_remove_queue_pkt.queue_type =
454 convert_to_mes_queue_type(input->queue_type);
455 }
456
457 if (mes->adev->enable_uni_mes)
458 pipe = AMDGPU_MES_KIQ_PIPE;
459 else
460 pipe = AMDGPU_MES_SCHED_PIPE;
461
462 return mes_v12_1_submit_pkt_and_poll_completion(mes,
463 input->xcc_id, pipe,
464 &mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt),
465 offsetof(union MESAPI__REMOVE_QUEUE, api_status));
466 }
467
mes_v12_1_suspend_gang(struct amdgpu_mes * mes,struct mes_suspend_gang_input * input)468 static int mes_v12_1_suspend_gang(struct amdgpu_mes *mes,
469 struct mes_suspend_gang_input *input)
470 {
471 return 0;
472 }
473
mes_v12_1_resume_gang(struct amdgpu_mes * mes,struct mes_resume_gang_input * input)474 static int mes_v12_1_resume_gang(struct amdgpu_mes *mes,
475 struct mes_resume_gang_input *input)
476 {
477 return 0;
478 }
479
mes_v12_1_query_sched_status(struct amdgpu_mes * mes,int pipe,int xcc_id)480 static int mes_v12_1_query_sched_status(struct amdgpu_mes *mes,
481 int pipe, int xcc_id)
482 {
483 union MESAPI__QUERY_MES_STATUS mes_status_pkt;
484
485 memset(&mes_status_pkt, 0, sizeof(mes_status_pkt));
486
487 mes_status_pkt.header.type = MES_API_TYPE_SCHEDULER;
488 mes_status_pkt.header.opcode = MES_SCH_API_QUERY_SCHEDULER_STATUS;
489 mes_status_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
490
491 return mes_v12_1_submit_pkt_and_poll_completion(mes, xcc_id, pipe,
492 &mes_status_pkt, sizeof(mes_status_pkt),
493 offsetof(union MESAPI__QUERY_MES_STATUS, api_status));
494 }
mes_v12_1_get_xcc_from_reg(uint32_t reg_offset)495 static uint32_t mes_v12_1_get_xcc_from_reg(uint32_t reg_offset)
496 {
497 return ((reg_offset >> 16) & 0x7);
498 }
499
mes_v12_1_get_rrmt(uint32_t reg,uint32_t xcc_id,struct RRMT_OPTION * rrmt_opt,uint32_t * out_reg)500 static void mes_v12_1_get_rrmt(uint32_t reg, uint32_t xcc_id,
501 struct RRMT_OPTION *rrmt_opt,
502 uint32_t *out_reg)
503 {
504 uint32_t normalized_reg = soc_v1_0_normalize_xcc_reg_offset(reg);
505
506 if (soc_v1_0_normalize_xcc_reg_range(normalized_reg)) {
507 rrmt_opt->xcd_die_id = mes_v12_1_get_xcc_from_reg(reg);
508 rrmt_opt->mode = (xcc_id == rrmt_opt->xcd_die_id) ?
509 MES_RRMT_MODE_LOCAL_XCD : MES_RRMT_MODE_REMOTE_XCD;
510 } else {
511 rrmt_opt->mode = MES_RRMT_MODE_REMOTE_MID;
512 if (soc_v1_0_mid1_reg_range(reg))
513 rrmt_opt->mid_die_id = 1;
514 }
515
516 *out_reg = soc_v1_0_normalize_reg_offset(reg);
517 }
518
mes_v12_1_misc_op(struct amdgpu_mes * mes,struct mes_misc_op_input * input)519 static int mes_v12_1_misc_op(struct amdgpu_mes *mes,
520 struct mes_misc_op_input *input)
521 {
522 struct amdgpu_device *adev = mes->adev;
523 union MESAPI__MISC misc_pkt;
524 int pipe;
525
526 if (mes->adev->enable_uni_mes)
527 pipe = AMDGPU_MES_KIQ_PIPE;
528 else
529 pipe = AMDGPU_MES_SCHED_PIPE;
530
531 memset(&misc_pkt, 0, sizeof(misc_pkt));
532
533 misc_pkt.header.type = MES_API_TYPE_SCHEDULER;
534 misc_pkt.header.opcode = MES_SCH_API_MISC;
535 misc_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
536
537 switch (input->op) {
538 case MES_MISC_OP_READ_REG:
539 misc_pkt.opcode = MESAPI_MISC__READ_REG;
540 misc_pkt.read_reg.buffer_addr = input->read_reg.buffer_addr;
541 mes_v12_1_get_rrmt(input->read_reg.reg_offset,
542 GET_INST(GC, input->xcc_id),
543 &misc_pkt.read_reg.rrmt_opt,
544 &misc_pkt.read_reg.reg_offset);
545 break;
546 case MES_MISC_OP_WRITE_REG:
547 misc_pkt.opcode = MESAPI_MISC__WRITE_REG;
548 misc_pkt.write_reg.reg_value = input->write_reg.reg_value;
549 mes_v12_1_get_rrmt(input->write_reg.reg_offset,
550 GET_INST(GC, input->xcc_id),
551 &misc_pkt.write_reg.rrmt_opt,
552 &misc_pkt.write_reg.reg_offset);
553 break;
554 case MES_MISC_OP_WRM_REG_WAIT:
555 misc_pkt.opcode = MESAPI_MISC__WAIT_REG_MEM;
556 misc_pkt.wait_reg_mem.op = WRM_OPERATION__WAIT_REG_MEM;
557 misc_pkt.wait_reg_mem.reference = input->wrm_reg.ref;
558 misc_pkt.wait_reg_mem.mask = input->wrm_reg.mask;
559 misc_pkt.wait_reg_mem.reg_offset2 = 0;
560 mes_v12_1_get_rrmt(input->wrm_reg.reg0,
561 GET_INST(GC, input->xcc_id),
562 &misc_pkt.wait_reg_mem.rrmt_opt1,
563 &misc_pkt.wait_reg_mem.reg_offset1);
564 break;
565 case MES_MISC_OP_WRM_REG_WR_WAIT:
566 misc_pkt.opcode = MESAPI_MISC__WAIT_REG_MEM;
567 misc_pkt.wait_reg_mem.op = WRM_OPERATION__WR_WAIT_WR_REG;
568 misc_pkt.wait_reg_mem.reference = input->wrm_reg.ref;
569 misc_pkt.wait_reg_mem.mask = input->wrm_reg.mask;
570 mes_v12_1_get_rrmt(input->wrm_reg.reg0,
571 GET_INST(GC, input->xcc_id),
572 &misc_pkt.wait_reg_mem.rrmt_opt1,
573 &misc_pkt.wait_reg_mem.reg_offset1);
574 mes_v12_1_get_rrmt(input->wrm_reg.reg1,
575 GET_INST(GC, input->xcc_id),
576 &misc_pkt.wait_reg_mem.rrmt_opt2,
577 &misc_pkt.wait_reg_mem.reg_offset2);
578 break;
579 case MES_MISC_OP_SET_SHADER_DEBUGGER:
580 pipe = AMDGPU_MES_SCHED_PIPE;
581 misc_pkt.opcode = MESAPI_MISC__SET_SHADER_DEBUGGER;
582 misc_pkt.set_shader_debugger.process_context_addr =
583 input->set_shader_debugger.process_context_addr;
584 misc_pkt.set_shader_debugger.flags.u32all =
585 input->set_shader_debugger.flags.u32all;
586 misc_pkt.set_shader_debugger.spi_gdbg_per_vmid_cntl =
587 input->set_shader_debugger.spi_gdbg_per_vmid_cntl;
588 memcpy(misc_pkt.set_shader_debugger.tcp_watch_cntl,
589 input->set_shader_debugger.tcp_watch_cntl,
590 sizeof(misc_pkt.set_shader_debugger.tcp_watch_cntl));
591 misc_pkt.set_shader_debugger.trap_en = input->set_shader_debugger.trap_en;
592 break;
593 case MES_MISC_OP_CHANGE_CONFIG:
594 misc_pkt.opcode = MESAPI_MISC__CHANGE_CONFIG;
595 misc_pkt.change_config.opcode =
596 MESAPI_MISC__CHANGE_CONFIG_OPTION_LIMIT_SINGLE_PROCESS;
597 misc_pkt.change_config.option.bits.limit_single_process =
598 input->change_config.option.limit_single_process;
599 break;
600 default:
601 DRM_ERROR("unsupported misc op (%d) \n", input->op);
602 return -EINVAL;
603 }
604
605 return mes_v12_1_submit_pkt_and_poll_completion(mes,
606 input->xcc_id, pipe,
607 &misc_pkt, sizeof(misc_pkt),
608 offsetof(union MESAPI__MISC, api_status));
609 }
610
mes_v12_1_set_hw_resources_1(struct amdgpu_mes * mes,int pipe,int xcc_id)611 static int mes_v12_1_set_hw_resources_1(struct amdgpu_mes *mes,
612 int pipe, int xcc_id)
613 {
614 union MESAPI_SET_HW_RESOURCES_1 mes_set_hw_res_1_pkt;
615 int master_xcc_id, inst = MES_PIPE_INST(xcc_id, pipe);
616
617 memset(&mes_set_hw_res_1_pkt, 0, sizeof(mes_set_hw_res_1_pkt));
618
619 mes_set_hw_res_1_pkt.header.type = MES_API_TYPE_SCHEDULER;
620 mes_set_hw_res_1_pkt.header.opcode = MES_SCH_API_SET_HW_RSRC_1;
621 mes_set_hw_res_1_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
622 mes_set_hw_res_1_pkt.mes_kiq_unmap_timeout = 100;
623
624 if (mes->enable_coop_mode && pipe == AMDGPU_MES_SCHED_PIPE) {
625 master_xcc_id = mes->master_xcc_ids[inst];
626 mes_set_hw_res_1_pkt.mes_coop_mode = 1;
627 mes_set_hw_res_1_pkt.coop_sch_shared_mc_addr =
628 mes->shared_cmd_buf_gpu_addr[master_xcc_id];
629 }
630
631 return mes_v12_1_submit_pkt_and_poll_completion(mes, xcc_id, pipe,
632 &mes_set_hw_res_1_pkt, sizeof(mes_set_hw_res_1_pkt),
633 offsetof(union MESAPI_SET_HW_RESOURCES_1, api_status));
634 }
635
mes_v12_1_set_gfx_hqd_mask(union MESAPI_SET_HW_RESOURCES * pkt)636 static void mes_v12_1_set_gfx_hqd_mask(union MESAPI_SET_HW_RESOURCES *pkt)
637 {
638 /*
639 * GFX V12 has only one GFX pipe, but 8 queues in it.
640 * GFX pipe 0 queue 0 is being used by Kernel queue.
641 * Set GFX pipe 0 queue 1-7 for MES scheduling
642 * mask = 1111 1110b
643 */
644 pkt->gfx_hqd_mask[0] = 0xFE;
645 }
646
mes_v12_1_set_hw_resources(struct amdgpu_mes * mes,int pipe,int xcc_id)647 static int mes_v12_1_set_hw_resources(struct amdgpu_mes *mes,
648 int pipe, int xcc_id)
649 {
650 int i;
651 struct amdgpu_device *adev = mes->adev;
652 union MESAPI_SET_HW_RESOURCES mes_set_hw_res_pkt;
653
654 memset(&mes_set_hw_res_pkt, 0, sizeof(mes_set_hw_res_pkt));
655
656 mes_set_hw_res_pkt.header.type = MES_API_TYPE_SCHEDULER;
657 mes_set_hw_res_pkt.header.opcode = MES_SCH_API_SET_HW_RSRC;
658 mes_set_hw_res_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
659
660 if (pipe == AMDGPU_MES_SCHED_PIPE) {
661 mes_set_hw_res_pkt.vmid_mask_mmhub = mes->vmid_mask_mmhub;
662 mes_set_hw_res_pkt.vmid_mask_gfxhub = mes->vmid_mask_gfxhub;
663 mes_set_hw_res_pkt.gds_size = adev->gds.gds_size;
664 mes_set_hw_res_pkt.paging_vmid = 0;
665
666 for (i = 0; i < MAX_COMPUTE_PIPES; i++)
667 mes_set_hw_res_pkt.compute_hqd_mask[i] =
668 mes->compute_hqd_mask[i];
669
670 mes_v12_1_set_gfx_hqd_mask(&mes_set_hw_res_pkt);
671
672 for (i = 0; i < MAX_SDMA_PIPES; i++)
673 mes_set_hw_res_pkt.sdma_hqd_mask[i] =
674 mes->sdma_hqd_mask[i];
675
676 for (i = 0; i < AMD_PRIORITY_NUM_LEVELS; i++)
677 mes_set_hw_res_pkt.aggregated_doorbells[i] =
678 mes->aggregated_doorbells[i];
679 }
680
681 mes_set_hw_res_pkt.g_sch_ctx_gpu_mc_ptr =
682 mes->sch_ctx_gpu_addr[pipe];
683 mes_set_hw_res_pkt.query_status_fence_gpu_mc_ptr =
684 mes->query_status_fence_gpu_addr[pipe];
685
686 for (i = 0; i < 5; i++) {
687 mes_set_hw_res_pkt.gc_base[i] =
688 adev->reg_offset[GC_HWIP][0][i];
689 mes_set_hw_res_pkt.mmhub_base[i] =
690 adev->reg_offset[MMHUB_HWIP][0][i];
691 mes_set_hw_res_pkt.osssys_base[i] =
692 adev->reg_offset[OSSSYS_HWIP][0][i];
693 }
694
695 mes_set_hw_res_pkt.disable_reset = 1;
696 mes_set_hw_res_pkt.disable_mes_log = 1;
697 mes_set_hw_res_pkt.use_different_vmid_compute = 1;
698 mes_set_hw_res_pkt.enable_reg_active_poll = 1;
699 mes_set_hw_res_pkt.enable_level_process_quantum_check = 1;
700
701 /*
702 * Keep oversubscribe timer for sdma . When we have unmapped doorbell
703 * handling support, other queue will not use the oversubscribe timer.
704 * handling mode - 0: disabled; 1: basic version; 2: basic+ version
705 */
706 mes_set_hw_res_pkt.oversubscription_timer = 50;
707 mes_set_hw_res_pkt.unmapped_doorbell_handling = 1;
708
709 if (amdgpu_mes_log_enable) {
710 mes_set_hw_res_pkt.enable_mes_event_int_logging = 1;
711 mes_set_hw_res_pkt.event_intr_history_gpu_mc_ptr =
712 mes->event_log_gpu_addr + MES_PIPE_INST(xcc_id, pipe) * AMDGPU_MES_LOG_BUFFER_SIZE;
713 }
714
715 if (adev->enforce_isolation[0] == AMDGPU_ENFORCE_ISOLATION_ENABLE)
716 mes_set_hw_res_pkt.limit_single_process = 1;
717
718 return mes_v12_1_submit_pkt_and_poll_completion(mes, xcc_id, pipe,
719 &mes_set_hw_res_pkt, sizeof(mes_set_hw_res_pkt),
720 offsetof(union MESAPI_SET_HW_RESOURCES, api_status));
721 }
722
mes_v12_1_init_aggregated_doorbell(struct amdgpu_mes * mes,int xcc_id)723 static void mes_v12_1_init_aggregated_doorbell(struct amdgpu_mes *mes,
724 int xcc_id)
725 {
726 struct amdgpu_device *adev = mes->adev;
727 uint32_t data;
728
729 data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_DOORBELL_CONTROL1);
730 data &= ~(CP_MES_DOORBELL_CONTROL1__DOORBELL_OFFSET_MASK |
731 CP_MES_DOORBELL_CONTROL1__DOORBELL_EN_MASK |
732 CP_MES_DOORBELL_CONTROL1__DOORBELL_HIT_MASK);
733 data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_LOW] <<
734 CP_MES_DOORBELL_CONTROL1__DOORBELL_OFFSET__SHIFT;
735 data |= 1 << CP_MES_DOORBELL_CONTROL1__DOORBELL_EN__SHIFT;
736 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_DOORBELL_CONTROL1, data);
737
738 data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_DOORBELL_CONTROL2);
739 data &= ~(CP_MES_DOORBELL_CONTROL2__DOORBELL_OFFSET_MASK |
740 CP_MES_DOORBELL_CONTROL2__DOORBELL_EN_MASK |
741 CP_MES_DOORBELL_CONTROL2__DOORBELL_HIT_MASK);
742 data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_NORMAL] <<
743 CP_MES_DOORBELL_CONTROL2__DOORBELL_OFFSET__SHIFT;
744 data |= 1 << CP_MES_DOORBELL_CONTROL2__DOORBELL_EN__SHIFT;
745 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_DOORBELL_CONTROL2, data);
746
747 data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_DOORBELL_CONTROL3);
748 data &= ~(CP_MES_DOORBELL_CONTROL3__DOORBELL_OFFSET_MASK |
749 CP_MES_DOORBELL_CONTROL3__DOORBELL_EN_MASK |
750 CP_MES_DOORBELL_CONTROL3__DOORBELL_HIT_MASK);
751 data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_MEDIUM] <<
752 CP_MES_DOORBELL_CONTROL3__DOORBELL_OFFSET__SHIFT;
753 data |= 1 << CP_MES_DOORBELL_CONTROL3__DOORBELL_EN__SHIFT;
754 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_DOORBELL_CONTROL3, data);
755
756 data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_DOORBELL_CONTROL4);
757 data &= ~(CP_MES_DOORBELL_CONTROL4__DOORBELL_OFFSET_MASK |
758 CP_MES_DOORBELL_CONTROL4__DOORBELL_EN_MASK |
759 CP_MES_DOORBELL_CONTROL4__DOORBELL_HIT_MASK);
760 data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_HIGH] <<
761 CP_MES_DOORBELL_CONTROL4__DOORBELL_OFFSET__SHIFT;
762 data |= 1 << CP_MES_DOORBELL_CONTROL4__DOORBELL_EN__SHIFT;
763 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_DOORBELL_CONTROL4, data);
764
765 data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_DOORBELL_CONTROL5);
766 data &= ~(CP_MES_DOORBELL_CONTROL5__DOORBELL_OFFSET_MASK |
767 CP_MES_DOORBELL_CONTROL5__DOORBELL_EN_MASK |
768 CP_MES_DOORBELL_CONTROL5__DOORBELL_HIT_MASK);
769 data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_REALTIME] <<
770 CP_MES_DOORBELL_CONTROL5__DOORBELL_OFFSET__SHIFT;
771 data |= 1 << CP_MES_DOORBELL_CONTROL5__DOORBELL_EN__SHIFT;
772 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_DOORBELL_CONTROL5, data);
773
774 data = 1 << CP_HQD_GFX_CONTROL__DB_UPDATED_MSG_EN__SHIFT;
775 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_GFX_CONTROL, data);
776 }
777
778
mes_v12_1_enable_unmapped_doorbell_handling(struct amdgpu_mes * mes,bool enable,int xcc_id)779 static void mes_v12_1_enable_unmapped_doorbell_handling(
780 struct amdgpu_mes *mes, bool enable, int xcc_id)
781 {
782 struct amdgpu_device *adev = mes->adev;
783 uint32_t data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_UNMAPPED_DOORBELL);
784
785 /*
786 * The default PROC_LSB settng is 0xc which means doorbell
787 * addr[16:12] gives the doorbell page number. For kfd, each
788 * process will use 2 pages of doorbell, we need to change the
789 * setting to 0xd
790 */
791 data &= ~CP_UNMAPPED_DOORBELL__PROC_LSB_MASK;
792 data |= 0xd << CP_UNMAPPED_DOORBELL__PROC_LSB__SHIFT;
793
794 data |= (enable ? 1 : 0) << CP_UNMAPPED_DOORBELL__ENABLE__SHIFT;
795
796 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_UNMAPPED_DOORBELL, data);
797 }
798
799 #if 0
800 static int mes_v12_1_reset_legacy_queue(struct amdgpu_mes *mes,
801 struct mes_reset_legacy_queue_input *input)
802 {
803 union MESAPI__RESET mes_reset_queue_pkt;
804 int pipe;
805
806 memset(&mes_reset_queue_pkt, 0, sizeof(mes_reset_queue_pkt));
807
808 mes_reset_queue_pkt.header.type = MES_API_TYPE_SCHEDULER;
809 mes_reset_queue_pkt.header.opcode = MES_SCH_API_RESET;
810 mes_reset_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
811
812 mes_reset_queue_pkt.queue_type =
813 convert_to_mes_queue_type(input->queue_type);
814
815 if (mes_reset_queue_pkt.queue_type == MES_QUEUE_TYPE_GFX) {
816 mes_reset_queue_pkt.reset_legacy_gfx = 1;
817 mes_reset_queue_pkt.pipe_id_lp = input->pipe_id;
818 mes_reset_queue_pkt.queue_id_lp = input->queue_id;
819 mes_reset_queue_pkt.mqd_mc_addr_lp = input->mqd_addr;
820 mes_reset_queue_pkt.doorbell_offset_lp = input->doorbell_offset;
821 mes_reset_queue_pkt.wptr_addr_lp = input->wptr_addr;
822 mes_reset_queue_pkt.vmid_id_lp = input->vmid;
823 } else {
824 mes_reset_queue_pkt.reset_queue_only = 1;
825 mes_reset_queue_pkt.doorbell_offset = input->doorbell_offset;
826 }
827
828 if (mes->adev->enable_uni_mes)
829 pipe = AMDGPU_MES_KIQ_PIPE;
830 else
831 pipe = AMDGPU_MES_SCHED_PIPE;
832
833 return mes_v12_1_submit_pkt_and_poll_completion(mes,
834 input->xcc_id, pipe,
835 &mes_reset_queue_pkt, sizeof(mes_reset_queue_pkt),
836 offsetof(union MESAPI__RESET, api_status));
837 }
838 #endif
839
mes_v12_inv_tlb_convert_hub_id(uint8_t id)840 static int mes_v12_inv_tlb_convert_hub_id(uint8_t id)
841 {
842 /*
843 * MES doesn't support invalidate gc_hub on slave xcc individually
844 * master xcc will invalidate all gc_hub for the partition
845 */
846 if (AMDGPU_IS_GFXHUB(id))
847 return 0;
848 else if (AMDGPU_IS_MMHUB0(id))
849 return 1;
850 else if (AMDGPU_IS_MMHUB1(id))
851 return 2;
852 return -EINVAL;
853
854 }
855
mes_v12_1_inv_tlbs_pasid(struct amdgpu_mes * mes,struct mes_inv_tlbs_pasid_input * input)856 static int mes_v12_1_inv_tlbs_pasid(struct amdgpu_mes *mes,
857 struct mes_inv_tlbs_pasid_input *input)
858 {
859 union MESAPI__INV_TLBS mes_inv_tlbs;
860 int xcc_id = input->xcc_id;
861 int inst = MES_PIPE_INST(xcc_id, AMDGPU_MES_SCHED_PIPE);
862 int ret;
863
864 if (mes->enable_coop_mode)
865 xcc_id = mes->master_xcc_ids[inst];
866
867 memset(&mes_inv_tlbs, 0, sizeof(mes_inv_tlbs));
868
869 mes_inv_tlbs.header.type = MES_API_TYPE_SCHEDULER;
870 mes_inv_tlbs.header.opcode = MES_SCH_API_INV_TLBS;
871 mes_inv_tlbs.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
872
873 mes_inv_tlbs.invalidate_tlbs.inv_sel = 0;
874 mes_inv_tlbs.invalidate_tlbs.flush_type = input->flush_type;
875 mes_inv_tlbs.invalidate_tlbs.inv_sel_id = input->pasid;
876
877 /*convert amdgpu_mes_hub_id to mes expected hub_id */
878 ret = mes_v12_inv_tlb_convert_hub_id(input->hub_id);
879 if (ret < 0)
880 return -EINVAL;
881 mes_inv_tlbs.invalidate_tlbs.hub_id = ret;
882 return mes_v12_1_submit_pkt_and_poll_completion(mes, xcc_id, AMDGPU_MES_KIQ_PIPE,
883 &mes_inv_tlbs, sizeof(mes_inv_tlbs),
884 offsetof(union MESAPI__INV_TLBS, api_status));
885
886 }
887
888 static const struct amdgpu_mes_funcs mes_v12_1_funcs = {
889 .add_hw_queue = mes_v12_1_add_hw_queue,
890 .remove_hw_queue = mes_v12_1_remove_hw_queue,
891 .map_legacy_queue = mes_v12_1_map_legacy_queue,
892 .unmap_legacy_queue = mes_v12_1_unmap_legacy_queue,
893 .suspend_gang = mes_v12_1_suspend_gang,
894 .resume_gang = mes_v12_1_resume_gang,
895 .misc_op = mes_v12_1_misc_op,
896 .reset_hw_queue = mes_v12_1_reset_hw_queue,
897 .invalidate_tlbs_pasid = mes_v12_1_inv_tlbs_pasid,
898 };
899
mes_v12_1_allocate_ucode_buffer(struct amdgpu_device * adev,enum amdgpu_mes_pipe pipe,int xcc_id)900 static int mes_v12_1_allocate_ucode_buffer(struct amdgpu_device *adev,
901 enum amdgpu_mes_pipe pipe,
902 int xcc_id)
903 {
904 int r, inst = MES_PIPE_INST(xcc_id, pipe);
905 const struct mes_firmware_header_v1_0 *mes_hdr;
906 const __le32 *fw_data;
907 unsigned fw_size;
908
909 mes_hdr = (const struct mes_firmware_header_v1_0 *)
910 adev->mes.fw[pipe]->data;
911
912 fw_data = (const __le32 *)(adev->mes.fw[pipe]->data +
913 le32_to_cpu(mes_hdr->mes_ucode_offset_bytes));
914 fw_size = le32_to_cpu(mes_hdr->mes_ucode_size_bytes);
915
916 r = amdgpu_bo_create_reserved(adev, fw_size,
917 PAGE_SIZE,
918 AMDGPU_GEM_DOMAIN_VRAM,
919 &adev->mes.ucode_fw_obj[inst],
920 &adev->mes.ucode_fw_gpu_addr[inst],
921 (void **)&adev->mes.ucode_fw_ptr[inst]);
922 if (r) {
923 dev_err(adev->dev, "(%d) failed to create mes fw bo\n", r);
924 return r;
925 }
926
927 memcpy(adev->mes.ucode_fw_ptr[inst], fw_data, fw_size);
928
929 amdgpu_bo_kunmap(adev->mes.ucode_fw_obj[inst]);
930 amdgpu_bo_unreserve(adev->mes.ucode_fw_obj[inst]);
931
932 return 0;
933 }
934
mes_v12_1_allocate_ucode_data_buffer(struct amdgpu_device * adev,enum amdgpu_mes_pipe pipe,int xcc_id)935 static int mes_v12_1_allocate_ucode_data_buffer(struct amdgpu_device *adev,
936 enum amdgpu_mes_pipe pipe,
937 int xcc_id)
938 {
939 int r, inst = MES_PIPE_INST(xcc_id, pipe);
940 const struct mes_firmware_header_v1_0 *mes_hdr;
941 const __le32 *fw_data;
942 unsigned fw_size;
943
944 mes_hdr = (const struct mes_firmware_header_v1_0 *)
945 adev->mes.fw[pipe]->data;
946
947 fw_data = (const __le32 *)(adev->mes.fw[pipe]->data +
948 le32_to_cpu(mes_hdr->mes_ucode_data_offset_bytes));
949 fw_size = le32_to_cpu(mes_hdr->mes_ucode_data_size_bytes);
950
951 r = amdgpu_bo_create_reserved(adev, fw_size,
952 64 * 1024,
953 AMDGPU_GEM_DOMAIN_VRAM,
954 &adev->mes.data_fw_obj[inst],
955 &adev->mes.data_fw_gpu_addr[inst],
956 (void **)&adev->mes.data_fw_ptr[inst]);
957 if (r) {
958 dev_err(adev->dev, "(%d) failed to create mes data fw bo\n", r);
959 return r;
960 }
961
962 memcpy(adev->mes.data_fw_ptr[inst], fw_data, fw_size);
963
964 amdgpu_bo_kunmap(adev->mes.data_fw_obj[inst]);
965 amdgpu_bo_unreserve(adev->mes.data_fw_obj[inst]);
966
967 return 0;
968 }
969
mes_v12_1_free_ucode_buffers(struct amdgpu_device * adev,enum amdgpu_mes_pipe pipe,int xcc_id)970 static void mes_v12_1_free_ucode_buffers(struct amdgpu_device *adev,
971 enum amdgpu_mes_pipe pipe,
972 int xcc_id)
973 {
974 int inst = MES_PIPE_INST(xcc_id, pipe);
975
976 amdgpu_bo_free_kernel(&adev->mes.data_fw_obj[inst],
977 &adev->mes.data_fw_gpu_addr[inst],
978 (void **)&adev->mes.data_fw_ptr[inst]);
979
980 amdgpu_bo_free_kernel(&adev->mes.ucode_fw_obj[inst],
981 &adev->mes.ucode_fw_gpu_addr[inst],
982 (void **)&adev->mes.ucode_fw_ptr[inst]);
983 }
984
mes_v12_1_enable(struct amdgpu_device * adev,bool enable,int xcc_id)985 static void mes_v12_1_enable(struct amdgpu_device *adev,
986 bool enable, int xcc_id)
987 {
988 uint64_t ucode_addr;
989 uint32_t pipe, data = 0;
990
991 if (enable) {
992 data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_CNTL);
993 data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_RESET, 1);
994 data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_RESET, 1);
995 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_CNTL, data);
996
997 mutex_lock(&adev->srbm_mutex);
998 for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) {
999 soc_v1_0_grbm_select(adev, 3, pipe, 0, 0,
1000 GET_INST(GC, xcc_id));
1001
1002 ucode_addr = adev->mes.uc_start_addr[pipe] >> 2;
1003 WREG32_SOC15(GC, GET_INST(GC, xcc_id),
1004 regCP_MES_PRGRM_CNTR_START,
1005 lower_32_bits(ucode_addr));
1006 WREG32_SOC15(GC, GET_INST(GC, xcc_id),
1007 regCP_MES_PRGRM_CNTR_START_HI,
1008 upper_32_bits(ucode_addr));
1009 }
1010 soc_v1_0_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id));
1011 mutex_unlock(&adev->srbm_mutex);
1012
1013 /* unhalt MES and activate pipe0 */
1014 data = REG_SET_FIELD(0, CP_MES_CNTL, MES_PIPE0_ACTIVE, 1);
1015 data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_ACTIVE, 1);
1016 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_CNTL, data);
1017
1018 if (amdgpu_emu_mode)
1019 msleep(500);
1020 else if (adev->enable_uni_mes)
1021 udelay(500);
1022 else
1023 udelay(50);
1024 } else {
1025 data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_CNTL);
1026 data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_ACTIVE, 0);
1027 data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_ACTIVE, 0);
1028 data = REG_SET_FIELD(data, CP_MES_CNTL,
1029 MES_INVALIDATE_ICACHE, 1);
1030 data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_RESET, 1);
1031 data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_RESET, 1);
1032 data = REG_SET_FIELD(data, CP_MES_CNTL, MES_HALT, 1);
1033 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_CNTL, data);
1034 }
1035 }
1036
mes_v12_1_set_ucode_start_addr(struct amdgpu_device * adev,int xcc_id)1037 static void mes_v12_1_set_ucode_start_addr(struct amdgpu_device *adev,
1038 int xcc_id)
1039 {
1040 uint64_t ucode_addr;
1041 int pipe;
1042
1043 mes_v12_1_enable(adev, false, xcc_id);
1044
1045 mutex_lock(&adev->srbm_mutex);
1046 for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) {
1047 /* me=3, queue=0 */
1048 soc_v1_0_grbm_select(adev, 3, pipe, 0, 0, GET_INST(GC, xcc_id));
1049
1050 /* set ucode start address */
1051 ucode_addr = adev->mes.uc_start_addr[pipe] >> 2;
1052 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_PRGRM_CNTR_START,
1053 lower_32_bits(ucode_addr));
1054 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_PRGRM_CNTR_START_HI,
1055 upper_32_bits(ucode_addr));
1056
1057 soc_v1_0_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id));
1058 }
1059 mutex_unlock(&adev->srbm_mutex);
1060 }
1061
1062 /* This function is for backdoor MES firmware */
mes_v12_1_load_microcode(struct amdgpu_device * adev,enum amdgpu_mes_pipe pipe,bool prime_icache,int xcc_id)1063 static int mes_v12_1_load_microcode(struct amdgpu_device *adev,
1064 enum amdgpu_mes_pipe pipe,
1065 bool prime_icache, int xcc_id)
1066 {
1067 int r, inst = MES_PIPE_INST(xcc_id, pipe);
1068 uint32_t data;
1069
1070 mes_v12_1_enable(adev, false, xcc_id);
1071
1072 if (!adev->mes.fw[pipe])
1073 return -EINVAL;
1074
1075 r = mes_v12_1_allocate_ucode_buffer(adev, pipe, xcc_id);
1076 if (r)
1077 return r;
1078
1079 r = mes_v12_1_allocate_ucode_data_buffer(adev, pipe, xcc_id);
1080 if (r) {
1081 mes_v12_1_free_ucode_buffers(adev, pipe, xcc_id);
1082 return r;
1083 }
1084
1085 mutex_lock(&adev->srbm_mutex);
1086 /* me=3, pipe=0, queue=0 */
1087 soc_v1_0_grbm_select(adev, 3, pipe, 0, 0, GET_INST(GC, xcc_id));
1088
1089 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_IC_BASE_CNTL, 0);
1090
1091 /* set ucode fimrware address */
1092 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_IC_BASE_LO,
1093 lower_32_bits(adev->mes.ucode_fw_gpu_addr[inst]));
1094 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_IC_BASE_HI,
1095 upper_32_bits(adev->mes.ucode_fw_gpu_addr[inst]));
1096
1097 /* set ucode instruction cache boundary to 2M-1 */
1098 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_MIBOUND_LO, 0x1FFFFF);
1099
1100 /* set ucode data firmware address */
1101 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_MDBASE_LO,
1102 lower_32_bits(adev->mes.data_fw_gpu_addr[inst]));
1103 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_MDBASE_HI,
1104 upper_32_bits(adev->mes.data_fw_gpu_addr[inst]));
1105
1106 /* Set data cache boundary CP_MES_MDBOUND_LO */
1107 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_MDBOUND_LO, 0x7FFFF);
1108
1109 if (prime_icache) {
1110 /* invalidate ICACHE */
1111 data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_IC_OP_CNTL);
1112 data = REG_SET_FIELD(data, CP_MES_IC_OP_CNTL, PRIME_ICACHE, 0);
1113 data = REG_SET_FIELD(data, CP_MES_IC_OP_CNTL, INVALIDATE_CACHE, 1);
1114 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_IC_OP_CNTL, data);
1115
1116 /* prime the ICACHE. */
1117 data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_IC_OP_CNTL);
1118 data = REG_SET_FIELD(data, CP_MES_IC_OP_CNTL, PRIME_ICACHE, 1);
1119 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_IC_OP_CNTL, data);
1120 }
1121
1122 soc_v1_0_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id));
1123 mutex_unlock(&adev->srbm_mutex);
1124
1125 return 0;
1126 }
1127
mes_v12_1_allocate_eop_buf(struct amdgpu_device * adev,enum amdgpu_mes_pipe pipe,int xcc_id)1128 static int mes_v12_1_allocate_eop_buf(struct amdgpu_device *adev,
1129 enum amdgpu_mes_pipe pipe,
1130 int xcc_id)
1131 {
1132 int r, inst = MES_PIPE_INST(xcc_id, pipe);
1133 u32 *eop;
1134
1135 r = amdgpu_bo_create_reserved(adev, MES_EOP_SIZE, PAGE_SIZE,
1136 AMDGPU_GEM_DOMAIN_GTT,
1137 &adev->mes.eop_gpu_obj[inst],
1138 &adev->mes.eop_gpu_addr[inst],
1139 (void **)&eop);
1140 if (r) {
1141 dev_warn(adev->dev, "(%d) create EOP bo failed\n", r);
1142 return r;
1143 }
1144
1145 memset(eop, 0,
1146 adev->mes.eop_gpu_obj[inst]->tbo.base.size);
1147
1148 amdgpu_bo_kunmap(adev->mes.eop_gpu_obj[inst]);
1149 amdgpu_bo_unreserve(adev->mes.eop_gpu_obj[inst]);
1150
1151 return 0;
1152 }
1153
mes_v12_1_allocate_shared_cmd_buf(struct amdgpu_device * adev,enum amdgpu_mes_pipe pipe,int xcc_id)1154 static int mes_v12_1_allocate_shared_cmd_buf(struct amdgpu_device *adev,
1155 enum amdgpu_mes_pipe pipe,
1156 int xcc_id)
1157 {
1158 int r, inst = MES_PIPE_INST(xcc_id, pipe);
1159
1160 if (pipe == AMDGPU_MES_KIQ_PIPE)
1161 return 0;
1162
1163 r = amdgpu_bo_create_kernel(adev, PAGE_SIZE, PAGE_SIZE,
1164 AMDGPU_GEM_DOMAIN_VRAM,
1165 &adev->mes.shared_cmd_buf_obj[inst],
1166 &adev->mes.shared_cmd_buf_gpu_addr[inst],
1167 NULL);
1168 if (r) {
1169 dev_err(adev->dev,
1170 "(%d) failed to create shared cmd buf bo\n", r);
1171 return r;
1172 }
1173
1174 return 0;
1175 }
1176
mes_v12_1_mqd_init(struct amdgpu_ring * ring)1177 static int mes_v12_1_mqd_init(struct amdgpu_ring *ring)
1178 {
1179 struct v12_1_mes_mqd *mqd = ring->mqd_ptr;
1180 uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
1181 uint32_t tmp;
1182
1183 mqd->header = 0xC0310800;
1184 mqd->compute_pipelinestat_enable = 0x00000001;
1185 mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
1186 mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
1187 mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
1188 mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
1189 mqd->compute_misc_reserved = 0x00000007;
1190
1191 eop_base_addr = ring->eop_gpu_addr >> 8;
1192
1193 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
1194 tmp = regCP_HQD_EOP_CONTROL_DEFAULT;
1195 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
1196 (order_base_2(MES_EOP_SIZE / 4) - 1));
1197
1198 mqd->cp_hqd_eop_base_addr_lo = lower_32_bits(eop_base_addr);
1199 mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
1200 mqd->cp_hqd_eop_control = tmp;
1201
1202 /* disable the queue if it's active */
1203 ring->wptr = 0;
1204 mqd->cp_hqd_pq_rptr = 0;
1205 mqd->cp_hqd_pq_wptr_lo = 0;
1206 mqd->cp_hqd_pq_wptr_hi = 0;
1207
1208 /* set the pointer to the MQD */
1209 mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
1210 mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
1211
1212 /* set MQD vmid to 0 */
1213 tmp = regCP_MQD_CONTROL_DEFAULT;
1214 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
1215 mqd->cp_mqd_control = tmp;
1216
1217 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
1218 hqd_gpu_addr = ring->gpu_addr >> 8;
1219 mqd->cp_hqd_pq_base_lo = lower_32_bits(hqd_gpu_addr);
1220 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
1221
1222 /* set the wb address whether it's enabled or not */
1223 wb_gpu_addr = ring->rptr_gpu_addr;
1224 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
1225 mqd->cp_hqd_pq_rptr_report_addr_hi =
1226 upper_32_bits(wb_gpu_addr) & 0xffff;
1227
1228 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
1229 wb_gpu_addr = ring->wptr_gpu_addr;
1230 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffff8;
1231 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
1232
1233 /* set up the HQD, this is similar to CP_RB0_CNTL */
1234 tmp = regCP_HQD_PQ_CONTROL_DEFAULT;
1235 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
1236 (order_base_2(ring->ring_size / 4) - 1));
1237 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
1238 ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
1239 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 1);
1240 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, 0);
1241 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
1242 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
1243 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, NO_UPDATE_RPTR, 1);
1244 mqd->cp_hqd_pq_control = tmp;
1245
1246 /* enable doorbell */
1247 tmp = 0;
1248 if (ring->use_doorbell) {
1249 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
1250 DOORBELL_OFFSET, ring->doorbell_index);
1251 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
1252 DOORBELL_EN, 1);
1253 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
1254 DOORBELL_SOURCE, 0);
1255 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
1256 DOORBELL_HIT, 0);
1257 } else {
1258 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
1259 DOORBELL_EN, 0);
1260 }
1261 mqd->cp_hqd_pq_doorbell_control = tmp;
1262
1263 mqd->cp_hqd_vmid = 0;
1264 /* activate the queue */
1265 mqd->cp_hqd_active = 1;
1266
1267 tmp = regCP_HQD_PERSISTENT_STATE_DEFAULT;
1268 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE,
1269 PRELOAD_SIZE, 0x63);
1270 mqd->cp_hqd_persistent_state = tmp;
1271
1272 mqd->cp_hqd_ib_control = regCP_HQD_IB_CONTROL_MES_12_1_DEFAULT;
1273 mqd->cp_hqd_iq_timer = regCP_HQD_IQ_TIMER_DEFAULT;
1274 mqd->cp_hqd_quantum = regCP_HQD_QUANTUM_DEFAULT;
1275
1276 /*
1277 * Set CP_HQD_GFX_CONTROL.DB_UPDATED_MSG_EN[15] to enable unmapped
1278 * doorbell handling. This is a reserved CP internal register can
1279 * not be accesss by others
1280 */
1281 mqd->cp_hqd_gfx_control = BIT(15);
1282
1283 return 0;
1284 }
1285
mes_v12_1_queue_init_register(struct amdgpu_ring * ring,int xcc_id)1286 static void mes_v12_1_queue_init_register(struct amdgpu_ring *ring,
1287 int xcc_id)
1288 {
1289 struct v12_1_mes_mqd *mqd = ring->mqd_ptr;
1290 struct amdgpu_device *adev = ring->adev;
1291 uint32_t data = 0;
1292
1293 mutex_lock(&adev->srbm_mutex);
1294 soc_v1_0_grbm_select(adev, 3, ring->pipe, 0, 0, GET_INST(GC, xcc_id));
1295
1296 /* set CP_HQD_VMID.VMID = 0. */
1297 data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_VMID);
1298 data = REG_SET_FIELD(data, CP_HQD_VMID, VMID, 0);
1299 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_VMID, data);
1300
1301 /* set CP_HQD_PQ_DOORBELL_CONTROL.DOORBELL_EN=0 */
1302 data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_DOORBELL_CONTROL);
1303 data = REG_SET_FIELD(data, CP_HQD_PQ_DOORBELL_CONTROL,
1304 DOORBELL_EN, 0);
1305 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_DOORBELL_CONTROL, data);
1306
1307 /* set CP_MQD_BASE_ADDR/HI with the MQD base address */
1308 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo);
1309 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
1310
1311 /* set CP_MQD_CONTROL.VMID=0 */
1312 data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MQD_CONTROL);
1313 data = REG_SET_FIELD(data, CP_MQD_CONTROL, VMID, 0);
1314 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MQD_CONTROL, 0);
1315
1316 /* set CP_HQD_PQ_BASE/HI with the ring buffer base address */
1317 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo);
1318 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi);
1319
1320 /* set CP_HQD_PQ_RPTR_REPORT_ADDR/HI */
1321 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_RPTR_REPORT_ADDR,
1322 mqd->cp_hqd_pq_rptr_report_addr_lo);
1323 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
1324 mqd->cp_hqd_pq_rptr_report_addr_hi);
1325
1326 /* set CP_HQD_PQ_CONTROL */
1327 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_CONTROL, mqd->cp_hqd_pq_control);
1328
1329 /* set CP_HQD_PQ_WPTR_POLL_ADDR/HI */
1330 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_WPTR_POLL_ADDR,
1331 mqd->cp_hqd_pq_wptr_poll_addr_lo);
1332 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_WPTR_POLL_ADDR_HI,
1333 mqd->cp_hqd_pq_wptr_poll_addr_hi);
1334
1335 /* set CP_HQD_PQ_DOORBELL_CONTROL */
1336 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_DOORBELL_CONTROL,
1337 mqd->cp_hqd_pq_doorbell_control);
1338
1339 /* set CP_HQD_PERSISTENT_STATE.PRELOAD_SIZE=0x53 */
1340 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PERSISTENT_STATE, mqd->cp_hqd_persistent_state);
1341
1342 /* set CP_HQD_ACTIVE.ACTIVE=1 */
1343 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_ACTIVE, mqd->cp_hqd_active);
1344
1345 soc_v1_0_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id));
1346 mutex_unlock(&adev->srbm_mutex);
1347 }
1348
mes_v12_1_kiq_enable_queue(struct amdgpu_device * adev,int xcc_id)1349 static int mes_v12_1_kiq_enable_queue(struct amdgpu_device *adev, int xcc_id)
1350 {
1351 struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
1352 struct amdgpu_ring *kiq_ring = &adev->gfx.kiq[xcc_id].ring;
1353 int r, inst = MES_PIPE_INST(xcc_id, AMDGPU_MES_SCHED_PIPE);
1354
1355 if (!kiq->pmf || !kiq->pmf->kiq_map_queues)
1356 return -EINVAL;
1357
1358 r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size);
1359 if (r) {
1360 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
1361 return r;
1362 }
1363
1364 kiq->pmf->kiq_map_queues(kiq_ring, &adev->mes.ring[inst]);
1365
1366 r = amdgpu_ring_test_ring(kiq_ring);
1367 if (r) {
1368 DRM_ERROR("kfq enable failed\n");
1369 kiq_ring->sched.ready = false;
1370 }
1371 return r;
1372 }
1373
mes_v12_1_queue_init(struct amdgpu_device * adev,enum amdgpu_mes_pipe pipe,int xcc_id)1374 static int mes_v12_1_queue_init(struct amdgpu_device *adev,
1375 enum amdgpu_mes_pipe pipe,
1376 int xcc_id)
1377 {
1378 struct amdgpu_ring *ring;
1379 int r;
1380
1381 if (!adev->enable_uni_mes && pipe == AMDGPU_MES_KIQ_PIPE)
1382 ring = &adev->gfx.kiq[xcc_id].ring;
1383 else
1384 ring = &adev->mes.ring[MES_PIPE_INST(xcc_id, pipe)];
1385
1386 if ((adev->enable_uni_mes || pipe == AMDGPU_MES_SCHED_PIPE) &&
1387 (amdgpu_in_reset(adev) || adev->in_suspend)) {
1388 *(ring->wptr_cpu_addr) = 0;
1389 *(ring->rptr_cpu_addr) = 0;
1390 amdgpu_ring_clear_ring(ring);
1391 }
1392
1393 r = mes_v12_1_mqd_init(ring);
1394 if (r)
1395 return r;
1396
1397 if (pipe == AMDGPU_MES_SCHED_PIPE) {
1398 if (adev->enable_uni_mes)
1399 r = amdgpu_mes_map_legacy_queue(adev, ring, xcc_id);
1400 else
1401 r = mes_v12_1_kiq_enable_queue(adev, xcc_id);
1402 if (r)
1403 return r;
1404 } else {
1405 mes_v12_1_queue_init_register(ring, xcc_id);
1406 }
1407
1408 /* get MES scheduler/KIQ versions */
1409 mutex_lock(&adev->srbm_mutex);
1410 soc_v1_0_grbm_select(adev, 3, pipe, 0, 0, GET_INST(GC, xcc_id));
1411
1412 if (pipe == AMDGPU_MES_SCHED_PIPE)
1413 adev->mes.sched_version = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_GP3_LO);
1414 else if (pipe == AMDGPU_MES_KIQ_PIPE && adev->enable_mes_kiq)
1415 adev->mes.kiq_version = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_GP3_LO);
1416
1417 soc_v1_0_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id));
1418 mutex_unlock(&adev->srbm_mutex);
1419
1420 return 0;
1421 }
1422
mes_v12_1_ring_init(struct amdgpu_device * adev,int xcc_id,int pipe)1423 static int mes_v12_1_ring_init(struct amdgpu_device *adev,
1424 int xcc_id, int pipe)
1425 {
1426 struct amdgpu_ring *ring;
1427 int inst = MES_PIPE_INST(xcc_id, pipe);
1428
1429 ring = &adev->mes.ring[inst];
1430
1431 ring->funcs = &mes_v12_1_ring_funcs;
1432
1433 ring->me = 3;
1434 ring->pipe = pipe;
1435 ring->queue = 0;
1436 ring->xcc_id = xcc_id;
1437 ring->vm_hub = AMDGPU_GFXHUB(xcc_id);
1438
1439 ring->ring_obj = NULL;
1440 ring->use_doorbell = true;
1441 ring->eop_gpu_addr = adev->mes.eop_gpu_addr[inst];
1442 ring->no_scheduler = true;
1443 snprintf(ring->name, sizeof(ring->name), "mes_%hhu.%hhu.%hhu.%hhu",
1444 (unsigned char)xcc_id, (unsigned char)ring->me,
1445 (unsigned char)ring->pipe, (unsigned char)ring->queue);
1446
1447 if (pipe == AMDGPU_MES_SCHED_PIPE)
1448 ring->doorbell_index =
1449 (adev->doorbell_index.mes_ring0 +
1450 xcc_id * adev->doorbell_index.xcc_doorbell_range)
1451 << 1;
1452 else
1453 ring->doorbell_index =
1454 (adev->doorbell_index.mes_ring1 +
1455 xcc_id * adev->doorbell_index.xcc_doorbell_range)
1456 << 1;
1457
1458 return amdgpu_ring_init(adev, ring, 1024, NULL, 0,
1459 AMDGPU_RING_PRIO_DEFAULT, NULL);
1460 }
1461
mes_v12_1_kiq_ring_init(struct amdgpu_device * adev,int xcc_id)1462 static int mes_v12_1_kiq_ring_init(struct amdgpu_device *adev, int xcc_id)
1463 {
1464 struct amdgpu_ring *ring;
1465 int inst = MES_PIPE_INST(xcc_id, AMDGPU_MES_KIQ_PIPE);
1466
1467 spin_lock_init(&adev->gfx.kiq[xcc_id].ring_lock);
1468
1469 ring = &adev->gfx.kiq[xcc_id].ring;
1470
1471 ring->me = 3;
1472 ring->pipe = 1;
1473 ring->queue = 0;
1474 ring->xcc_id = xcc_id;
1475 ring->vm_hub = AMDGPU_GFXHUB(xcc_id);
1476
1477 ring->adev = NULL;
1478 ring->ring_obj = NULL;
1479 ring->use_doorbell = true;
1480 ring->eop_gpu_addr = adev->mes.eop_gpu_addr[inst];
1481 ring->no_scheduler = true;
1482 ring->doorbell_index =
1483 (adev->doorbell_index.mes_ring1 +
1484 xcc_id * adev->doorbell_index.xcc_doorbell_range)
1485 << 1;
1486
1487 snprintf(ring->name, sizeof(ring->name), "mes_kiq_%hhu.%hhu.%hhu.%hhu",
1488 (unsigned char)xcc_id, (unsigned char)ring->me,
1489 (unsigned char)ring->pipe, (unsigned char)ring->queue);
1490
1491 return amdgpu_ring_init(adev, ring, 1024, NULL, 0,
1492 AMDGPU_RING_PRIO_DEFAULT, NULL);
1493 }
1494
mes_v12_1_mqd_sw_init(struct amdgpu_device * adev,enum amdgpu_mes_pipe pipe,int xcc_id)1495 static int mes_v12_1_mqd_sw_init(struct amdgpu_device *adev,
1496 enum amdgpu_mes_pipe pipe,
1497 int xcc_id)
1498 {
1499 int r, mqd_size = sizeof(struct v12_1_mes_mqd);
1500 struct amdgpu_ring *ring;
1501 int inst = MES_PIPE_INST(xcc_id, pipe);
1502
1503 if (!adev->enable_uni_mes && pipe == AMDGPU_MES_KIQ_PIPE)
1504 ring = &adev->gfx.kiq[xcc_id].ring;
1505 else
1506 ring = &adev->mes.ring[inst];
1507
1508 if (ring->mqd_obj)
1509 return 0;
1510
1511 r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE,
1512 AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj,
1513 &ring->mqd_gpu_addr, &ring->mqd_ptr);
1514 if (r) {
1515 dev_warn(adev->dev, "failed to create ring mqd bo (%d)", r);
1516 return r;
1517 }
1518
1519 memset(ring->mqd_ptr, 0, mqd_size);
1520
1521 /* prepare MQD backup */
1522 adev->mes.mqd_backup[inst] = kmalloc(mqd_size, GFP_KERNEL);
1523 if (!adev->mes.mqd_backup[inst])
1524 dev_warn(adev->dev,
1525 "no memory to create MQD backup for ring %s\n",
1526 ring->name);
1527
1528 return 0;
1529 }
1530
mes_v12_1_sw_init(struct amdgpu_ip_block * ip_block)1531 static int mes_v12_1_sw_init(struct amdgpu_ip_block *ip_block)
1532 {
1533 struct amdgpu_device *adev = ip_block->adev;
1534 int pipe, r, xcc_id, num_xcc = NUM_XCC(adev->gfx.xcc_mask);
1535
1536 adev->mes.funcs = &mes_v12_1_funcs;
1537 adev->mes.kiq_hw_init = &mes_v12_1_kiq_hw_init;
1538 adev->mes.kiq_hw_fini = &mes_v12_1_kiq_hw_fini;
1539 adev->mes.enable_legacy_queue_map = true;
1540
1541 adev->mes.event_log_size =
1542 adev->enable_uni_mes ? (AMDGPU_MAX_MES_PIPES * AMDGPU_MES_LOG_BUFFER_SIZE * num_xcc) : AMDGPU_MES_LOG_BUFFER_SIZE;
1543
1544 r = amdgpu_mes_init(adev);
1545 if (r)
1546 return r;
1547
1548 for (xcc_id = 0; xcc_id < num_xcc; xcc_id++) {
1549 for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) {
1550 r = mes_v12_1_allocate_eop_buf(adev, pipe, xcc_id);
1551 if (r)
1552 return r;
1553
1554 r = mes_v12_1_mqd_sw_init(adev, pipe, xcc_id);
1555 if (r)
1556 return r;
1557
1558 if (!adev->enable_uni_mes && pipe ==
1559 AMDGPU_MES_KIQ_PIPE)
1560 r = mes_v12_1_kiq_ring_init(adev, xcc_id);
1561 else
1562 r = mes_v12_1_ring_init(adev, xcc_id, pipe);
1563 if (r)
1564 return r;
1565
1566 if (adev->enable_uni_mes && num_xcc > 1) {
1567 r = mes_v12_1_allocate_shared_cmd_buf(adev,
1568 pipe, xcc_id);
1569 if (r)
1570 return r;
1571 }
1572 }
1573 }
1574
1575 return 0;
1576 }
1577
mes_v12_1_sw_fini(struct amdgpu_ip_block * ip_block)1578 static int mes_v12_1_sw_fini(struct amdgpu_ip_block *ip_block)
1579 {
1580 struct amdgpu_device *adev = ip_block->adev;
1581 int pipe, inst, xcc_id, num_xcc = NUM_XCC(adev->gfx.xcc_mask);
1582
1583 for (xcc_id = 0; xcc_id < num_xcc; xcc_id++) {
1584 for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) {
1585 inst = MES_PIPE_INST(xcc_id, pipe);
1586
1587 amdgpu_bo_free_kernel(&adev->mes.shared_cmd_buf_obj[inst],
1588 &adev->mes.shared_cmd_buf_gpu_addr[inst],
1589 NULL);
1590
1591 kfree(adev->mes.mqd_backup[inst]);
1592
1593 amdgpu_bo_free_kernel(&adev->mes.eop_gpu_obj[inst],
1594 &adev->mes.eop_gpu_addr[inst],
1595 NULL);
1596
1597 if (adev->enable_uni_mes || pipe == AMDGPU_MES_SCHED_PIPE) {
1598 amdgpu_bo_free_kernel(&adev->mes.ring[inst].mqd_obj,
1599 &adev->mes.ring[inst].mqd_gpu_addr,
1600 &adev->mes.ring[inst].mqd_ptr);
1601 amdgpu_ring_fini(&adev->mes.ring[inst]);
1602 }
1603 }
1604 }
1605
1606 for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++)
1607 amdgpu_ucode_release(&adev->mes.fw[pipe]);
1608
1609 for (xcc_id = 0; xcc_id < num_xcc; xcc_id++) {
1610 if (!adev->enable_uni_mes) {
1611 amdgpu_bo_free_kernel(&adev->gfx.kiq[xcc_id].ring.mqd_obj,
1612 &adev->gfx.kiq[xcc_id].ring.mqd_gpu_addr,
1613 &adev->gfx.kiq[xcc_id].ring.mqd_ptr);
1614 amdgpu_ring_fini(&adev->gfx.kiq[xcc_id].ring);
1615 }
1616
1617 if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
1618 mes_v12_1_free_ucode_buffers(adev,
1619 AMDGPU_MES_KIQ_PIPE, xcc_id);
1620 mes_v12_1_free_ucode_buffers(adev,
1621 AMDGPU_MES_SCHED_PIPE, xcc_id);
1622 }
1623 }
1624
1625 amdgpu_mes_fini(adev);
1626 return 0;
1627 }
1628
mes_v12_1_kiq_dequeue_sched(struct amdgpu_device * adev,int xcc_id)1629 static void mes_v12_1_kiq_dequeue_sched(struct amdgpu_device *adev,
1630 int xcc_id)
1631 {
1632 uint32_t data;
1633 int i;
1634
1635 mutex_lock(&adev->srbm_mutex);
1636 soc_v1_0_grbm_select(adev, 3, AMDGPU_MES_SCHED_PIPE, 0, 0,
1637 GET_INST(GC, xcc_id));
1638
1639 /* disable the queue if it's active */
1640 if (RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_ACTIVE) & 1) {
1641 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_DEQUEUE_REQUEST, 1);
1642 for (i = 0; i < adev->usec_timeout; i++) {
1643 if (!(RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_ACTIVE) & 1))
1644 break;
1645 udelay(1);
1646 }
1647 }
1648 data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_DOORBELL_CONTROL);
1649 data = REG_SET_FIELD(data, CP_HQD_PQ_DOORBELL_CONTROL,
1650 DOORBELL_EN, 0);
1651 data = REG_SET_FIELD(data, CP_HQD_PQ_DOORBELL_CONTROL,
1652 DOORBELL_HIT, 1);
1653 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_DOORBELL_CONTROL, data);
1654
1655 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_DOORBELL_CONTROL, 0);
1656
1657 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_WPTR_LO, 0);
1658 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_WPTR_HI, 0);
1659 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_RPTR, 0);
1660
1661 soc_v1_0_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id));
1662 mutex_unlock(&adev->srbm_mutex);
1663
1664 adev->mes.ring[MES_PIPE_INST(xcc_id, 0)].sched.ready = false;
1665 }
1666
mes_v12_1_kiq_setting(struct amdgpu_ring * ring,int xcc_id)1667 static void mes_v12_1_kiq_setting(struct amdgpu_ring *ring, int xcc_id)
1668 {
1669 uint32_t tmp;
1670 struct amdgpu_device *adev = ring->adev;
1671
1672 /* tell RLC which is KIQ queue */
1673 tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CP_SCHEDULERS);
1674 tmp &= 0xffffff00;
1675 tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
1676 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CP_SCHEDULERS, tmp);
1677 tmp |= 0x80;
1678 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CP_SCHEDULERS, tmp);
1679 }
1680
mes_v12_1_kiq_hw_init(struct amdgpu_device * adev,uint32_t xcc_id)1681 static int mes_v12_1_kiq_hw_init(struct amdgpu_device *adev, uint32_t xcc_id)
1682 {
1683 int inst = MES_PIPE_INST(xcc_id, AMDGPU_MES_KIQ_PIPE);
1684 int r = 0;
1685 struct amdgpu_ip_block *ip_block;
1686
1687 if (adev->enable_uni_mes)
1688 mes_v12_1_kiq_setting(&adev->mes.ring[inst], xcc_id);
1689 else
1690 mes_v12_1_kiq_setting(&adev->gfx.kiq[xcc_id].ring, xcc_id);
1691
1692 if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
1693
1694 r = mes_v12_1_load_microcode(adev, AMDGPU_MES_SCHED_PIPE,
1695 false, xcc_id);
1696 if (r) {
1697 DRM_ERROR("failed to load MES fw, r=%d\n", r);
1698 return r;
1699 }
1700
1701 r = mes_v12_1_load_microcode(adev, AMDGPU_MES_KIQ_PIPE,
1702 true, xcc_id);
1703 if (r) {
1704 DRM_ERROR("failed to load MES kiq fw, r=%d\n", r);
1705 return r;
1706 }
1707
1708 mes_v12_1_set_ucode_start_addr(adev, xcc_id);
1709
1710 } else if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO)
1711 mes_v12_1_set_ucode_start_addr(adev, xcc_id);
1712
1713 mes_v12_1_enable(adev, true, xcc_id);
1714
1715 ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_MES);
1716 if (unlikely(!ip_block)) {
1717 dev_err(adev->dev, "Failed to get MES handle\n");
1718 return -EINVAL;
1719 }
1720
1721 r = mes_v12_1_queue_init(adev, AMDGPU_MES_KIQ_PIPE, xcc_id);
1722 if (r)
1723 goto failure;
1724
1725 if (adev->enable_uni_mes) {
1726 r = mes_v12_1_set_hw_resources(&adev->mes,
1727 AMDGPU_MES_KIQ_PIPE, xcc_id);
1728 if (r)
1729 goto failure;
1730
1731 mes_v12_1_set_hw_resources_1(&adev->mes,
1732 AMDGPU_MES_KIQ_PIPE, xcc_id);
1733 }
1734
1735 if (adev->mes.enable_legacy_queue_map) {
1736 r = mes_v12_1_xcc_hw_init(ip_block, xcc_id);
1737 if (r)
1738 goto failure;
1739 }
1740
1741 return r;
1742
1743 failure:
1744 mes_v12_1_hw_fini(ip_block);
1745 return r;
1746 }
1747
mes_v12_1_kiq_hw_fini(struct amdgpu_device * adev,uint32_t xcc_id)1748 static int mes_v12_1_kiq_hw_fini(struct amdgpu_device *adev, uint32_t xcc_id)
1749 {
1750 int inst = MES_PIPE_INST(xcc_id, AMDGPU_MES_SCHED_PIPE);
1751
1752 if (adev->mes.ring[inst].sched.ready) {
1753 if (adev->enable_uni_mes)
1754 amdgpu_mes_unmap_legacy_queue(adev,
1755 &adev->mes.ring[inst],
1756 RESET_QUEUES, 0, 0, xcc_id);
1757 else
1758 mes_v12_1_kiq_dequeue_sched(adev, xcc_id);
1759
1760 adev->mes.ring[inst].sched.ready = false;
1761 }
1762
1763 mes_v12_1_enable(adev, false, xcc_id);
1764
1765 return 0;
1766 }
1767
mes_v12_1_setup_coop_mode(struct amdgpu_device * adev,int xcc_id)1768 static int mes_v12_1_setup_coop_mode(struct amdgpu_device *adev, int xcc_id)
1769 {
1770 u32 num_xcc_per_xcp, num_xcc = NUM_XCC(adev->gfx.xcc_mask);
1771 int r = 0;
1772
1773 if (num_xcc == 1)
1774 return r;
1775
1776 if (adev->gfx.funcs &&
1777 adev->gfx.funcs->get_xccs_per_xcp)
1778 num_xcc_per_xcp = adev->gfx.funcs->get_xccs_per_xcp(adev);
1779 else
1780 return -EINVAL;
1781
1782 switch (adev->xcp_mgr->mode) {
1783 case AMDGPU_SPX_PARTITION_MODE:
1784 adev->mes.enable_coop_mode = 1;
1785 adev->mes.master_xcc_ids[xcc_id] = 0;
1786 break;
1787 case AMDGPU_DPX_PARTITION_MODE:
1788 adev->mes.enable_coop_mode = 1;
1789 adev->mes.master_xcc_ids[xcc_id] =
1790 (xcc_id/num_xcc_per_xcp) * (num_xcc / 2);
1791 break;
1792 case AMDGPU_QPX_PARTITION_MODE:
1793 adev->mes.enable_coop_mode = 1;
1794 adev->mes.master_xcc_ids[xcc_id] =
1795 (xcc_id/num_xcc_per_xcp) * (num_xcc / 4);
1796 break;
1797 case AMDGPU_CPX_PARTITION_MODE:
1798 adev->mes.enable_coop_mode = 0;
1799 break;
1800 default:
1801 r = -EINVAL;
1802 break;
1803 }
1804 return r;
1805 }
1806
mes_v12_1_xcc_hw_init(struct amdgpu_ip_block * ip_block,int xcc_id)1807 static int mes_v12_1_xcc_hw_init(struct amdgpu_ip_block *ip_block, int xcc_id)
1808 {
1809 int r;
1810 struct amdgpu_device *adev = ip_block->adev;
1811
1812 if (adev->mes.ring[MES_PIPE_INST(xcc_id, 0)].sched.ready)
1813 goto out;
1814
1815 if (!adev->enable_mes_kiq) {
1816 if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
1817 r = mes_v12_1_load_microcode(adev,
1818 AMDGPU_MES_SCHED_PIPE, true, xcc_id);
1819 if (r) {
1820 DRM_ERROR("failed to MES fw, r=%d\n", r);
1821 return r;
1822 }
1823
1824 mes_v12_1_set_ucode_start_addr(adev, xcc_id);
1825
1826 } else if (adev->firmware.load_type ==
1827 AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) {
1828
1829 mes_v12_1_set_ucode_start_addr(adev, xcc_id);
1830 }
1831
1832 mes_v12_1_enable(adev, true, xcc_id);
1833 }
1834
1835 /* Enable the MES to handle doorbell ring on unmapped queue */
1836 mes_v12_1_enable_unmapped_doorbell_handling(&adev->mes, true, xcc_id);
1837
1838 r = mes_v12_1_queue_init(adev, AMDGPU_MES_SCHED_PIPE, xcc_id);
1839 if (r)
1840 goto failure;
1841
1842 r = mes_v12_1_set_hw_resources(&adev->mes,
1843 AMDGPU_MES_SCHED_PIPE, xcc_id);
1844 if (r)
1845 goto failure;
1846
1847 if (adev->enable_uni_mes) {
1848 r = mes_v12_1_setup_coop_mode(adev, xcc_id);
1849 if (r)
1850 goto failure;
1851 mes_v12_1_set_hw_resources_1(&adev->mes,
1852 AMDGPU_MES_SCHED_PIPE, xcc_id);
1853 }
1854 mes_v12_1_init_aggregated_doorbell(&adev->mes, xcc_id);
1855
1856 r = mes_v12_1_query_sched_status(&adev->mes,
1857 AMDGPU_MES_SCHED_PIPE, xcc_id);
1858 if (r) {
1859 DRM_ERROR("MES is busy\n");
1860 goto failure;
1861 }
1862
1863 out:
1864 /*
1865 * Disable KIQ ring usage from the driver once MES is enabled.
1866 * MES uses KIQ ring exclusively so driver cannot access KIQ ring
1867 * with MES enabled.
1868 */
1869 adev->gfx.kiq[xcc_id].ring.sched.ready = false;
1870 adev->mes.ring[MES_PIPE_INST(xcc_id, 0)].sched.ready = true;
1871
1872 return 0;
1873
1874 failure:
1875 mes_v12_1_hw_fini(ip_block);
1876 return r;
1877 }
1878
mes_v12_1_hw_init(struct amdgpu_ip_block * ip_block)1879 static int mes_v12_1_hw_init(struct amdgpu_ip_block *ip_block)
1880 {
1881 struct amdgpu_device *adev = ip_block->adev;
1882 int r, xcc_id, num_xcc = NUM_XCC(adev->gfx.xcc_mask);
1883
1884 for (xcc_id = 0; xcc_id < num_xcc; xcc_id++) {
1885 r = mes_v12_1_xcc_hw_init(ip_block, xcc_id);
1886 if (r)
1887 return r;
1888 }
1889
1890 return 0;
1891 }
1892
mes_v12_1_hw_fini(struct amdgpu_ip_block * ip_block)1893 static int mes_v12_1_hw_fini(struct amdgpu_ip_block *ip_block)
1894 {
1895 return 0;
1896 }
1897
mes_v12_1_suspend(struct amdgpu_ip_block * ip_block)1898 static int mes_v12_1_suspend(struct amdgpu_ip_block *ip_block)
1899 {
1900 int r;
1901
1902 r = amdgpu_mes_suspend(ip_block->adev);
1903 if (r)
1904 return r;
1905
1906 return mes_v12_1_hw_fini(ip_block);
1907 }
1908
mes_v12_1_resume(struct amdgpu_ip_block * ip_block)1909 static int mes_v12_1_resume(struct amdgpu_ip_block *ip_block)
1910 {
1911 int r;
1912
1913 r = mes_v12_1_hw_init(ip_block);
1914 if (r)
1915 return r;
1916
1917 return amdgpu_mes_resume(ip_block->adev);
1918 }
1919
mes_v12_1_early_init(struct amdgpu_ip_block * ip_block)1920 static int mes_v12_1_early_init(struct amdgpu_ip_block *ip_block)
1921 {
1922 struct amdgpu_device *adev = ip_block->adev;
1923 int pipe, r;
1924
1925 for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) {
1926 r = amdgpu_mes_init_microcode(adev, pipe);
1927 if (r)
1928 return r;
1929 }
1930
1931 return 0;
1932 }
1933
mes_v12_1_late_init(struct amdgpu_ip_block * ip_block)1934 static int mes_v12_1_late_init(struct amdgpu_ip_block *ip_block)
1935 {
1936 struct amdgpu_device *adev = ip_block->adev;
1937 int xcc_id, num_xcc = NUM_XCC(adev->gfx.xcc_mask);
1938
1939 /* TODO: remove it if issue fixed. */
1940 if (adev->mes.enable_coop_mode)
1941 return 0;
1942
1943 for (xcc_id = 0; xcc_id < num_xcc; xcc_id++) {
1944 /* for COOP mode, only test master xcc. */
1945 if (adev->mes.enable_coop_mode &&
1946 adev->mes.master_xcc_ids[xcc_id] != xcc_id)
1947 continue;
1948
1949 mes_v12_1_self_test(adev, xcc_id);
1950 }
1951
1952 return 0;
1953 }
1954
1955 static const struct amd_ip_funcs mes_v12_1_ip_funcs = {
1956 .name = "mes_v12_1",
1957 .early_init = mes_v12_1_early_init,
1958 .late_init = mes_v12_1_late_init,
1959 .sw_init = mes_v12_1_sw_init,
1960 .sw_fini = mes_v12_1_sw_fini,
1961 .hw_init = mes_v12_1_hw_init,
1962 .hw_fini = mes_v12_1_hw_fini,
1963 .suspend = mes_v12_1_suspend,
1964 .resume = mes_v12_1_resume,
1965 };
1966
1967 const struct amdgpu_ip_block_version mes_v12_1_ip_block = {
1968 .type = AMD_IP_BLOCK_TYPE_MES,
1969 .major = 12,
1970 .minor = 1,
1971 .rev = 0,
1972 .funcs = &mes_v12_1_ip_funcs,
1973 };
1974
mes_v12_1_alloc_test_buf(struct amdgpu_device * adev,struct amdgpu_bo ** bo,uint64_t * addr,void ** ptr,int size)1975 static int mes_v12_1_alloc_test_buf(struct amdgpu_device *adev,
1976 struct amdgpu_bo **bo, uint64_t *addr,
1977 void **ptr, int size)
1978 {
1979 amdgpu_bo_create_kernel(adev, size, PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1980 bo, addr, ptr);
1981 if (!*bo) {
1982 dev_err(adev->dev, "failed to allocate test buffer bo\n");
1983 return -ENOMEM;
1984 }
1985 memset(*ptr, 0, size);
1986 return 0;
1987 }
1988
mes_v12_1_map_test_bo(struct amdgpu_device * adev,struct amdgpu_bo * bo,struct amdgpu_vm * vm,struct amdgpu_bo_va ** bo_va,u64 va,int size)1989 static int mes_v12_1_map_test_bo(struct amdgpu_device *adev,
1990 struct amdgpu_bo *bo, struct amdgpu_vm *vm,
1991 struct amdgpu_bo_va **bo_va, u64 va, int size)
1992 {
1993 struct amdgpu_sync sync;
1994 int r;
1995
1996 r = amdgpu_map_static_csa(adev, vm, bo, bo_va, va, size);
1997 if (r)
1998 return r;
1999
2000 amdgpu_sync_create(&sync);
2001
2002 r = amdgpu_vm_bo_update(adev, *bo_va, false);
2003 if (r) {
2004 dev_err(adev->dev, "failed to do vm_bo_update on meta data\n");
2005 goto error;
2006 }
2007 amdgpu_sync_fence(&sync, (*bo_va)->last_pt_update, GFP_KERNEL);
2008
2009 r = amdgpu_vm_update_pdes(adev, vm, false);
2010 if (r) {
2011 dev_err(adev->dev, "failed to update pdes on meta data\n");
2012 goto error;
2013 }
2014 amdgpu_sync_fence(&sync, vm->last_update, GFP_KERNEL);
2015 amdgpu_sync_wait(&sync, false);
2016
2017 error:
2018 amdgpu_sync_free(&sync);
2019 return 0;
2020 }
2021
mes_v12_1_test_ring(struct amdgpu_device * adev,int xcc_id,u32 * queue_ptr,u64 fence_gpu_addr,void * fence_cpu_ptr,void * wptr_cpu_addr,u64 doorbell_idx,int queue_type)2022 static int mes_v12_1_test_ring(struct amdgpu_device *adev, int xcc_id,
2023 u32 *queue_ptr, u64 fence_gpu_addr,
2024 void *fence_cpu_ptr, void *wptr_cpu_addr,
2025 u64 doorbell_idx, int queue_type)
2026 {
2027 volatile uint32_t *cpu_ptr = fence_cpu_ptr;
2028 int num_xcc = NUM_XCC(adev->gfx.xcc_mask);
2029 int sdma_ring_align = 0x10, compute_ring_align = 0x100;
2030 uint32_t tmp, xcc_offset;
2031 int r = 0, i, j, wptr = 0;
2032
2033 if (queue_type == AMDGPU_RING_TYPE_COMPUTE) {
2034 if (!adev->mes.enable_coop_mode) {
2035 WREG32_SOC15(GC, GET_INST(GC, xcc_id),
2036 regSCRATCH_REG0, 0xCAFEDEAD);
2037 } else {
2038 for (i = 0; i < num_xcc; i++) {
2039 if (adev->mes.master_xcc_ids[i] == xcc_id)
2040 WREG32_SOC15(GC, GET_INST(GC, i),
2041 regSCRATCH_REG0, 0xCAFEDEAD);
2042 }
2043 }
2044
2045 xcc_offset = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG0);
2046 queue_ptr[wptr++] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
2047 queue_ptr[wptr++] = xcc_offset - PACKET3_SET_UCONFIG_REG_START;
2048 queue_ptr[wptr++] = 0xDEADBEEF;
2049
2050 for (i = wptr; i < compute_ring_align; i++)
2051 queue_ptr[wptr++] = PACKET3(PACKET3_NOP, 0x3FFF);
2052
2053 } else if (queue_type == AMDGPU_RING_TYPE_SDMA) {
2054 *cpu_ptr = 0xCAFEDEAD;
2055
2056 queue_ptr[wptr++] = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_WRITE) |
2057 SDMA_PKT_COPY_LINEAR_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR);
2058 queue_ptr[wptr++] = lower_32_bits(fence_gpu_addr);
2059 queue_ptr[wptr++] = upper_32_bits(fence_gpu_addr);
2060 queue_ptr[wptr++] = SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(0);
2061 queue_ptr[wptr++] = 0xDEADBEEF;
2062
2063 for (i = wptr; i < sdma_ring_align; i++)
2064 queue_ptr[wptr++] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP);
2065
2066 wptr <<= 2;
2067 }
2068
2069 atomic64_set((atomic64_t *)wptr_cpu_addr, wptr);
2070 WDOORBELL64(doorbell_idx, wptr);
2071
2072 for (i = 0; i < adev->usec_timeout; i++) {
2073 if (queue_type == AMDGPU_RING_TYPE_SDMA) {
2074 tmp = le32_to_cpu(*cpu_ptr);
2075 } else {
2076 if (!adev->mes.enable_coop_mode) {
2077 tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id),
2078 regSCRATCH_REG0);
2079 } else {
2080 for (j = 0; j < num_xcc; j++) {
2081 if (xcc_id != adev->mes.master_xcc_ids[j])
2082 continue;
2083
2084 tmp = RREG32_SOC15(GC, GET_INST(GC, j),
2085 regSCRATCH_REG0);
2086 if (tmp != 0xDEADBEEF)
2087 break;
2088 }
2089 }
2090 }
2091
2092 if (tmp == 0xDEADBEEF)
2093 break;
2094
2095 if (amdgpu_emu_mode == 1)
2096 msleep(1);
2097 else
2098 udelay(1);
2099 }
2100
2101 if (i >= adev->usec_timeout) {
2102 dev_err(adev->dev, "xcc%d: mes self test (%s) failed\n", xcc_id,
2103 queue_type == AMDGPU_RING_TYPE_SDMA ? "sdma" : "compute");
2104
2105 while (halt_if_hws_hang)
2106 schedule();
2107
2108 r = -ETIMEDOUT;
2109 } else {
2110 dev_info(adev->dev, "xcc%d: mes self test (%s) pass\n", xcc_id,
2111 queue_type == AMDGPU_RING_TYPE_SDMA ? "sdma" : "compute");
2112 }
2113
2114 return r;
2115 }
2116
2117 #define USER_CTX_SIZE (PAGE_SIZE * 2)
2118 #define USER_CTX_VA AMDGPU_VA_RESERVED_BOTTOM
2119 #define RING_OFFSET(addr) ((addr))
2120 #define EOP_OFFSET(addr) ((addr) + PAGE_SIZE)
2121 #define WPTR_OFFSET(addr) ((addr) + USER_CTX_SIZE - sizeof(u64))
2122 #define RPTR_OFFSET(addr) ((addr) + USER_CTX_SIZE - sizeof(u64) * 2)
2123 #define FENCE_OFFSET(addr) ((addr) + USER_CTX_SIZE - sizeof(u64) * 3)
2124
mes_v12_1_test_queue(struct amdgpu_device * adev,int xcc_id,int pasid,struct amdgpu_vm * vm,u64 meta_gpu_addr,u64 queue_gpu_addr,void * ctx_ptr,int queue_type)2125 static int mes_v12_1_test_queue(struct amdgpu_device *adev, int xcc_id,
2126 int pasid, struct amdgpu_vm *vm, u64 meta_gpu_addr,
2127 u64 queue_gpu_addr, void *ctx_ptr, int queue_type)
2128 {
2129 struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
2130 struct amdgpu_mqd *mqd_mgr = &adev->mqds[queue_type];
2131 struct amdgpu_mqd_prop mqd_prop = {0};
2132 struct mes_add_queue_input add_queue = {0};
2133 struct mes_remove_queue_input remove_queue = {0};
2134 struct amdgpu_bo *mqd_bo = NULL;
2135 int num_xcc = NUM_XCC(adev->gfx.xcc_mask);
2136 int i, r, off, mqd_size, mqd_count = 1;
2137 void *mqd_ptr = NULL;
2138 u64 mqd_gpu_addr, doorbell_idx;
2139
2140 /* extra one page size padding for mes fw */
2141 mqd_size = mqd_mgr->mqd_size + PAGE_SIZE;
2142
2143 if (queue_type == AMDGPU_RING_TYPE_SDMA) {
2144 doorbell_idx = adev->mes.db_start_dw_offset + \
2145 adev->doorbell_index.sdma_engine[0];
2146 } else {
2147 doorbell_idx = adev->mes.db_start_dw_offset + \
2148 adev->doorbell_index.userqueue_start;
2149 }
2150
2151 if (adev->mes.enable_coop_mode &&
2152 queue_type == AMDGPU_RING_TYPE_COMPUTE) {
2153 for (i = 0, mqd_count = 0; i < num_xcc; i++) {
2154 if (adev->mes.master_xcc_ids[i] == xcc_id)
2155 mqd_count++;
2156 }
2157 mqd_size *= mqd_count;
2158 }
2159
2160 r = mes_v12_1_alloc_test_buf(adev, &mqd_bo, &mqd_gpu_addr,
2161 &mqd_ptr, mqd_size * mqd_count);
2162 if (r < 0)
2163 return r;
2164
2165 mqd_prop.mqd_gpu_addr = mqd_gpu_addr;
2166 mqd_prop.hqd_base_gpu_addr = RING_OFFSET(USER_CTX_VA);
2167 mqd_prop.eop_gpu_addr = EOP_OFFSET(USER_CTX_VA);
2168 mqd_prop.wptr_gpu_addr = WPTR_OFFSET(USER_CTX_VA);
2169 mqd_prop.rptr_gpu_addr = RPTR_OFFSET(USER_CTX_VA);
2170 mqd_prop.doorbell_index = doorbell_idx;
2171 mqd_prop.queue_size = PAGE_SIZE;
2172 mqd_prop.mqd_stride_size = mqd_size;
2173 mqd_prop.use_doorbell = true;
2174 mqd_prop.hqd_active = false;
2175
2176 mqd_mgr->init_mqd(adev, mqd_ptr, &mqd_prop);
2177 if (mqd_count > 1) {
2178 for (i = 1; i < mqd_count; i++) {
2179 off = mqd_size * i;
2180 mqd_prop.mqd_gpu_addr = mqd_gpu_addr + off;
2181 mqd_mgr->init_mqd(adev, (char *)mqd_ptr + off,
2182 &mqd_prop);
2183 }
2184 }
2185
2186 add_queue.xcc_id = xcc_id;
2187 add_queue.process_id = pasid;
2188 add_queue.page_table_base_addr = adev->vm_manager.vram_base_offset +
2189 amdgpu_bo_gpu_offset(vm->root.bo) - adev->gmc.vram_start;
2190 add_queue.process_va_start = 0;
2191 add_queue.process_va_end = adev->vm_manager.max_pfn - 1;
2192 add_queue.process_context_addr = meta_gpu_addr;
2193 add_queue.gang_context_addr = meta_gpu_addr + AMDGPU_MES_PROC_CTX_SIZE;
2194 add_queue.doorbell_offset = doorbell_idx;
2195 add_queue.mqd_addr = mqd_gpu_addr;
2196 add_queue.wptr_addr = mqd_prop.wptr_gpu_addr;
2197 add_queue.wptr_mc_addr = WPTR_OFFSET(queue_gpu_addr);
2198 add_queue.queue_type = queue_type;
2199 add_queue.vm_cntx_cntl = hub->vm_cntx_cntl;
2200
2201 r = mes_v12_1_add_hw_queue(&adev->mes, &add_queue);
2202 if (r)
2203 goto error;
2204
2205 mes_v12_1_test_ring(adev, xcc_id, (u32 *)RING_OFFSET((char *)ctx_ptr),
2206 FENCE_OFFSET(USER_CTX_VA),
2207 FENCE_OFFSET((char *)ctx_ptr),
2208 WPTR_OFFSET((char *)ctx_ptr),
2209 doorbell_idx, queue_type);
2210
2211 remove_queue.xcc_id = xcc_id;
2212 remove_queue.doorbell_offset = doorbell_idx;
2213 remove_queue.gang_context_addr = add_queue.gang_context_addr;
2214 r = mes_v12_1_remove_hw_queue(&adev->mes, &remove_queue);
2215
2216 error:
2217 amdgpu_bo_free_kernel(&mqd_bo, &mqd_gpu_addr, &mqd_ptr);
2218 return r;
2219 }
2220
mes_v12_1_self_test(struct amdgpu_device * adev,int xcc_id)2221 static int mes_v12_1_self_test(struct amdgpu_device *adev, int xcc_id)
2222 {
2223 int queue_types[] = { AMDGPU_RING_TYPE_COMPUTE,
2224 /* AMDGPU_RING_TYPE_SDMA */ };
2225 struct amdgpu_bo_va *bo_va = NULL;
2226 struct amdgpu_vm *vm = NULL;
2227 struct amdgpu_bo *meta_bo = NULL, *ctx_bo = NULL;
2228 void *meta_ptr = NULL, *ctx_ptr = NULL;
2229 u64 meta_gpu_addr, ctx_gpu_addr;
2230 int size, i, r, pasid;
2231
2232 pasid = amdgpu_pasid_alloc(16);
2233 if (pasid < 0)
2234 pasid = 0;
2235
2236 size = AMDGPU_MES_PROC_CTX_SIZE + AMDGPU_MES_GANG_CTX_SIZE;
2237 r = mes_v12_1_alloc_test_buf(adev, &meta_bo, &meta_gpu_addr,
2238 &meta_ptr, size);
2239 if (r < 0)
2240 goto err2;
2241
2242 r = mes_v12_1_alloc_test_buf(adev, &ctx_bo, &ctx_gpu_addr,
2243 &ctx_ptr, USER_CTX_SIZE);
2244 if (r < 0)
2245 goto err2;
2246
2247 vm = kzalloc(sizeof(*vm), GFP_KERNEL);
2248 if (!vm) {
2249 r = -ENOMEM;
2250 goto err2;
2251 }
2252
2253 r = amdgpu_vm_init(adev, vm, -1, pasid);
2254 if (r)
2255 goto err1;
2256
2257 r = mes_v12_1_map_test_bo(adev, ctx_bo, vm, &bo_va,
2258 USER_CTX_VA, USER_CTX_SIZE);
2259 if (r)
2260 goto err0;
2261
2262 for (i = 0; i < ARRAY_SIZE(queue_types); i++) {
2263 memset(ctx_ptr, 0, USER_CTX_SIZE);
2264
2265 r = mes_v12_1_test_queue(adev, xcc_id, pasid, vm, meta_gpu_addr,
2266 ctx_gpu_addr, ctx_ptr, queue_types[i]);
2267 if (r)
2268 break;
2269 }
2270
2271 amdgpu_unmap_static_csa(adev, vm, ctx_bo, bo_va, USER_CTX_VA);
2272 err0:
2273 amdgpu_vm_fini(adev, vm);
2274 err1:
2275 kfree(vm);
2276 err2:
2277 amdgpu_bo_free_kernel(&meta_bo, &meta_gpu_addr, &meta_ptr);
2278 amdgpu_bo_free_kernel(&ctx_bo, &ctx_gpu_addr, &ctx_ptr);
2279 amdgpu_pasid_free(pasid);
2280 return r;
2281 }
2282
2283