1 /*
2 * Copyright 2019 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 */
23
24 #include <linux/firmware.h>
25 #include <linux/module.h>
26 #include "amdgpu.h"
27 #include "soc15_common.h"
28 #include "soc21.h"
29 #include "gfx_v11_0.h"
30 #include "gc/gc_11_0_0_offset.h"
31 #include "gc/gc_11_0_0_sh_mask.h"
32 #include "gc/gc_11_0_0_default.h"
33 #include "v11_structs.h"
34 #include "mes_v11_api_def.h"
35
36 MODULE_FIRMWARE("amdgpu/gc_11_0_0_mes.bin");
37 MODULE_FIRMWARE("amdgpu/gc_11_0_0_mes_2.bin");
38 MODULE_FIRMWARE("amdgpu/gc_11_0_0_mes1.bin");
39 MODULE_FIRMWARE("amdgpu/gc_11_0_1_mes.bin");
40 MODULE_FIRMWARE("amdgpu/gc_11_0_1_mes_2.bin");
41 MODULE_FIRMWARE("amdgpu/gc_11_0_1_mes1.bin");
42 MODULE_FIRMWARE("amdgpu/gc_11_0_2_mes.bin");
43 MODULE_FIRMWARE("amdgpu/gc_11_0_2_mes_2.bin");
44 MODULE_FIRMWARE("amdgpu/gc_11_0_2_mes1.bin");
45 MODULE_FIRMWARE("amdgpu/gc_11_0_3_mes.bin");
46 MODULE_FIRMWARE("amdgpu/gc_11_0_3_mes_2.bin");
47 MODULE_FIRMWARE("amdgpu/gc_11_0_3_mes1.bin");
48 MODULE_FIRMWARE("amdgpu/gc_11_0_4_mes.bin");
49 MODULE_FIRMWARE("amdgpu/gc_11_0_4_mes_2.bin");
50 MODULE_FIRMWARE("amdgpu/gc_11_0_4_mes1.bin");
51 MODULE_FIRMWARE("amdgpu/gc_11_5_0_mes_2.bin");
52 MODULE_FIRMWARE("amdgpu/gc_11_5_0_mes1.bin");
53 MODULE_FIRMWARE("amdgpu/gc_11_5_1_mes_2.bin");
54 MODULE_FIRMWARE("amdgpu/gc_11_5_1_mes1.bin");
55 MODULE_FIRMWARE("amdgpu/gc_11_5_2_mes_2.bin");
56 MODULE_FIRMWARE("amdgpu/gc_11_5_2_mes1.bin");
57 MODULE_FIRMWARE("amdgpu/gc_11_5_3_mes_2.bin");
58 MODULE_FIRMWARE("amdgpu/gc_11_5_3_mes1.bin");
59
60 static int mes_v11_0_hw_init(struct amdgpu_ip_block *ip_block);
61 static int mes_v11_0_hw_fini(struct amdgpu_ip_block *ip_block);
62 static int mes_v11_0_kiq_hw_init(struct amdgpu_device *adev);
63 static int mes_v11_0_kiq_hw_fini(struct amdgpu_device *adev);
64
65 #define MES_EOP_SIZE 2048
66 #define GFX_MES_DRAM_SIZE 0x80000
67 #define MES11_HW_RESOURCE_1_SIZE (128 * AMDGPU_GPU_PAGE_SIZE)
68
69 #define MES11_HUNG_DB_OFFSET_ARRAY_SIZE 4
70
mes_v11_0_ring_set_wptr(struct amdgpu_ring * ring)71 static void mes_v11_0_ring_set_wptr(struct amdgpu_ring *ring)
72 {
73 struct amdgpu_device *adev = ring->adev;
74
75 if (ring->use_doorbell) {
76 atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
77 ring->wptr);
78 WDOORBELL64(ring->doorbell_index, ring->wptr);
79 } else {
80 BUG();
81 }
82 }
83
mes_v11_0_ring_get_rptr(struct amdgpu_ring * ring)84 static u64 mes_v11_0_ring_get_rptr(struct amdgpu_ring *ring)
85 {
86 return *ring->rptr_cpu_addr;
87 }
88
mes_v11_0_ring_get_wptr(struct amdgpu_ring * ring)89 static u64 mes_v11_0_ring_get_wptr(struct amdgpu_ring *ring)
90 {
91 u64 wptr;
92
93 if (ring->use_doorbell)
94 wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
95 else
96 BUG();
97 return wptr;
98 }
99
100 static const struct amdgpu_ring_funcs mes_v11_0_ring_funcs = {
101 .type = AMDGPU_RING_TYPE_MES,
102 .align_mask = 1,
103 .nop = 0,
104 .support_64bit_ptrs = true,
105 .get_rptr = mes_v11_0_ring_get_rptr,
106 .get_wptr = mes_v11_0_ring_get_wptr,
107 .set_wptr = mes_v11_0_ring_set_wptr,
108 .insert_nop = amdgpu_ring_insert_nop,
109 };
110
111 static const char *mes_v11_0_opcodes[] = {
112 "SET_HW_RSRC",
113 "SET_SCHEDULING_CONFIG",
114 "ADD_QUEUE",
115 "REMOVE_QUEUE",
116 "PERFORM_YIELD",
117 "SET_GANG_PRIORITY_LEVEL",
118 "SUSPEND",
119 "RESUME",
120 "RESET",
121 "SET_LOG_BUFFER",
122 "CHANGE_GANG_PRORITY",
123 "QUERY_SCHEDULER_STATUS",
124 "PROGRAM_GDS",
125 "SET_DEBUG_VMID",
126 "MISC",
127 "UPDATE_ROOT_PAGE_TABLE",
128 "AMD_LOG",
129 "unused",
130 "unused",
131 "SET_HW_RSRC_1",
132 };
133
134 static const char *mes_v11_0_misc_opcodes[] = {
135 "WRITE_REG",
136 "INV_GART",
137 "QUERY_STATUS",
138 "READ_REG",
139 "WAIT_REG_MEM",
140 "SET_SHADER_DEBUGGER",
141 };
142
mes_v11_0_get_op_string(union MESAPI__MISC * x_pkt)143 static const char *mes_v11_0_get_op_string(union MESAPI__MISC *x_pkt)
144 {
145 const char *op_str = NULL;
146
147 if (x_pkt->header.opcode < ARRAY_SIZE(mes_v11_0_opcodes))
148 op_str = mes_v11_0_opcodes[x_pkt->header.opcode];
149
150 return op_str;
151 }
152
mes_v11_0_get_misc_op_string(union MESAPI__MISC * x_pkt)153 static const char *mes_v11_0_get_misc_op_string(union MESAPI__MISC *x_pkt)
154 {
155 const char *op_str = NULL;
156
157 if ((x_pkt->header.opcode == MES_SCH_API_MISC) &&
158 (x_pkt->opcode < ARRAY_SIZE(mes_v11_0_misc_opcodes)))
159 op_str = mes_v11_0_misc_opcodes[x_pkt->opcode];
160
161 return op_str;
162 }
163
mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes * mes,void * pkt,int size,int api_status_off)164 static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes,
165 void *pkt, int size,
166 int api_status_off)
167 {
168 union MESAPI__QUERY_MES_STATUS mes_status_pkt;
169 signed long timeout = 2100000; /* 2100 ms */
170 struct amdgpu_device *adev = mes->adev;
171 struct amdgpu_ring *ring = &mes->ring[0];
172 struct MES_API_STATUS *api_status;
173 union MESAPI__MISC *x_pkt = pkt;
174 const char *op_str, *misc_op_str;
175 unsigned long flags;
176 u64 status_gpu_addr;
177 u32 seq, status_offset;
178 u64 *status_ptr;
179 signed long r;
180 int ret;
181
182 if (x_pkt->header.opcode >= MES_SCH_API_MAX)
183 return -EINVAL;
184
185 if (amdgpu_emu_mode) {
186 timeout *= 100;
187 } else if (amdgpu_sriov_vf(adev)) {
188 /* Worst case in sriov where all other 15 VF timeout, each VF needs about 600ms */
189 timeout = 15 * 600 * 1000;
190 }
191
192 ret = amdgpu_device_wb_get(adev, &status_offset);
193 if (ret)
194 return ret;
195
196 status_gpu_addr = adev->wb.gpu_addr + (status_offset * 4);
197 status_ptr = (u64 *)&adev->wb.wb[status_offset];
198 *status_ptr = 0;
199
200 spin_lock_irqsave(&mes->ring_lock[0], flags);
201 r = amdgpu_ring_alloc(ring, (size + sizeof(mes_status_pkt)) / 4);
202 if (r)
203 goto error_unlock_free;
204
205 seq = ++ring->fence_drv.sync_seq;
206 r = amdgpu_fence_wait_polling(ring,
207 seq - ring->fence_drv.num_fences_mask,
208 timeout);
209 if (r < 1)
210 goto error_undo;
211
212 api_status = (struct MES_API_STATUS *)((char *)pkt + api_status_off);
213 api_status->api_completion_fence_addr = status_gpu_addr;
214 api_status->api_completion_fence_value = 1;
215
216 amdgpu_ring_write_multiple(ring, pkt, size / 4);
217
218 memset(&mes_status_pkt, 0, sizeof(mes_status_pkt));
219 mes_status_pkt.header.type = MES_API_TYPE_SCHEDULER;
220 mes_status_pkt.header.opcode = MES_SCH_API_QUERY_SCHEDULER_STATUS;
221 mes_status_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
222 mes_status_pkt.api_status.api_completion_fence_addr =
223 ring->fence_drv.gpu_addr;
224 mes_status_pkt.api_status.api_completion_fence_value = seq;
225
226 amdgpu_ring_write_multiple(ring, &mes_status_pkt,
227 sizeof(mes_status_pkt) / 4);
228
229 amdgpu_ring_commit(ring);
230 spin_unlock_irqrestore(&mes->ring_lock[0], flags);
231
232 op_str = mes_v11_0_get_op_string(x_pkt);
233 misc_op_str = mes_v11_0_get_misc_op_string(x_pkt);
234
235 if (misc_op_str)
236 dev_dbg(adev->dev, "MES msg=%s (%s) was emitted\n", op_str,
237 misc_op_str);
238 else if (op_str)
239 dev_dbg(adev->dev, "MES msg=%s was emitted\n", op_str);
240 else
241 dev_dbg(adev->dev, "MES msg=%d was emitted\n",
242 x_pkt->header.opcode);
243
244 r = amdgpu_fence_wait_polling(ring, seq, timeout);
245 if (r < 1 || !*status_ptr) {
246
247 if (misc_op_str)
248 dev_err(adev->dev, "MES failed to respond to msg=%s (%s)\n",
249 op_str, misc_op_str);
250 else if (op_str)
251 dev_err(adev->dev, "MES failed to respond to msg=%s\n",
252 op_str);
253 else
254 dev_err(adev->dev, "MES failed to respond to msg=%d\n",
255 x_pkt->header.opcode);
256
257 while (halt_if_hws_hang)
258 schedule();
259
260 r = -ETIMEDOUT;
261 goto error_wb_free;
262 }
263
264 amdgpu_device_wb_free(adev, status_offset);
265 return 0;
266
267 error_undo:
268 dev_err(adev->dev, "MES ring buffer is full.\n");
269 amdgpu_ring_undo(ring);
270
271 error_unlock_free:
272 spin_unlock_irqrestore(&mes->ring_lock[0], flags);
273
274 error_wb_free:
275 amdgpu_device_wb_free(adev, status_offset);
276 return r;
277 }
278
convert_to_mes_queue_type(int queue_type)279 static int convert_to_mes_queue_type(int queue_type)
280 {
281 if (queue_type == AMDGPU_RING_TYPE_GFX)
282 return MES_QUEUE_TYPE_GFX;
283 else if (queue_type == AMDGPU_RING_TYPE_COMPUTE)
284 return MES_QUEUE_TYPE_COMPUTE;
285 else if (queue_type == AMDGPU_RING_TYPE_SDMA)
286 return MES_QUEUE_TYPE_SDMA;
287 else
288 BUG();
289 return -1;
290 }
291
convert_to_mes_priority_level(int priority_level)292 static int convert_to_mes_priority_level(int priority_level)
293 {
294 switch (priority_level) {
295 case AMDGPU_MES_PRIORITY_LEVEL_LOW:
296 return AMD_PRIORITY_LEVEL_LOW;
297 case AMDGPU_MES_PRIORITY_LEVEL_NORMAL:
298 default:
299 return AMD_PRIORITY_LEVEL_NORMAL;
300 case AMDGPU_MES_PRIORITY_LEVEL_MEDIUM:
301 return AMD_PRIORITY_LEVEL_MEDIUM;
302 case AMDGPU_MES_PRIORITY_LEVEL_HIGH:
303 return AMD_PRIORITY_LEVEL_HIGH;
304 case AMDGPU_MES_PRIORITY_LEVEL_REALTIME:
305 return AMD_PRIORITY_LEVEL_REALTIME;
306 }
307 }
308
mes_v11_0_add_hw_queue(struct amdgpu_mes * mes,struct mes_add_queue_input * input)309 static int mes_v11_0_add_hw_queue(struct amdgpu_mes *mes,
310 struct mes_add_queue_input *input)
311 {
312 struct amdgpu_device *adev = mes->adev;
313 union MESAPI__ADD_QUEUE mes_add_queue_pkt;
314 struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
315 uint32_t vm_cntx_cntl = hub->vm_cntx_cntl;
316
317 memset(&mes_add_queue_pkt, 0, sizeof(mes_add_queue_pkt));
318
319 mes_add_queue_pkt.header.type = MES_API_TYPE_SCHEDULER;
320 mes_add_queue_pkt.header.opcode = MES_SCH_API_ADD_QUEUE;
321 mes_add_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
322
323 mes_add_queue_pkt.process_id = input->process_id;
324 mes_add_queue_pkt.page_table_base_addr = input->page_table_base_addr;
325 mes_add_queue_pkt.process_va_start = input->process_va_start;
326 mes_add_queue_pkt.process_va_end = input->process_va_end;
327 mes_add_queue_pkt.process_quantum = input->process_quantum;
328 mes_add_queue_pkt.process_context_addr = input->process_context_addr;
329 mes_add_queue_pkt.gang_quantum = input->gang_quantum;
330 mes_add_queue_pkt.gang_context_addr = input->gang_context_addr;
331 mes_add_queue_pkt.inprocess_gang_priority =
332 convert_to_mes_priority_level(input->inprocess_gang_priority);
333 mes_add_queue_pkt.gang_global_priority_level =
334 convert_to_mes_priority_level(input->gang_global_priority_level);
335 mes_add_queue_pkt.doorbell_offset = input->doorbell_offset;
336 mes_add_queue_pkt.mqd_addr = input->mqd_addr;
337
338 if (((adev->mes.sched_version & AMDGPU_MES_API_VERSION_MASK) >>
339 AMDGPU_MES_API_VERSION_SHIFT) >= 2)
340 mes_add_queue_pkt.wptr_addr = input->wptr_mc_addr;
341 else
342 mes_add_queue_pkt.wptr_addr = input->wptr_addr;
343
344 mes_add_queue_pkt.queue_type =
345 convert_to_mes_queue_type(input->queue_type);
346 mes_add_queue_pkt.paging = input->paging;
347 mes_add_queue_pkt.vm_context_cntl = vm_cntx_cntl;
348 mes_add_queue_pkt.gws_base = input->gws_base;
349 mes_add_queue_pkt.gws_size = input->gws_size;
350 mes_add_queue_pkt.trap_handler_addr = input->tba_addr;
351 mes_add_queue_pkt.tma_addr = input->tma_addr;
352 mes_add_queue_pkt.trap_en = input->trap_en;
353 mes_add_queue_pkt.skip_process_ctx_clear = input->skip_process_ctx_clear;
354 mes_add_queue_pkt.is_kfd_process = input->is_kfd_process;
355
356 /* For KFD, gds_size is re-used for queue size (needed in MES for AQL queues) */
357 mes_add_queue_pkt.is_aql_queue = input->is_aql_queue;
358 mes_add_queue_pkt.gds_size = input->queue_size;
359
360 mes_add_queue_pkt.exclusively_scheduled = input->exclusively_scheduled;
361
362 return mes_v11_0_submit_pkt_and_poll_completion(mes,
363 &mes_add_queue_pkt, sizeof(mes_add_queue_pkt),
364 offsetof(union MESAPI__ADD_QUEUE, api_status));
365 }
366
mes_v11_0_remove_hw_queue(struct amdgpu_mes * mes,struct mes_remove_queue_input * input)367 static int mes_v11_0_remove_hw_queue(struct amdgpu_mes *mes,
368 struct mes_remove_queue_input *input)
369 {
370 union MESAPI__REMOVE_QUEUE mes_remove_queue_pkt;
371
372 memset(&mes_remove_queue_pkt, 0, sizeof(mes_remove_queue_pkt));
373
374 mes_remove_queue_pkt.header.type = MES_API_TYPE_SCHEDULER;
375 mes_remove_queue_pkt.header.opcode = MES_SCH_API_REMOVE_QUEUE;
376 mes_remove_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
377
378 mes_remove_queue_pkt.doorbell_offset = input->doorbell_offset;
379 mes_remove_queue_pkt.gang_context_addr = input->gang_context_addr;
380
381 return mes_v11_0_submit_pkt_and_poll_completion(mes,
382 &mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt),
383 offsetof(union MESAPI__REMOVE_QUEUE, api_status));
384 }
385
mes_v11_0_reset_queue_mmio(struct amdgpu_mes * mes,uint32_t queue_type,uint32_t me_id,uint32_t pipe_id,uint32_t queue_id,uint32_t vmid)386 static int mes_v11_0_reset_queue_mmio(struct amdgpu_mes *mes, uint32_t queue_type,
387 uint32_t me_id, uint32_t pipe_id,
388 uint32_t queue_id, uint32_t vmid)
389 {
390 struct amdgpu_device *adev = mes->adev;
391 uint32_t value, reg;
392 int i, r = 0;
393
394 amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
395
396 if (queue_type == AMDGPU_RING_TYPE_GFX) {
397 dev_info(adev->dev, "reset gfx queue (%d:%d:%d: vmid:%d)\n",
398 me_id, pipe_id, queue_id, vmid);
399
400 mutex_lock(&adev->gfx.reset_sem_mutex);
401 gfx_v11_0_request_gfx_index_mutex(adev, true);
402 /* all se allow writes */
403 WREG32_SOC15(GC, 0, regGRBM_GFX_INDEX,
404 (uint32_t)(0x1 << GRBM_GFX_INDEX__SE_BROADCAST_WRITES__SHIFT));
405 value = REG_SET_FIELD(0, CP_VMID_RESET, RESET_REQUEST, 1 << vmid);
406 if (pipe_id == 0)
407 value = REG_SET_FIELD(value, CP_VMID_RESET, PIPE0_QUEUES, 1 << queue_id);
408 else
409 value = REG_SET_FIELD(value, CP_VMID_RESET, PIPE1_QUEUES, 1 << queue_id);
410 WREG32_SOC15(GC, 0, regCP_VMID_RESET, value);
411 gfx_v11_0_request_gfx_index_mutex(adev, false);
412 mutex_unlock(&adev->gfx.reset_sem_mutex);
413
414 mutex_lock(&adev->srbm_mutex);
415 soc21_grbm_select(adev, me_id, pipe_id, queue_id, 0);
416 /* wait till dequeue take effects */
417 for (i = 0; i < adev->usec_timeout; i++) {
418 if (!(RREG32_SOC15(GC, 0, regCP_GFX_HQD_ACTIVE) & 1))
419 break;
420 udelay(1);
421 }
422 if (i >= adev->usec_timeout) {
423 dev_err(adev->dev, "failed to wait on gfx hqd deactivate\n");
424 r = -ETIMEDOUT;
425 }
426
427 soc21_grbm_select(adev, 0, 0, 0, 0);
428 mutex_unlock(&adev->srbm_mutex);
429 } else if (queue_type == AMDGPU_RING_TYPE_COMPUTE) {
430 dev_info(adev->dev, "reset compute queue (%d:%d:%d)\n",
431 me_id, pipe_id, queue_id);
432 mutex_lock(&adev->srbm_mutex);
433 soc21_grbm_select(adev, me_id, pipe_id, queue_id, 0);
434 WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 0x2);
435 WREG32_SOC15(GC, 0, regSPI_COMPUTE_QUEUE_RESET, 0x1);
436
437 /* wait till dequeue take effects */
438 for (i = 0; i < adev->usec_timeout; i++) {
439 if (!(RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1))
440 break;
441 udelay(1);
442 }
443 if (i >= adev->usec_timeout) {
444 dev_err(adev->dev, "failed to wait on hqd deactivate\n");
445 r = -ETIMEDOUT;
446 }
447 soc21_grbm_select(adev, 0, 0, 0, 0);
448 mutex_unlock(&adev->srbm_mutex);
449 } else if (queue_type == AMDGPU_RING_TYPE_SDMA) {
450 dev_info(adev->dev, "reset sdma queue (%d:%d:%d)\n",
451 me_id, pipe_id, queue_id);
452 switch (me_id) {
453 case 1:
454 reg = SOC15_REG_OFFSET(GC, 0, regSDMA1_QUEUE_RESET_REQ);
455 break;
456 case 0:
457 default:
458 reg = SOC15_REG_OFFSET(GC, 0, regSDMA0_QUEUE_RESET_REQ);
459 break;
460 }
461
462 value = 1 << queue_id;
463 WREG32(reg, value);
464 /* wait for queue reset done */
465 for (i = 0; i < adev->usec_timeout; i++) {
466 if (!(RREG32(reg) & value))
467 break;
468 udelay(1);
469 }
470 if (i >= adev->usec_timeout) {
471 dev_err(adev->dev, "failed to wait on sdma queue reset done\n");
472 r = -ETIMEDOUT;
473 }
474 }
475
476 amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
477 return r;
478 }
479
mes_v11_0_map_legacy_queue(struct amdgpu_mes * mes,struct mes_map_legacy_queue_input * input)480 static int mes_v11_0_map_legacy_queue(struct amdgpu_mes *mes,
481 struct mes_map_legacy_queue_input *input)
482 {
483 union MESAPI__ADD_QUEUE mes_add_queue_pkt;
484
485 memset(&mes_add_queue_pkt, 0, sizeof(mes_add_queue_pkt));
486
487 mes_add_queue_pkt.header.type = MES_API_TYPE_SCHEDULER;
488 mes_add_queue_pkt.header.opcode = MES_SCH_API_ADD_QUEUE;
489 mes_add_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
490
491 mes_add_queue_pkt.pipe_id = input->pipe_id;
492 mes_add_queue_pkt.queue_id = input->queue_id;
493 mes_add_queue_pkt.doorbell_offset = input->doorbell_offset;
494 mes_add_queue_pkt.mqd_addr = input->mqd_addr;
495 mes_add_queue_pkt.wptr_addr = input->wptr_addr;
496 mes_add_queue_pkt.queue_type =
497 convert_to_mes_queue_type(input->queue_type);
498 mes_add_queue_pkt.map_legacy_kq = 1;
499
500 return mes_v11_0_submit_pkt_and_poll_completion(mes,
501 &mes_add_queue_pkt, sizeof(mes_add_queue_pkt),
502 offsetof(union MESAPI__ADD_QUEUE, api_status));
503 }
504
mes_v11_0_unmap_legacy_queue(struct amdgpu_mes * mes,struct mes_unmap_legacy_queue_input * input)505 static int mes_v11_0_unmap_legacy_queue(struct amdgpu_mes *mes,
506 struct mes_unmap_legacy_queue_input *input)
507 {
508 union MESAPI__REMOVE_QUEUE mes_remove_queue_pkt;
509
510 memset(&mes_remove_queue_pkt, 0, sizeof(mes_remove_queue_pkt));
511
512 mes_remove_queue_pkt.header.type = MES_API_TYPE_SCHEDULER;
513 mes_remove_queue_pkt.header.opcode = MES_SCH_API_REMOVE_QUEUE;
514 mes_remove_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
515
516 mes_remove_queue_pkt.doorbell_offset = input->doorbell_offset;
517 mes_remove_queue_pkt.gang_context_addr = 0;
518
519 mes_remove_queue_pkt.pipe_id = input->pipe_id;
520 mes_remove_queue_pkt.queue_id = input->queue_id;
521
522 if (input->action == PREEMPT_QUEUES_NO_UNMAP) {
523 mes_remove_queue_pkt.preempt_legacy_gfx_queue = 1;
524 mes_remove_queue_pkt.tf_addr = input->trail_fence_addr;
525 mes_remove_queue_pkt.tf_data =
526 lower_32_bits(input->trail_fence_data);
527 } else {
528 mes_remove_queue_pkt.unmap_legacy_queue = 1;
529 mes_remove_queue_pkt.queue_type =
530 convert_to_mes_queue_type(input->queue_type);
531 }
532
533 return mes_v11_0_submit_pkt_and_poll_completion(mes,
534 &mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt),
535 offsetof(union MESAPI__REMOVE_QUEUE, api_status));
536 }
537
mes_v11_0_suspend_gang(struct amdgpu_mes * mes,struct mes_suspend_gang_input * input)538 static int mes_v11_0_suspend_gang(struct amdgpu_mes *mes,
539 struct mes_suspend_gang_input *input)
540 {
541 union MESAPI__SUSPEND mes_suspend_gang_pkt;
542
543 memset(&mes_suspend_gang_pkt, 0, sizeof(mes_suspend_gang_pkt));
544
545 mes_suspend_gang_pkt.header.type = MES_API_TYPE_SCHEDULER;
546 mes_suspend_gang_pkt.header.opcode = MES_SCH_API_SUSPEND;
547 mes_suspend_gang_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
548
549 mes_suspend_gang_pkt.suspend_all_gangs = input->suspend_all_gangs;
550 mes_suspend_gang_pkt.gang_context_addr = input->gang_context_addr;
551 mes_suspend_gang_pkt.suspend_fence_addr = input->suspend_fence_addr;
552 mes_suspend_gang_pkt.suspend_fence_value = input->suspend_fence_value;
553
554 return mes_v11_0_submit_pkt_and_poll_completion(mes,
555 &mes_suspend_gang_pkt, sizeof(mes_suspend_gang_pkt),
556 offsetof(union MESAPI__SUSPEND, api_status));
557 }
558
mes_v11_0_resume_gang(struct amdgpu_mes * mes,struct mes_resume_gang_input * input)559 static int mes_v11_0_resume_gang(struct amdgpu_mes *mes,
560 struct mes_resume_gang_input *input)
561 {
562 union MESAPI__RESUME mes_resume_gang_pkt;
563
564 memset(&mes_resume_gang_pkt, 0, sizeof(mes_resume_gang_pkt));
565
566 mes_resume_gang_pkt.header.type = MES_API_TYPE_SCHEDULER;
567 mes_resume_gang_pkt.header.opcode = MES_SCH_API_RESUME;
568 mes_resume_gang_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
569
570 mes_resume_gang_pkt.resume_all_gangs = input->resume_all_gangs;
571 mes_resume_gang_pkt.gang_context_addr = input->gang_context_addr;
572
573 return mes_v11_0_submit_pkt_and_poll_completion(mes,
574 &mes_resume_gang_pkt, sizeof(mes_resume_gang_pkt),
575 offsetof(union MESAPI__RESUME, api_status));
576 }
577
mes_v11_0_query_sched_status(struct amdgpu_mes * mes)578 static int mes_v11_0_query_sched_status(struct amdgpu_mes *mes)
579 {
580 union MESAPI__QUERY_MES_STATUS mes_status_pkt;
581
582 memset(&mes_status_pkt, 0, sizeof(mes_status_pkt));
583
584 mes_status_pkt.header.type = MES_API_TYPE_SCHEDULER;
585 mes_status_pkt.header.opcode = MES_SCH_API_QUERY_SCHEDULER_STATUS;
586 mes_status_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
587
588 return mes_v11_0_submit_pkt_and_poll_completion(mes,
589 &mes_status_pkt, sizeof(mes_status_pkt),
590 offsetof(union MESAPI__QUERY_MES_STATUS, api_status));
591 }
592
mes_v11_0_misc_op(struct amdgpu_mes * mes,struct mes_misc_op_input * input)593 static int mes_v11_0_misc_op(struct amdgpu_mes *mes,
594 struct mes_misc_op_input *input)
595 {
596 union MESAPI__MISC misc_pkt;
597
598 memset(&misc_pkt, 0, sizeof(misc_pkt));
599
600 misc_pkt.header.type = MES_API_TYPE_SCHEDULER;
601 misc_pkt.header.opcode = MES_SCH_API_MISC;
602 misc_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
603
604 switch (input->op) {
605 case MES_MISC_OP_READ_REG:
606 misc_pkt.opcode = MESAPI_MISC__READ_REG;
607 misc_pkt.read_reg.reg_offset = input->read_reg.reg_offset;
608 misc_pkt.read_reg.buffer_addr = input->read_reg.buffer_addr;
609 break;
610 case MES_MISC_OP_WRITE_REG:
611 misc_pkt.opcode = MESAPI_MISC__WRITE_REG;
612 misc_pkt.write_reg.reg_offset = input->write_reg.reg_offset;
613 misc_pkt.write_reg.reg_value = input->write_reg.reg_value;
614 break;
615 case MES_MISC_OP_WRM_REG_WAIT:
616 misc_pkt.opcode = MESAPI_MISC__WAIT_REG_MEM;
617 misc_pkt.wait_reg_mem.op = WRM_OPERATION__WAIT_REG_MEM;
618 misc_pkt.wait_reg_mem.reference = input->wrm_reg.ref;
619 misc_pkt.wait_reg_mem.mask = input->wrm_reg.mask;
620 misc_pkt.wait_reg_mem.reg_offset1 = input->wrm_reg.reg0;
621 misc_pkt.wait_reg_mem.reg_offset2 = 0;
622 break;
623 case MES_MISC_OP_WRM_REG_WR_WAIT:
624 misc_pkt.opcode = MESAPI_MISC__WAIT_REG_MEM;
625 misc_pkt.wait_reg_mem.op = WRM_OPERATION__WR_WAIT_WR_REG;
626 misc_pkt.wait_reg_mem.reference = input->wrm_reg.ref;
627 misc_pkt.wait_reg_mem.mask = input->wrm_reg.mask;
628 misc_pkt.wait_reg_mem.reg_offset1 = input->wrm_reg.reg0;
629 misc_pkt.wait_reg_mem.reg_offset2 = input->wrm_reg.reg1;
630 break;
631 case MES_MISC_OP_SET_SHADER_DEBUGGER:
632 misc_pkt.opcode = MESAPI_MISC__SET_SHADER_DEBUGGER;
633 misc_pkt.set_shader_debugger.process_context_addr =
634 input->set_shader_debugger.process_context_addr;
635 misc_pkt.set_shader_debugger.flags.u32all =
636 input->set_shader_debugger.flags.u32all;
637 misc_pkt.set_shader_debugger.spi_gdbg_per_vmid_cntl =
638 input->set_shader_debugger.spi_gdbg_per_vmid_cntl;
639 memcpy(misc_pkt.set_shader_debugger.tcp_watch_cntl,
640 input->set_shader_debugger.tcp_watch_cntl,
641 sizeof(misc_pkt.set_shader_debugger.tcp_watch_cntl));
642 misc_pkt.set_shader_debugger.trap_en = input->set_shader_debugger.trap_en;
643 break;
644 case MES_MISC_OP_CHANGE_CONFIG:
645 if ((mes->adev->mes.sched_version & AMDGPU_MES_VERSION_MASK) < 0x63) {
646 dev_warn_once(mes->adev->dev,
647 "MES FW version must be larger than 0x63 to support limit single process feature.\n");
648 return 0;
649 }
650 misc_pkt.opcode = MESAPI_MISC__CHANGE_CONFIG;
651 misc_pkt.change_config.opcode =
652 MESAPI_MISC__CHANGE_CONFIG_OPTION_LIMIT_SINGLE_PROCESS;
653 misc_pkt.change_config.option.bits.limit_single_process =
654 input->change_config.option.limit_single_process;
655 break;
656
657 default:
658 DRM_ERROR("unsupported misc op (%d) \n", input->op);
659 return -EINVAL;
660 }
661
662 return mes_v11_0_submit_pkt_and_poll_completion(mes,
663 &misc_pkt, sizeof(misc_pkt),
664 offsetof(union MESAPI__MISC, api_status));
665 }
666
mes_v11_0_set_hw_resources(struct amdgpu_mes * mes)667 static int mes_v11_0_set_hw_resources(struct amdgpu_mes *mes)
668 {
669 int i;
670 struct amdgpu_device *adev = mes->adev;
671 union MESAPI_SET_HW_RESOURCES mes_set_hw_res_pkt;
672
673 memset(&mes_set_hw_res_pkt, 0, sizeof(mes_set_hw_res_pkt));
674
675 mes_set_hw_res_pkt.header.type = MES_API_TYPE_SCHEDULER;
676 mes_set_hw_res_pkt.header.opcode = MES_SCH_API_SET_HW_RSRC;
677 mes_set_hw_res_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
678
679 mes_set_hw_res_pkt.vmid_mask_mmhub = mes->vmid_mask_mmhub;
680 mes_set_hw_res_pkt.vmid_mask_gfxhub = mes->vmid_mask_gfxhub;
681 mes_set_hw_res_pkt.gds_size = adev->gds.gds_size;
682 mes_set_hw_res_pkt.paging_vmid = 0;
683 mes_set_hw_res_pkt.g_sch_ctx_gpu_mc_ptr = mes->sch_ctx_gpu_addr[0];
684 mes_set_hw_res_pkt.query_status_fence_gpu_mc_ptr =
685 mes->query_status_fence_gpu_addr[0];
686
687 for (i = 0; i < MAX_COMPUTE_PIPES; i++)
688 mes_set_hw_res_pkt.compute_hqd_mask[i] =
689 mes->compute_hqd_mask[i];
690
691 for (i = 0; i < MAX_GFX_PIPES; i++)
692 mes_set_hw_res_pkt.gfx_hqd_mask[i] =
693 mes->gfx_hqd_mask[i];
694
695 for (i = 0; i < MAX_SDMA_PIPES; i++)
696 mes_set_hw_res_pkt.sdma_hqd_mask[i] = mes->sdma_hqd_mask[i];
697
698 for (i = 0; i < AMD_PRIORITY_NUM_LEVELS; i++)
699 mes_set_hw_res_pkt.aggregated_doorbells[i] =
700 mes->aggregated_doorbells[i];
701
702 for (i = 0; i < 5; i++) {
703 mes_set_hw_res_pkt.gc_base[i] = adev->reg_offset[GC_HWIP][0][i];
704 mes_set_hw_res_pkt.mmhub_base[i] =
705 adev->reg_offset[MMHUB_HWIP][0][i];
706 mes_set_hw_res_pkt.osssys_base[i] =
707 adev->reg_offset[OSSSYS_HWIP][0][i];
708 }
709
710 mes_set_hw_res_pkt.disable_reset = 1;
711 mes_set_hw_res_pkt.disable_mes_log = 1;
712 mes_set_hw_res_pkt.use_different_vmid_compute = 1;
713 mes_set_hw_res_pkt.enable_reg_active_poll = 1;
714 mes_set_hw_res_pkt.enable_level_process_quantum_check = 1;
715 mes_set_hw_res_pkt.oversubscription_timer = 50;
716 if ((mes->adev->mes.sched_version & AMDGPU_MES_VERSION_MASK) >= 0x7f)
717 mes_set_hw_res_pkt.enable_lr_compute_wa = 1;
718 else
719 dev_info_once(mes->adev->dev,
720 "MES FW version must be >= 0x7f to enable LR compute workaround.\n");
721
722 if (amdgpu_mes_log_enable) {
723 mes_set_hw_res_pkt.enable_mes_event_int_logging = 1;
724 mes_set_hw_res_pkt.event_intr_history_gpu_mc_ptr =
725 mes->event_log_gpu_addr;
726 }
727
728 if (adev->enforce_isolation[0] == AMDGPU_ENFORCE_ISOLATION_ENABLE)
729 mes_set_hw_res_pkt.limit_single_process = 1;
730
731 return mes_v11_0_submit_pkt_and_poll_completion(mes,
732 &mes_set_hw_res_pkt, sizeof(mes_set_hw_res_pkt),
733 offsetof(union MESAPI_SET_HW_RESOURCES, api_status));
734 }
735
mes_v11_0_set_hw_resources_1(struct amdgpu_mes * mes)736 static int mes_v11_0_set_hw_resources_1(struct amdgpu_mes *mes)
737 {
738 union MESAPI_SET_HW_RESOURCES_1 mes_set_hw_res_pkt;
739 memset(&mes_set_hw_res_pkt, 0, sizeof(mes_set_hw_res_pkt));
740
741 mes_set_hw_res_pkt.header.type = MES_API_TYPE_SCHEDULER;
742 mes_set_hw_res_pkt.header.opcode = MES_SCH_API_SET_HW_RSRC_1;
743 mes_set_hw_res_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
744 mes_set_hw_res_pkt.enable_mes_info_ctx = 1;
745
746 mes_set_hw_res_pkt.cleaner_shader_fence_mc_addr = mes->resource_1_gpu_addr[0];
747 if (amdgpu_sriov_is_mes_info_enable(mes->adev)) {
748 mes_set_hw_res_pkt.mes_info_ctx_mc_addr =
749 mes->resource_1_gpu_addr[0] + AMDGPU_GPU_PAGE_SIZE;
750 mes_set_hw_res_pkt.mes_info_ctx_size = MES11_HW_RESOURCE_1_SIZE;
751 }
752
753 return mes_v11_0_submit_pkt_and_poll_completion(mes,
754 &mes_set_hw_res_pkt, sizeof(mes_set_hw_res_pkt),
755 offsetof(union MESAPI_SET_HW_RESOURCES_1, api_status));
756 }
757
mes_v11_0_reset_hw_queue(struct amdgpu_mes * mes,struct mes_reset_queue_input * input)758 static int mes_v11_0_reset_hw_queue(struct amdgpu_mes *mes,
759 struct mes_reset_queue_input *input)
760 {
761 union MESAPI__RESET mes_reset_queue_pkt;
762
763 if (input->use_mmio)
764 return mes_v11_0_reset_queue_mmio(mes, input->queue_type,
765 input->me_id, input->pipe_id,
766 input->queue_id, input->vmid);
767
768 memset(&mes_reset_queue_pkt, 0, sizeof(mes_reset_queue_pkt));
769
770 mes_reset_queue_pkt.header.type = MES_API_TYPE_SCHEDULER;
771 mes_reset_queue_pkt.header.opcode = MES_SCH_API_RESET;
772 mes_reset_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
773
774 mes_reset_queue_pkt.queue_type =
775 convert_to_mes_queue_type(input->queue_type);
776
777 if (input->legacy_gfx) {
778 mes_reset_queue_pkt.reset_legacy_gfx = 1;
779 mes_reset_queue_pkt.pipe_id_lp = input->pipe_id;
780 mes_reset_queue_pkt.queue_id_lp = input->queue_id;
781 mes_reset_queue_pkt.mqd_mc_addr_lp = input->mqd_addr;
782 mes_reset_queue_pkt.doorbell_offset_lp = input->doorbell_offset;
783 mes_reset_queue_pkt.wptr_addr_lp = input->wptr_addr;
784 mes_reset_queue_pkt.vmid_id_lp = input->vmid;
785 } else {
786 mes_reset_queue_pkt.reset_queue_only = 1;
787 mes_reset_queue_pkt.doorbell_offset = input->doorbell_offset;
788 }
789
790 return mes_v11_0_submit_pkt_and_poll_completion(mes,
791 &mes_reset_queue_pkt, sizeof(mes_reset_queue_pkt),
792 offsetof(union MESAPI__RESET, api_status));
793 }
794
mes_v11_0_detect_and_reset_hung_queues(struct amdgpu_mes * mes,struct mes_detect_and_reset_queue_input * input)795 static int mes_v11_0_detect_and_reset_hung_queues(struct amdgpu_mes *mes,
796 struct mes_detect_and_reset_queue_input *input)
797 {
798 union MESAPI__RESET mes_reset_queue_pkt;
799
800 memset(&mes_reset_queue_pkt, 0, sizeof(mes_reset_queue_pkt));
801
802 mes_reset_queue_pkt.header.type = MES_API_TYPE_SCHEDULER;
803 mes_reset_queue_pkt.header.opcode = MES_SCH_API_RESET;
804 mes_reset_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
805
806 mes_reset_queue_pkt.queue_type =
807 convert_to_mes_queue_type(input->queue_type);
808 mes_reset_queue_pkt.doorbell_offset_addr =
809 mes->hung_queue_db_array_gpu_addr;
810
811 if (input->detect_only)
812 mes_reset_queue_pkt.hang_detect_only = 1;
813 else
814 mes_reset_queue_pkt.hang_detect_then_reset = 1;
815
816 return mes_v11_0_submit_pkt_and_poll_completion(mes,
817 &mes_reset_queue_pkt, sizeof(mes_reset_queue_pkt),
818 offsetof(union MESAPI__RESET, api_status));
819 }
820
821 static const struct amdgpu_mes_funcs mes_v11_0_funcs = {
822 .add_hw_queue = mes_v11_0_add_hw_queue,
823 .remove_hw_queue = mes_v11_0_remove_hw_queue,
824 .map_legacy_queue = mes_v11_0_map_legacy_queue,
825 .unmap_legacy_queue = mes_v11_0_unmap_legacy_queue,
826 .suspend_gang = mes_v11_0_suspend_gang,
827 .resume_gang = mes_v11_0_resume_gang,
828 .misc_op = mes_v11_0_misc_op,
829 .reset_hw_queue = mes_v11_0_reset_hw_queue,
830 .detect_and_reset_hung_queues = mes_v11_0_detect_and_reset_hung_queues,
831 };
832
mes_v11_0_allocate_ucode_buffer(struct amdgpu_device * adev,enum amdgpu_mes_pipe pipe)833 static int mes_v11_0_allocate_ucode_buffer(struct amdgpu_device *adev,
834 enum amdgpu_mes_pipe pipe)
835 {
836 int r;
837 const struct mes_firmware_header_v1_0 *mes_hdr;
838 const __le32 *fw_data;
839 unsigned fw_size;
840
841 mes_hdr = (const struct mes_firmware_header_v1_0 *)
842 adev->mes.fw[pipe]->data;
843
844 fw_data = (const __le32 *)(adev->mes.fw[pipe]->data +
845 le32_to_cpu(mes_hdr->mes_ucode_offset_bytes));
846 fw_size = le32_to_cpu(mes_hdr->mes_ucode_size_bytes);
847
848 r = amdgpu_bo_create_reserved(adev, fw_size,
849 PAGE_SIZE,
850 AMDGPU_GEM_DOMAIN_VRAM |
851 AMDGPU_GEM_DOMAIN_GTT,
852 &adev->mes.ucode_fw_obj[pipe],
853 &adev->mes.ucode_fw_gpu_addr[pipe],
854 (void **)&adev->mes.ucode_fw_ptr[pipe]);
855 if (r) {
856 dev_err(adev->dev, "(%d) failed to create mes fw bo\n", r);
857 return r;
858 }
859
860 memcpy(adev->mes.ucode_fw_ptr[pipe], fw_data, fw_size);
861
862 amdgpu_bo_kunmap(adev->mes.ucode_fw_obj[pipe]);
863 amdgpu_bo_unreserve(adev->mes.ucode_fw_obj[pipe]);
864
865 return 0;
866 }
867
mes_v11_0_allocate_ucode_data_buffer(struct amdgpu_device * adev,enum amdgpu_mes_pipe pipe)868 static int mes_v11_0_allocate_ucode_data_buffer(struct amdgpu_device *adev,
869 enum amdgpu_mes_pipe pipe)
870 {
871 int r;
872 const struct mes_firmware_header_v1_0 *mes_hdr;
873 const __le32 *fw_data;
874 unsigned fw_size;
875
876 mes_hdr = (const struct mes_firmware_header_v1_0 *)
877 adev->mes.fw[pipe]->data;
878
879 fw_data = (const __le32 *)(adev->mes.fw[pipe]->data +
880 le32_to_cpu(mes_hdr->mes_ucode_data_offset_bytes));
881 fw_size = le32_to_cpu(mes_hdr->mes_ucode_data_size_bytes);
882
883 if (fw_size > GFX_MES_DRAM_SIZE) {
884 dev_err(adev->dev, "PIPE%d ucode data fw size (%d) is greater than dram size (%d)\n",
885 pipe, fw_size, GFX_MES_DRAM_SIZE);
886 return -EINVAL;
887 }
888
889 r = amdgpu_bo_create_reserved(adev, GFX_MES_DRAM_SIZE,
890 64 * 1024,
891 AMDGPU_GEM_DOMAIN_VRAM |
892 AMDGPU_GEM_DOMAIN_GTT,
893 &adev->mes.data_fw_obj[pipe],
894 &adev->mes.data_fw_gpu_addr[pipe],
895 (void **)&adev->mes.data_fw_ptr[pipe]);
896 if (r) {
897 dev_err(adev->dev, "(%d) failed to create mes data fw bo\n", r);
898 return r;
899 }
900
901 memcpy(adev->mes.data_fw_ptr[pipe], fw_data, fw_size);
902
903 amdgpu_bo_kunmap(adev->mes.data_fw_obj[pipe]);
904 amdgpu_bo_unreserve(adev->mes.data_fw_obj[pipe]);
905
906 return 0;
907 }
908
mes_v11_0_free_ucode_buffers(struct amdgpu_device * adev,enum amdgpu_mes_pipe pipe)909 static void mes_v11_0_free_ucode_buffers(struct amdgpu_device *adev,
910 enum amdgpu_mes_pipe pipe)
911 {
912 amdgpu_bo_free_kernel(&adev->mes.data_fw_obj[pipe],
913 &adev->mes.data_fw_gpu_addr[pipe],
914 (void **)&adev->mes.data_fw_ptr[pipe]);
915
916 amdgpu_bo_free_kernel(&adev->mes.ucode_fw_obj[pipe],
917 &adev->mes.ucode_fw_gpu_addr[pipe],
918 (void **)&adev->mes.ucode_fw_ptr[pipe]);
919 }
920
mes_v11_0_get_fw_version(struct amdgpu_device * adev)921 static void mes_v11_0_get_fw_version(struct amdgpu_device *adev)
922 {
923 int pipe;
924
925 /* return early if we have already fetched these */
926 if (adev->mes.sched_version && adev->mes.kiq_version)
927 return;
928
929 /* get MES scheduler/KIQ versions */
930 mutex_lock(&adev->srbm_mutex);
931
932 for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) {
933 soc21_grbm_select(adev, 3, pipe, 0, 0);
934
935 if (pipe == AMDGPU_MES_SCHED_PIPE)
936 adev->mes.sched_version =
937 RREG32_SOC15(GC, 0, regCP_MES_GP3_LO);
938 else if (pipe == AMDGPU_MES_KIQ_PIPE && adev->enable_mes_kiq)
939 adev->mes.kiq_version =
940 RREG32_SOC15(GC, 0, regCP_MES_GP3_LO);
941 }
942
943 soc21_grbm_select(adev, 0, 0, 0, 0);
944 mutex_unlock(&adev->srbm_mutex);
945 }
946
mes_v11_0_enable(struct amdgpu_device * adev,bool enable)947 static void mes_v11_0_enable(struct amdgpu_device *adev, bool enable)
948 {
949 uint64_t ucode_addr;
950 uint32_t pipe, data = 0;
951
952 if (enable) {
953 if (amdgpu_mes_log_enable) {
954 WREG32_SOC15(GC, 0, regCP_MES_MSCRATCH_LO,
955 lower_32_bits(adev->mes.event_log_gpu_addr + AMDGPU_MES_LOG_BUFFER_SIZE));
956 WREG32_SOC15(GC, 0, regCP_MES_MSCRATCH_HI,
957 upper_32_bits(adev->mes.event_log_gpu_addr + AMDGPU_MES_LOG_BUFFER_SIZE));
958 dev_info(adev->dev, "Setup CP MES MSCRATCH address : 0x%x. 0x%x\n",
959 RREG32_SOC15(GC, 0, regCP_MES_MSCRATCH_HI),
960 RREG32_SOC15(GC, 0, regCP_MES_MSCRATCH_LO));
961 }
962
963 data = RREG32_SOC15(GC, 0, regCP_MES_CNTL);
964 data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_RESET, 1);
965 data = REG_SET_FIELD(data, CP_MES_CNTL,
966 MES_PIPE1_RESET, adev->enable_mes_kiq ? 1 : 0);
967 WREG32_SOC15(GC, 0, regCP_MES_CNTL, data);
968
969 mutex_lock(&adev->srbm_mutex);
970 for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) {
971 if (!adev->enable_mes_kiq &&
972 pipe == AMDGPU_MES_KIQ_PIPE)
973 continue;
974
975 soc21_grbm_select(adev, 3, pipe, 0, 0);
976
977 ucode_addr = adev->mes.uc_start_addr[pipe] >> 2;
978 WREG32_SOC15(GC, 0, regCP_MES_PRGRM_CNTR_START,
979 lower_32_bits(ucode_addr));
980 WREG32_SOC15(GC, 0, regCP_MES_PRGRM_CNTR_START_HI,
981 upper_32_bits(ucode_addr));
982 }
983 soc21_grbm_select(adev, 0, 0, 0, 0);
984 mutex_unlock(&adev->srbm_mutex);
985
986 /* unhalt MES and activate pipe0 */
987 data = REG_SET_FIELD(0, CP_MES_CNTL, MES_PIPE0_ACTIVE, 1);
988 data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_ACTIVE,
989 adev->enable_mes_kiq ? 1 : 0);
990 WREG32_SOC15(GC, 0, regCP_MES_CNTL, data);
991
992 if (amdgpu_emu_mode)
993 msleep(100);
994 else
995 udelay(500);
996 } else {
997 data = RREG32_SOC15(GC, 0, regCP_MES_CNTL);
998 data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_ACTIVE, 0);
999 data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_ACTIVE, 0);
1000 data = REG_SET_FIELD(data, CP_MES_CNTL,
1001 MES_INVALIDATE_ICACHE, 1);
1002 data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_RESET, 1);
1003 data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_RESET,
1004 adev->enable_mes_kiq ? 1 : 0);
1005 data = REG_SET_FIELD(data, CP_MES_CNTL, MES_HALT, 1);
1006 WREG32_SOC15(GC, 0, regCP_MES_CNTL, data);
1007 }
1008 }
1009
1010 /* This function is for backdoor MES firmware */
mes_v11_0_load_microcode(struct amdgpu_device * adev,enum amdgpu_mes_pipe pipe,bool prime_icache)1011 static int mes_v11_0_load_microcode(struct amdgpu_device *adev,
1012 enum amdgpu_mes_pipe pipe, bool prime_icache)
1013 {
1014 int r;
1015 uint32_t data;
1016 uint64_t ucode_addr;
1017
1018 mes_v11_0_enable(adev, false);
1019
1020 if (!adev->mes.fw[pipe])
1021 return -EINVAL;
1022
1023 r = mes_v11_0_allocate_ucode_buffer(adev, pipe);
1024 if (r)
1025 return r;
1026
1027 r = mes_v11_0_allocate_ucode_data_buffer(adev, pipe);
1028 if (r) {
1029 mes_v11_0_free_ucode_buffers(adev, pipe);
1030 return r;
1031 }
1032
1033 mutex_lock(&adev->srbm_mutex);
1034 /* me=3, pipe=0, queue=0 */
1035 soc21_grbm_select(adev, 3, pipe, 0, 0);
1036
1037 WREG32_SOC15(GC, 0, regCP_MES_IC_BASE_CNTL, 0);
1038
1039 /* set ucode start address */
1040 ucode_addr = adev->mes.uc_start_addr[pipe] >> 2;
1041 WREG32_SOC15(GC, 0, regCP_MES_PRGRM_CNTR_START,
1042 lower_32_bits(ucode_addr));
1043 WREG32_SOC15(GC, 0, regCP_MES_PRGRM_CNTR_START_HI,
1044 upper_32_bits(ucode_addr));
1045
1046 /* set ucode fimrware address */
1047 WREG32_SOC15(GC, 0, regCP_MES_IC_BASE_LO,
1048 lower_32_bits(adev->mes.ucode_fw_gpu_addr[pipe]));
1049 WREG32_SOC15(GC, 0, regCP_MES_IC_BASE_HI,
1050 upper_32_bits(adev->mes.ucode_fw_gpu_addr[pipe]));
1051
1052 /* set ucode instruction cache boundary to 2M-1 */
1053 WREG32_SOC15(GC, 0, regCP_MES_MIBOUND_LO, 0x1FFFFF);
1054
1055 /* set ucode data firmware address */
1056 WREG32_SOC15(GC, 0, regCP_MES_MDBASE_LO,
1057 lower_32_bits(adev->mes.data_fw_gpu_addr[pipe]));
1058 WREG32_SOC15(GC, 0, regCP_MES_MDBASE_HI,
1059 upper_32_bits(adev->mes.data_fw_gpu_addr[pipe]));
1060
1061 /* Set 0x7FFFF (512K-1) to CP_MES_MDBOUND_LO */
1062 WREG32_SOC15(GC, 0, regCP_MES_MDBOUND_LO, 0x7FFFF);
1063
1064 if (prime_icache) {
1065 /* invalidate ICACHE */
1066 data = RREG32_SOC15(GC, 0, regCP_MES_IC_OP_CNTL);
1067 data = REG_SET_FIELD(data, CP_MES_IC_OP_CNTL, PRIME_ICACHE, 0);
1068 data = REG_SET_FIELD(data, CP_MES_IC_OP_CNTL, INVALIDATE_CACHE, 1);
1069 WREG32_SOC15(GC, 0, regCP_MES_IC_OP_CNTL, data);
1070
1071 /* prime the ICACHE. */
1072 data = RREG32_SOC15(GC, 0, regCP_MES_IC_OP_CNTL);
1073 data = REG_SET_FIELD(data, CP_MES_IC_OP_CNTL, PRIME_ICACHE, 1);
1074 WREG32_SOC15(GC, 0, regCP_MES_IC_OP_CNTL, data);
1075 }
1076
1077 soc21_grbm_select(adev, 0, 0, 0, 0);
1078 mutex_unlock(&adev->srbm_mutex);
1079
1080 return 0;
1081 }
1082
mes_v11_0_allocate_eop_buf(struct amdgpu_device * adev,enum amdgpu_mes_pipe pipe)1083 static int mes_v11_0_allocate_eop_buf(struct amdgpu_device *adev,
1084 enum amdgpu_mes_pipe pipe)
1085 {
1086 int r;
1087 u32 *eop;
1088
1089 r = amdgpu_bo_create_reserved(adev, MES_EOP_SIZE, PAGE_SIZE,
1090 AMDGPU_GEM_DOMAIN_GTT,
1091 &adev->mes.eop_gpu_obj[pipe],
1092 &adev->mes.eop_gpu_addr[pipe],
1093 (void **)&eop);
1094 if (r) {
1095 dev_warn(adev->dev, "(%d) create EOP bo failed\n", r);
1096 return r;
1097 }
1098
1099 memset(eop, 0,
1100 adev->mes.eop_gpu_obj[pipe]->tbo.base.size);
1101
1102 amdgpu_bo_kunmap(adev->mes.eop_gpu_obj[pipe]);
1103 amdgpu_bo_unreserve(adev->mes.eop_gpu_obj[pipe]);
1104
1105 return 0;
1106 }
1107
mes_v11_0_mqd_init(struct amdgpu_ring * ring)1108 static int mes_v11_0_mqd_init(struct amdgpu_ring *ring)
1109 {
1110 struct v11_compute_mqd *mqd = ring->mqd_ptr;
1111 uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
1112 uint32_t tmp;
1113
1114 memset(mqd, 0, sizeof(*mqd));
1115
1116 mqd->header = 0xC0310800;
1117 mqd->compute_pipelinestat_enable = 0x00000001;
1118 mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
1119 mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
1120 mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
1121 mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
1122 mqd->compute_misc_reserved = 0x00000007;
1123
1124 eop_base_addr = ring->eop_gpu_addr >> 8;
1125
1126 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
1127 tmp = regCP_HQD_EOP_CONTROL_DEFAULT;
1128 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
1129 (order_base_2(MES_EOP_SIZE / 4) - 1));
1130
1131 mqd->cp_hqd_eop_base_addr_lo = lower_32_bits(eop_base_addr);
1132 mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
1133 mqd->cp_hqd_eop_control = tmp;
1134
1135 /* disable the queue if it's active */
1136 ring->wptr = 0;
1137 mqd->cp_hqd_pq_rptr = 0;
1138 mqd->cp_hqd_pq_wptr_lo = 0;
1139 mqd->cp_hqd_pq_wptr_hi = 0;
1140
1141 /* set the pointer to the MQD */
1142 mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
1143 mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
1144
1145 /* set MQD vmid to 0 */
1146 tmp = regCP_MQD_CONTROL_DEFAULT;
1147 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
1148 mqd->cp_mqd_control = tmp;
1149
1150 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
1151 hqd_gpu_addr = ring->gpu_addr >> 8;
1152 mqd->cp_hqd_pq_base_lo = lower_32_bits(hqd_gpu_addr);
1153 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
1154
1155 /* set the wb address whether it's enabled or not */
1156 wb_gpu_addr = ring->rptr_gpu_addr;
1157 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
1158 mqd->cp_hqd_pq_rptr_report_addr_hi =
1159 upper_32_bits(wb_gpu_addr) & 0xffff;
1160
1161 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
1162 wb_gpu_addr = ring->wptr_gpu_addr;
1163 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffff8;
1164 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
1165
1166 /* set up the HQD, this is similar to CP_RB0_CNTL */
1167 tmp = regCP_HQD_PQ_CONTROL_DEFAULT;
1168 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
1169 (order_base_2(ring->ring_size / 4) - 1));
1170 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
1171 ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
1172 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 1);
1173 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, 0);
1174 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
1175 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
1176 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, NO_UPDATE_RPTR, 1);
1177 mqd->cp_hqd_pq_control = tmp;
1178
1179 /* enable doorbell */
1180 tmp = 0;
1181 if (ring->use_doorbell) {
1182 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
1183 DOORBELL_OFFSET, ring->doorbell_index);
1184 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
1185 DOORBELL_EN, 1);
1186 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
1187 DOORBELL_SOURCE, 0);
1188 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
1189 DOORBELL_HIT, 0);
1190 } else
1191 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
1192 DOORBELL_EN, 0);
1193 mqd->cp_hqd_pq_doorbell_control = tmp;
1194
1195 mqd->cp_hqd_vmid = 0;
1196 /* activate the queue */
1197 mqd->cp_hqd_active = 1;
1198
1199 tmp = regCP_HQD_PERSISTENT_STATE_DEFAULT;
1200 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE,
1201 PRELOAD_SIZE, 0x55);
1202 mqd->cp_hqd_persistent_state = tmp;
1203
1204 mqd->cp_hqd_ib_control = regCP_HQD_IB_CONTROL_DEFAULT;
1205 mqd->cp_hqd_iq_timer = regCP_HQD_IQ_TIMER_DEFAULT;
1206 mqd->cp_hqd_quantum = regCP_HQD_QUANTUM_DEFAULT;
1207
1208 amdgpu_device_flush_hdp(ring->adev, NULL);
1209 return 0;
1210 }
1211
mes_v11_0_queue_init_register(struct amdgpu_ring * ring)1212 static void mes_v11_0_queue_init_register(struct amdgpu_ring *ring)
1213 {
1214 struct v11_compute_mqd *mqd = ring->mqd_ptr;
1215 struct amdgpu_device *adev = ring->adev;
1216 uint32_t data = 0;
1217
1218 mutex_lock(&adev->srbm_mutex);
1219 soc21_grbm_select(adev, 3, ring->pipe, 0, 0);
1220
1221 /* set CP_HQD_VMID.VMID = 0. */
1222 data = RREG32_SOC15(GC, 0, regCP_HQD_VMID);
1223 data = REG_SET_FIELD(data, CP_HQD_VMID, VMID, 0);
1224 WREG32_SOC15(GC, 0, regCP_HQD_VMID, data);
1225
1226 /* set CP_HQD_PQ_DOORBELL_CONTROL.DOORBELL_EN=0 */
1227 data = RREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL);
1228 data = REG_SET_FIELD(data, CP_HQD_PQ_DOORBELL_CONTROL,
1229 DOORBELL_EN, 0);
1230 WREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL, data);
1231
1232 /* set CP_MQD_BASE_ADDR/HI with the MQD base address */
1233 WREG32_SOC15(GC, 0, regCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo);
1234 WREG32_SOC15(GC, 0, regCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
1235
1236 /* set CP_MQD_CONTROL.VMID=0 */
1237 data = RREG32_SOC15(GC, 0, regCP_MQD_CONTROL);
1238 data = REG_SET_FIELD(data, CP_MQD_CONTROL, VMID, 0);
1239 WREG32_SOC15(GC, 0, regCP_MQD_CONTROL, 0);
1240
1241 /* set CP_HQD_PQ_BASE/HI with the ring buffer base address */
1242 WREG32_SOC15(GC, 0, regCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo);
1243 WREG32_SOC15(GC, 0, regCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi);
1244
1245 /* set CP_HQD_PQ_RPTR_REPORT_ADDR/HI */
1246 WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR_REPORT_ADDR,
1247 mqd->cp_hqd_pq_rptr_report_addr_lo);
1248 WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
1249 mqd->cp_hqd_pq_rptr_report_addr_hi);
1250
1251 /* set CP_HQD_PQ_CONTROL */
1252 WREG32_SOC15(GC, 0, regCP_HQD_PQ_CONTROL, mqd->cp_hqd_pq_control);
1253
1254 /* set CP_HQD_PQ_WPTR_POLL_ADDR/HI */
1255 WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR,
1256 mqd->cp_hqd_pq_wptr_poll_addr_lo);
1257 WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR_HI,
1258 mqd->cp_hqd_pq_wptr_poll_addr_hi);
1259
1260 /* set CP_HQD_PQ_DOORBELL_CONTROL */
1261 WREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL,
1262 mqd->cp_hqd_pq_doorbell_control);
1263
1264 /* set CP_HQD_PERSISTENT_STATE.PRELOAD_SIZE=0x53 */
1265 WREG32_SOC15(GC, 0, regCP_HQD_PERSISTENT_STATE, mqd->cp_hqd_persistent_state);
1266
1267 /* set CP_HQD_ACTIVE.ACTIVE=1 */
1268 WREG32_SOC15(GC, 0, regCP_HQD_ACTIVE, mqd->cp_hqd_active);
1269
1270 soc21_grbm_select(adev, 0, 0, 0, 0);
1271 mutex_unlock(&adev->srbm_mutex);
1272 }
1273
mes_v11_0_kiq_enable_queue(struct amdgpu_device * adev)1274 static int mes_v11_0_kiq_enable_queue(struct amdgpu_device *adev)
1275 {
1276 struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
1277 struct amdgpu_ring *kiq_ring = &adev->gfx.kiq[0].ring;
1278 int r;
1279
1280 if (!kiq->pmf || !kiq->pmf->kiq_map_queues)
1281 return -EINVAL;
1282
1283 r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size);
1284 if (r) {
1285 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
1286 return r;
1287 }
1288
1289 kiq->pmf->kiq_map_queues(kiq_ring, &adev->mes.ring[0]);
1290
1291 return amdgpu_ring_test_helper(kiq_ring);
1292 }
1293
mes_v11_0_queue_init(struct amdgpu_device * adev,enum amdgpu_mes_pipe pipe)1294 static int mes_v11_0_queue_init(struct amdgpu_device *adev,
1295 enum amdgpu_mes_pipe pipe)
1296 {
1297 struct amdgpu_ring *ring;
1298 int r;
1299
1300 if (pipe == AMDGPU_MES_KIQ_PIPE)
1301 ring = &adev->gfx.kiq[0].ring;
1302 else if (pipe == AMDGPU_MES_SCHED_PIPE)
1303 ring = &adev->mes.ring[0];
1304 else
1305 BUG();
1306
1307 if ((pipe == AMDGPU_MES_SCHED_PIPE) &&
1308 (amdgpu_in_reset(adev) || adev->in_suspend)) {
1309 *(ring->wptr_cpu_addr) = 0;
1310 *(ring->rptr_cpu_addr) = 0;
1311 amdgpu_ring_clear_ring(ring);
1312 }
1313
1314 r = mes_v11_0_mqd_init(ring);
1315 if (r)
1316 return r;
1317
1318 if (pipe == AMDGPU_MES_SCHED_PIPE) {
1319 r = mes_v11_0_kiq_enable_queue(adev);
1320 if (r)
1321 return r;
1322 } else {
1323 mes_v11_0_queue_init_register(ring);
1324 }
1325
1326 return 0;
1327 }
1328
mes_v11_0_ring_init(struct amdgpu_device * adev)1329 static int mes_v11_0_ring_init(struct amdgpu_device *adev)
1330 {
1331 struct amdgpu_ring *ring;
1332
1333 ring = &adev->mes.ring[0];
1334
1335 ring->funcs = &mes_v11_0_ring_funcs;
1336
1337 ring->me = 3;
1338 ring->pipe = 0;
1339 ring->queue = 0;
1340
1341 ring->ring_obj = NULL;
1342 ring->use_doorbell = true;
1343 ring->doorbell_index = adev->doorbell_index.mes_ring0 << 1;
1344 ring->eop_gpu_addr = adev->mes.eop_gpu_addr[AMDGPU_MES_SCHED_PIPE];
1345 ring->no_scheduler = true;
1346 sprintf(ring->name, "mes_%d.%d.%d", ring->me, ring->pipe, ring->queue);
1347
1348 return amdgpu_ring_init(adev, ring, 1024, NULL, 0,
1349 AMDGPU_RING_PRIO_DEFAULT, NULL);
1350 }
1351
mes_v11_0_kiq_ring_init(struct amdgpu_device * adev)1352 static int mes_v11_0_kiq_ring_init(struct amdgpu_device *adev)
1353 {
1354 struct amdgpu_ring *ring;
1355
1356 spin_lock_init(&adev->gfx.kiq[0].ring_lock);
1357
1358 ring = &adev->gfx.kiq[0].ring;
1359
1360 ring->me = 3;
1361 ring->pipe = 1;
1362 ring->queue = 0;
1363
1364 ring->adev = NULL;
1365 ring->ring_obj = NULL;
1366 ring->use_doorbell = true;
1367 ring->doorbell_index = adev->doorbell_index.mes_ring1 << 1;
1368 ring->eop_gpu_addr = adev->mes.eop_gpu_addr[AMDGPU_MES_KIQ_PIPE];
1369 ring->no_scheduler = true;
1370 sprintf(ring->name, "mes_kiq_%d.%d.%d",
1371 ring->me, ring->pipe, ring->queue);
1372
1373 return amdgpu_ring_init(adev, ring, 1024, NULL, 0,
1374 AMDGPU_RING_PRIO_DEFAULT, NULL);
1375 }
1376
mes_v11_0_mqd_sw_init(struct amdgpu_device * adev,enum amdgpu_mes_pipe pipe)1377 static int mes_v11_0_mqd_sw_init(struct amdgpu_device *adev,
1378 enum amdgpu_mes_pipe pipe)
1379 {
1380 int r, mqd_size = sizeof(struct v11_compute_mqd);
1381 struct amdgpu_ring *ring;
1382
1383 if (pipe == AMDGPU_MES_KIQ_PIPE)
1384 ring = &adev->gfx.kiq[0].ring;
1385 else if (pipe == AMDGPU_MES_SCHED_PIPE)
1386 ring = &adev->mes.ring[0];
1387 else
1388 BUG();
1389
1390 if (ring->mqd_obj)
1391 return 0;
1392
1393 r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE,
1394 AMDGPU_GEM_DOMAIN_VRAM |
1395 AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj,
1396 &ring->mqd_gpu_addr, &ring->mqd_ptr);
1397 if (r) {
1398 dev_warn(adev->dev, "failed to create ring mqd bo (%d)", r);
1399 return r;
1400 }
1401
1402 memset(ring->mqd_ptr, 0, mqd_size);
1403
1404 /* prepare MQD backup */
1405 adev->mes.mqd_backup[pipe] = kmalloc(mqd_size, GFP_KERNEL);
1406 if (!adev->mes.mqd_backup[pipe]) {
1407 dev_warn(adev->dev,
1408 "no memory to create MQD backup for ring %s\n",
1409 ring->name);
1410 return -ENOMEM;
1411 }
1412
1413 return 0;
1414 }
1415
mes_v11_0_sw_init(struct amdgpu_ip_block * ip_block)1416 static int mes_v11_0_sw_init(struct amdgpu_ip_block *ip_block)
1417 {
1418 struct amdgpu_device *adev = ip_block->adev;
1419 int pipe, r, bo_size;
1420
1421 adev->mes.funcs = &mes_v11_0_funcs;
1422 adev->mes.kiq_hw_init = &mes_v11_0_kiq_hw_init;
1423 adev->mes.kiq_hw_fini = &mes_v11_0_kiq_hw_fini;
1424
1425 adev->mes.event_log_size = AMDGPU_MES_LOG_BUFFER_SIZE + AMDGPU_MES_MSCRATCH_SIZE;
1426
1427 r = amdgpu_mes_init(adev);
1428 if (r)
1429 return r;
1430
1431 for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) {
1432 if (!adev->enable_mes_kiq && pipe == AMDGPU_MES_KIQ_PIPE)
1433 continue;
1434
1435 r = mes_v11_0_allocate_eop_buf(adev, pipe);
1436 if (r)
1437 return r;
1438
1439 r = mes_v11_0_mqd_sw_init(adev, pipe);
1440 if (r)
1441 return r;
1442 }
1443
1444 if (adev->enable_mes_kiq) {
1445 r = mes_v11_0_kiq_ring_init(adev);
1446 if (r)
1447 return r;
1448 }
1449
1450 r = mes_v11_0_ring_init(adev);
1451 if (r)
1452 return r;
1453
1454 bo_size = AMDGPU_GPU_PAGE_SIZE;
1455 if (amdgpu_sriov_is_mes_info_enable(adev))
1456 bo_size += MES11_HW_RESOURCE_1_SIZE;
1457
1458 /* Only needed for AMDGPU_MES_SCHED_PIPE on MES 11*/
1459 r = amdgpu_bo_create_kernel(adev,
1460 bo_size,
1461 PAGE_SIZE,
1462 AMDGPU_GEM_DOMAIN_VRAM,
1463 &adev->mes.resource_1[0],
1464 &adev->mes.resource_1_gpu_addr[0],
1465 &adev->mes.resource_1_addr[0]);
1466 if (r) {
1467 dev_err(adev->dev, "(%d) failed to create mes resource_1 bo\n", r);
1468 return r;
1469 }
1470
1471 return 0;
1472 }
1473
mes_v11_0_sw_fini(struct amdgpu_ip_block * ip_block)1474 static int mes_v11_0_sw_fini(struct amdgpu_ip_block *ip_block)
1475 {
1476 struct amdgpu_device *adev = ip_block->adev;
1477 int pipe;
1478
1479 amdgpu_bo_free_kernel(&adev->mes.resource_1[0], &adev->mes.resource_1_gpu_addr[0],
1480 &adev->mes.resource_1_addr[0]);
1481
1482 for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) {
1483 kfree(adev->mes.mqd_backup[pipe]);
1484
1485 amdgpu_bo_free_kernel(&adev->mes.eop_gpu_obj[pipe],
1486 &adev->mes.eop_gpu_addr[pipe],
1487 NULL);
1488 amdgpu_ucode_release(&adev->mes.fw[pipe]);
1489 }
1490
1491 amdgpu_bo_free_kernel(&adev->gfx.kiq[0].ring.mqd_obj,
1492 &adev->gfx.kiq[0].ring.mqd_gpu_addr,
1493 &adev->gfx.kiq[0].ring.mqd_ptr);
1494
1495 amdgpu_bo_free_kernel(&adev->mes.ring[0].mqd_obj,
1496 &adev->mes.ring[0].mqd_gpu_addr,
1497 &adev->mes.ring[0].mqd_ptr);
1498
1499 amdgpu_ring_fini(&adev->gfx.kiq[0].ring);
1500 amdgpu_ring_fini(&adev->mes.ring[0]);
1501
1502 if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
1503 mes_v11_0_free_ucode_buffers(adev, AMDGPU_MES_KIQ_PIPE);
1504 mes_v11_0_free_ucode_buffers(adev, AMDGPU_MES_SCHED_PIPE);
1505 }
1506
1507 amdgpu_mes_fini(adev);
1508 return 0;
1509 }
1510
mes_v11_0_kiq_dequeue(struct amdgpu_ring * ring)1511 static void mes_v11_0_kiq_dequeue(struct amdgpu_ring *ring)
1512 {
1513 uint32_t data;
1514 int i;
1515 struct amdgpu_device *adev = ring->adev;
1516
1517 mutex_lock(&adev->srbm_mutex);
1518 soc21_grbm_select(adev, 3, ring->pipe, 0, 0);
1519
1520 /* disable the queue if it's active */
1521 if (RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1) {
1522 WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 1);
1523 for (i = 0; i < adev->usec_timeout; i++) {
1524 if (!(RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1))
1525 break;
1526 udelay(1);
1527 }
1528 }
1529 data = RREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL);
1530 data = REG_SET_FIELD(data, CP_HQD_PQ_DOORBELL_CONTROL,
1531 DOORBELL_EN, 0);
1532 data = REG_SET_FIELD(data, CP_HQD_PQ_DOORBELL_CONTROL,
1533 DOORBELL_HIT, 1);
1534 WREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL, data);
1535
1536 WREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL, 0);
1537
1538 WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_LO, 0);
1539 WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_HI, 0);
1540 WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR, 0);
1541
1542 soc21_grbm_select(adev, 0, 0, 0, 0);
1543 mutex_unlock(&adev->srbm_mutex);
1544 }
1545
mes_v11_0_kiq_setting(struct amdgpu_ring * ring)1546 static void mes_v11_0_kiq_setting(struct amdgpu_ring *ring)
1547 {
1548 uint32_t tmp;
1549 struct amdgpu_device *adev = ring->adev;
1550
1551 /* tell RLC which is KIQ queue */
1552 tmp = RREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS);
1553 tmp &= 0xffffff00;
1554 tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
1555 WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp | 0x80);
1556 }
1557
mes_v11_0_kiq_clear(struct amdgpu_device * adev)1558 static void mes_v11_0_kiq_clear(struct amdgpu_device *adev)
1559 {
1560 uint32_t tmp;
1561
1562 /* tell RLC which is KIQ dequeue */
1563 tmp = RREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS);
1564 tmp &= ~RLC_CP_SCHEDULERS__scheduler0_MASK;
1565 WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp);
1566 }
1567
mes_v11_0_kiq_hw_init(struct amdgpu_device * adev)1568 static int mes_v11_0_kiq_hw_init(struct amdgpu_device *adev)
1569 {
1570 int r = 0;
1571 struct amdgpu_ip_block *ip_block;
1572
1573 if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
1574
1575 r = mes_v11_0_load_microcode(adev, AMDGPU_MES_SCHED_PIPE, false);
1576 if (r) {
1577 DRM_ERROR("failed to load MES fw, r=%d\n", r);
1578 return r;
1579 }
1580
1581 r = mes_v11_0_load_microcode(adev, AMDGPU_MES_KIQ_PIPE, true);
1582 if (r) {
1583 DRM_ERROR("failed to load MES kiq fw, r=%d\n", r);
1584 return r;
1585 }
1586
1587 }
1588
1589 mes_v11_0_enable(adev, true);
1590
1591 mes_v11_0_get_fw_version(adev);
1592
1593 mes_v11_0_kiq_setting(&adev->gfx.kiq[0].ring);
1594
1595 ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_MES);
1596 if (unlikely(!ip_block)) {
1597 dev_err(adev->dev, "Failed to get MES handle\n");
1598 return -EINVAL;
1599 }
1600
1601 r = mes_v11_0_queue_init(adev, AMDGPU_MES_KIQ_PIPE);
1602 if (r)
1603 goto failure;
1604
1605 if ((adev->mes.sched_version & AMDGPU_MES_VERSION_MASK) >= 0x47)
1606 adev->mes.enable_legacy_queue_map = true;
1607 else
1608 adev->mes.enable_legacy_queue_map = false;
1609
1610 if (adev->mes.enable_legacy_queue_map) {
1611 r = mes_v11_0_hw_init(ip_block);
1612 if (r)
1613 goto failure;
1614 }
1615
1616 return r;
1617
1618 failure:
1619 mes_v11_0_hw_fini(ip_block);
1620 return r;
1621 }
1622
mes_v11_0_kiq_hw_fini(struct amdgpu_device * adev)1623 static int mes_v11_0_kiq_hw_fini(struct amdgpu_device *adev)
1624 {
1625 if (adev->mes.ring[0].sched.ready) {
1626 mes_v11_0_kiq_dequeue(&adev->mes.ring[0]);
1627 adev->mes.ring[0].sched.ready = false;
1628 }
1629
1630 if (amdgpu_sriov_vf(adev)) {
1631 mes_v11_0_kiq_dequeue(&adev->gfx.kiq[0].ring);
1632 mes_v11_0_kiq_clear(adev);
1633 }
1634
1635 mes_v11_0_enable(adev, false);
1636
1637 return 0;
1638 }
1639
mes_v11_0_hw_init(struct amdgpu_ip_block * ip_block)1640 static int mes_v11_0_hw_init(struct amdgpu_ip_block *ip_block)
1641 {
1642 int r;
1643 struct amdgpu_device *adev = ip_block->adev;
1644
1645 if (adev->mes.ring[0].sched.ready)
1646 goto out;
1647
1648 if (!adev->enable_mes_kiq) {
1649 if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
1650 r = mes_v11_0_load_microcode(adev,
1651 AMDGPU_MES_SCHED_PIPE, true);
1652 if (r) {
1653 DRM_ERROR("failed to MES fw, r=%d\n", r);
1654 return r;
1655 }
1656 }
1657
1658 mes_v11_0_enable(adev, true);
1659 }
1660
1661 r = mes_v11_0_queue_init(adev, AMDGPU_MES_SCHED_PIPE);
1662 if (r)
1663 goto failure;
1664
1665 r = mes_v11_0_set_hw_resources(&adev->mes);
1666 if (r)
1667 goto failure;
1668
1669 if ((adev->mes.sched_version & AMDGPU_MES_VERSION_MASK) >= 0x50) {
1670 r = mes_v11_0_set_hw_resources_1(&adev->mes);
1671 if (r) {
1672 DRM_ERROR("failed mes_v11_0_set_hw_resources_1, r=%d\n", r);
1673 goto failure;
1674 }
1675 }
1676
1677 r = mes_v11_0_query_sched_status(&adev->mes);
1678 if (r) {
1679 DRM_ERROR("MES is busy\n");
1680 goto failure;
1681 }
1682
1683 r = amdgpu_mes_update_enforce_isolation(adev);
1684 if (r)
1685 goto failure;
1686
1687 out:
1688 /*
1689 * Disable KIQ ring usage from the driver once MES is enabled.
1690 * MES uses KIQ ring exclusively so driver cannot access KIQ ring
1691 * with MES enabled.
1692 */
1693 adev->gfx.kiq[0].ring.sched.ready = false;
1694 adev->mes.ring[0].sched.ready = true;
1695
1696 return 0;
1697
1698 failure:
1699 mes_v11_0_hw_fini(ip_block);
1700 return r;
1701 }
1702
mes_v11_0_hw_fini(struct amdgpu_ip_block * ip_block)1703 static int mes_v11_0_hw_fini(struct amdgpu_ip_block *ip_block)
1704 {
1705 return 0;
1706 }
1707
mes_v11_0_suspend(struct amdgpu_ip_block * ip_block)1708 static int mes_v11_0_suspend(struct amdgpu_ip_block *ip_block)
1709 {
1710 return mes_v11_0_hw_fini(ip_block);
1711 }
1712
mes_v11_0_resume(struct amdgpu_ip_block * ip_block)1713 static int mes_v11_0_resume(struct amdgpu_ip_block *ip_block)
1714 {
1715 return mes_v11_0_hw_init(ip_block);
1716 }
1717
mes_v11_0_early_init(struct amdgpu_ip_block * ip_block)1718 static int mes_v11_0_early_init(struct amdgpu_ip_block *ip_block)
1719 {
1720 struct amdgpu_device *adev = ip_block->adev;
1721 int pipe, r;
1722
1723 adev->mes.hung_queue_db_array_size =
1724 MES11_HUNG_DB_OFFSET_ARRAY_SIZE;
1725 for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) {
1726 if (!adev->enable_mes_kiq && pipe == AMDGPU_MES_KIQ_PIPE)
1727 continue;
1728 r = amdgpu_mes_init_microcode(adev, pipe);
1729 if (r)
1730 return r;
1731 }
1732
1733 return 0;
1734 }
1735
1736 static const struct amd_ip_funcs mes_v11_0_ip_funcs = {
1737 .name = "mes_v11_0",
1738 .early_init = mes_v11_0_early_init,
1739 .late_init = NULL,
1740 .sw_init = mes_v11_0_sw_init,
1741 .sw_fini = mes_v11_0_sw_fini,
1742 .hw_init = mes_v11_0_hw_init,
1743 .hw_fini = mes_v11_0_hw_fini,
1744 .suspend = mes_v11_0_suspend,
1745 .resume = mes_v11_0_resume,
1746 };
1747
1748 const struct amdgpu_ip_block_version mes_v11_0_ip_block = {
1749 .type = AMD_IP_BLOCK_TYPE_MES,
1750 .major = 11,
1751 .minor = 0,
1752 .rev = 0,
1753 .funcs = &mes_v11_0_ip_funcs,
1754 };
1755