1 /* 2 * Copyright 2025 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 */ 23 24 #include <linux/firmware.h> 25 #include <linux/module.h> 26 #include "amdgpu.h" 27 #include "soc15_common.h" 28 #include "soc_v1_0.h" 29 #include "gc/gc_12_1_0_offset.h" 30 #include "gc/gc_12_1_0_sh_mask.h" 31 #include "gc/gc_11_0_0_default.h" 32 #include "v12_structs.h" 33 #include "mes_v12_api_def.h" 34 35 MODULE_FIRMWARE("amdgpu/gc_12_1_0_mes.bin"); 36 MODULE_FIRMWARE("amdgpu/gc_12_1_0_mes1.bin"); 37 MODULE_FIRMWARE("amdgpu/gc_12_1_0_uni_mes.bin"); 38 39 static int mes_v12_1_hw_init(struct amdgpu_ip_block *ip_block); 40 static int mes_v12_1_xcc_hw_init(struct amdgpu_ip_block *ip_block, int xcc_id); 41 static int mes_v12_1_hw_fini(struct amdgpu_ip_block *ip_block); 42 static int mes_v12_1_kiq_hw_init(struct amdgpu_device *adev, uint32_t xcc_id); 43 static int mes_v12_1_kiq_hw_fini(struct amdgpu_device *adev, uint32_t xcc_id); 44 45 #define MES_EOP_SIZE 2048 46 47 #define regCP_HQD_IB_CONTROL_MES_12_1_DEFAULT 0x100000 48 #define XCC_REG_RANGE_0_LOW 0x1260 /* XCC gfxdec0 lower Bound */ 49 #define XCC_REG_RANGE_0_HIGH 0x3C00 /* XCC gfxdec0 upper Bound */ 50 #define XCC_REG_RANGE_1_LOW 0xA000 /* XCC gfxdec1 lower Bound */ 51 #define XCC_REG_RANGE_1_HIGH 0x10000 /* XCC gfxdec1 upper Bound */ 52 #define XCC_MID_MASK 0x41000000 53 54 #define NORMALIZE_XCC_REG_OFFSET(offset) \ 55 (offset & 0x3FFFF) 56 57 static void mes_v12_1_ring_set_wptr(struct amdgpu_ring *ring) 58 { 59 struct amdgpu_device *adev = ring->adev; 60 61 if (ring->use_doorbell) { 62 atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 63 ring->wptr); 64 WDOORBELL64(ring->doorbell_index, ring->wptr); 65 } else { 66 BUG(); 67 } 68 } 69 70 static u64 mes_v12_1_ring_get_rptr(struct amdgpu_ring *ring) 71 { 72 return *ring->rptr_cpu_addr; 73 } 74 75 static u64 mes_v12_1_ring_get_wptr(struct amdgpu_ring *ring) 76 { 77 u64 wptr; 78 79 if (ring->use_doorbell) 80 wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr); 81 else 82 BUG(); 83 return wptr; 84 } 85 86 static const struct amdgpu_ring_funcs mes_v12_1_ring_funcs = { 87 .type = AMDGPU_RING_TYPE_MES, 88 .align_mask = 1, 89 .nop = 0, 90 .support_64bit_ptrs = true, 91 .get_rptr = mes_v12_1_ring_get_rptr, 92 .get_wptr = mes_v12_1_ring_get_wptr, 93 .set_wptr = mes_v12_1_ring_set_wptr, 94 .insert_nop = amdgpu_ring_insert_nop, 95 }; 96 97 static const char *mes_v12_1_opcodes[] = { 98 "SET_HW_RSRC", 99 "SET_SCHEDULING_CONFIG", 100 "ADD_QUEUE", 101 "REMOVE_QUEUE", 102 "PERFORM_YIELD", 103 "SET_GANG_PRIORITY_LEVEL", 104 "SUSPEND", 105 "RESUME", 106 "RESET", 107 "SET_LOG_BUFFER", 108 "CHANGE_GANG_PRORITY", 109 "QUERY_SCHEDULER_STATUS", 110 "unused", 111 "SET_DEBUG_VMID", 112 "MISC", 113 "UPDATE_ROOT_PAGE_TABLE", 114 "AMD_LOG", 115 "SET_SE_MODE", 116 "SET_GANG_SUBMIT", 117 "SET_HW_RSRC_1", 118 }; 119 120 static const char *mes_v12_1_misc_opcodes[] = { 121 "WRITE_REG", 122 "INV_GART", 123 "QUERY_STATUS", 124 "READ_REG", 125 "WAIT_REG_MEM", 126 "SET_SHADER_DEBUGGER", 127 "NOTIFY_WORK_ON_UNMAPPED_QUEUE", 128 "NOTIFY_TO_UNMAP_PROCESSES", 129 }; 130 131 static const char *mes_v12_1_get_op_string(union MESAPI__MISC *x_pkt) 132 { 133 const char *op_str = NULL; 134 135 if (x_pkt->header.opcode < ARRAY_SIZE(mes_v12_1_opcodes)) 136 op_str = mes_v12_1_opcodes[x_pkt->header.opcode]; 137 138 return op_str; 139 } 140 141 static const char *mes_v12_1_get_misc_op_string(union MESAPI__MISC *x_pkt) 142 { 143 const char *op_str = NULL; 144 145 if ((x_pkt->header.opcode == MES_SCH_API_MISC) && 146 (x_pkt->opcode < ARRAY_SIZE(mes_v12_1_misc_opcodes))) 147 op_str = mes_v12_1_misc_opcodes[x_pkt->opcode]; 148 149 return op_str; 150 } 151 152 static int mes_v12_1_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, 153 int xcc_id, int pipe, void *pkt, 154 int size, int api_status_off) 155 { 156 union MESAPI__QUERY_MES_STATUS mes_status_pkt; 157 signed long timeout = 2100000; /* 2100 ms */ 158 struct amdgpu_device *adev = mes->adev; 159 struct amdgpu_ring *ring = &mes->ring[MES_PIPE_INST(xcc_id, pipe)]; 160 spinlock_t *ring_lock = &mes->ring_lock[MES_PIPE_INST(xcc_id, pipe)]; 161 struct MES_API_STATUS *api_status; 162 union MESAPI__MISC *x_pkt = pkt; 163 const char *op_str, *misc_op_str; 164 unsigned long flags; 165 u64 status_gpu_addr; 166 u32 seq, status_offset; 167 u64 *status_ptr; 168 signed long r; 169 int ret; 170 171 if (x_pkt->header.opcode >= MES_SCH_API_MAX) 172 return -EINVAL; 173 174 if (amdgpu_emu_mode) { 175 timeout *= 1000; 176 } else if (amdgpu_sriov_vf(adev)) { 177 /* Worst case in sriov where all other 15 VF timeout, each VF needs about 600ms */ 178 timeout = 15 * 600 * 1000; 179 } 180 181 ret = amdgpu_device_wb_get(adev, &status_offset); 182 if (ret) 183 return ret; 184 185 status_gpu_addr = adev->wb.gpu_addr + (status_offset * 4); 186 status_ptr = (u64 *)&adev->wb.wb[status_offset]; 187 *status_ptr = 0; 188 189 spin_lock_irqsave(ring_lock, flags); 190 r = amdgpu_ring_alloc(ring, (size + sizeof(mes_status_pkt)) / 4); 191 if (r) 192 goto error_unlock_free; 193 194 seq = ++ring->fence_drv.sync_seq; 195 r = amdgpu_fence_wait_polling(ring, 196 seq - ring->fence_drv.num_fences_mask, 197 timeout); 198 if (r < 1) 199 goto error_undo; 200 201 api_status = (struct MES_API_STATUS *)((char *)pkt + api_status_off); 202 api_status->api_completion_fence_addr = status_gpu_addr; 203 api_status->api_completion_fence_value = 1; 204 205 amdgpu_ring_write_multiple(ring, pkt, size / 4); 206 207 memset(&mes_status_pkt, 0, sizeof(mes_status_pkt)); 208 mes_status_pkt.header.type = MES_API_TYPE_SCHEDULER; 209 mes_status_pkt.header.opcode = MES_SCH_API_QUERY_SCHEDULER_STATUS; 210 mes_status_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; 211 mes_status_pkt.api_status.api_completion_fence_addr = 212 ring->fence_drv.gpu_addr; 213 mes_status_pkt.api_status.api_completion_fence_value = seq; 214 215 amdgpu_ring_write_multiple(ring, &mes_status_pkt, 216 sizeof(mes_status_pkt) / 4); 217 218 amdgpu_ring_commit(ring); 219 spin_unlock_irqrestore(ring_lock, flags); 220 221 op_str = mes_v12_1_get_op_string(x_pkt); 222 misc_op_str = mes_v12_1_get_misc_op_string(x_pkt); 223 224 if (misc_op_str) 225 dev_dbg(adev->dev, "MES(%d, %d) msg=%s (%s) was emitted\n", 226 xcc_id, pipe, op_str, misc_op_str); 227 else if (op_str) 228 dev_dbg(adev->dev, "MES(%d, %d) msg=%s was emitted\n", 229 xcc_id, pipe, op_str); 230 else 231 dev_dbg(adev->dev, "MES(%d, %d) msg=%d was emitted\n", 232 xcc_id, pipe, x_pkt->header.opcode); 233 234 r = amdgpu_fence_wait_polling(ring, seq, timeout); 235 if (r < 1 || !*status_ptr) { 236 if (misc_op_str) 237 dev_err(adev->dev, 238 "MES(%d, %d) failed to respond to msg=%s (%s)\n", 239 xcc_id, pipe, op_str, misc_op_str); 240 else if (op_str) 241 dev_err(adev->dev, 242 "MES(%d, %d) failed to respond to msg=%s\n", 243 xcc_id, pipe, op_str); 244 else 245 dev_err(adev->dev, 246 "MES(%d, %d) failed to respond to msg=%d\n", 247 xcc_id, pipe, x_pkt->header.opcode); 248 249 while (halt_if_hws_hang) 250 schedule(); 251 252 r = -ETIMEDOUT; 253 goto error_wb_free; 254 } 255 256 amdgpu_device_wb_free(adev, status_offset); 257 return 0; 258 259 error_undo: 260 dev_err(adev->dev, "MES(%d, %d) ring buffer is full.\n", xcc_id, pipe); 261 amdgpu_ring_undo(ring); 262 263 error_unlock_free: 264 spin_unlock_irqrestore(ring_lock, flags); 265 266 error_wb_free: 267 amdgpu_device_wb_free(adev, status_offset); 268 return r; 269 } 270 271 static int convert_to_mes_queue_type(int queue_type) 272 { 273 if (queue_type == AMDGPU_RING_TYPE_GFX) 274 return MES_QUEUE_TYPE_GFX; 275 else if (queue_type == AMDGPU_RING_TYPE_COMPUTE) 276 return MES_QUEUE_TYPE_COMPUTE; 277 else if (queue_type == AMDGPU_RING_TYPE_SDMA) 278 return MES_QUEUE_TYPE_SDMA; 279 else if (queue_type == AMDGPU_RING_TYPE_MES) 280 return MES_QUEUE_TYPE_SCHQ; 281 else 282 BUG(); 283 return -1; 284 } 285 286 static int mes_v12_1_add_hw_queue(struct amdgpu_mes *mes, 287 struct mes_add_queue_input *input) 288 { 289 struct amdgpu_device *adev = mes->adev; 290 union MESAPI__ADD_QUEUE mes_add_queue_pkt; 291 struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)]; 292 uint32_t vm_cntx_cntl = hub->vm_cntx_cntl; 293 int xcc_id = input->xcc_id; 294 int inst = MES_PIPE_INST(xcc_id, AMDGPU_MES_SCHED_PIPE); 295 296 if (mes->enable_coop_mode) 297 xcc_id = mes->master_xcc_ids[inst]; 298 299 memset(&mes_add_queue_pkt, 0, sizeof(mes_add_queue_pkt)); 300 301 mes_add_queue_pkt.header.type = MES_API_TYPE_SCHEDULER; 302 mes_add_queue_pkt.header.opcode = MES_SCH_API_ADD_QUEUE; 303 mes_add_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; 304 305 mes_add_queue_pkt.process_id = input->process_id; 306 mes_add_queue_pkt.page_table_base_addr = input->page_table_base_addr; 307 mes_add_queue_pkt.process_va_start = input->process_va_start; 308 mes_add_queue_pkt.process_va_end = input->process_va_end; 309 mes_add_queue_pkt.process_quantum = input->process_quantum; 310 mes_add_queue_pkt.process_context_addr = input->process_context_addr; 311 mes_add_queue_pkt.gang_quantum = input->gang_quantum; 312 mes_add_queue_pkt.gang_context_addr = input->gang_context_addr; 313 mes_add_queue_pkt.inprocess_gang_priority = 314 input->inprocess_gang_priority; 315 mes_add_queue_pkt.gang_global_priority_level = 316 input->gang_global_priority_level; 317 mes_add_queue_pkt.doorbell_offset = input->doorbell_offset; 318 mes_add_queue_pkt.mqd_addr = input->mqd_addr; 319 320 mes_add_queue_pkt.wptr_addr = input->wptr_mc_addr; 321 322 mes_add_queue_pkt.queue_type = 323 convert_to_mes_queue_type(input->queue_type); 324 mes_add_queue_pkt.paging = input->paging; 325 mes_add_queue_pkt.vm_context_cntl = vm_cntx_cntl; 326 mes_add_queue_pkt.gws_base = input->gws_base; 327 mes_add_queue_pkt.gws_size = input->gws_size; 328 mes_add_queue_pkt.trap_handler_addr = input->tba_addr; 329 mes_add_queue_pkt.tma_addr = input->tma_addr; 330 mes_add_queue_pkt.trap_en = input->trap_en; 331 mes_add_queue_pkt.skip_process_ctx_clear = input->skip_process_ctx_clear; 332 mes_add_queue_pkt.is_kfd_process = input->is_kfd_process; 333 334 /* For KFD, gds_size is re-used for queue size (needed in MES for AQL queues) */ 335 mes_add_queue_pkt.is_aql_queue = input->is_aql_queue; 336 mes_add_queue_pkt.gds_size = input->queue_size; 337 338 /* For KFD, gds_size is re-used for queue size (needed in MES for AQL queues) */ 339 mes_add_queue_pkt.is_aql_queue = input->is_aql_queue; 340 mes_add_queue_pkt.gds_size = input->queue_size; 341 342 return mes_v12_1_submit_pkt_and_poll_completion(mes, 343 xcc_id, AMDGPU_MES_SCHED_PIPE, 344 &mes_add_queue_pkt, sizeof(mes_add_queue_pkt), 345 offsetof(union MESAPI__ADD_QUEUE, api_status)); 346 } 347 348 static int mes_v12_1_remove_hw_queue(struct amdgpu_mes *mes, 349 struct mes_remove_queue_input *input) 350 { 351 union MESAPI__REMOVE_QUEUE mes_remove_queue_pkt; 352 int xcc_id = input->xcc_id; 353 int inst = MES_PIPE_INST(xcc_id, AMDGPU_MES_SCHED_PIPE); 354 355 if (mes->enable_coop_mode) 356 xcc_id = mes->master_xcc_ids[inst]; 357 358 memset(&mes_remove_queue_pkt, 0, sizeof(mes_remove_queue_pkt)); 359 360 mes_remove_queue_pkt.header.type = MES_API_TYPE_SCHEDULER; 361 mes_remove_queue_pkt.header.opcode = MES_SCH_API_REMOVE_QUEUE; 362 mes_remove_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; 363 364 mes_remove_queue_pkt.doorbell_offset = input->doorbell_offset; 365 mes_remove_queue_pkt.gang_context_addr = input->gang_context_addr; 366 367 return mes_v12_1_submit_pkt_and_poll_completion(mes, 368 xcc_id, AMDGPU_MES_SCHED_PIPE, 369 &mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt), 370 offsetof(union MESAPI__REMOVE_QUEUE, api_status)); 371 } 372 373 static int mes_v12_1_reset_hw_queue(struct amdgpu_mes *mes, 374 struct mes_reset_queue_input *input) 375 { 376 union MESAPI__RESET mes_reset_queue_pkt; 377 int pipe; 378 379 memset(&mes_reset_queue_pkt, 0, sizeof(mes_reset_queue_pkt)); 380 381 mes_reset_queue_pkt.header.type = MES_API_TYPE_SCHEDULER; 382 mes_reset_queue_pkt.header.opcode = MES_SCH_API_RESET; 383 mes_reset_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; 384 385 mes_reset_queue_pkt.doorbell_offset = input->doorbell_offset; 386 /* mes_reset_queue_pkt.gang_context_addr = input->gang_context_addr; */ 387 /*mes_reset_queue_pkt.reset_queue_only = 1;*/ 388 389 if (mes->adev->enable_uni_mes) 390 pipe = AMDGPU_MES_KIQ_PIPE; 391 else 392 pipe = AMDGPU_MES_SCHED_PIPE; 393 394 return mes_v12_1_submit_pkt_and_poll_completion(mes, 395 input->xcc_id, pipe, 396 &mes_reset_queue_pkt, sizeof(mes_reset_queue_pkt), 397 offsetof(union MESAPI__REMOVE_QUEUE, api_status)); 398 } 399 400 static int mes_v12_1_map_legacy_queue(struct amdgpu_mes *mes, 401 struct mes_map_legacy_queue_input *input) 402 { 403 union MESAPI__ADD_QUEUE mes_add_queue_pkt; 404 int pipe; 405 406 memset(&mes_add_queue_pkt, 0, sizeof(mes_add_queue_pkt)); 407 408 mes_add_queue_pkt.header.type = MES_API_TYPE_SCHEDULER; 409 mes_add_queue_pkt.header.opcode = MES_SCH_API_ADD_QUEUE; 410 mes_add_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; 411 412 mes_add_queue_pkt.pipe_id = input->pipe_id; 413 mes_add_queue_pkt.queue_id = input->queue_id; 414 mes_add_queue_pkt.doorbell_offset = input->doorbell_offset; 415 mes_add_queue_pkt.mqd_addr = input->mqd_addr; 416 mes_add_queue_pkt.wptr_addr = input->wptr_addr; 417 mes_add_queue_pkt.queue_type = 418 convert_to_mes_queue_type(input->queue_type); 419 mes_add_queue_pkt.map_legacy_kq = 1; 420 421 if (mes->adev->enable_uni_mes) 422 pipe = AMDGPU_MES_KIQ_PIPE; 423 else 424 pipe = AMDGPU_MES_SCHED_PIPE; 425 426 return mes_v12_1_submit_pkt_and_poll_completion(mes, 427 input->xcc_id, pipe, 428 &mes_add_queue_pkt, sizeof(mes_add_queue_pkt), 429 offsetof(union MESAPI__ADD_QUEUE, api_status)); 430 } 431 432 static int mes_v12_1_unmap_legacy_queue(struct amdgpu_mes *mes, 433 struct mes_unmap_legacy_queue_input *input) 434 { 435 union MESAPI__REMOVE_QUEUE mes_remove_queue_pkt; 436 int pipe; 437 438 memset(&mes_remove_queue_pkt, 0, sizeof(mes_remove_queue_pkt)); 439 440 mes_remove_queue_pkt.header.type = MES_API_TYPE_SCHEDULER; 441 mes_remove_queue_pkt.header.opcode = MES_SCH_API_REMOVE_QUEUE; 442 mes_remove_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; 443 444 mes_remove_queue_pkt.doorbell_offset = input->doorbell_offset; 445 mes_remove_queue_pkt.gang_context_addr = 0; 446 447 mes_remove_queue_pkt.pipe_id = input->pipe_id; 448 mes_remove_queue_pkt.queue_id = input->queue_id; 449 450 if (input->action == PREEMPT_QUEUES_NO_UNMAP) { 451 mes_remove_queue_pkt.preempt_legacy_gfx_queue = 1; 452 mes_remove_queue_pkt.tf_addr = input->trail_fence_addr; 453 mes_remove_queue_pkt.tf_data = 454 lower_32_bits(input->trail_fence_data); 455 } else { 456 mes_remove_queue_pkt.unmap_legacy_queue = 1; 457 mes_remove_queue_pkt.queue_type = 458 convert_to_mes_queue_type(input->queue_type); 459 } 460 461 if (mes->adev->enable_uni_mes) 462 pipe = AMDGPU_MES_KIQ_PIPE; 463 else 464 pipe = AMDGPU_MES_SCHED_PIPE; 465 466 return mes_v12_1_submit_pkt_and_poll_completion(mes, 467 input->xcc_id, pipe, 468 &mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt), 469 offsetof(union MESAPI__REMOVE_QUEUE, api_status)); 470 } 471 472 static int mes_v12_1_suspend_gang(struct amdgpu_mes *mes, 473 struct mes_suspend_gang_input *input) 474 { 475 return 0; 476 } 477 478 static int mes_v12_1_resume_gang(struct amdgpu_mes *mes, 479 struct mes_resume_gang_input *input) 480 { 481 return 0; 482 } 483 484 static int mes_v12_1_query_sched_status(struct amdgpu_mes *mes, 485 int pipe, int xcc_id) 486 { 487 union MESAPI__QUERY_MES_STATUS mes_status_pkt; 488 489 memset(&mes_status_pkt, 0, sizeof(mes_status_pkt)); 490 491 mes_status_pkt.header.type = MES_API_TYPE_SCHEDULER; 492 mes_status_pkt.header.opcode = MES_SCH_API_QUERY_SCHEDULER_STATUS; 493 mes_status_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; 494 495 return mes_v12_1_submit_pkt_and_poll_completion(mes, xcc_id, pipe, 496 &mes_status_pkt, sizeof(mes_status_pkt), 497 offsetof(union MESAPI__QUERY_MES_STATUS, api_status)); 498 } 499 static uint32_t mes_v12_1_get_xcc_from_reg(uint32_t reg_offset) 500 { 501 /* Check xcc reg offset range */ 502 uint32_t xcc = (reg_offset & XCC_MID_MASK) ? 4 : 0; 503 /* Each XCC has two register ranges. 504 * These are represented in reg_offset[17:16] 505 */ 506 return ((reg_offset >> 16) & 0x3) + xcc; 507 } 508 509 static void mes_v12_1_get_rrmt(uint32_t reg, uint32_t xcc_id, 510 struct RRMT_OPTION *rrmt_opt) 511 { 512 uint32_t normalized_reg = NORMALIZE_XCC_REG_OFFSET(reg); 513 514 if (((normalized_reg >= XCC_REG_RANGE_0_LOW) && (normalized_reg < XCC_REG_RANGE_0_HIGH)) || 515 ((normalized_reg >= XCC_REG_RANGE_1_LOW) && (normalized_reg < XCC_REG_RANGE_1_HIGH))) { 516 rrmt_opt->xcd_die_id = mes_v12_1_get_xcc_from_reg(reg); 517 rrmt_opt->mode = (xcc_id == rrmt_opt->xcd_die_id) ? 518 MES_RRMT_MODE_LOCAL_XCD : MES_RRMT_MODE_REMOTE_XCD; 519 } else { 520 rrmt_opt->mode = MES_RRMT_MODE_LOCAL_REMOTE_AID; 521 } 522 } 523 524 static int mes_v12_1_misc_op(struct amdgpu_mes *mes, 525 struct mes_misc_op_input *input) 526 { 527 union MESAPI__MISC misc_pkt; 528 int pipe; 529 530 if (mes->adev->enable_uni_mes) 531 pipe = AMDGPU_MES_KIQ_PIPE; 532 else 533 pipe = AMDGPU_MES_SCHED_PIPE; 534 535 memset(&misc_pkt, 0, sizeof(misc_pkt)); 536 537 misc_pkt.header.type = MES_API_TYPE_SCHEDULER; 538 misc_pkt.header.opcode = MES_SCH_API_MISC; 539 misc_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; 540 541 switch (input->op) { 542 case MES_MISC_OP_READ_REG: 543 misc_pkt.opcode = MESAPI_MISC__READ_REG; 544 misc_pkt.read_reg.reg_offset = input->read_reg.reg_offset; 545 misc_pkt.read_reg.buffer_addr = input->read_reg.buffer_addr; 546 mes_v12_1_get_rrmt(input->read_reg.reg_offset, input->xcc_id, 547 &misc_pkt.read_reg.rrmt_opt); 548 break; 549 case MES_MISC_OP_WRITE_REG: 550 misc_pkt.opcode = MESAPI_MISC__WRITE_REG; 551 misc_pkt.write_reg.reg_offset = input->write_reg.reg_offset; 552 misc_pkt.write_reg.reg_value = input->write_reg.reg_value; 553 mes_v12_1_get_rrmt(input->write_reg.reg_offset, input->xcc_id, 554 &misc_pkt.write_reg.rrmt_opt); 555 break; 556 case MES_MISC_OP_WRM_REG_WAIT: 557 misc_pkt.opcode = MESAPI_MISC__WAIT_REG_MEM; 558 misc_pkt.wait_reg_mem.op = WRM_OPERATION__WAIT_REG_MEM; 559 misc_pkt.wait_reg_mem.reference = input->wrm_reg.ref; 560 misc_pkt.wait_reg_mem.mask = input->wrm_reg.mask; 561 misc_pkt.wait_reg_mem.reg_offset1 = input->wrm_reg.reg0; 562 misc_pkt.wait_reg_mem.reg_offset2 = 0; 563 mes_v12_1_get_rrmt(input->wrm_reg.reg0, input->xcc_id, 564 &misc_pkt.wait_reg_mem.rrmt_opt1); 565 break; 566 case MES_MISC_OP_WRM_REG_WR_WAIT: 567 misc_pkt.opcode = MESAPI_MISC__WAIT_REG_MEM; 568 misc_pkt.wait_reg_mem.op = WRM_OPERATION__WR_WAIT_WR_REG; 569 misc_pkt.wait_reg_mem.reference = input->wrm_reg.ref; 570 misc_pkt.wait_reg_mem.mask = input->wrm_reg.mask; 571 misc_pkt.wait_reg_mem.reg_offset1 = input->wrm_reg.reg0; 572 misc_pkt.wait_reg_mem.reg_offset2 = input->wrm_reg.reg1; 573 mes_v12_1_get_rrmt(input->wrm_reg.reg0, input->xcc_id, 574 &misc_pkt.wait_reg_mem.rrmt_opt1); 575 mes_v12_1_get_rrmt(input->wrm_reg.reg1, input->xcc_id, 576 &misc_pkt.wait_reg_mem.rrmt_opt2); 577 break; 578 case MES_MISC_OP_SET_SHADER_DEBUGGER: 579 pipe = AMDGPU_MES_SCHED_PIPE; 580 misc_pkt.opcode = MESAPI_MISC__SET_SHADER_DEBUGGER; 581 misc_pkt.set_shader_debugger.process_context_addr = 582 input->set_shader_debugger.process_context_addr; 583 misc_pkt.set_shader_debugger.flags.u32all = 584 input->set_shader_debugger.flags.u32all; 585 misc_pkt.set_shader_debugger.spi_gdbg_per_vmid_cntl = 586 input->set_shader_debugger.spi_gdbg_per_vmid_cntl; 587 memcpy(misc_pkt.set_shader_debugger.tcp_watch_cntl, 588 input->set_shader_debugger.tcp_watch_cntl, 589 sizeof(misc_pkt.set_shader_debugger.tcp_watch_cntl)); 590 misc_pkt.set_shader_debugger.trap_en = input->set_shader_debugger.trap_en; 591 break; 592 case MES_MISC_OP_CHANGE_CONFIG: 593 misc_pkt.opcode = MESAPI_MISC__CHANGE_CONFIG; 594 misc_pkt.change_config.opcode = 595 MESAPI_MISC__CHANGE_CONFIG_OPTION_LIMIT_SINGLE_PROCESS; 596 misc_pkt.change_config.option.bits.limit_single_process = 597 input->change_config.option.limit_single_process; 598 break; 599 default: 600 DRM_ERROR("unsupported misc op (%d) \n", input->op); 601 return -EINVAL; 602 } 603 604 return mes_v12_1_submit_pkt_and_poll_completion(mes, 605 input->xcc_id, pipe, 606 &misc_pkt, sizeof(misc_pkt), 607 offsetof(union MESAPI__MISC, api_status)); 608 } 609 610 static int mes_v12_1_set_hw_resources_1(struct amdgpu_mes *mes, 611 int pipe, int xcc_id) 612 { 613 union MESAPI_SET_HW_RESOURCES_1 mes_set_hw_res_1_pkt; 614 int master_xcc_id, inst = MES_PIPE_INST(xcc_id, pipe); 615 616 memset(&mes_set_hw_res_1_pkt, 0, sizeof(mes_set_hw_res_1_pkt)); 617 618 mes_set_hw_res_1_pkt.header.type = MES_API_TYPE_SCHEDULER; 619 mes_set_hw_res_1_pkt.header.opcode = MES_SCH_API_SET_HW_RSRC_1; 620 mes_set_hw_res_1_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; 621 mes_set_hw_res_1_pkt.mes_kiq_unmap_timeout = 100; 622 623 if (mes->enable_coop_mode && pipe == AMDGPU_MES_SCHED_PIPE) { 624 master_xcc_id = mes->master_xcc_ids[inst]; 625 mes_set_hw_res_1_pkt.mes_coop_mode = 1; 626 mes_set_hw_res_1_pkt.coop_sch_shared_mc_addr = 627 mes->shared_cmd_buf_gpu_addr[master_xcc_id]; 628 } 629 630 return mes_v12_1_submit_pkt_and_poll_completion(mes, xcc_id, pipe, 631 &mes_set_hw_res_1_pkt, sizeof(mes_set_hw_res_1_pkt), 632 offsetof(union MESAPI_SET_HW_RESOURCES_1, api_status)); 633 } 634 635 static void mes_v12_1_set_gfx_hqd_mask(union MESAPI_SET_HW_RESOURCES *pkt) 636 { 637 /* 638 * GFX V12 has only one GFX pipe, but 8 queues in it. 639 * GFX pipe 0 queue 0 is being used by Kernel queue. 640 * Set GFX pipe 0 queue 1-7 for MES scheduling 641 * mask = 1111 1110b 642 */ 643 pkt->gfx_hqd_mask[0] = 0xFE; 644 } 645 646 static int mes_v12_1_set_hw_resources(struct amdgpu_mes *mes, 647 int pipe, int xcc_id) 648 { 649 int i; 650 struct amdgpu_device *adev = mes->adev; 651 union MESAPI_SET_HW_RESOURCES mes_set_hw_res_pkt; 652 653 memset(&mes_set_hw_res_pkt, 0, sizeof(mes_set_hw_res_pkt)); 654 655 mes_set_hw_res_pkt.header.type = MES_API_TYPE_SCHEDULER; 656 mes_set_hw_res_pkt.header.opcode = MES_SCH_API_SET_HW_RSRC; 657 mes_set_hw_res_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; 658 659 if (pipe == AMDGPU_MES_SCHED_PIPE) { 660 mes_set_hw_res_pkt.vmid_mask_mmhub = mes->vmid_mask_mmhub; 661 mes_set_hw_res_pkt.vmid_mask_gfxhub = mes->vmid_mask_gfxhub; 662 mes_set_hw_res_pkt.gds_size = adev->gds.gds_size; 663 mes_set_hw_res_pkt.paging_vmid = 0; 664 665 for (i = 0; i < MAX_COMPUTE_PIPES; i++) 666 mes_set_hw_res_pkt.compute_hqd_mask[i] = 667 mes->compute_hqd_mask[i]; 668 669 mes_v12_1_set_gfx_hqd_mask(&mes_set_hw_res_pkt); 670 671 for (i = 0; i < MAX_SDMA_PIPES; i++) 672 mes_set_hw_res_pkt.sdma_hqd_mask[i] = 673 mes->sdma_hqd_mask[i]; 674 675 for (i = 0; i < AMD_PRIORITY_NUM_LEVELS; i++) 676 mes_set_hw_res_pkt.aggregated_doorbells[i] = 677 mes->aggregated_doorbells[i]; 678 } 679 680 mes_set_hw_res_pkt.g_sch_ctx_gpu_mc_ptr = 681 mes->sch_ctx_gpu_addr[pipe]; 682 mes_set_hw_res_pkt.query_status_fence_gpu_mc_ptr = 683 mes->query_status_fence_gpu_addr[pipe]; 684 685 for (i = 0; i < 5; i++) { 686 mes_set_hw_res_pkt.gc_base[i] = 687 adev->reg_offset[GC_HWIP][0][i]; 688 mes_set_hw_res_pkt.mmhub_base[i] = 689 adev->reg_offset[MMHUB_HWIP][0][i]; 690 mes_set_hw_res_pkt.osssys_base[i] = 691 adev->reg_offset[OSSSYS_HWIP][0][i]; 692 } 693 694 mes_set_hw_res_pkt.disable_reset = 1; 695 mes_set_hw_res_pkt.disable_mes_log = 1; 696 mes_set_hw_res_pkt.use_different_vmid_compute = 1; 697 mes_set_hw_res_pkt.enable_reg_active_poll = 1; 698 mes_set_hw_res_pkt.enable_level_process_quantum_check = 1; 699 700 /* 701 * Keep oversubscribe timer for sdma . When we have unmapped doorbell 702 * handling support, other queue will not use the oversubscribe timer. 703 * handling mode - 0: disabled; 1: basic version; 2: basic+ version 704 */ 705 mes_set_hw_res_pkt.oversubscription_timer = 50; 706 mes_set_hw_res_pkt.unmapped_doorbell_handling = 1; 707 708 if (amdgpu_mes_log_enable) { 709 mes_set_hw_res_pkt.enable_mes_event_int_logging = 1; 710 mes_set_hw_res_pkt.event_intr_history_gpu_mc_ptr = 711 mes->event_log_gpu_addr + MES_PIPE_INST(xcc_id, pipe) * AMDGPU_MES_LOG_BUFFER_SIZE; 712 } 713 714 if (adev->enforce_isolation[0] == AMDGPU_ENFORCE_ISOLATION_ENABLE) 715 mes_set_hw_res_pkt.limit_single_process = 1; 716 717 return mes_v12_1_submit_pkt_and_poll_completion(mes, xcc_id, pipe, 718 &mes_set_hw_res_pkt, sizeof(mes_set_hw_res_pkt), 719 offsetof(union MESAPI_SET_HW_RESOURCES, api_status)); 720 } 721 722 static void mes_v12_1_init_aggregated_doorbell(struct amdgpu_mes *mes, 723 int xcc_id) 724 { 725 struct amdgpu_device *adev = mes->adev; 726 uint32_t data; 727 728 data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_DOORBELL_CONTROL1); 729 data &= ~(CP_MES_DOORBELL_CONTROL1__DOORBELL_OFFSET_MASK | 730 CP_MES_DOORBELL_CONTROL1__DOORBELL_EN_MASK | 731 CP_MES_DOORBELL_CONTROL1__DOORBELL_HIT_MASK); 732 data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_LOW] << 733 CP_MES_DOORBELL_CONTROL1__DOORBELL_OFFSET__SHIFT; 734 data |= 1 << CP_MES_DOORBELL_CONTROL1__DOORBELL_EN__SHIFT; 735 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_DOORBELL_CONTROL1, data); 736 737 data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_DOORBELL_CONTROL2); 738 data &= ~(CP_MES_DOORBELL_CONTROL2__DOORBELL_OFFSET_MASK | 739 CP_MES_DOORBELL_CONTROL2__DOORBELL_EN_MASK | 740 CP_MES_DOORBELL_CONTROL2__DOORBELL_HIT_MASK); 741 data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_NORMAL] << 742 CP_MES_DOORBELL_CONTROL2__DOORBELL_OFFSET__SHIFT; 743 data |= 1 << CP_MES_DOORBELL_CONTROL2__DOORBELL_EN__SHIFT; 744 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_DOORBELL_CONTROL2, data); 745 746 data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_DOORBELL_CONTROL3); 747 data &= ~(CP_MES_DOORBELL_CONTROL3__DOORBELL_OFFSET_MASK | 748 CP_MES_DOORBELL_CONTROL3__DOORBELL_EN_MASK | 749 CP_MES_DOORBELL_CONTROL3__DOORBELL_HIT_MASK); 750 data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_MEDIUM] << 751 CP_MES_DOORBELL_CONTROL3__DOORBELL_OFFSET__SHIFT; 752 data |= 1 << CP_MES_DOORBELL_CONTROL3__DOORBELL_EN__SHIFT; 753 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_DOORBELL_CONTROL3, data); 754 755 data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_DOORBELL_CONTROL4); 756 data &= ~(CP_MES_DOORBELL_CONTROL4__DOORBELL_OFFSET_MASK | 757 CP_MES_DOORBELL_CONTROL4__DOORBELL_EN_MASK | 758 CP_MES_DOORBELL_CONTROL4__DOORBELL_HIT_MASK); 759 data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_HIGH] << 760 CP_MES_DOORBELL_CONTROL4__DOORBELL_OFFSET__SHIFT; 761 data |= 1 << CP_MES_DOORBELL_CONTROL4__DOORBELL_EN__SHIFT; 762 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_DOORBELL_CONTROL4, data); 763 764 data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_DOORBELL_CONTROL5); 765 data &= ~(CP_MES_DOORBELL_CONTROL5__DOORBELL_OFFSET_MASK | 766 CP_MES_DOORBELL_CONTROL5__DOORBELL_EN_MASK | 767 CP_MES_DOORBELL_CONTROL5__DOORBELL_HIT_MASK); 768 data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_REALTIME] << 769 CP_MES_DOORBELL_CONTROL5__DOORBELL_OFFSET__SHIFT; 770 data |= 1 << CP_MES_DOORBELL_CONTROL5__DOORBELL_EN__SHIFT; 771 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_DOORBELL_CONTROL5, data); 772 773 data = 1 << CP_HQD_GFX_CONTROL__DB_UPDATED_MSG_EN__SHIFT; 774 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_GFX_CONTROL, data); 775 } 776 777 778 static void mes_v12_1_enable_unmapped_doorbell_handling( 779 struct amdgpu_mes *mes, bool enable, int xcc_id) 780 { 781 struct amdgpu_device *adev = mes->adev; 782 uint32_t data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_UNMAPPED_DOORBELL); 783 784 /* 785 * The default PROC_LSB settng is 0xc which means doorbell 786 * addr[16:12] gives the doorbell page number. For kfd, each 787 * process will use 2 pages of doorbell, we need to change the 788 * setting to 0xd 789 */ 790 data &= ~CP_UNMAPPED_DOORBELL__PROC_LSB_MASK; 791 data |= 0xd << CP_UNMAPPED_DOORBELL__PROC_LSB__SHIFT; 792 793 data |= (enable ? 1 : 0) << CP_UNMAPPED_DOORBELL__ENABLE__SHIFT; 794 795 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_UNMAPPED_DOORBELL, data); 796 } 797 798 #if 0 799 static int mes_v12_1_reset_legacy_queue(struct amdgpu_mes *mes, 800 struct mes_reset_legacy_queue_input *input) 801 { 802 union MESAPI__RESET mes_reset_queue_pkt; 803 int pipe; 804 805 memset(&mes_reset_queue_pkt, 0, sizeof(mes_reset_queue_pkt)); 806 807 mes_reset_queue_pkt.header.type = MES_API_TYPE_SCHEDULER; 808 mes_reset_queue_pkt.header.opcode = MES_SCH_API_RESET; 809 mes_reset_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; 810 811 mes_reset_queue_pkt.queue_type = 812 convert_to_mes_queue_type(input->queue_type); 813 814 if (mes_reset_queue_pkt.queue_type == MES_QUEUE_TYPE_GFX) { 815 mes_reset_queue_pkt.reset_legacy_gfx = 1; 816 mes_reset_queue_pkt.pipe_id_lp = input->pipe_id; 817 mes_reset_queue_pkt.queue_id_lp = input->queue_id; 818 mes_reset_queue_pkt.mqd_mc_addr_lp = input->mqd_addr; 819 mes_reset_queue_pkt.doorbell_offset_lp = input->doorbell_offset; 820 mes_reset_queue_pkt.wptr_addr_lp = input->wptr_addr; 821 mes_reset_queue_pkt.vmid_id_lp = input->vmid; 822 } else { 823 mes_reset_queue_pkt.reset_queue_only = 1; 824 mes_reset_queue_pkt.doorbell_offset = input->doorbell_offset; 825 } 826 827 if (mes->adev->enable_uni_mes) 828 pipe = AMDGPU_MES_KIQ_PIPE; 829 else 830 pipe = AMDGPU_MES_SCHED_PIPE; 831 832 return mes_v12_1_submit_pkt_and_poll_completion(mes, 833 input->xcc_id, pipe, 834 &mes_reset_queue_pkt, sizeof(mes_reset_queue_pkt), 835 offsetof(union MESAPI__RESET, api_status)); 836 } 837 #endif 838 839 static const struct amdgpu_mes_funcs mes_v12_1_funcs = { 840 .add_hw_queue = mes_v12_1_add_hw_queue, 841 .remove_hw_queue = mes_v12_1_remove_hw_queue, 842 .map_legacy_queue = mes_v12_1_map_legacy_queue, 843 .unmap_legacy_queue = mes_v12_1_unmap_legacy_queue, 844 .suspend_gang = mes_v12_1_suspend_gang, 845 .resume_gang = mes_v12_1_resume_gang, 846 .misc_op = mes_v12_1_misc_op, 847 .reset_hw_queue = mes_v12_1_reset_hw_queue, 848 }; 849 850 static int mes_v12_1_allocate_ucode_buffer(struct amdgpu_device *adev, 851 enum amdgpu_mes_pipe pipe, 852 int xcc_id) 853 { 854 int r, inst = MES_PIPE_INST(xcc_id, pipe); 855 const struct mes_firmware_header_v1_0 *mes_hdr; 856 const __le32 *fw_data; 857 unsigned fw_size; 858 859 mes_hdr = (const struct mes_firmware_header_v1_0 *) 860 adev->mes.fw[pipe]->data; 861 862 fw_data = (const __le32 *)(adev->mes.fw[pipe]->data + 863 le32_to_cpu(mes_hdr->mes_ucode_offset_bytes)); 864 fw_size = le32_to_cpu(mes_hdr->mes_ucode_size_bytes); 865 866 r = amdgpu_bo_create_reserved(adev, fw_size, 867 PAGE_SIZE, 868 AMDGPU_GEM_DOMAIN_VRAM, 869 &adev->mes.ucode_fw_obj[inst], 870 &adev->mes.ucode_fw_gpu_addr[inst], 871 (void **)&adev->mes.ucode_fw_ptr[inst]); 872 if (r) { 873 dev_err(adev->dev, "(%d) failed to create mes fw bo\n", r); 874 return r; 875 } 876 877 memcpy(adev->mes.ucode_fw_ptr[inst], fw_data, fw_size); 878 879 amdgpu_bo_kunmap(adev->mes.ucode_fw_obj[inst]); 880 amdgpu_bo_unreserve(adev->mes.ucode_fw_obj[inst]); 881 882 return 0; 883 } 884 885 static int mes_v12_1_allocate_ucode_data_buffer(struct amdgpu_device *adev, 886 enum amdgpu_mes_pipe pipe, 887 int xcc_id) 888 { 889 int r, inst = MES_PIPE_INST(xcc_id, pipe); 890 const struct mes_firmware_header_v1_0 *mes_hdr; 891 const __le32 *fw_data; 892 unsigned fw_size; 893 894 mes_hdr = (const struct mes_firmware_header_v1_0 *) 895 adev->mes.fw[pipe]->data; 896 897 fw_data = (const __le32 *)(adev->mes.fw[pipe]->data + 898 le32_to_cpu(mes_hdr->mes_ucode_data_offset_bytes)); 899 fw_size = le32_to_cpu(mes_hdr->mes_ucode_data_size_bytes); 900 901 r = amdgpu_bo_create_reserved(adev, fw_size, 902 64 * 1024, 903 AMDGPU_GEM_DOMAIN_VRAM, 904 &adev->mes.data_fw_obj[inst], 905 &adev->mes.data_fw_gpu_addr[inst], 906 (void **)&adev->mes.data_fw_ptr[inst]); 907 if (r) { 908 dev_err(adev->dev, "(%d) failed to create mes data fw bo\n", r); 909 return r; 910 } 911 912 memcpy(adev->mes.data_fw_ptr[inst], fw_data, fw_size); 913 914 amdgpu_bo_kunmap(adev->mes.data_fw_obj[inst]); 915 amdgpu_bo_unreserve(adev->mes.data_fw_obj[inst]); 916 917 return 0; 918 } 919 920 static void mes_v12_1_free_ucode_buffers(struct amdgpu_device *adev, 921 enum amdgpu_mes_pipe pipe, 922 int xcc_id) 923 { 924 int inst = MES_PIPE_INST(xcc_id, pipe); 925 926 amdgpu_bo_free_kernel(&adev->mes.data_fw_obj[inst], 927 &adev->mes.data_fw_gpu_addr[inst], 928 (void **)&adev->mes.data_fw_ptr[inst]); 929 930 amdgpu_bo_free_kernel(&adev->mes.ucode_fw_obj[inst], 931 &adev->mes.ucode_fw_gpu_addr[inst], 932 (void **)&adev->mes.ucode_fw_ptr[inst]); 933 } 934 935 static void mes_v12_1_enable(struct amdgpu_device *adev, 936 bool enable, int xcc_id) 937 { 938 uint64_t ucode_addr; 939 uint32_t pipe, data = 0; 940 941 if (enable) { 942 data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_CNTL); 943 data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_RESET, 1); 944 data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_RESET, 1); 945 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_CNTL, data); 946 947 mutex_lock(&adev->srbm_mutex); 948 for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) { 949 soc_v1_0_grbm_select(adev, 3, pipe, 0, 0, 950 GET_INST(GC, xcc_id)); 951 952 ucode_addr = adev->mes.uc_start_addr[pipe] >> 2; 953 WREG32_SOC15(GC, GET_INST(GC, xcc_id), 954 regCP_MES_PRGRM_CNTR_START, 955 lower_32_bits(ucode_addr)); 956 WREG32_SOC15(GC, GET_INST(GC, xcc_id), 957 regCP_MES_PRGRM_CNTR_START_HI, 958 upper_32_bits(ucode_addr)); 959 } 960 soc_v1_0_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id)); 961 mutex_unlock(&adev->srbm_mutex); 962 963 /* unhalt MES and activate pipe0 */ 964 data = REG_SET_FIELD(0, CP_MES_CNTL, MES_PIPE0_ACTIVE, 1); 965 data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_ACTIVE, 1); 966 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_CNTL, data); 967 968 if (amdgpu_emu_mode) 969 msleep(500); 970 else if (adev->enable_uni_mes) 971 udelay(500); 972 else 973 udelay(50); 974 } else { 975 data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_CNTL); 976 data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_ACTIVE, 0); 977 data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_ACTIVE, 0); 978 data = REG_SET_FIELD(data, CP_MES_CNTL, 979 MES_INVALIDATE_ICACHE, 1); 980 data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_RESET, 1); 981 data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_RESET, 1); 982 data = REG_SET_FIELD(data, CP_MES_CNTL, MES_HALT, 1); 983 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_CNTL, data); 984 } 985 } 986 987 static void mes_v12_1_set_ucode_start_addr(struct amdgpu_device *adev, 988 int xcc_id) 989 { 990 uint64_t ucode_addr; 991 int pipe; 992 993 mes_v12_1_enable(adev, false, xcc_id); 994 995 mutex_lock(&adev->srbm_mutex); 996 for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) { 997 /* me=3, queue=0 */ 998 soc_v1_0_grbm_select(adev, 3, pipe, 0, 0, GET_INST(GC, xcc_id)); 999 1000 /* set ucode start address */ 1001 ucode_addr = adev->mes.uc_start_addr[pipe] >> 2; 1002 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_PRGRM_CNTR_START, 1003 lower_32_bits(ucode_addr)); 1004 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_PRGRM_CNTR_START_HI, 1005 upper_32_bits(ucode_addr)); 1006 1007 soc_v1_0_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id)); 1008 } 1009 mutex_unlock(&adev->srbm_mutex); 1010 } 1011 1012 /* This function is for backdoor MES firmware */ 1013 static int mes_v12_1_load_microcode(struct amdgpu_device *adev, 1014 enum amdgpu_mes_pipe pipe, 1015 bool prime_icache, int xcc_id) 1016 { 1017 int r, inst = MES_PIPE_INST(xcc_id, pipe); 1018 uint32_t data; 1019 1020 mes_v12_1_enable(adev, false, xcc_id); 1021 1022 if (!adev->mes.fw[pipe]) 1023 return -EINVAL; 1024 1025 r = mes_v12_1_allocate_ucode_buffer(adev, pipe, xcc_id); 1026 if (r) 1027 return r; 1028 1029 r = mes_v12_1_allocate_ucode_data_buffer(adev, pipe, xcc_id); 1030 if (r) { 1031 mes_v12_1_free_ucode_buffers(adev, pipe, xcc_id); 1032 return r; 1033 } 1034 1035 mutex_lock(&adev->srbm_mutex); 1036 /* me=3, pipe=0, queue=0 */ 1037 soc_v1_0_grbm_select(adev, 3, pipe, 0, 0, GET_INST(GC, xcc_id)); 1038 1039 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_IC_BASE_CNTL, 0); 1040 1041 /* set ucode fimrware address */ 1042 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_IC_BASE_LO, 1043 lower_32_bits(adev->mes.ucode_fw_gpu_addr[inst])); 1044 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_IC_BASE_HI, 1045 upper_32_bits(adev->mes.ucode_fw_gpu_addr[inst])); 1046 1047 /* set ucode instruction cache boundary to 2M-1 */ 1048 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_MIBOUND_LO, 0x1FFFFF); 1049 1050 /* set ucode data firmware address */ 1051 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_MDBASE_LO, 1052 lower_32_bits(adev->mes.data_fw_gpu_addr[inst])); 1053 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_MDBASE_HI, 1054 upper_32_bits(adev->mes.data_fw_gpu_addr[inst])); 1055 1056 /* Set data cache boundary CP_MES_MDBOUND_LO */ 1057 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_MDBOUND_LO, 0x7FFFF); 1058 1059 if (prime_icache) { 1060 /* invalidate ICACHE */ 1061 data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_IC_OP_CNTL); 1062 data = REG_SET_FIELD(data, CP_MES_IC_OP_CNTL, PRIME_ICACHE, 0); 1063 data = REG_SET_FIELD(data, CP_MES_IC_OP_CNTL, INVALIDATE_CACHE, 1); 1064 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_IC_OP_CNTL, data); 1065 1066 /* prime the ICACHE. */ 1067 data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_IC_OP_CNTL); 1068 data = REG_SET_FIELD(data, CP_MES_IC_OP_CNTL, PRIME_ICACHE, 1); 1069 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_IC_OP_CNTL, data); 1070 } 1071 1072 soc_v1_0_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id)); 1073 mutex_unlock(&adev->srbm_mutex); 1074 1075 return 0; 1076 } 1077 1078 static int mes_v12_1_allocate_eop_buf(struct amdgpu_device *adev, 1079 enum amdgpu_mes_pipe pipe, 1080 int xcc_id) 1081 { 1082 int r, inst = MES_PIPE_INST(xcc_id, pipe); 1083 u32 *eop; 1084 1085 r = amdgpu_bo_create_reserved(adev, MES_EOP_SIZE, PAGE_SIZE, 1086 AMDGPU_GEM_DOMAIN_GTT, 1087 &adev->mes.eop_gpu_obj[inst], 1088 &adev->mes.eop_gpu_addr[inst], 1089 (void **)&eop); 1090 if (r) { 1091 dev_warn(adev->dev, "(%d) create EOP bo failed\n", r); 1092 return r; 1093 } 1094 1095 memset(eop, 0, 1096 adev->mes.eop_gpu_obj[inst]->tbo.base.size); 1097 1098 amdgpu_bo_kunmap(adev->mes.eop_gpu_obj[inst]); 1099 amdgpu_bo_unreserve(adev->mes.eop_gpu_obj[inst]); 1100 1101 return 0; 1102 } 1103 1104 static int mes_v12_1_allocate_shared_cmd_buf(struct amdgpu_device *adev, 1105 enum amdgpu_mes_pipe pipe, 1106 int xcc_id) 1107 { 1108 int r, inst = MES_PIPE_INST(xcc_id, pipe); 1109 1110 if (pipe == AMDGPU_MES_KIQ_PIPE) 1111 return 0; 1112 1113 r = amdgpu_bo_create_kernel(adev, PAGE_SIZE, PAGE_SIZE, 1114 AMDGPU_GEM_DOMAIN_VRAM, 1115 &adev->mes.shared_cmd_buf_obj[inst], 1116 &adev->mes.shared_cmd_buf_gpu_addr[inst], 1117 NULL); 1118 if (r) { 1119 dev_err(adev->dev, 1120 "(%d) failed to create shared cmd buf bo\n", r); 1121 return r; 1122 } 1123 1124 return 0; 1125 } 1126 1127 static int mes_v12_1_mqd_init(struct amdgpu_ring *ring) 1128 { 1129 struct v12_1_mes_mqd *mqd = ring->mqd_ptr; 1130 uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr; 1131 uint32_t tmp; 1132 1133 mqd->header = 0xC0310800; 1134 mqd->compute_pipelinestat_enable = 0x00000001; 1135 mqd->compute_static_thread_mgmt_se0 = 0xffffffff; 1136 mqd->compute_static_thread_mgmt_se1 = 0xffffffff; 1137 mqd->compute_static_thread_mgmt_se2 = 0xffffffff; 1138 mqd->compute_static_thread_mgmt_se3 = 0xffffffff; 1139 mqd->compute_misc_reserved = 0x00000007; 1140 1141 eop_base_addr = ring->eop_gpu_addr >> 8; 1142 1143 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ 1144 tmp = regCP_HQD_EOP_CONTROL_DEFAULT; 1145 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE, 1146 (order_base_2(MES_EOP_SIZE / 4) - 1)); 1147 1148 mqd->cp_hqd_eop_base_addr_lo = lower_32_bits(eop_base_addr); 1149 mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr); 1150 mqd->cp_hqd_eop_control = tmp; 1151 1152 /* disable the queue if it's active */ 1153 ring->wptr = 0; 1154 mqd->cp_hqd_pq_rptr = 0; 1155 mqd->cp_hqd_pq_wptr_lo = 0; 1156 mqd->cp_hqd_pq_wptr_hi = 0; 1157 1158 /* set the pointer to the MQD */ 1159 mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc; 1160 mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr); 1161 1162 /* set MQD vmid to 0 */ 1163 tmp = regCP_MQD_CONTROL_DEFAULT; 1164 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0); 1165 mqd->cp_mqd_control = tmp; 1166 1167 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ 1168 hqd_gpu_addr = ring->gpu_addr >> 8; 1169 mqd->cp_hqd_pq_base_lo = lower_32_bits(hqd_gpu_addr); 1170 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr); 1171 1172 /* set the wb address whether it's enabled or not */ 1173 wb_gpu_addr = ring->rptr_gpu_addr; 1174 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc; 1175 mqd->cp_hqd_pq_rptr_report_addr_hi = 1176 upper_32_bits(wb_gpu_addr) & 0xffff; 1177 1178 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ 1179 wb_gpu_addr = ring->wptr_gpu_addr; 1180 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffff8; 1181 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; 1182 1183 /* set up the HQD, this is similar to CP_RB0_CNTL */ 1184 tmp = regCP_HQD_PQ_CONTROL_DEFAULT; 1185 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE, 1186 (order_base_2(ring->ring_size / 4) - 1)); 1187 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE, 1188 ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8)); 1189 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 1); 1190 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, 0); 1191 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); 1192 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); 1193 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, NO_UPDATE_RPTR, 1); 1194 mqd->cp_hqd_pq_control = tmp; 1195 1196 /* enable doorbell */ 1197 tmp = 0; 1198 if (ring->use_doorbell) { 1199 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 1200 DOORBELL_OFFSET, ring->doorbell_index); 1201 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 1202 DOORBELL_EN, 1); 1203 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 1204 DOORBELL_SOURCE, 0); 1205 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 1206 DOORBELL_HIT, 0); 1207 } else { 1208 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 1209 DOORBELL_EN, 0); 1210 } 1211 mqd->cp_hqd_pq_doorbell_control = tmp; 1212 1213 mqd->cp_hqd_vmid = 0; 1214 /* activate the queue */ 1215 mqd->cp_hqd_active = 1; 1216 1217 tmp = regCP_HQD_PERSISTENT_STATE_DEFAULT; 1218 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, 1219 PRELOAD_SIZE, 0x63); 1220 mqd->cp_hqd_persistent_state = tmp; 1221 1222 mqd->cp_hqd_ib_control = regCP_HQD_IB_CONTROL_MES_12_1_DEFAULT; 1223 mqd->cp_hqd_iq_timer = regCP_HQD_IQ_TIMER_DEFAULT; 1224 mqd->cp_hqd_quantum = regCP_HQD_QUANTUM_DEFAULT; 1225 1226 /* 1227 * Set CP_HQD_GFX_CONTROL.DB_UPDATED_MSG_EN[15] to enable unmapped 1228 * doorbell handling. This is a reserved CP internal register can 1229 * not be accesss by others 1230 */ 1231 mqd->cp_hqd_gfx_control = BIT(15); 1232 1233 return 0; 1234 } 1235 1236 static void mes_v12_1_queue_init_register(struct amdgpu_ring *ring, 1237 int xcc_id) 1238 { 1239 struct v12_1_mes_mqd *mqd = ring->mqd_ptr; 1240 struct amdgpu_device *adev = ring->adev; 1241 uint32_t data = 0; 1242 1243 mutex_lock(&adev->srbm_mutex); 1244 soc_v1_0_grbm_select(adev, 3, ring->pipe, 0, 0, GET_INST(GC, xcc_id)); 1245 1246 /* set CP_HQD_VMID.VMID = 0. */ 1247 data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_VMID); 1248 data = REG_SET_FIELD(data, CP_HQD_VMID, VMID, 0); 1249 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_VMID, data); 1250 1251 /* set CP_HQD_PQ_DOORBELL_CONTROL.DOORBELL_EN=0 */ 1252 data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_DOORBELL_CONTROL); 1253 data = REG_SET_FIELD(data, CP_HQD_PQ_DOORBELL_CONTROL, 1254 DOORBELL_EN, 0); 1255 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_DOORBELL_CONTROL, data); 1256 1257 /* set CP_MQD_BASE_ADDR/HI with the MQD base address */ 1258 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo); 1259 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi); 1260 1261 /* set CP_MQD_CONTROL.VMID=0 */ 1262 data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MQD_CONTROL); 1263 data = REG_SET_FIELD(data, CP_MQD_CONTROL, VMID, 0); 1264 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MQD_CONTROL, 0); 1265 1266 /* set CP_HQD_PQ_BASE/HI with the ring buffer base address */ 1267 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo); 1268 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi); 1269 1270 /* set CP_HQD_PQ_RPTR_REPORT_ADDR/HI */ 1271 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_RPTR_REPORT_ADDR, 1272 mqd->cp_hqd_pq_rptr_report_addr_lo); 1273 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_RPTR_REPORT_ADDR_HI, 1274 mqd->cp_hqd_pq_rptr_report_addr_hi); 1275 1276 /* set CP_HQD_PQ_CONTROL */ 1277 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_CONTROL, mqd->cp_hqd_pq_control); 1278 1279 /* set CP_HQD_PQ_WPTR_POLL_ADDR/HI */ 1280 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_WPTR_POLL_ADDR, 1281 mqd->cp_hqd_pq_wptr_poll_addr_lo); 1282 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_WPTR_POLL_ADDR_HI, 1283 mqd->cp_hqd_pq_wptr_poll_addr_hi); 1284 1285 /* set CP_HQD_PQ_DOORBELL_CONTROL */ 1286 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_DOORBELL_CONTROL, 1287 mqd->cp_hqd_pq_doorbell_control); 1288 1289 /* set CP_HQD_PERSISTENT_STATE.PRELOAD_SIZE=0x53 */ 1290 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PERSISTENT_STATE, mqd->cp_hqd_persistent_state); 1291 1292 /* set CP_HQD_ACTIVE.ACTIVE=1 */ 1293 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_ACTIVE, mqd->cp_hqd_active); 1294 1295 soc_v1_0_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id)); 1296 mutex_unlock(&adev->srbm_mutex); 1297 } 1298 1299 static int mes_v12_1_kiq_enable_queue(struct amdgpu_device *adev, int xcc_id) 1300 { 1301 struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id]; 1302 struct amdgpu_ring *kiq_ring = &adev->gfx.kiq[xcc_id].ring; 1303 int r, inst = MES_PIPE_INST(xcc_id, AMDGPU_MES_SCHED_PIPE); 1304 1305 if (!kiq->pmf || !kiq->pmf->kiq_map_queues) 1306 return -EINVAL; 1307 1308 r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size); 1309 if (r) { 1310 DRM_ERROR("Failed to lock KIQ (%d).\n", r); 1311 return r; 1312 } 1313 1314 kiq->pmf->kiq_map_queues(kiq_ring, &adev->mes.ring[inst]); 1315 1316 r = amdgpu_ring_test_ring(kiq_ring); 1317 if (r) { 1318 DRM_ERROR("kfq enable failed\n"); 1319 kiq_ring->sched.ready = false; 1320 } 1321 return r; 1322 } 1323 1324 static int mes_v12_1_queue_init(struct amdgpu_device *adev, 1325 enum amdgpu_mes_pipe pipe, 1326 int xcc_id) 1327 { 1328 struct amdgpu_ring *ring; 1329 int r; 1330 1331 if (!adev->enable_uni_mes && pipe == AMDGPU_MES_KIQ_PIPE) 1332 ring = &adev->gfx.kiq[xcc_id].ring; 1333 else 1334 ring = &adev->mes.ring[MES_PIPE_INST(xcc_id, pipe)]; 1335 1336 if ((adev->enable_uni_mes || pipe == AMDGPU_MES_SCHED_PIPE) && 1337 (amdgpu_in_reset(adev) || adev->in_suspend)) { 1338 *(ring->wptr_cpu_addr) = 0; 1339 *(ring->rptr_cpu_addr) = 0; 1340 amdgpu_ring_clear_ring(ring); 1341 } 1342 1343 r = mes_v12_1_mqd_init(ring); 1344 if (r) 1345 return r; 1346 1347 if (pipe == AMDGPU_MES_SCHED_PIPE) { 1348 if (adev->enable_uni_mes) 1349 r = amdgpu_mes_map_legacy_queue(adev, ring, xcc_id); 1350 else 1351 r = mes_v12_1_kiq_enable_queue(adev, xcc_id); 1352 if (r) 1353 return r; 1354 } else { 1355 mes_v12_1_queue_init_register(ring, xcc_id); 1356 } 1357 1358 /* get MES scheduler/KIQ versions */ 1359 mutex_lock(&adev->srbm_mutex); 1360 soc_v1_0_grbm_select(adev, 3, pipe, 0, 0, GET_INST(GC, xcc_id)); 1361 1362 if (pipe == AMDGPU_MES_SCHED_PIPE) 1363 adev->mes.sched_version = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_GP3_LO); 1364 else if (pipe == AMDGPU_MES_KIQ_PIPE && adev->enable_mes_kiq) 1365 adev->mes.kiq_version = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_GP3_LO); 1366 1367 soc_v1_0_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id)); 1368 mutex_unlock(&adev->srbm_mutex); 1369 1370 return 0; 1371 } 1372 1373 static int mes_v12_1_ring_init(struct amdgpu_device *adev, 1374 int xcc_id, int pipe) 1375 { 1376 struct amdgpu_ring *ring; 1377 int inst = MES_PIPE_INST(xcc_id, pipe); 1378 1379 ring = &adev->mes.ring[inst]; 1380 1381 ring->funcs = &mes_v12_1_ring_funcs; 1382 1383 ring->me = 3; 1384 ring->pipe = pipe; 1385 ring->queue = 0; 1386 ring->xcc_id = xcc_id; 1387 ring->vm_hub = AMDGPU_GFXHUB(xcc_id); 1388 1389 ring->ring_obj = NULL; 1390 ring->use_doorbell = true; 1391 ring->eop_gpu_addr = adev->mes.eop_gpu_addr[inst]; 1392 ring->no_scheduler = true; 1393 snprintf(ring->name, sizeof(ring->name), "mes_%hhu.%hhu.%hhu.%hhu", 1394 (unsigned char)xcc_id, (unsigned char)ring->me, 1395 (unsigned char)ring->pipe, (unsigned char)ring->queue); 1396 1397 if (pipe == AMDGPU_MES_SCHED_PIPE) 1398 ring->doorbell_index = 1399 (adev->doorbell_index.mes_ring0 + 1400 xcc_id * adev->doorbell_index.xcc_doorbell_range) 1401 << 1; 1402 else 1403 ring->doorbell_index = 1404 (adev->doorbell_index.mes_ring1 + 1405 xcc_id * adev->doorbell_index.xcc_doorbell_range) 1406 << 1; 1407 1408 return amdgpu_ring_init(adev, ring, 1024, NULL, 0, 1409 AMDGPU_RING_PRIO_DEFAULT, NULL); 1410 } 1411 1412 static int mes_v12_1_kiq_ring_init(struct amdgpu_device *adev, int xcc_id) 1413 { 1414 struct amdgpu_ring *ring; 1415 int inst = MES_PIPE_INST(xcc_id, AMDGPU_MES_KIQ_PIPE); 1416 1417 spin_lock_init(&adev->gfx.kiq[xcc_id].ring_lock); 1418 1419 ring = &adev->gfx.kiq[xcc_id].ring; 1420 1421 ring->me = 3; 1422 ring->pipe = 1; 1423 ring->queue = 0; 1424 ring->xcc_id = xcc_id; 1425 ring->vm_hub = AMDGPU_GFXHUB(xcc_id); 1426 1427 ring->adev = NULL; 1428 ring->ring_obj = NULL; 1429 ring->use_doorbell = true; 1430 ring->eop_gpu_addr = adev->mes.eop_gpu_addr[inst]; 1431 ring->no_scheduler = true; 1432 ring->doorbell_index = 1433 (adev->doorbell_index.mes_ring1 + 1434 xcc_id * adev->doorbell_index.xcc_doorbell_range) 1435 << 1; 1436 1437 snprintf(ring->name, sizeof(ring->name), "mes_kiq_%hhu.%hhu.%hhu.%hhu", 1438 (unsigned char)xcc_id, (unsigned char)ring->me, 1439 (unsigned char)ring->pipe, (unsigned char)ring->queue); 1440 1441 return amdgpu_ring_init(adev, ring, 1024, NULL, 0, 1442 AMDGPU_RING_PRIO_DEFAULT, NULL); 1443 } 1444 1445 static int mes_v12_1_mqd_sw_init(struct amdgpu_device *adev, 1446 enum amdgpu_mes_pipe pipe, 1447 int xcc_id) 1448 { 1449 int r, mqd_size = sizeof(struct v12_1_mes_mqd); 1450 struct amdgpu_ring *ring; 1451 int inst = MES_PIPE_INST(xcc_id, pipe); 1452 1453 if (!adev->enable_uni_mes && pipe == AMDGPU_MES_KIQ_PIPE) 1454 ring = &adev->gfx.kiq[xcc_id].ring; 1455 else 1456 ring = &adev->mes.ring[inst]; 1457 1458 if (ring->mqd_obj) 1459 return 0; 1460 1461 r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE, 1462 AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj, 1463 &ring->mqd_gpu_addr, &ring->mqd_ptr); 1464 if (r) { 1465 dev_warn(adev->dev, "failed to create ring mqd bo (%d)", r); 1466 return r; 1467 } 1468 1469 memset(ring->mqd_ptr, 0, mqd_size); 1470 1471 /* prepare MQD backup */ 1472 adev->mes.mqd_backup[inst] = kmalloc(mqd_size, GFP_KERNEL); 1473 if (!adev->mes.mqd_backup[inst]) 1474 dev_warn(adev->dev, 1475 "no memory to create MQD backup for ring %s\n", 1476 ring->name); 1477 1478 return 0; 1479 } 1480 1481 static int mes_v12_1_sw_init(struct amdgpu_ip_block *ip_block) 1482 { 1483 struct amdgpu_device *adev = ip_block->adev; 1484 int pipe, r, xcc_id, num_xcc = NUM_XCC(adev->gfx.xcc_mask); 1485 1486 if (adev->enable_uni_mes && num_xcc > 1) 1487 adev->mes.enable_coop_mode = true; 1488 1489 adev->mes.funcs = &mes_v12_1_funcs; 1490 adev->mes.kiq_hw_init = &mes_v12_1_kiq_hw_init; 1491 adev->mes.kiq_hw_fini = &mes_v12_1_kiq_hw_fini; 1492 adev->mes.enable_legacy_queue_map = true; 1493 1494 adev->mes.event_log_size = 1495 adev->enable_uni_mes ? (AMDGPU_MAX_MES_PIPES * AMDGPU_MES_LOG_BUFFER_SIZE * num_xcc) : AMDGPU_MES_LOG_BUFFER_SIZE; 1496 1497 r = amdgpu_mes_init(adev); 1498 if (r) 1499 return r; 1500 1501 for (xcc_id = 0; xcc_id < num_xcc; xcc_id++) { 1502 for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) { 1503 r = mes_v12_1_allocate_eop_buf(adev, pipe, xcc_id); 1504 if (r) 1505 return r; 1506 1507 r = mes_v12_1_mqd_sw_init(adev, pipe, xcc_id); 1508 if (r) 1509 return r; 1510 1511 if (!adev->enable_uni_mes && pipe == 1512 AMDGPU_MES_KIQ_PIPE) 1513 r = mes_v12_1_kiq_ring_init(adev, xcc_id); 1514 else 1515 r = mes_v12_1_ring_init(adev, xcc_id, pipe); 1516 if (r) 1517 return r; 1518 1519 if (adev->mes.enable_coop_mode) { 1520 r = mes_v12_1_allocate_shared_cmd_buf(adev, 1521 pipe, xcc_id); 1522 if (r) 1523 return r; 1524 } 1525 } 1526 } 1527 1528 return 0; 1529 } 1530 1531 static int mes_v12_1_sw_fini(struct amdgpu_ip_block *ip_block) 1532 { 1533 struct amdgpu_device *adev = ip_block->adev; 1534 int pipe, inst, xcc_id, num_xcc = NUM_XCC(adev->gfx.xcc_mask); 1535 1536 for (xcc_id = 0; xcc_id < num_xcc; xcc_id++) { 1537 for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) { 1538 inst = MES_PIPE_INST(xcc_id, pipe); 1539 1540 amdgpu_bo_free_kernel(&adev->mes.shared_cmd_buf_obj[inst], 1541 &adev->mes.shared_cmd_buf_gpu_addr[inst], 1542 NULL); 1543 1544 kfree(adev->mes.mqd_backup[inst]); 1545 1546 amdgpu_bo_free_kernel(&adev->mes.eop_gpu_obj[inst], 1547 &adev->mes.eop_gpu_addr[inst], 1548 NULL); 1549 amdgpu_ucode_release(&adev->mes.fw[inst]); 1550 1551 if (adev->enable_uni_mes || pipe == AMDGPU_MES_SCHED_PIPE) { 1552 amdgpu_bo_free_kernel(&adev->mes.ring[inst].mqd_obj, 1553 &adev->mes.ring[inst].mqd_gpu_addr, 1554 &adev->mes.ring[inst].mqd_ptr); 1555 amdgpu_ring_fini(&adev->mes.ring[inst]); 1556 } 1557 } 1558 } 1559 1560 for (xcc_id = 0; xcc_id < num_xcc; xcc_id++) { 1561 if (!adev->enable_uni_mes) { 1562 amdgpu_bo_free_kernel(&adev->gfx.kiq[xcc_id].ring.mqd_obj, 1563 &adev->gfx.kiq[xcc_id].ring.mqd_gpu_addr, 1564 &adev->gfx.kiq[xcc_id].ring.mqd_ptr); 1565 amdgpu_ring_fini(&adev->gfx.kiq[xcc_id].ring); 1566 } 1567 1568 if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { 1569 mes_v12_1_free_ucode_buffers(adev, 1570 AMDGPU_MES_KIQ_PIPE, xcc_id); 1571 mes_v12_1_free_ucode_buffers(adev, 1572 AMDGPU_MES_SCHED_PIPE, xcc_id); 1573 } 1574 } 1575 1576 amdgpu_mes_fini(adev); 1577 return 0; 1578 } 1579 1580 static void mes_v12_1_kiq_dequeue_sched(struct amdgpu_device *adev, 1581 int xcc_id) 1582 { 1583 uint32_t data; 1584 int i; 1585 1586 mutex_lock(&adev->srbm_mutex); 1587 soc_v1_0_grbm_select(adev, 3, AMDGPU_MES_SCHED_PIPE, 0, 0, 1588 GET_INST(GC, xcc_id)); 1589 1590 /* disable the queue if it's active */ 1591 if (RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_ACTIVE) & 1) { 1592 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_DEQUEUE_REQUEST, 1); 1593 for (i = 0; i < adev->usec_timeout; i++) { 1594 if (!(RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_ACTIVE) & 1)) 1595 break; 1596 udelay(1); 1597 } 1598 } 1599 data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_DOORBELL_CONTROL); 1600 data = REG_SET_FIELD(data, CP_HQD_PQ_DOORBELL_CONTROL, 1601 DOORBELL_EN, 0); 1602 data = REG_SET_FIELD(data, CP_HQD_PQ_DOORBELL_CONTROL, 1603 DOORBELL_HIT, 1); 1604 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_DOORBELL_CONTROL, data); 1605 1606 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_DOORBELL_CONTROL, 0); 1607 1608 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_WPTR_LO, 0); 1609 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_WPTR_HI, 0); 1610 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_RPTR, 0); 1611 1612 soc_v1_0_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id)); 1613 mutex_unlock(&adev->srbm_mutex); 1614 1615 adev->mes.ring[MES_PIPE_INST(xcc_id, 0)].sched.ready = false; 1616 } 1617 1618 static void mes_v12_1_kiq_setting(struct amdgpu_ring *ring, int xcc_id) 1619 { 1620 uint32_t tmp; 1621 struct amdgpu_device *adev = ring->adev; 1622 1623 /* tell RLC which is KIQ queue */ 1624 tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CP_SCHEDULERS); 1625 tmp &= 0xffffff00; 1626 tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue); 1627 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CP_SCHEDULERS, tmp); 1628 tmp |= 0x80; 1629 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CP_SCHEDULERS, tmp); 1630 } 1631 1632 static int mes_v12_1_kiq_hw_init(struct amdgpu_device *adev, uint32_t xcc_id) 1633 { 1634 int inst = MES_PIPE_INST(xcc_id, AMDGPU_MES_KIQ_PIPE); 1635 int r = 0; 1636 struct amdgpu_ip_block *ip_block; 1637 1638 if (adev->enable_uni_mes) 1639 mes_v12_1_kiq_setting(&adev->mes.ring[inst], xcc_id); 1640 else 1641 mes_v12_1_kiq_setting(&adev->gfx.kiq[xcc_id].ring, xcc_id); 1642 1643 if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { 1644 1645 r = mes_v12_1_load_microcode(adev, AMDGPU_MES_SCHED_PIPE, 1646 false, xcc_id); 1647 if (r) { 1648 DRM_ERROR("failed to load MES fw, r=%d\n", r); 1649 return r; 1650 } 1651 1652 r = mes_v12_1_load_microcode(adev, AMDGPU_MES_KIQ_PIPE, 1653 true, xcc_id); 1654 if (r) { 1655 DRM_ERROR("failed to load MES kiq fw, r=%d\n", r); 1656 return r; 1657 } 1658 1659 mes_v12_1_set_ucode_start_addr(adev, xcc_id); 1660 1661 } else if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) 1662 mes_v12_1_set_ucode_start_addr(adev, xcc_id); 1663 1664 mes_v12_1_enable(adev, true, xcc_id); 1665 1666 ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_MES); 1667 if (unlikely(!ip_block)) { 1668 dev_err(adev->dev, "Failed to get MES handle\n"); 1669 return -EINVAL; 1670 } 1671 1672 r = mes_v12_1_queue_init(adev, AMDGPU_MES_KIQ_PIPE, xcc_id); 1673 if (r) 1674 goto failure; 1675 1676 if (adev->enable_uni_mes) { 1677 r = mes_v12_1_set_hw_resources(&adev->mes, 1678 AMDGPU_MES_KIQ_PIPE, xcc_id); 1679 if (r) 1680 goto failure; 1681 1682 mes_v12_1_set_hw_resources_1(&adev->mes, 1683 AMDGPU_MES_KIQ_PIPE, xcc_id); 1684 } 1685 1686 if (adev->mes.enable_legacy_queue_map) { 1687 r = mes_v12_1_xcc_hw_init(ip_block, xcc_id); 1688 if (r) 1689 goto failure; 1690 } 1691 1692 return r; 1693 1694 failure: 1695 mes_v12_1_hw_fini(ip_block); 1696 return r; 1697 } 1698 1699 static int mes_v12_1_kiq_hw_fini(struct amdgpu_device *adev, uint32_t xcc_id) 1700 { 1701 int inst = MES_PIPE_INST(xcc_id, AMDGPU_MES_SCHED_PIPE); 1702 1703 if (adev->mes.ring[inst].sched.ready) { 1704 if (adev->enable_uni_mes) 1705 amdgpu_mes_unmap_legacy_queue(adev, 1706 &adev->mes.ring[inst], 1707 RESET_QUEUES, 0, 0, xcc_id); 1708 else 1709 mes_v12_1_kiq_dequeue_sched(adev, xcc_id); 1710 1711 adev->mes.ring[inst].sched.ready = false; 1712 } 1713 1714 mes_v12_1_enable(adev, false, xcc_id); 1715 1716 return 0; 1717 } 1718 1719 static int mes_v12_1_xcc_hw_init(struct amdgpu_ip_block *ip_block, int xcc_id) 1720 { 1721 int r; 1722 struct amdgpu_device *adev = ip_block->adev; 1723 1724 if (adev->mes.ring[MES_PIPE_INST(xcc_id, 0)].sched.ready) 1725 goto out; 1726 1727 if (!adev->enable_mes_kiq) { 1728 if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { 1729 r = mes_v12_1_load_microcode(adev, 1730 AMDGPU_MES_SCHED_PIPE, true, xcc_id); 1731 if (r) { 1732 DRM_ERROR("failed to MES fw, r=%d\n", r); 1733 return r; 1734 } 1735 1736 mes_v12_1_set_ucode_start_addr(adev, xcc_id); 1737 1738 } else if (adev->firmware.load_type == 1739 AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) { 1740 1741 mes_v12_1_set_ucode_start_addr(adev, xcc_id); 1742 } 1743 1744 mes_v12_1_enable(adev, true, xcc_id); 1745 } 1746 1747 /* Enable the MES to handle doorbell ring on unmapped queue */ 1748 mes_v12_1_enable_unmapped_doorbell_handling(&adev->mes, true, xcc_id); 1749 1750 r = mes_v12_1_queue_init(adev, AMDGPU_MES_SCHED_PIPE, xcc_id); 1751 if (r) 1752 goto failure; 1753 1754 r = mes_v12_1_set_hw_resources(&adev->mes, 1755 AMDGPU_MES_SCHED_PIPE, xcc_id); 1756 if (r) 1757 goto failure; 1758 1759 if (adev->enable_uni_mes) 1760 mes_v12_1_set_hw_resources_1(&adev->mes, 1761 AMDGPU_MES_SCHED_PIPE, xcc_id); 1762 1763 mes_v12_1_init_aggregated_doorbell(&adev->mes, xcc_id); 1764 1765 r = mes_v12_1_query_sched_status(&adev->mes, 1766 AMDGPU_MES_SCHED_PIPE, xcc_id); 1767 if (r) { 1768 DRM_ERROR("MES is busy\n"); 1769 goto failure; 1770 } 1771 1772 out: 1773 /* 1774 * Disable KIQ ring usage from the driver once MES is enabled. 1775 * MES uses KIQ ring exclusively so driver cannot access KIQ ring 1776 * with MES enabled. 1777 */ 1778 adev->gfx.kiq[xcc_id].ring.sched.ready = false; 1779 adev->mes.ring[MES_PIPE_INST(xcc_id, 0)].sched.ready = true; 1780 1781 return 0; 1782 1783 failure: 1784 mes_v12_1_hw_fini(ip_block); 1785 return r; 1786 } 1787 1788 static int mes_v12_1_hw_init(struct amdgpu_ip_block *ip_block) 1789 { 1790 struct amdgpu_device *adev = ip_block->adev; 1791 int r, xcc_id, num_xcc = NUM_XCC(adev->gfx.xcc_mask); 1792 1793 for (xcc_id = 0; xcc_id < num_xcc; xcc_id++) { 1794 /* for SPX mode, all master xcc ids are set to 0 */ 1795 if (adev->mes.enable_coop_mode) 1796 adev->mes.master_xcc_ids[xcc_id] = 0; 1797 1798 r = mes_v12_1_xcc_hw_init(ip_block, xcc_id); 1799 if (r) 1800 return r; 1801 } 1802 1803 return 0; 1804 } 1805 1806 static int mes_v12_1_hw_fini(struct amdgpu_ip_block *ip_block) 1807 { 1808 return 0; 1809 } 1810 1811 static int mes_v12_1_suspend(struct amdgpu_ip_block *ip_block) 1812 { 1813 int r; 1814 1815 r = amdgpu_mes_suspend(ip_block->adev); 1816 if (r) 1817 return r; 1818 1819 return mes_v12_1_hw_fini(ip_block); 1820 } 1821 1822 static int mes_v12_1_resume(struct amdgpu_ip_block *ip_block) 1823 { 1824 int r; 1825 1826 r = mes_v12_1_hw_init(ip_block); 1827 if (r) 1828 return r; 1829 1830 return amdgpu_mes_resume(ip_block->adev); 1831 } 1832 1833 static int mes_v12_1_early_init(struct amdgpu_ip_block *ip_block) 1834 { 1835 struct amdgpu_device *adev = ip_block->adev; 1836 int pipe, r; 1837 1838 for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) { 1839 r = amdgpu_mes_init_microcode(adev, pipe); 1840 if (r) 1841 return r; 1842 } 1843 1844 return 0; 1845 } 1846 1847 static const struct amd_ip_funcs mes_v12_1_ip_funcs = { 1848 .name = "mes_v12_1", 1849 .early_init = mes_v12_1_early_init, 1850 .late_init = NULL, 1851 .sw_init = mes_v12_1_sw_init, 1852 .sw_fini = mes_v12_1_sw_fini, 1853 .hw_init = mes_v12_1_hw_init, 1854 .hw_fini = mes_v12_1_hw_fini, 1855 .suspend = mes_v12_1_suspend, 1856 .resume = mes_v12_1_resume, 1857 }; 1858 1859 const struct amdgpu_ip_block_version mes_v12_1_ip_block = { 1860 .type = AMD_IP_BLOCK_TYPE_MES, 1861 .major = 12, 1862 .minor = 1, 1863 .rev = 0, 1864 .funcs = &mes_v12_1_ip_funcs, 1865 }; 1866