1 /* 2 * Copyright 2025 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 */ 23 24 #include <linux/firmware.h> 25 #include <linux/module.h> 26 #include "amdgpu.h" 27 #include "soc15_common.h" 28 #include "soc_v1_0.h" 29 #include "gc/gc_12_1_0_offset.h" 30 #include "gc/gc_12_1_0_sh_mask.h" 31 #include "gc/gc_11_0_0_default.h" 32 #include "v12_structs.h" 33 #include "mes_v12_api_def.h" 34 35 MODULE_FIRMWARE("amdgpu/gc_12_1_0_mes.bin"); 36 MODULE_FIRMWARE("amdgpu/gc_12_1_0_mes1.bin"); 37 MODULE_FIRMWARE("amdgpu/gc_12_1_0_uni_mes.bin"); 38 39 static int mes_v12_1_hw_init(struct amdgpu_ip_block *ip_block); 40 static int mes_v12_1_xcc_hw_init(struct amdgpu_ip_block *ip_block, int xcc_id); 41 static int mes_v12_1_hw_fini(struct amdgpu_ip_block *ip_block); 42 static int mes_v12_1_kiq_hw_init(struct amdgpu_device *adev, uint32_t xcc_id); 43 static int mes_v12_1_kiq_hw_fini(struct amdgpu_device *adev, uint32_t xcc_id); 44 45 #define MES_EOP_SIZE 2048 46 47 #define regCP_HQD_IB_CONTROL_MES_12_1_DEFAULT 0x100000 48 #define XCC_REG_RANGE_0_LOW 0x1260 /* XCC gfxdec0 lower Bound */ 49 #define XCC_REG_RANGE_0_HIGH 0x3C00 /* XCC gfxdec0 upper Bound */ 50 #define XCC_REG_RANGE_1_LOW 0xA000 /* XCC gfxdec1 lower Bound */ 51 #define XCC_REG_RANGE_1_HIGH 0x10000 /* XCC gfxdec1 upper Bound */ 52 #define XCC_MID_MASK 0x41000000 53 54 #define NORMALIZE_XCC_REG_OFFSET(offset) \ 55 (offset & 0x3FFFF) 56 57 static void mes_v12_1_ring_set_wptr(struct amdgpu_ring *ring) 58 { 59 struct amdgpu_device *adev = ring->adev; 60 61 if (ring->use_doorbell) { 62 atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 63 ring->wptr); 64 WDOORBELL64(ring->doorbell_index, ring->wptr); 65 } else { 66 BUG(); 67 } 68 } 69 70 static u64 mes_v12_1_ring_get_rptr(struct amdgpu_ring *ring) 71 { 72 return *ring->rptr_cpu_addr; 73 } 74 75 static u64 mes_v12_1_ring_get_wptr(struct amdgpu_ring *ring) 76 { 77 u64 wptr; 78 79 if (ring->use_doorbell) 80 wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr); 81 else 82 BUG(); 83 return wptr; 84 } 85 86 static const struct amdgpu_ring_funcs mes_v12_1_ring_funcs = { 87 .type = AMDGPU_RING_TYPE_MES, 88 .align_mask = 1, 89 .nop = 0, 90 .support_64bit_ptrs = true, 91 .get_rptr = mes_v12_1_ring_get_rptr, 92 .get_wptr = mes_v12_1_ring_get_wptr, 93 .set_wptr = mes_v12_1_ring_set_wptr, 94 .insert_nop = amdgpu_ring_insert_nop, 95 }; 96 97 static const char *mes_v12_1_opcodes[] = { 98 "SET_HW_RSRC", 99 "SET_SCHEDULING_CONFIG", 100 "ADD_QUEUE", 101 "REMOVE_QUEUE", 102 "PERFORM_YIELD", 103 "SET_GANG_PRIORITY_LEVEL", 104 "SUSPEND", 105 "RESUME", 106 "RESET", 107 "SET_LOG_BUFFER", 108 "CHANGE_GANG_PRORITY", 109 "QUERY_SCHEDULER_STATUS", 110 "unused", 111 "SET_DEBUG_VMID", 112 "MISC", 113 "UPDATE_ROOT_PAGE_TABLE", 114 "AMD_LOG", 115 "SET_SE_MODE", 116 "SET_GANG_SUBMIT", 117 "SET_HW_RSRC_1", 118 }; 119 120 static const char *mes_v12_1_misc_opcodes[] = { 121 "WRITE_REG", 122 "INV_GART", 123 "QUERY_STATUS", 124 "READ_REG", 125 "WAIT_REG_MEM", 126 "SET_SHADER_DEBUGGER", 127 "NOTIFY_WORK_ON_UNMAPPED_QUEUE", 128 "NOTIFY_TO_UNMAP_PROCESSES", 129 }; 130 131 static const char *mes_v12_1_get_op_string(union MESAPI__MISC *x_pkt) 132 { 133 const char *op_str = NULL; 134 135 if (x_pkt->header.opcode < ARRAY_SIZE(mes_v12_1_opcodes)) 136 op_str = mes_v12_1_opcodes[x_pkt->header.opcode]; 137 138 return op_str; 139 } 140 141 static const char *mes_v12_1_get_misc_op_string(union MESAPI__MISC *x_pkt) 142 { 143 const char *op_str = NULL; 144 145 if ((x_pkt->header.opcode == MES_SCH_API_MISC) && 146 (x_pkt->opcode < ARRAY_SIZE(mes_v12_1_misc_opcodes))) 147 op_str = mes_v12_1_misc_opcodes[x_pkt->opcode]; 148 149 return op_str; 150 } 151 152 static int mes_v12_1_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, 153 int xcc_id, int pipe, void *pkt, 154 int size, int api_status_off) 155 { 156 union MESAPI__QUERY_MES_STATUS mes_status_pkt; 157 signed long timeout = 2100000; /* 2100 ms */ 158 struct amdgpu_device *adev = mes->adev; 159 struct amdgpu_ring *ring = &mes->ring[MES_PIPE_INST(xcc_id, pipe)]; 160 spinlock_t *ring_lock = &mes->ring_lock[MES_PIPE_INST(xcc_id, pipe)]; 161 struct MES_API_STATUS *api_status; 162 union MESAPI__MISC *x_pkt = pkt; 163 const char *op_str, *misc_op_str; 164 unsigned long flags; 165 u64 status_gpu_addr; 166 u32 seq, status_offset; 167 u64 *status_ptr; 168 signed long r; 169 int ret; 170 171 if (x_pkt->header.opcode >= MES_SCH_API_MAX) 172 return -EINVAL; 173 174 if (amdgpu_emu_mode) { 175 timeout *= 1000; 176 } else if (amdgpu_sriov_vf(adev)) { 177 /* Worst case in sriov where all other 15 VF timeout, each VF needs about 600ms */ 178 timeout = 15 * 600 * 1000; 179 } 180 181 ret = amdgpu_device_wb_get(adev, &status_offset); 182 if (ret) 183 return ret; 184 185 status_gpu_addr = adev->wb.gpu_addr + (status_offset * 4); 186 status_ptr = (u64 *)&adev->wb.wb[status_offset]; 187 *status_ptr = 0; 188 189 spin_lock_irqsave(ring_lock, flags); 190 r = amdgpu_ring_alloc(ring, (size + sizeof(mes_status_pkt)) / 4); 191 if (r) 192 goto error_unlock_free; 193 194 seq = ++ring->fence_drv.sync_seq; 195 r = amdgpu_fence_wait_polling(ring, 196 seq - ring->fence_drv.num_fences_mask, 197 timeout); 198 if (r < 1) 199 goto error_undo; 200 201 api_status = (struct MES_API_STATUS *)((char *)pkt + api_status_off); 202 api_status->api_completion_fence_addr = status_gpu_addr; 203 api_status->api_completion_fence_value = 1; 204 205 amdgpu_ring_write_multiple(ring, pkt, size / 4); 206 207 memset(&mes_status_pkt, 0, sizeof(mes_status_pkt)); 208 mes_status_pkt.header.type = MES_API_TYPE_SCHEDULER; 209 mes_status_pkt.header.opcode = MES_SCH_API_QUERY_SCHEDULER_STATUS; 210 mes_status_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; 211 mes_status_pkt.api_status.api_completion_fence_addr = 212 ring->fence_drv.gpu_addr; 213 mes_status_pkt.api_status.api_completion_fence_value = seq; 214 215 amdgpu_ring_write_multiple(ring, &mes_status_pkt, 216 sizeof(mes_status_pkt) / 4); 217 218 amdgpu_ring_commit(ring); 219 spin_unlock_irqrestore(ring_lock, flags); 220 221 op_str = mes_v12_1_get_op_string(x_pkt); 222 misc_op_str = mes_v12_1_get_misc_op_string(x_pkt); 223 224 if (misc_op_str) 225 dev_dbg(adev->dev, "MES(%d, %d) msg=%s (%s) was emitted\n", 226 xcc_id, pipe, op_str, misc_op_str); 227 else if (op_str) 228 dev_dbg(adev->dev, "MES(%d, %d) msg=%s was emitted\n", 229 xcc_id, pipe, op_str); 230 else 231 dev_dbg(adev->dev, "MES(%d, %d) msg=%d was emitted\n", 232 xcc_id, pipe, x_pkt->header.opcode); 233 234 r = amdgpu_fence_wait_polling(ring, seq, timeout); 235 if (r < 1 || !*status_ptr) { 236 if (misc_op_str) 237 dev_err(adev->dev, 238 "MES(%d, %d) failed to respond to msg=%s (%s)\n", 239 xcc_id, pipe, op_str, misc_op_str); 240 else if (op_str) 241 dev_err(adev->dev, 242 "MES(%d, %d) failed to respond to msg=%s\n", 243 xcc_id, pipe, op_str); 244 else 245 dev_err(adev->dev, 246 "MES(%d, %d) failed to respond to msg=%d\n", 247 xcc_id, pipe, x_pkt->header.opcode); 248 249 while (halt_if_hws_hang) 250 schedule(); 251 252 r = -ETIMEDOUT; 253 goto error_wb_free; 254 } 255 256 amdgpu_device_wb_free(adev, status_offset); 257 return 0; 258 259 error_undo: 260 dev_err(adev->dev, "MES(%d, %d) ring buffer is full.\n", xcc_id, pipe); 261 amdgpu_ring_undo(ring); 262 263 error_unlock_free: 264 spin_unlock_irqrestore(ring_lock, flags); 265 266 error_wb_free: 267 amdgpu_device_wb_free(adev, status_offset); 268 return r; 269 } 270 271 static int convert_to_mes_queue_type(int queue_type) 272 { 273 if (queue_type == AMDGPU_RING_TYPE_GFX) 274 return MES_QUEUE_TYPE_GFX; 275 else if (queue_type == AMDGPU_RING_TYPE_COMPUTE) 276 return MES_QUEUE_TYPE_COMPUTE; 277 else if (queue_type == AMDGPU_RING_TYPE_SDMA) 278 return MES_QUEUE_TYPE_SDMA; 279 else if (queue_type == AMDGPU_RING_TYPE_MES) 280 return MES_QUEUE_TYPE_SCHQ; 281 else 282 BUG(); 283 return -1; 284 } 285 286 static int mes_v12_1_add_hw_queue(struct amdgpu_mes *mes, 287 struct mes_add_queue_input *input) 288 { 289 union MESAPI__ADD_QUEUE mes_add_queue_pkt; 290 int xcc_id = input->xcc_id; 291 int inst = MES_PIPE_INST(xcc_id, AMDGPU_MES_SCHED_PIPE); 292 293 if (mes->enable_coop_mode) 294 xcc_id = mes->master_xcc_ids[inst]; 295 296 memset(&mes_add_queue_pkt, 0, sizeof(mes_add_queue_pkt)); 297 298 mes_add_queue_pkt.header.type = MES_API_TYPE_SCHEDULER; 299 mes_add_queue_pkt.header.opcode = MES_SCH_API_ADD_QUEUE; 300 mes_add_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; 301 302 mes_add_queue_pkt.process_id = input->process_id; 303 mes_add_queue_pkt.page_table_base_addr = input->page_table_base_addr; 304 mes_add_queue_pkt.process_va_start = input->process_va_start; 305 mes_add_queue_pkt.process_va_end = input->process_va_end; 306 mes_add_queue_pkt.process_quantum = input->process_quantum; 307 mes_add_queue_pkt.process_context_addr = input->process_context_addr; 308 mes_add_queue_pkt.gang_quantum = input->gang_quantum; 309 mes_add_queue_pkt.gang_context_addr = input->gang_context_addr; 310 mes_add_queue_pkt.inprocess_gang_priority = 311 input->inprocess_gang_priority; 312 mes_add_queue_pkt.gang_global_priority_level = 313 input->gang_global_priority_level; 314 mes_add_queue_pkt.doorbell_offset = input->doorbell_offset; 315 mes_add_queue_pkt.mqd_addr = input->mqd_addr; 316 317 mes_add_queue_pkt.wptr_addr = input->wptr_mc_addr; 318 319 mes_add_queue_pkt.queue_type = 320 convert_to_mes_queue_type(input->queue_type); 321 mes_add_queue_pkt.paging = input->paging; 322 mes_add_queue_pkt.vm_context_cntl = input->vm_cntx_cntl; 323 mes_add_queue_pkt.gws_base = input->gws_base; 324 mes_add_queue_pkt.gws_size = input->gws_size; 325 mes_add_queue_pkt.trap_handler_addr = input->tba_addr; 326 mes_add_queue_pkt.tma_addr = input->tma_addr; 327 mes_add_queue_pkt.trap_en = input->trap_en; 328 mes_add_queue_pkt.skip_process_ctx_clear = input->skip_process_ctx_clear; 329 mes_add_queue_pkt.is_kfd_process = input->is_kfd_process; 330 331 /* For KFD, gds_size is re-used for queue size (needed in MES for AQL queues) */ 332 mes_add_queue_pkt.is_aql_queue = input->is_aql_queue; 333 mes_add_queue_pkt.gds_size = input->queue_size; 334 335 /* For KFD, gds_size is re-used for queue size (needed in MES for AQL queues) */ 336 mes_add_queue_pkt.is_aql_queue = input->is_aql_queue; 337 mes_add_queue_pkt.gds_size = input->queue_size; 338 339 mes_add_queue_pkt.full_sh_mem_config_data = input->sh_mem_config_data; 340 341 return mes_v12_1_submit_pkt_and_poll_completion(mes, 342 xcc_id, AMDGPU_MES_SCHED_PIPE, 343 &mes_add_queue_pkt, sizeof(mes_add_queue_pkt), 344 offsetof(union MESAPI__ADD_QUEUE, api_status)); 345 } 346 347 static int mes_v12_1_remove_hw_queue(struct amdgpu_mes *mes, 348 struct mes_remove_queue_input *input) 349 { 350 union MESAPI__REMOVE_QUEUE mes_remove_queue_pkt; 351 int xcc_id = input->xcc_id; 352 int inst = MES_PIPE_INST(xcc_id, AMDGPU_MES_SCHED_PIPE); 353 354 if (mes->enable_coop_mode) 355 xcc_id = mes->master_xcc_ids[inst]; 356 357 memset(&mes_remove_queue_pkt, 0, sizeof(mes_remove_queue_pkt)); 358 359 mes_remove_queue_pkt.header.type = MES_API_TYPE_SCHEDULER; 360 mes_remove_queue_pkt.header.opcode = MES_SCH_API_REMOVE_QUEUE; 361 mes_remove_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; 362 363 mes_remove_queue_pkt.doorbell_offset = input->doorbell_offset; 364 mes_remove_queue_pkt.gang_context_addr = input->gang_context_addr; 365 366 return mes_v12_1_submit_pkt_and_poll_completion(mes, 367 xcc_id, AMDGPU_MES_SCHED_PIPE, 368 &mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt), 369 offsetof(union MESAPI__REMOVE_QUEUE, api_status)); 370 } 371 372 static int mes_v12_1_reset_hw_queue(struct amdgpu_mes *mes, 373 struct mes_reset_queue_input *input) 374 { 375 union MESAPI__RESET mes_reset_queue_pkt; 376 int pipe; 377 378 memset(&mes_reset_queue_pkt, 0, sizeof(mes_reset_queue_pkt)); 379 380 mes_reset_queue_pkt.header.type = MES_API_TYPE_SCHEDULER; 381 mes_reset_queue_pkt.header.opcode = MES_SCH_API_RESET; 382 mes_reset_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; 383 384 mes_reset_queue_pkt.doorbell_offset = input->doorbell_offset; 385 /* mes_reset_queue_pkt.gang_context_addr = input->gang_context_addr; */ 386 /*mes_reset_queue_pkt.reset_queue_only = 1;*/ 387 388 if (mes->adev->enable_uni_mes) 389 pipe = AMDGPU_MES_KIQ_PIPE; 390 else 391 pipe = AMDGPU_MES_SCHED_PIPE; 392 393 return mes_v12_1_submit_pkt_and_poll_completion(mes, 394 input->xcc_id, pipe, 395 &mes_reset_queue_pkt, sizeof(mes_reset_queue_pkt), 396 offsetof(union MESAPI__REMOVE_QUEUE, api_status)); 397 } 398 399 static int mes_v12_1_map_legacy_queue(struct amdgpu_mes *mes, 400 struct mes_map_legacy_queue_input *input) 401 { 402 union MESAPI__ADD_QUEUE mes_add_queue_pkt; 403 int pipe; 404 405 memset(&mes_add_queue_pkt, 0, sizeof(mes_add_queue_pkt)); 406 407 mes_add_queue_pkt.header.type = MES_API_TYPE_SCHEDULER; 408 mes_add_queue_pkt.header.opcode = MES_SCH_API_ADD_QUEUE; 409 mes_add_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; 410 411 mes_add_queue_pkt.pipe_id = input->pipe_id; 412 mes_add_queue_pkt.queue_id = input->queue_id; 413 mes_add_queue_pkt.doorbell_offset = input->doorbell_offset; 414 mes_add_queue_pkt.mqd_addr = input->mqd_addr; 415 mes_add_queue_pkt.wptr_addr = input->wptr_addr; 416 mes_add_queue_pkt.queue_type = 417 convert_to_mes_queue_type(input->queue_type); 418 mes_add_queue_pkt.map_legacy_kq = 1; 419 420 if (mes->adev->enable_uni_mes) 421 pipe = AMDGPU_MES_KIQ_PIPE; 422 else 423 pipe = AMDGPU_MES_SCHED_PIPE; 424 425 return mes_v12_1_submit_pkt_and_poll_completion(mes, 426 input->xcc_id, pipe, 427 &mes_add_queue_pkt, sizeof(mes_add_queue_pkt), 428 offsetof(union MESAPI__ADD_QUEUE, api_status)); 429 } 430 431 static int mes_v12_1_unmap_legacy_queue(struct amdgpu_mes *mes, 432 struct mes_unmap_legacy_queue_input *input) 433 { 434 union MESAPI__REMOVE_QUEUE mes_remove_queue_pkt; 435 int pipe; 436 437 memset(&mes_remove_queue_pkt, 0, sizeof(mes_remove_queue_pkt)); 438 439 mes_remove_queue_pkt.header.type = MES_API_TYPE_SCHEDULER; 440 mes_remove_queue_pkt.header.opcode = MES_SCH_API_REMOVE_QUEUE; 441 mes_remove_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; 442 443 mes_remove_queue_pkt.doorbell_offset = input->doorbell_offset; 444 mes_remove_queue_pkt.gang_context_addr = 0; 445 446 mes_remove_queue_pkt.pipe_id = input->pipe_id; 447 mes_remove_queue_pkt.queue_id = input->queue_id; 448 449 if (input->action == PREEMPT_QUEUES_NO_UNMAP) { 450 mes_remove_queue_pkt.preempt_legacy_gfx_queue = 1; 451 mes_remove_queue_pkt.tf_addr = input->trail_fence_addr; 452 mes_remove_queue_pkt.tf_data = 453 lower_32_bits(input->trail_fence_data); 454 } else { 455 mes_remove_queue_pkt.unmap_legacy_queue = 1; 456 mes_remove_queue_pkt.queue_type = 457 convert_to_mes_queue_type(input->queue_type); 458 } 459 460 if (mes->adev->enable_uni_mes) 461 pipe = AMDGPU_MES_KIQ_PIPE; 462 else 463 pipe = AMDGPU_MES_SCHED_PIPE; 464 465 return mes_v12_1_submit_pkt_and_poll_completion(mes, 466 input->xcc_id, pipe, 467 &mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt), 468 offsetof(union MESAPI__REMOVE_QUEUE, api_status)); 469 } 470 471 static int mes_v12_1_suspend_gang(struct amdgpu_mes *mes, 472 struct mes_suspend_gang_input *input) 473 { 474 return 0; 475 } 476 477 static int mes_v12_1_resume_gang(struct amdgpu_mes *mes, 478 struct mes_resume_gang_input *input) 479 { 480 return 0; 481 } 482 483 static int mes_v12_1_query_sched_status(struct amdgpu_mes *mes, 484 int pipe, int xcc_id) 485 { 486 union MESAPI__QUERY_MES_STATUS mes_status_pkt; 487 488 memset(&mes_status_pkt, 0, sizeof(mes_status_pkt)); 489 490 mes_status_pkt.header.type = MES_API_TYPE_SCHEDULER; 491 mes_status_pkt.header.opcode = MES_SCH_API_QUERY_SCHEDULER_STATUS; 492 mes_status_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; 493 494 return mes_v12_1_submit_pkt_and_poll_completion(mes, xcc_id, pipe, 495 &mes_status_pkt, sizeof(mes_status_pkt), 496 offsetof(union MESAPI__QUERY_MES_STATUS, api_status)); 497 } 498 static uint32_t mes_v12_1_get_xcc_from_reg(uint32_t reg_offset) 499 { 500 /* Check xcc reg offset range */ 501 uint32_t xcc = (reg_offset & XCC_MID_MASK) ? 4 : 0; 502 /* Each XCC has two register ranges. 503 * These are represented in reg_offset[17:16] 504 */ 505 return ((reg_offset >> 16) & 0x3) + xcc; 506 } 507 508 static void mes_v12_1_get_rrmt(uint32_t reg, uint32_t xcc_id, 509 struct RRMT_OPTION *rrmt_opt) 510 { 511 uint32_t normalized_reg = NORMALIZE_XCC_REG_OFFSET(reg); 512 513 if (((normalized_reg >= XCC_REG_RANGE_0_LOW) && (normalized_reg < XCC_REG_RANGE_0_HIGH)) || 514 ((normalized_reg >= XCC_REG_RANGE_1_LOW) && (normalized_reg < XCC_REG_RANGE_1_HIGH))) { 515 rrmt_opt->xcd_die_id = mes_v12_1_get_xcc_from_reg(reg); 516 rrmt_opt->mode = (xcc_id == rrmt_opt->xcd_die_id) ? 517 MES_RRMT_MODE_LOCAL_XCD : MES_RRMT_MODE_REMOTE_XCD; 518 } else { 519 rrmt_opt->mode = MES_RRMT_MODE_LOCAL_REMOTE_AID; 520 } 521 } 522 523 static int mes_v12_1_misc_op(struct amdgpu_mes *mes, 524 struct mes_misc_op_input *input) 525 { 526 union MESAPI__MISC misc_pkt; 527 int pipe; 528 529 if (mes->adev->enable_uni_mes) 530 pipe = AMDGPU_MES_KIQ_PIPE; 531 else 532 pipe = AMDGPU_MES_SCHED_PIPE; 533 534 memset(&misc_pkt, 0, sizeof(misc_pkt)); 535 536 misc_pkt.header.type = MES_API_TYPE_SCHEDULER; 537 misc_pkt.header.opcode = MES_SCH_API_MISC; 538 misc_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; 539 540 switch (input->op) { 541 case MES_MISC_OP_READ_REG: 542 misc_pkt.opcode = MESAPI_MISC__READ_REG; 543 misc_pkt.read_reg.reg_offset = input->read_reg.reg_offset; 544 misc_pkt.read_reg.buffer_addr = input->read_reg.buffer_addr; 545 mes_v12_1_get_rrmt(input->read_reg.reg_offset, input->xcc_id, 546 &misc_pkt.read_reg.rrmt_opt); 547 break; 548 case MES_MISC_OP_WRITE_REG: 549 misc_pkt.opcode = MESAPI_MISC__WRITE_REG; 550 misc_pkt.write_reg.reg_offset = input->write_reg.reg_offset; 551 misc_pkt.write_reg.reg_value = input->write_reg.reg_value; 552 mes_v12_1_get_rrmt(input->write_reg.reg_offset, input->xcc_id, 553 &misc_pkt.write_reg.rrmt_opt); 554 break; 555 case MES_MISC_OP_WRM_REG_WAIT: 556 misc_pkt.opcode = MESAPI_MISC__WAIT_REG_MEM; 557 misc_pkt.wait_reg_mem.op = WRM_OPERATION__WAIT_REG_MEM; 558 misc_pkt.wait_reg_mem.reference = input->wrm_reg.ref; 559 misc_pkt.wait_reg_mem.mask = input->wrm_reg.mask; 560 misc_pkt.wait_reg_mem.reg_offset1 = input->wrm_reg.reg0; 561 misc_pkt.wait_reg_mem.reg_offset2 = 0; 562 mes_v12_1_get_rrmt(input->wrm_reg.reg0, input->xcc_id, 563 &misc_pkt.wait_reg_mem.rrmt_opt1); 564 break; 565 case MES_MISC_OP_WRM_REG_WR_WAIT: 566 misc_pkt.opcode = MESAPI_MISC__WAIT_REG_MEM; 567 misc_pkt.wait_reg_mem.op = WRM_OPERATION__WR_WAIT_WR_REG; 568 misc_pkt.wait_reg_mem.reference = input->wrm_reg.ref; 569 misc_pkt.wait_reg_mem.mask = input->wrm_reg.mask; 570 misc_pkt.wait_reg_mem.reg_offset1 = input->wrm_reg.reg0; 571 misc_pkt.wait_reg_mem.reg_offset2 = input->wrm_reg.reg1; 572 mes_v12_1_get_rrmt(input->wrm_reg.reg0, input->xcc_id, 573 &misc_pkt.wait_reg_mem.rrmt_opt1); 574 mes_v12_1_get_rrmt(input->wrm_reg.reg1, input->xcc_id, 575 &misc_pkt.wait_reg_mem.rrmt_opt2); 576 break; 577 case MES_MISC_OP_SET_SHADER_DEBUGGER: 578 pipe = AMDGPU_MES_SCHED_PIPE; 579 misc_pkt.opcode = MESAPI_MISC__SET_SHADER_DEBUGGER; 580 misc_pkt.set_shader_debugger.process_context_addr = 581 input->set_shader_debugger.process_context_addr; 582 misc_pkt.set_shader_debugger.flags.u32all = 583 input->set_shader_debugger.flags.u32all; 584 misc_pkt.set_shader_debugger.spi_gdbg_per_vmid_cntl = 585 input->set_shader_debugger.spi_gdbg_per_vmid_cntl; 586 memcpy(misc_pkt.set_shader_debugger.tcp_watch_cntl, 587 input->set_shader_debugger.tcp_watch_cntl, 588 sizeof(misc_pkt.set_shader_debugger.tcp_watch_cntl)); 589 misc_pkt.set_shader_debugger.trap_en = input->set_shader_debugger.trap_en; 590 break; 591 case MES_MISC_OP_CHANGE_CONFIG: 592 misc_pkt.opcode = MESAPI_MISC__CHANGE_CONFIG; 593 misc_pkt.change_config.opcode = 594 MESAPI_MISC__CHANGE_CONFIG_OPTION_LIMIT_SINGLE_PROCESS; 595 misc_pkt.change_config.option.bits.limit_single_process = 596 input->change_config.option.limit_single_process; 597 break; 598 default: 599 DRM_ERROR("unsupported misc op (%d) \n", input->op); 600 return -EINVAL; 601 } 602 603 return mes_v12_1_submit_pkt_and_poll_completion(mes, 604 input->xcc_id, pipe, 605 &misc_pkt, sizeof(misc_pkt), 606 offsetof(union MESAPI__MISC, api_status)); 607 } 608 609 static int mes_v12_1_set_hw_resources_1(struct amdgpu_mes *mes, 610 int pipe, int xcc_id) 611 { 612 union MESAPI_SET_HW_RESOURCES_1 mes_set_hw_res_1_pkt; 613 int master_xcc_id, inst = MES_PIPE_INST(xcc_id, pipe); 614 615 memset(&mes_set_hw_res_1_pkt, 0, sizeof(mes_set_hw_res_1_pkt)); 616 617 mes_set_hw_res_1_pkt.header.type = MES_API_TYPE_SCHEDULER; 618 mes_set_hw_res_1_pkt.header.opcode = MES_SCH_API_SET_HW_RSRC_1; 619 mes_set_hw_res_1_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; 620 mes_set_hw_res_1_pkt.mes_kiq_unmap_timeout = 100; 621 622 if (mes->enable_coop_mode && pipe == AMDGPU_MES_SCHED_PIPE) { 623 master_xcc_id = mes->master_xcc_ids[inst]; 624 mes_set_hw_res_1_pkt.mes_coop_mode = 1; 625 mes_set_hw_res_1_pkt.coop_sch_shared_mc_addr = 626 mes->shared_cmd_buf_gpu_addr[master_xcc_id]; 627 } 628 629 return mes_v12_1_submit_pkt_and_poll_completion(mes, xcc_id, pipe, 630 &mes_set_hw_res_1_pkt, sizeof(mes_set_hw_res_1_pkt), 631 offsetof(union MESAPI_SET_HW_RESOURCES_1, api_status)); 632 } 633 634 static void mes_v12_1_set_gfx_hqd_mask(union MESAPI_SET_HW_RESOURCES *pkt) 635 { 636 /* 637 * GFX V12 has only one GFX pipe, but 8 queues in it. 638 * GFX pipe 0 queue 0 is being used by Kernel queue. 639 * Set GFX pipe 0 queue 1-7 for MES scheduling 640 * mask = 1111 1110b 641 */ 642 pkt->gfx_hqd_mask[0] = 0xFE; 643 } 644 645 static int mes_v12_1_set_hw_resources(struct amdgpu_mes *mes, 646 int pipe, int xcc_id) 647 { 648 int i; 649 struct amdgpu_device *adev = mes->adev; 650 union MESAPI_SET_HW_RESOURCES mes_set_hw_res_pkt; 651 652 memset(&mes_set_hw_res_pkt, 0, sizeof(mes_set_hw_res_pkt)); 653 654 mes_set_hw_res_pkt.header.type = MES_API_TYPE_SCHEDULER; 655 mes_set_hw_res_pkt.header.opcode = MES_SCH_API_SET_HW_RSRC; 656 mes_set_hw_res_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; 657 658 if (pipe == AMDGPU_MES_SCHED_PIPE) { 659 mes_set_hw_res_pkt.vmid_mask_mmhub = mes->vmid_mask_mmhub; 660 mes_set_hw_res_pkt.vmid_mask_gfxhub = mes->vmid_mask_gfxhub; 661 mes_set_hw_res_pkt.gds_size = adev->gds.gds_size; 662 mes_set_hw_res_pkt.paging_vmid = 0; 663 664 for (i = 0; i < MAX_COMPUTE_PIPES; i++) 665 mes_set_hw_res_pkt.compute_hqd_mask[i] = 666 mes->compute_hqd_mask[i]; 667 668 mes_v12_1_set_gfx_hqd_mask(&mes_set_hw_res_pkt); 669 670 for (i = 0; i < MAX_SDMA_PIPES; i++) 671 mes_set_hw_res_pkt.sdma_hqd_mask[i] = 672 mes->sdma_hqd_mask[i]; 673 674 for (i = 0; i < AMD_PRIORITY_NUM_LEVELS; i++) 675 mes_set_hw_res_pkt.aggregated_doorbells[i] = 676 mes->aggregated_doorbells[i]; 677 } 678 679 mes_set_hw_res_pkt.g_sch_ctx_gpu_mc_ptr = 680 mes->sch_ctx_gpu_addr[pipe]; 681 mes_set_hw_res_pkt.query_status_fence_gpu_mc_ptr = 682 mes->query_status_fence_gpu_addr[pipe]; 683 684 for (i = 0; i < 5; i++) { 685 mes_set_hw_res_pkt.gc_base[i] = 686 adev->reg_offset[GC_HWIP][0][i]; 687 mes_set_hw_res_pkt.mmhub_base[i] = 688 adev->reg_offset[MMHUB_HWIP][0][i]; 689 mes_set_hw_res_pkt.osssys_base[i] = 690 adev->reg_offset[OSSSYS_HWIP][0][i]; 691 } 692 693 mes_set_hw_res_pkt.disable_reset = 1; 694 mes_set_hw_res_pkt.disable_mes_log = 1; 695 mes_set_hw_res_pkt.use_different_vmid_compute = 1; 696 mes_set_hw_res_pkt.enable_reg_active_poll = 1; 697 mes_set_hw_res_pkt.enable_level_process_quantum_check = 1; 698 699 /* 700 * Keep oversubscribe timer for sdma . When we have unmapped doorbell 701 * handling support, other queue will not use the oversubscribe timer. 702 * handling mode - 0: disabled; 1: basic version; 2: basic+ version 703 */ 704 mes_set_hw_res_pkt.oversubscription_timer = 50; 705 mes_set_hw_res_pkt.unmapped_doorbell_handling = 1; 706 707 if (amdgpu_mes_log_enable) { 708 mes_set_hw_res_pkt.enable_mes_event_int_logging = 1; 709 mes_set_hw_res_pkt.event_intr_history_gpu_mc_ptr = 710 mes->event_log_gpu_addr + MES_PIPE_INST(xcc_id, pipe) * AMDGPU_MES_LOG_BUFFER_SIZE; 711 } 712 713 if (adev->enforce_isolation[0] == AMDGPU_ENFORCE_ISOLATION_ENABLE) 714 mes_set_hw_res_pkt.limit_single_process = 1; 715 716 return mes_v12_1_submit_pkt_and_poll_completion(mes, xcc_id, pipe, 717 &mes_set_hw_res_pkt, sizeof(mes_set_hw_res_pkt), 718 offsetof(union MESAPI_SET_HW_RESOURCES, api_status)); 719 } 720 721 static void mes_v12_1_init_aggregated_doorbell(struct amdgpu_mes *mes, 722 int xcc_id) 723 { 724 struct amdgpu_device *adev = mes->adev; 725 uint32_t data; 726 727 data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_DOORBELL_CONTROL1); 728 data &= ~(CP_MES_DOORBELL_CONTROL1__DOORBELL_OFFSET_MASK | 729 CP_MES_DOORBELL_CONTROL1__DOORBELL_EN_MASK | 730 CP_MES_DOORBELL_CONTROL1__DOORBELL_HIT_MASK); 731 data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_LOW] << 732 CP_MES_DOORBELL_CONTROL1__DOORBELL_OFFSET__SHIFT; 733 data |= 1 << CP_MES_DOORBELL_CONTROL1__DOORBELL_EN__SHIFT; 734 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_DOORBELL_CONTROL1, data); 735 736 data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_DOORBELL_CONTROL2); 737 data &= ~(CP_MES_DOORBELL_CONTROL2__DOORBELL_OFFSET_MASK | 738 CP_MES_DOORBELL_CONTROL2__DOORBELL_EN_MASK | 739 CP_MES_DOORBELL_CONTROL2__DOORBELL_HIT_MASK); 740 data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_NORMAL] << 741 CP_MES_DOORBELL_CONTROL2__DOORBELL_OFFSET__SHIFT; 742 data |= 1 << CP_MES_DOORBELL_CONTROL2__DOORBELL_EN__SHIFT; 743 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_DOORBELL_CONTROL2, data); 744 745 data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_DOORBELL_CONTROL3); 746 data &= ~(CP_MES_DOORBELL_CONTROL3__DOORBELL_OFFSET_MASK | 747 CP_MES_DOORBELL_CONTROL3__DOORBELL_EN_MASK | 748 CP_MES_DOORBELL_CONTROL3__DOORBELL_HIT_MASK); 749 data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_MEDIUM] << 750 CP_MES_DOORBELL_CONTROL3__DOORBELL_OFFSET__SHIFT; 751 data |= 1 << CP_MES_DOORBELL_CONTROL3__DOORBELL_EN__SHIFT; 752 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_DOORBELL_CONTROL3, data); 753 754 data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_DOORBELL_CONTROL4); 755 data &= ~(CP_MES_DOORBELL_CONTROL4__DOORBELL_OFFSET_MASK | 756 CP_MES_DOORBELL_CONTROL4__DOORBELL_EN_MASK | 757 CP_MES_DOORBELL_CONTROL4__DOORBELL_HIT_MASK); 758 data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_HIGH] << 759 CP_MES_DOORBELL_CONTROL4__DOORBELL_OFFSET__SHIFT; 760 data |= 1 << CP_MES_DOORBELL_CONTROL4__DOORBELL_EN__SHIFT; 761 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_DOORBELL_CONTROL4, data); 762 763 data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_DOORBELL_CONTROL5); 764 data &= ~(CP_MES_DOORBELL_CONTROL5__DOORBELL_OFFSET_MASK | 765 CP_MES_DOORBELL_CONTROL5__DOORBELL_EN_MASK | 766 CP_MES_DOORBELL_CONTROL5__DOORBELL_HIT_MASK); 767 data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_REALTIME] << 768 CP_MES_DOORBELL_CONTROL5__DOORBELL_OFFSET__SHIFT; 769 data |= 1 << CP_MES_DOORBELL_CONTROL5__DOORBELL_EN__SHIFT; 770 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_DOORBELL_CONTROL5, data); 771 772 data = 1 << CP_HQD_GFX_CONTROL__DB_UPDATED_MSG_EN__SHIFT; 773 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_GFX_CONTROL, data); 774 } 775 776 777 static void mes_v12_1_enable_unmapped_doorbell_handling( 778 struct amdgpu_mes *mes, bool enable, int xcc_id) 779 { 780 struct amdgpu_device *adev = mes->adev; 781 uint32_t data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_UNMAPPED_DOORBELL); 782 783 /* 784 * The default PROC_LSB settng is 0xc which means doorbell 785 * addr[16:12] gives the doorbell page number. For kfd, each 786 * process will use 2 pages of doorbell, we need to change the 787 * setting to 0xd 788 */ 789 data &= ~CP_UNMAPPED_DOORBELL__PROC_LSB_MASK; 790 data |= 0xd << CP_UNMAPPED_DOORBELL__PROC_LSB__SHIFT; 791 792 data |= (enable ? 1 : 0) << CP_UNMAPPED_DOORBELL__ENABLE__SHIFT; 793 794 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_UNMAPPED_DOORBELL, data); 795 } 796 797 #if 0 798 static int mes_v12_1_reset_legacy_queue(struct amdgpu_mes *mes, 799 struct mes_reset_legacy_queue_input *input) 800 { 801 union MESAPI__RESET mes_reset_queue_pkt; 802 int pipe; 803 804 memset(&mes_reset_queue_pkt, 0, sizeof(mes_reset_queue_pkt)); 805 806 mes_reset_queue_pkt.header.type = MES_API_TYPE_SCHEDULER; 807 mes_reset_queue_pkt.header.opcode = MES_SCH_API_RESET; 808 mes_reset_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; 809 810 mes_reset_queue_pkt.queue_type = 811 convert_to_mes_queue_type(input->queue_type); 812 813 if (mes_reset_queue_pkt.queue_type == MES_QUEUE_TYPE_GFX) { 814 mes_reset_queue_pkt.reset_legacy_gfx = 1; 815 mes_reset_queue_pkt.pipe_id_lp = input->pipe_id; 816 mes_reset_queue_pkt.queue_id_lp = input->queue_id; 817 mes_reset_queue_pkt.mqd_mc_addr_lp = input->mqd_addr; 818 mes_reset_queue_pkt.doorbell_offset_lp = input->doorbell_offset; 819 mes_reset_queue_pkt.wptr_addr_lp = input->wptr_addr; 820 mes_reset_queue_pkt.vmid_id_lp = input->vmid; 821 } else { 822 mes_reset_queue_pkt.reset_queue_only = 1; 823 mes_reset_queue_pkt.doorbell_offset = input->doorbell_offset; 824 } 825 826 if (mes->adev->enable_uni_mes) 827 pipe = AMDGPU_MES_KIQ_PIPE; 828 else 829 pipe = AMDGPU_MES_SCHED_PIPE; 830 831 return mes_v12_1_submit_pkt_and_poll_completion(mes, 832 input->xcc_id, pipe, 833 &mes_reset_queue_pkt, sizeof(mes_reset_queue_pkt), 834 offsetof(union MESAPI__RESET, api_status)); 835 } 836 #endif 837 838 static const struct amdgpu_mes_funcs mes_v12_1_funcs = { 839 .add_hw_queue = mes_v12_1_add_hw_queue, 840 .remove_hw_queue = mes_v12_1_remove_hw_queue, 841 .map_legacy_queue = mes_v12_1_map_legacy_queue, 842 .unmap_legacy_queue = mes_v12_1_unmap_legacy_queue, 843 .suspend_gang = mes_v12_1_suspend_gang, 844 .resume_gang = mes_v12_1_resume_gang, 845 .misc_op = mes_v12_1_misc_op, 846 .reset_hw_queue = mes_v12_1_reset_hw_queue, 847 }; 848 849 static int mes_v12_1_allocate_ucode_buffer(struct amdgpu_device *adev, 850 enum amdgpu_mes_pipe pipe, 851 int xcc_id) 852 { 853 int r, inst = MES_PIPE_INST(xcc_id, pipe); 854 const struct mes_firmware_header_v1_0 *mes_hdr; 855 const __le32 *fw_data; 856 unsigned fw_size; 857 858 mes_hdr = (const struct mes_firmware_header_v1_0 *) 859 adev->mes.fw[pipe]->data; 860 861 fw_data = (const __le32 *)(adev->mes.fw[pipe]->data + 862 le32_to_cpu(mes_hdr->mes_ucode_offset_bytes)); 863 fw_size = le32_to_cpu(mes_hdr->mes_ucode_size_bytes); 864 865 r = amdgpu_bo_create_reserved(adev, fw_size, 866 PAGE_SIZE, 867 AMDGPU_GEM_DOMAIN_VRAM, 868 &adev->mes.ucode_fw_obj[inst], 869 &adev->mes.ucode_fw_gpu_addr[inst], 870 (void **)&adev->mes.ucode_fw_ptr[inst]); 871 if (r) { 872 dev_err(adev->dev, "(%d) failed to create mes fw bo\n", r); 873 return r; 874 } 875 876 memcpy(adev->mes.ucode_fw_ptr[inst], fw_data, fw_size); 877 878 amdgpu_bo_kunmap(adev->mes.ucode_fw_obj[inst]); 879 amdgpu_bo_unreserve(adev->mes.ucode_fw_obj[inst]); 880 881 return 0; 882 } 883 884 static int mes_v12_1_allocate_ucode_data_buffer(struct amdgpu_device *adev, 885 enum amdgpu_mes_pipe pipe, 886 int xcc_id) 887 { 888 int r, inst = MES_PIPE_INST(xcc_id, pipe); 889 const struct mes_firmware_header_v1_0 *mes_hdr; 890 const __le32 *fw_data; 891 unsigned fw_size; 892 893 mes_hdr = (const struct mes_firmware_header_v1_0 *) 894 adev->mes.fw[pipe]->data; 895 896 fw_data = (const __le32 *)(adev->mes.fw[pipe]->data + 897 le32_to_cpu(mes_hdr->mes_ucode_data_offset_bytes)); 898 fw_size = le32_to_cpu(mes_hdr->mes_ucode_data_size_bytes); 899 900 r = amdgpu_bo_create_reserved(adev, fw_size, 901 64 * 1024, 902 AMDGPU_GEM_DOMAIN_VRAM, 903 &adev->mes.data_fw_obj[inst], 904 &adev->mes.data_fw_gpu_addr[inst], 905 (void **)&adev->mes.data_fw_ptr[inst]); 906 if (r) { 907 dev_err(adev->dev, "(%d) failed to create mes data fw bo\n", r); 908 return r; 909 } 910 911 memcpy(adev->mes.data_fw_ptr[inst], fw_data, fw_size); 912 913 amdgpu_bo_kunmap(adev->mes.data_fw_obj[inst]); 914 amdgpu_bo_unreserve(adev->mes.data_fw_obj[inst]); 915 916 return 0; 917 } 918 919 static void mes_v12_1_free_ucode_buffers(struct amdgpu_device *adev, 920 enum amdgpu_mes_pipe pipe, 921 int xcc_id) 922 { 923 int inst = MES_PIPE_INST(xcc_id, pipe); 924 925 amdgpu_bo_free_kernel(&adev->mes.data_fw_obj[inst], 926 &adev->mes.data_fw_gpu_addr[inst], 927 (void **)&adev->mes.data_fw_ptr[inst]); 928 929 amdgpu_bo_free_kernel(&adev->mes.ucode_fw_obj[inst], 930 &adev->mes.ucode_fw_gpu_addr[inst], 931 (void **)&adev->mes.ucode_fw_ptr[inst]); 932 } 933 934 static void mes_v12_1_enable(struct amdgpu_device *adev, 935 bool enable, int xcc_id) 936 { 937 uint64_t ucode_addr; 938 uint32_t pipe, data = 0; 939 940 if (enable) { 941 data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_CNTL); 942 data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_RESET, 1); 943 data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_RESET, 1); 944 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_CNTL, data); 945 946 mutex_lock(&adev->srbm_mutex); 947 for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) { 948 soc_v1_0_grbm_select(adev, 3, pipe, 0, 0, 949 GET_INST(GC, xcc_id)); 950 951 ucode_addr = adev->mes.uc_start_addr[pipe] >> 2; 952 WREG32_SOC15(GC, GET_INST(GC, xcc_id), 953 regCP_MES_PRGRM_CNTR_START, 954 lower_32_bits(ucode_addr)); 955 WREG32_SOC15(GC, GET_INST(GC, xcc_id), 956 regCP_MES_PRGRM_CNTR_START_HI, 957 upper_32_bits(ucode_addr)); 958 } 959 soc_v1_0_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id)); 960 mutex_unlock(&adev->srbm_mutex); 961 962 /* unhalt MES and activate pipe0 */ 963 data = REG_SET_FIELD(0, CP_MES_CNTL, MES_PIPE0_ACTIVE, 1); 964 data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_ACTIVE, 1); 965 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_CNTL, data); 966 967 if (amdgpu_emu_mode) 968 msleep(500); 969 else if (adev->enable_uni_mes) 970 udelay(500); 971 else 972 udelay(50); 973 } else { 974 data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_CNTL); 975 data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_ACTIVE, 0); 976 data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_ACTIVE, 0); 977 data = REG_SET_FIELD(data, CP_MES_CNTL, 978 MES_INVALIDATE_ICACHE, 1); 979 data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_RESET, 1); 980 data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_RESET, 1); 981 data = REG_SET_FIELD(data, CP_MES_CNTL, MES_HALT, 1); 982 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_CNTL, data); 983 } 984 } 985 986 static void mes_v12_1_set_ucode_start_addr(struct amdgpu_device *adev, 987 int xcc_id) 988 { 989 uint64_t ucode_addr; 990 int pipe; 991 992 mes_v12_1_enable(adev, false, xcc_id); 993 994 mutex_lock(&adev->srbm_mutex); 995 for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) { 996 /* me=3, queue=0 */ 997 soc_v1_0_grbm_select(adev, 3, pipe, 0, 0, GET_INST(GC, xcc_id)); 998 999 /* set ucode start address */ 1000 ucode_addr = adev->mes.uc_start_addr[pipe] >> 2; 1001 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_PRGRM_CNTR_START, 1002 lower_32_bits(ucode_addr)); 1003 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_PRGRM_CNTR_START_HI, 1004 upper_32_bits(ucode_addr)); 1005 1006 soc_v1_0_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id)); 1007 } 1008 mutex_unlock(&adev->srbm_mutex); 1009 } 1010 1011 /* This function is for backdoor MES firmware */ 1012 static int mes_v12_1_load_microcode(struct amdgpu_device *adev, 1013 enum amdgpu_mes_pipe pipe, 1014 bool prime_icache, int xcc_id) 1015 { 1016 int r, inst = MES_PIPE_INST(xcc_id, pipe); 1017 uint32_t data; 1018 1019 mes_v12_1_enable(adev, false, xcc_id); 1020 1021 if (!adev->mes.fw[pipe]) 1022 return -EINVAL; 1023 1024 r = mes_v12_1_allocate_ucode_buffer(adev, pipe, xcc_id); 1025 if (r) 1026 return r; 1027 1028 r = mes_v12_1_allocate_ucode_data_buffer(adev, pipe, xcc_id); 1029 if (r) { 1030 mes_v12_1_free_ucode_buffers(adev, pipe, xcc_id); 1031 return r; 1032 } 1033 1034 mutex_lock(&adev->srbm_mutex); 1035 /* me=3, pipe=0, queue=0 */ 1036 soc_v1_0_grbm_select(adev, 3, pipe, 0, 0, GET_INST(GC, xcc_id)); 1037 1038 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_IC_BASE_CNTL, 0); 1039 1040 /* set ucode fimrware address */ 1041 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_IC_BASE_LO, 1042 lower_32_bits(adev->mes.ucode_fw_gpu_addr[inst])); 1043 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_IC_BASE_HI, 1044 upper_32_bits(adev->mes.ucode_fw_gpu_addr[inst])); 1045 1046 /* set ucode instruction cache boundary to 2M-1 */ 1047 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_MIBOUND_LO, 0x1FFFFF); 1048 1049 /* set ucode data firmware address */ 1050 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_MDBASE_LO, 1051 lower_32_bits(adev->mes.data_fw_gpu_addr[inst])); 1052 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_MDBASE_HI, 1053 upper_32_bits(adev->mes.data_fw_gpu_addr[inst])); 1054 1055 /* Set data cache boundary CP_MES_MDBOUND_LO */ 1056 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_MDBOUND_LO, 0x7FFFF); 1057 1058 if (prime_icache) { 1059 /* invalidate ICACHE */ 1060 data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_IC_OP_CNTL); 1061 data = REG_SET_FIELD(data, CP_MES_IC_OP_CNTL, PRIME_ICACHE, 0); 1062 data = REG_SET_FIELD(data, CP_MES_IC_OP_CNTL, INVALIDATE_CACHE, 1); 1063 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_IC_OP_CNTL, data); 1064 1065 /* prime the ICACHE. */ 1066 data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_IC_OP_CNTL); 1067 data = REG_SET_FIELD(data, CP_MES_IC_OP_CNTL, PRIME_ICACHE, 1); 1068 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_IC_OP_CNTL, data); 1069 } 1070 1071 soc_v1_0_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id)); 1072 mutex_unlock(&adev->srbm_mutex); 1073 1074 return 0; 1075 } 1076 1077 static int mes_v12_1_allocate_eop_buf(struct amdgpu_device *adev, 1078 enum amdgpu_mes_pipe pipe, 1079 int xcc_id) 1080 { 1081 int r, inst = MES_PIPE_INST(xcc_id, pipe); 1082 u32 *eop; 1083 1084 r = amdgpu_bo_create_reserved(adev, MES_EOP_SIZE, PAGE_SIZE, 1085 AMDGPU_GEM_DOMAIN_GTT, 1086 &adev->mes.eop_gpu_obj[inst], 1087 &adev->mes.eop_gpu_addr[inst], 1088 (void **)&eop); 1089 if (r) { 1090 dev_warn(adev->dev, "(%d) create EOP bo failed\n", r); 1091 return r; 1092 } 1093 1094 memset(eop, 0, 1095 adev->mes.eop_gpu_obj[inst]->tbo.base.size); 1096 1097 amdgpu_bo_kunmap(adev->mes.eop_gpu_obj[inst]); 1098 amdgpu_bo_unreserve(adev->mes.eop_gpu_obj[inst]); 1099 1100 return 0; 1101 } 1102 1103 static int mes_v12_1_allocate_shared_cmd_buf(struct amdgpu_device *adev, 1104 enum amdgpu_mes_pipe pipe, 1105 int xcc_id) 1106 { 1107 int r, inst = MES_PIPE_INST(xcc_id, pipe); 1108 1109 if (pipe == AMDGPU_MES_KIQ_PIPE) 1110 return 0; 1111 1112 r = amdgpu_bo_create_kernel(adev, PAGE_SIZE, PAGE_SIZE, 1113 AMDGPU_GEM_DOMAIN_VRAM, 1114 &adev->mes.shared_cmd_buf_obj[inst], 1115 &adev->mes.shared_cmd_buf_gpu_addr[inst], 1116 NULL); 1117 if (r) { 1118 dev_err(adev->dev, 1119 "(%d) failed to create shared cmd buf bo\n", r); 1120 return r; 1121 } 1122 1123 return 0; 1124 } 1125 1126 static int mes_v12_1_mqd_init(struct amdgpu_ring *ring) 1127 { 1128 struct v12_1_mes_mqd *mqd = ring->mqd_ptr; 1129 uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr; 1130 uint32_t tmp; 1131 1132 mqd->header = 0xC0310800; 1133 mqd->compute_pipelinestat_enable = 0x00000001; 1134 mqd->compute_static_thread_mgmt_se0 = 0xffffffff; 1135 mqd->compute_static_thread_mgmt_se1 = 0xffffffff; 1136 mqd->compute_static_thread_mgmt_se2 = 0xffffffff; 1137 mqd->compute_static_thread_mgmt_se3 = 0xffffffff; 1138 mqd->compute_misc_reserved = 0x00000007; 1139 1140 eop_base_addr = ring->eop_gpu_addr >> 8; 1141 1142 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ 1143 tmp = regCP_HQD_EOP_CONTROL_DEFAULT; 1144 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE, 1145 (order_base_2(MES_EOP_SIZE / 4) - 1)); 1146 1147 mqd->cp_hqd_eop_base_addr_lo = lower_32_bits(eop_base_addr); 1148 mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr); 1149 mqd->cp_hqd_eop_control = tmp; 1150 1151 /* disable the queue if it's active */ 1152 ring->wptr = 0; 1153 mqd->cp_hqd_pq_rptr = 0; 1154 mqd->cp_hqd_pq_wptr_lo = 0; 1155 mqd->cp_hqd_pq_wptr_hi = 0; 1156 1157 /* set the pointer to the MQD */ 1158 mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc; 1159 mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr); 1160 1161 /* set MQD vmid to 0 */ 1162 tmp = regCP_MQD_CONTROL_DEFAULT; 1163 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0); 1164 mqd->cp_mqd_control = tmp; 1165 1166 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ 1167 hqd_gpu_addr = ring->gpu_addr >> 8; 1168 mqd->cp_hqd_pq_base_lo = lower_32_bits(hqd_gpu_addr); 1169 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr); 1170 1171 /* set the wb address whether it's enabled or not */ 1172 wb_gpu_addr = ring->rptr_gpu_addr; 1173 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc; 1174 mqd->cp_hqd_pq_rptr_report_addr_hi = 1175 upper_32_bits(wb_gpu_addr) & 0xffff; 1176 1177 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ 1178 wb_gpu_addr = ring->wptr_gpu_addr; 1179 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffff8; 1180 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; 1181 1182 /* set up the HQD, this is similar to CP_RB0_CNTL */ 1183 tmp = regCP_HQD_PQ_CONTROL_DEFAULT; 1184 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE, 1185 (order_base_2(ring->ring_size / 4) - 1)); 1186 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE, 1187 ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8)); 1188 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 1); 1189 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, 0); 1190 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); 1191 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); 1192 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, NO_UPDATE_RPTR, 1); 1193 mqd->cp_hqd_pq_control = tmp; 1194 1195 /* enable doorbell */ 1196 tmp = 0; 1197 if (ring->use_doorbell) { 1198 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 1199 DOORBELL_OFFSET, ring->doorbell_index); 1200 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 1201 DOORBELL_EN, 1); 1202 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 1203 DOORBELL_SOURCE, 0); 1204 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 1205 DOORBELL_HIT, 0); 1206 } else { 1207 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 1208 DOORBELL_EN, 0); 1209 } 1210 mqd->cp_hqd_pq_doorbell_control = tmp; 1211 1212 mqd->cp_hqd_vmid = 0; 1213 /* activate the queue */ 1214 mqd->cp_hqd_active = 1; 1215 1216 tmp = regCP_HQD_PERSISTENT_STATE_DEFAULT; 1217 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, 1218 PRELOAD_SIZE, 0x63); 1219 mqd->cp_hqd_persistent_state = tmp; 1220 1221 mqd->cp_hqd_ib_control = regCP_HQD_IB_CONTROL_MES_12_1_DEFAULT; 1222 mqd->cp_hqd_iq_timer = regCP_HQD_IQ_TIMER_DEFAULT; 1223 mqd->cp_hqd_quantum = regCP_HQD_QUANTUM_DEFAULT; 1224 1225 /* 1226 * Set CP_HQD_GFX_CONTROL.DB_UPDATED_MSG_EN[15] to enable unmapped 1227 * doorbell handling. This is a reserved CP internal register can 1228 * not be accesss by others 1229 */ 1230 mqd->cp_hqd_gfx_control = BIT(15); 1231 1232 return 0; 1233 } 1234 1235 static void mes_v12_1_queue_init_register(struct amdgpu_ring *ring, 1236 int xcc_id) 1237 { 1238 struct v12_1_mes_mqd *mqd = ring->mqd_ptr; 1239 struct amdgpu_device *adev = ring->adev; 1240 uint32_t data = 0; 1241 1242 mutex_lock(&adev->srbm_mutex); 1243 soc_v1_0_grbm_select(adev, 3, ring->pipe, 0, 0, GET_INST(GC, xcc_id)); 1244 1245 /* set CP_HQD_VMID.VMID = 0. */ 1246 data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_VMID); 1247 data = REG_SET_FIELD(data, CP_HQD_VMID, VMID, 0); 1248 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_VMID, data); 1249 1250 /* set CP_HQD_PQ_DOORBELL_CONTROL.DOORBELL_EN=0 */ 1251 data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_DOORBELL_CONTROL); 1252 data = REG_SET_FIELD(data, CP_HQD_PQ_DOORBELL_CONTROL, 1253 DOORBELL_EN, 0); 1254 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_DOORBELL_CONTROL, data); 1255 1256 /* set CP_MQD_BASE_ADDR/HI with the MQD base address */ 1257 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo); 1258 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi); 1259 1260 /* set CP_MQD_CONTROL.VMID=0 */ 1261 data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MQD_CONTROL); 1262 data = REG_SET_FIELD(data, CP_MQD_CONTROL, VMID, 0); 1263 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MQD_CONTROL, 0); 1264 1265 /* set CP_HQD_PQ_BASE/HI with the ring buffer base address */ 1266 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo); 1267 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi); 1268 1269 /* set CP_HQD_PQ_RPTR_REPORT_ADDR/HI */ 1270 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_RPTR_REPORT_ADDR, 1271 mqd->cp_hqd_pq_rptr_report_addr_lo); 1272 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_RPTR_REPORT_ADDR_HI, 1273 mqd->cp_hqd_pq_rptr_report_addr_hi); 1274 1275 /* set CP_HQD_PQ_CONTROL */ 1276 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_CONTROL, mqd->cp_hqd_pq_control); 1277 1278 /* set CP_HQD_PQ_WPTR_POLL_ADDR/HI */ 1279 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_WPTR_POLL_ADDR, 1280 mqd->cp_hqd_pq_wptr_poll_addr_lo); 1281 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_WPTR_POLL_ADDR_HI, 1282 mqd->cp_hqd_pq_wptr_poll_addr_hi); 1283 1284 /* set CP_HQD_PQ_DOORBELL_CONTROL */ 1285 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_DOORBELL_CONTROL, 1286 mqd->cp_hqd_pq_doorbell_control); 1287 1288 /* set CP_HQD_PERSISTENT_STATE.PRELOAD_SIZE=0x53 */ 1289 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PERSISTENT_STATE, mqd->cp_hqd_persistent_state); 1290 1291 /* set CP_HQD_ACTIVE.ACTIVE=1 */ 1292 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_ACTIVE, mqd->cp_hqd_active); 1293 1294 soc_v1_0_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id)); 1295 mutex_unlock(&adev->srbm_mutex); 1296 } 1297 1298 static int mes_v12_1_kiq_enable_queue(struct amdgpu_device *adev, int xcc_id) 1299 { 1300 struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id]; 1301 struct amdgpu_ring *kiq_ring = &adev->gfx.kiq[xcc_id].ring; 1302 int r, inst = MES_PIPE_INST(xcc_id, AMDGPU_MES_SCHED_PIPE); 1303 1304 if (!kiq->pmf || !kiq->pmf->kiq_map_queues) 1305 return -EINVAL; 1306 1307 r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size); 1308 if (r) { 1309 DRM_ERROR("Failed to lock KIQ (%d).\n", r); 1310 return r; 1311 } 1312 1313 kiq->pmf->kiq_map_queues(kiq_ring, &adev->mes.ring[inst]); 1314 1315 r = amdgpu_ring_test_ring(kiq_ring); 1316 if (r) { 1317 DRM_ERROR("kfq enable failed\n"); 1318 kiq_ring->sched.ready = false; 1319 } 1320 return r; 1321 } 1322 1323 static int mes_v12_1_queue_init(struct amdgpu_device *adev, 1324 enum amdgpu_mes_pipe pipe, 1325 int xcc_id) 1326 { 1327 struct amdgpu_ring *ring; 1328 int r; 1329 1330 if (!adev->enable_uni_mes && pipe == AMDGPU_MES_KIQ_PIPE) 1331 ring = &adev->gfx.kiq[xcc_id].ring; 1332 else 1333 ring = &adev->mes.ring[MES_PIPE_INST(xcc_id, pipe)]; 1334 1335 if ((adev->enable_uni_mes || pipe == AMDGPU_MES_SCHED_PIPE) && 1336 (amdgpu_in_reset(adev) || adev->in_suspend)) { 1337 *(ring->wptr_cpu_addr) = 0; 1338 *(ring->rptr_cpu_addr) = 0; 1339 amdgpu_ring_clear_ring(ring); 1340 } 1341 1342 r = mes_v12_1_mqd_init(ring); 1343 if (r) 1344 return r; 1345 1346 if (pipe == AMDGPU_MES_SCHED_PIPE) { 1347 if (adev->enable_uni_mes) 1348 r = amdgpu_mes_map_legacy_queue(adev, ring, xcc_id); 1349 else 1350 r = mes_v12_1_kiq_enable_queue(adev, xcc_id); 1351 if (r) 1352 return r; 1353 } else { 1354 mes_v12_1_queue_init_register(ring, xcc_id); 1355 } 1356 1357 /* get MES scheduler/KIQ versions */ 1358 mutex_lock(&adev->srbm_mutex); 1359 soc_v1_0_grbm_select(adev, 3, pipe, 0, 0, GET_INST(GC, xcc_id)); 1360 1361 if (pipe == AMDGPU_MES_SCHED_PIPE) 1362 adev->mes.sched_version = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_GP3_LO); 1363 else if (pipe == AMDGPU_MES_KIQ_PIPE && adev->enable_mes_kiq) 1364 adev->mes.kiq_version = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_GP3_LO); 1365 1366 soc_v1_0_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id)); 1367 mutex_unlock(&adev->srbm_mutex); 1368 1369 return 0; 1370 } 1371 1372 static int mes_v12_1_ring_init(struct amdgpu_device *adev, 1373 int xcc_id, int pipe) 1374 { 1375 struct amdgpu_ring *ring; 1376 int inst = MES_PIPE_INST(xcc_id, pipe); 1377 1378 ring = &adev->mes.ring[inst]; 1379 1380 ring->funcs = &mes_v12_1_ring_funcs; 1381 1382 ring->me = 3; 1383 ring->pipe = pipe; 1384 ring->queue = 0; 1385 ring->xcc_id = xcc_id; 1386 ring->vm_hub = AMDGPU_GFXHUB(xcc_id); 1387 1388 ring->ring_obj = NULL; 1389 ring->use_doorbell = true; 1390 ring->eop_gpu_addr = adev->mes.eop_gpu_addr[inst]; 1391 ring->no_scheduler = true; 1392 snprintf(ring->name, sizeof(ring->name), "mes_%hhu.%hhu.%hhu.%hhu", 1393 (unsigned char)xcc_id, (unsigned char)ring->me, 1394 (unsigned char)ring->pipe, (unsigned char)ring->queue); 1395 1396 if (pipe == AMDGPU_MES_SCHED_PIPE) 1397 ring->doorbell_index = 1398 (adev->doorbell_index.mes_ring0 + 1399 xcc_id * adev->doorbell_index.xcc_doorbell_range) 1400 << 1; 1401 else 1402 ring->doorbell_index = 1403 (adev->doorbell_index.mes_ring1 + 1404 xcc_id * adev->doorbell_index.xcc_doorbell_range) 1405 << 1; 1406 1407 return amdgpu_ring_init(adev, ring, 1024, NULL, 0, 1408 AMDGPU_RING_PRIO_DEFAULT, NULL); 1409 } 1410 1411 static int mes_v12_1_kiq_ring_init(struct amdgpu_device *adev, int xcc_id) 1412 { 1413 struct amdgpu_ring *ring; 1414 int inst = MES_PIPE_INST(xcc_id, AMDGPU_MES_KIQ_PIPE); 1415 1416 spin_lock_init(&adev->gfx.kiq[xcc_id].ring_lock); 1417 1418 ring = &adev->gfx.kiq[xcc_id].ring; 1419 1420 ring->me = 3; 1421 ring->pipe = 1; 1422 ring->queue = 0; 1423 ring->xcc_id = xcc_id; 1424 ring->vm_hub = AMDGPU_GFXHUB(xcc_id); 1425 1426 ring->adev = NULL; 1427 ring->ring_obj = NULL; 1428 ring->use_doorbell = true; 1429 ring->eop_gpu_addr = adev->mes.eop_gpu_addr[inst]; 1430 ring->no_scheduler = true; 1431 ring->doorbell_index = 1432 (adev->doorbell_index.mes_ring1 + 1433 xcc_id * adev->doorbell_index.xcc_doorbell_range) 1434 << 1; 1435 1436 snprintf(ring->name, sizeof(ring->name), "mes_kiq_%hhu.%hhu.%hhu.%hhu", 1437 (unsigned char)xcc_id, (unsigned char)ring->me, 1438 (unsigned char)ring->pipe, (unsigned char)ring->queue); 1439 1440 return amdgpu_ring_init(adev, ring, 1024, NULL, 0, 1441 AMDGPU_RING_PRIO_DEFAULT, NULL); 1442 } 1443 1444 static int mes_v12_1_mqd_sw_init(struct amdgpu_device *adev, 1445 enum amdgpu_mes_pipe pipe, 1446 int xcc_id) 1447 { 1448 int r, mqd_size = sizeof(struct v12_1_mes_mqd); 1449 struct amdgpu_ring *ring; 1450 int inst = MES_PIPE_INST(xcc_id, pipe); 1451 1452 if (!adev->enable_uni_mes && pipe == AMDGPU_MES_KIQ_PIPE) 1453 ring = &adev->gfx.kiq[xcc_id].ring; 1454 else 1455 ring = &adev->mes.ring[inst]; 1456 1457 if (ring->mqd_obj) 1458 return 0; 1459 1460 r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE, 1461 AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj, 1462 &ring->mqd_gpu_addr, &ring->mqd_ptr); 1463 if (r) { 1464 dev_warn(adev->dev, "failed to create ring mqd bo (%d)", r); 1465 return r; 1466 } 1467 1468 memset(ring->mqd_ptr, 0, mqd_size); 1469 1470 /* prepare MQD backup */ 1471 adev->mes.mqd_backup[inst] = kmalloc(mqd_size, GFP_KERNEL); 1472 if (!adev->mes.mqd_backup[inst]) 1473 dev_warn(adev->dev, 1474 "no memory to create MQD backup for ring %s\n", 1475 ring->name); 1476 1477 return 0; 1478 } 1479 1480 static int mes_v12_1_sw_init(struct amdgpu_ip_block *ip_block) 1481 { 1482 struct amdgpu_device *adev = ip_block->adev; 1483 int pipe, r, xcc_id, num_xcc = NUM_XCC(adev->gfx.xcc_mask); 1484 1485 if (adev->enable_uni_mes && num_xcc > 1) 1486 adev->mes.enable_coop_mode = true; 1487 1488 adev->mes.funcs = &mes_v12_1_funcs; 1489 adev->mes.kiq_hw_init = &mes_v12_1_kiq_hw_init; 1490 adev->mes.kiq_hw_fini = &mes_v12_1_kiq_hw_fini; 1491 adev->mes.enable_legacy_queue_map = true; 1492 1493 adev->mes.event_log_size = 1494 adev->enable_uni_mes ? (AMDGPU_MAX_MES_PIPES * AMDGPU_MES_LOG_BUFFER_SIZE * num_xcc) : AMDGPU_MES_LOG_BUFFER_SIZE; 1495 1496 r = amdgpu_mes_init(adev); 1497 if (r) 1498 return r; 1499 1500 for (xcc_id = 0; xcc_id < num_xcc; xcc_id++) { 1501 for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) { 1502 r = mes_v12_1_allocate_eop_buf(adev, pipe, xcc_id); 1503 if (r) 1504 return r; 1505 1506 r = mes_v12_1_mqd_sw_init(adev, pipe, xcc_id); 1507 if (r) 1508 return r; 1509 1510 if (!adev->enable_uni_mes && pipe == 1511 AMDGPU_MES_KIQ_PIPE) 1512 r = mes_v12_1_kiq_ring_init(adev, xcc_id); 1513 else 1514 r = mes_v12_1_ring_init(adev, xcc_id, pipe); 1515 if (r) 1516 return r; 1517 1518 if (adev->mes.enable_coop_mode) { 1519 r = mes_v12_1_allocate_shared_cmd_buf(adev, 1520 pipe, xcc_id); 1521 if (r) 1522 return r; 1523 } 1524 } 1525 } 1526 1527 return 0; 1528 } 1529 1530 static int mes_v12_1_sw_fini(struct amdgpu_ip_block *ip_block) 1531 { 1532 struct amdgpu_device *adev = ip_block->adev; 1533 int pipe, inst, xcc_id, num_xcc = NUM_XCC(adev->gfx.xcc_mask); 1534 1535 for (xcc_id = 0; xcc_id < num_xcc; xcc_id++) { 1536 for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) { 1537 inst = MES_PIPE_INST(xcc_id, pipe); 1538 1539 amdgpu_bo_free_kernel(&adev->mes.shared_cmd_buf_obj[inst], 1540 &adev->mes.shared_cmd_buf_gpu_addr[inst], 1541 NULL); 1542 1543 kfree(adev->mes.mqd_backup[inst]); 1544 1545 amdgpu_bo_free_kernel(&adev->mes.eop_gpu_obj[inst], 1546 &adev->mes.eop_gpu_addr[inst], 1547 NULL); 1548 amdgpu_ucode_release(&adev->mes.fw[inst]); 1549 1550 if (adev->enable_uni_mes || pipe == AMDGPU_MES_SCHED_PIPE) { 1551 amdgpu_bo_free_kernel(&adev->mes.ring[inst].mqd_obj, 1552 &adev->mes.ring[inst].mqd_gpu_addr, 1553 &adev->mes.ring[inst].mqd_ptr); 1554 amdgpu_ring_fini(&adev->mes.ring[inst]); 1555 } 1556 } 1557 } 1558 1559 for (xcc_id = 0; xcc_id < num_xcc; xcc_id++) { 1560 if (!adev->enable_uni_mes) { 1561 amdgpu_bo_free_kernel(&adev->gfx.kiq[xcc_id].ring.mqd_obj, 1562 &adev->gfx.kiq[xcc_id].ring.mqd_gpu_addr, 1563 &adev->gfx.kiq[xcc_id].ring.mqd_ptr); 1564 amdgpu_ring_fini(&adev->gfx.kiq[xcc_id].ring); 1565 } 1566 1567 if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { 1568 mes_v12_1_free_ucode_buffers(adev, 1569 AMDGPU_MES_KIQ_PIPE, xcc_id); 1570 mes_v12_1_free_ucode_buffers(adev, 1571 AMDGPU_MES_SCHED_PIPE, xcc_id); 1572 } 1573 } 1574 1575 amdgpu_mes_fini(adev); 1576 return 0; 1577 } 1578 1579 static void mes_v12_1_kiq_dequeue_sched(struct amdgpu_device *adev, 1580 int xcc_id) 1581 { 1582 uint32_t data; 1583 int i; 1584 1585 mutex_lock(&adev->srbm_mutex); 1586 soc_v1_0_grbm_select(adev, 3, AMDGPU_MES_SCHED_PIPE, 0, 0, 1587 GET_INST(GC, xcc_id)); 1588 1589 /* disable the queue if it's active */ 1590 if (RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_ACTIVE) & 1) { 1591 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_DEQUEUE_REQUEST, 1); 1592 for (i = 0; i < adev->usec_timeout; i++) { 1593 if (!(RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_ACTIVE) & 1)) 1594 break; 1595 udelay(1); 1596 } 1597 } 1598 data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_DOORBELL_CONTROL); 1599 data = REG_SET_FIELD(data, CP_HQD_PQ_DOORBELL_CONTROL, 1600 DOORBELL_EN, 0); 1601 data = REG_SET_FIELD(data, CP_HQD_PQ_DOORBELL_CONTROL, 1602 DOORBELL_HIT, 1); 1603 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_DOORBELL_CONTROL, data); 1604 1605 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_DOORBELL_CONTROL, 0); 1606 1607 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_WPTR_LO, 0); 1608 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_WPTR_HI, 0); 1609 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_RPTR, 0); 1610 1611 soc_v1_0_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id)); 1612 mutex_unlock(&adev->srbm_mutex); 1613 1614 adev->mes.ring[MES_PIPE_INST(xcc_id, 0)].sched.ready = false; 1615 } 1616 1617 static void mes_v12_1_kiq_setting(struct amdgpu_ring *ring, int xcc_id) 1618 { 1619 uint32_t tmp; 1620 struct amdgpu_device *adev = ring->adev; 1621 1622 /* tell RLC which is KIQ queue */ 1623 tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CP_SCHEDULERS); 1624 tmp &= 0xffffff00; 1625 tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue); 1626 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CP_SCHEDULERS, tmp); 1627 tmp |= 0x80; 1628 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CP_SCHEDULERS, tmp); 1629 } 1630 1631 static int mes_v12_1_kiq_hw_init(struct amdgpu_device *adev, uint32_t xcc_id) 1632 { 1633 int inst = MES_PIPE_INST(xcc_id, AMDGPU_MES_KIQ_PIPE); 1634 int r = 0; 1635 struct amdgpu_ip_block *ip_block; 1636 1637 if (adev->enable_uni_mes) 1638 mes_v12_1_kiq_setting(&adev->mes.ring[inst], xcc_id); 1639 else 1640 mes_v12_1_kiq_setting(&adev->gfx.kiq[xcc_id].ring, xcc_id); 1641 1642 if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { 1643 1644 r = mes_v12_1_load_microcode(adev, AMDGPU_MES_SCHED_PIPE, 1645 false, xcc_id); 1646 if (r) { 1647 DRM_ERROR("failed to load MES fw, r=%d\n", r); 1648 return r; 1649 } 1650 1651 r = mes_v12_1_load_microcode(adev, AMDGPU_MES_KIQ_PIPE, 1652 true, xcc_id); 1653 if (r) { 1654 DRM_ERROR("failed to load MES kiq fw, r=%d\n", r); 1655 return r; 1656 } 1657 1658 mes_v12_1_set_ucode_start_addr(adev, xcc_id); 1659 1660 } else if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) 1661 mes_v12_1_set_ucode_start_addr(adev, xcc_id); 1662 1663 mes_v12_1_enable(adev, true, xcc_id); 1664 1665 ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_MES); 1666 if (unlikely(!ip_block)) { 1667 dev_err(adev->dev, "Failed to get MES handle\n"); 1668 return -EINVAL; 1669 } 1670 1671 r = mes_v12_1_queue_init(adev, AMDGPU_MES_KIQ_PIPE, xcc_id); 1672 if (r) 1673 goto failure; 1674 1675 if (adev->enable_uni_mes) { 1676 r = mes_v12_1_set_hw_resources(&adev->mes, 1677 AMDGPU_MES_KIQ_PIPE, xcc_id); 1678 if (r) 1679 goto failure; 1680 1681 mes_v12_1_set_hw_resources_1(&adev->mes, 1682 AMDGPU_MES_KIQ_PIPE, xcc_id); 1683 } 1684 1685 if (adev->mes.enable_legacy_queue_map) { 1686 r = mes_v12_1_xcc_hw_init(ip_block, xcc_id); 1687 if (r) 1688 goto failure; 1689 } 1690 1691 return r; 1692 1693 failure: 1694 mes_v12_1_hw_fini(ip_block); 1695 return r; 1696 } 1697 1698 static int mes_v12_1_kiq_hw_fini(struct amdgpu_device *adev, uint32_t xcc_id) 1699 { 1700 int inst = MES_PIPE_INST(xcc_id, AMDGPU_MES_SCHED_PIPE); 1701 1702 if (adev->mes.ring[inst].sched.ready) { 1703 if (adev->enable_uni_mes) 1704 amdgpu_mes_unmap_legacy_queue(adev, 1705 &adev->mes.ring[inst], 1706 RESET_QUEUES, 0, 0, xcc_id); 1707 else 1708 mes_v12_1_kiq_dequeue_sched(adev, xcc_id); 1709 1710 adev->mes.ring[inst].sched.ready = false; 1711 } 1712 1713 mes_v12_1_enable(adev, false, xcc_id); 1714 1715 return 0; 1716 } 1717 1718 static int mes_v12_1_xcc_hw_init(struct amdgpu_ip_block *ip_block, int xcc_id) 1719 { 1720 int r; 1721 struct amdgpu_device *adev = ip_block->adev; 1722 1723 if (adev->mes.ring[MES_PIPE_INST(xcc_id, 0)].sched.ready) 1724 goto out; 1725 1726 if (!adev->enable_mes_kiq) { 1727 if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { 1728 r = mes_v12_1_load_microcode(adev, 1729 AMDGPU_MES_SCHED_PIPE, true, xcc_id); 1730 if (r) { 1731 DRM_ERROR("failed to MES fw, r=%d\n", r); 1732 return r; 1733 } 1734 1735 mes_v12_1_set_ucode_start_addr(adev, xcc_id); 1736 1737 } else if (adev->firmware.load_type == 1738 AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) { 1739 1740 mes_v12_1_set_ucode_start_addr(adev, xcc_id); 1741 } 1742 1743 mes_v12_1_enable(adev, true, xcc_id); 1744 } 1745 1746 /* Enable the MES to handle doorbell ring on unmapped queue */ 1747 mes_v12_1_enable_unmapped_doorbell_handling(&adev->mes, true, xcc_id); 1748 1749 r = mes_v12_1_queue_init(adev, AMDGPU_MES_SCHED_PIPE, xcc_id); 1750 if (r) 1751 goto failure; 1752 1753 r = mes_v12_1_set_hw_resources(&adev->mes, 1754 AMDGPU_MES_SCHED_PIPE, xcc_id); 1755 if (r) 1756 goto failure; 1757 1758 if (adev->enable_uni_mes) 1759 mes_v12_1_set_hw_resources_1(&adev->mes, 1760 AMDGPU_MES_SCHED_PIPE, xcc_id); 1761 1762 mes_v12_1_init_aggregated_doorbell(&adev->mes, xcc_id); 1763 1764 r = mes_v12_1_query_sched_status(&adev->mes, 1765 AMDGPU_MES_SCHED_PIPE, xcc_id); 1766 if (r) { 1767 DRM_ERROR("MES is busy\n"); 1768 goto failure; 1769 } 1770 1771 out: 1772 /* 1773 * Disable KIQ ring usage from the driver once MES is enabled. 1774 * MES uses KIQ ring exclusively so driver cannot access KIQ ring 1775 * with MES enabled. 1776 */ 1777 adev->gfx.kiq[xcc_id].ring.sched.ready = false; 1778 adev->mes.ring[MES_PIPE_INST(xcc_id, 0)].sched.ready = true; 1779 1780 return 0; 1781 1782 failure: 1783 mes_v12_1_hw_fini(ip_block); 1784 return r; 1785 } 1786 1787 static int mes_v12_1_hw_init(struct amdgpu_ip_block *ip_block) 1788 { 1789 struct amdgpu_device *adev = ip_block->adev; 1790 int r, xcc_id, num_xcc = NUM_XCC(adev->gfx.xcc_mask); 1791 1792 for (xcc_id = 0; xcc_id < num_xcc; xcc_id++) { 1793 /* for SPX mode, all master xcc ids are set to 0 */ 1794 if (adev->mes.enable_coop_mode) 1795 adev->mes.master_xcc_ids[xcc_id] = 0; 1796 1797 r = mes_v12_1_xcc_hw_init(ip_block, xcc_id); 1798 if (r) 1799 return r; 1800 } 1801 1802 return 0; 1803 } 1804 1805 static int mes_v12_1_hw_fini(struct amdgpu_ip_block *ip_block) 1806 { 1807 return 0; 1808 } 1809 1810 static int mes_v12_1_suspend(struct amdgpu_ip_block *ip_block) 1811 { 1812 int r; 1813 1814 r = amdgpu_mes_suspend(ip_block->adev); 1815 if (r) 1816 return r; 1817 1818 return mes_v12_1_hw_fini(ip_block); 1819 } 1820 1821 static int mes_v12_1_resume(struct amdgpu_ip_block *ip_block) 1822 { 1823 int r; 1824 1825 r = mes_v12_1_hw_init(ip_block); 1826 if (r) 1827 return r; 1828 1829 return amdgpu_mes_resume(ip_block->adev); 1830 } 1831 1832 static int mes_v12_1_early_init(struct amdgpu_ip_block *ip_block) 1833 { 1834 struct amdgpu_device *adev = ip_block->adev; 1835 int pipe, r; 1836 1837 for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) { 1838 r = amdgpu_mes_init_microcode(adev, pipe); 1839 if (r) 1840 return r; 1841 } 1842 1843 return 0; 1844 } 1845 1846 static const struct amd_ip_funcs mes_v12_1_ip_funcs = { 1847 .name = "mes_v12_1", 1848 .early_init = mes_v12_1_early_init, 1849 .late_init = NULL, 1850 .sw_init = mes_v12_1_sw_init, 1851 .sw_fini = mes_v12_1_sw_fini, 1852 .hw_init = mes_v12_1_hw_init, 1853 .hw_fini = mes_v12_1_hw_fini, 1854 .suspend = mes_v12_1_suspend, 1855 .resume = mes_v12_1_resume, 1856 }; 1857 1858 const struct amdgpu_ip_block_version mes_v12_1_ip_block = { 1859 .type = AMD_IP_BLOCK_TYPE_MES, 1860 .major = 12, 1861 .minor = 1, 1862 .rev = 0, 1863 .funcs = &mes_v12_1_ip_funcs, 1864 }; 1865