1 /* 2 * Copyright 2025 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 */ 23 24 #include <linux/firmware.h> 25 #include <linux/module.h> 26 #include "amdgpu.h" 27 #include "soc15_common.h" 28 #include "soc_v1_0.h" 29 #include "gc/gc_12_1_0_offset.h" 30 #include "gc/gc_12_1_0_sh_mask.h" 31 #include "gc/gc_11_0_0_default.h" 32 #include "v12_structs.h" 33 #include "mes_v12_api_def.h" 34 35 MODULE_FIRMWARE("amdgpu/gc_12_1_0_mes.bin"); 36 MODULE_FIRMWARE("amdgpu/gc_12_1_0_mes1.bin"); 37 MODULE_FIRMWARE("amdgpu/gc_12_1_0_uni_mes.bin"); 38 39 static int mes_v12_1_hw_init(struct amdgpu_ip_block *ip_block); 40 static int mes_v12_1_xcc_hw_init(struct amdgpu_ip_block *ip_block, int xcc_id); 41 static int mes_v12_1_hw_fini(struct amdgpu_ip_block *ip_block); 42 static int mes_v12_1_kiq_hw_init(struct amdgpu_device *adev, uint32_t xcc_id); 43 static int mes_v12_1_kiq_hw_fini(struct amdgpu_device *adev, uint32_t xcc_id); 44 45 #define MES_EOP_SIZE 2048 46 47 #define regCP_HQD_IB_CONTROL_MES_12_1_DEFAULT 0x100000 48 #define XCC_REG_RANGE_0_LOW 0x1260 /* XCC gfxdec0 lower Bound */ 49 #define XCC_REG_RANGE_0_HIGH 0x3C00 /* XCC gfxdec0 upper Bound */ 50 #define XCC_REG_RANGE_1_LOW 0xA000 /* XCC gfxdec1 lower Bound */ 51 #define XCC_REG_RANGE_1_HIGH 0x10000 /* XCC gfxdec1 upper Bound */ 52 #define XCC_MID_MASK 0x41000000 53 54 #define NORMALIZE_XCC_REG_OFFSET(offset) \ 55 (offset & 0x3FFFF) 56 57 static void mes_v12_1_ring_set_wptr(struct amdgpu_ring *ring) 58 { 59 struct amdgpu_device *adev = ring->adev; 60 61 if (ring->use_doorbell) { 62 atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 63 ring->wptr); 64 WDOORBELL64(ring->doorbell_index, ring->wptr); 65 } else { 66 BUG(); 67 } 68 } 69 70 static u64 mes_v12_1_ring_get_rptr(struct amdgpu_ring *ring) 71 { 72 return *ring->rptr_cpu_addr; 73 } 74 75 static u64 mes_v12_1_ring_get_wptr(struct amdgpu_ring *ring) 76 { 77 u64 wptr; 78 79 if (ring->use_doorbell) 80 wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr); 81 else 82 BUG(); 83 return wptr; 84 } 85 86 static const struct amdgpu_ring_funcs mes_v12_1_ring_funcs = { 87 .type = AMDGPU_RING_TYPE_MES, 88 .align_mask = 1, 89 .nop = 0, 90 .support_64bit_ptrs = true, 91 .get_rptr = mes_v12_1_ring_get_rptr, 92 .get_wptr = mes_v12_1_ring_get_wptr, 93 .set_wptr = mes_v12_1_ring_set_wptr, 94 .insert_nop = amdgpu_ring_insert_nop, 95 }; 96 97 static const char *mes_v12_1_opcodes[] = { 98 "SET_HW_RSRC", 99 "SET_SCHEDULING_CONFIG", 100 "ADD_QUEUE", 101 "REMOVE_QUEUE", 102 "PERFORM_YIELD", 103 "SET_GANG_PRIORITY_LEVEL", 104 "SUSPEND", 105 "RESUME", 106 "RESET", 107 "SET_LOG_BUFFER", 108 "CHANGE_GANG_PRORITY", 109 "QUERY_SCHEDULER_STATUS", 110 "unused", 111 "SET_DEBUG_VMID", 112 "MISC", 113 "UPDATE_ROOT_PAGE_TABLE", 114 "AMD_LOG", 115 "SET_SE_MODE", 116 "SET_GANG_SUBMIT", 117 "SET_HW_RSRC_1", 118 }; 119 120 static const char *mes_v12_1_misc_opcodes[] = { 121 "WRITE_REG", 122 "INV_GART", 123 "QUERY_STATUS", 124 "READ_REG", 125 "WAIT_REG_MEM", 126 "SET_SHADER_DEBUGGER", 127 "NOTIFY_WORK_ON_UNMAPPED_QUEUE", 128 "NOTIFY_TO_UNMAP_PROCESSES", 129 }; 130 131 static const char *mes_v12_1_get_op_string(union MESAPI__MISC *x_pkt) 132 { 133 const char *op_str = NULL; 134 135 if (x_pkt->header.opcode < ARRAY_SIZE(mes_v12_1_opcodes)) 136 op_str = mes_v12_1_opcodes[x_pkt->header.opcode]; 137 138 return op_str; 139 } 140 141 static const char *mes_v12_1_get_misc_op_string(union MESAPI__MISC *x_pkt) 142 { 143 const char *op_str = NULL; 144 145 if ((x_pkt->header.opcode == MES_SCH_API_MISC) && 146 (x_pkt->opcode < ARRAY_SIZE(mes_v12_1_misc_opcodes))) 147 op_str = mes_v12_1_misc_opcodes[x_pkt->opcode]; 148 149 return op_str; 150 } 151 152 static int mes_v12_1_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, 153 int xcc_id, int pipe, void *pkt, 154 int size, int api_status_off) 155 { 156 union MESAPI__QUERY_MES_STATUS mes_status_pkt; 157 signed long timeout = 2100000; /* 2100 ms */ 158 struct amdgpu_device *adev = mes->adev; 159 struct amdgpu_ring *ring = &mes->ring[MES_PIPE_INST(xcc_id, pipe)]; 160 spinlock_t *ring_lock = &mes->ring_lock[MES_PIPE_INST(xcc_id, pipe)]; 161 struct MES_API_STATUS *api_status; 162 union MESAPI__MISC *x_pkt = pkt; 163 const char *op_str, *misc_op_str; 164 unsigned long flags; 165 u64 status_gpu_addr; 166 u32 seq, status_offset; 167 u64 *status_ptr; 168 signed long r; 169 int ret; 170 171 if (x_pkt->header.opcode >= MES_SCH_API_MAX) 172 return -EINVAL; 173 174 if (amdgpu_emu_mode) { 175 timeout *= 1000; 176 } else if (amdgpu_sriov_vf(adev)) { 177 /* Worst case in sriov where all other 15 VF timeout, each VF needs about 600ms */ 178 timeout = 15 * 600 * 1000; 179 } 180 181 ret = amdgpu_device_wb_get(adev, &status_offset); 182 if (ret) 183 return ret; 184 185 status_gpu_addr = adev->wb.gpu_addr + (status_offset * 4); 186 status_ptr = (u64 *)&adev->wb.wb[status_offset]; 187 *status_ptr = 0; 188 189 spin_lock_irqsave(ring_lock, flags); 190 r = amdgpu_ring_alloc(ring, (size + sizeof(mes_status_pkt)) / 4); 191 if (r) 192 goto error_unlock_free; 193 194 seq = ++ring->fence_drv.sync_seq; 195 r = amdgpu_fence_wait_polling(ring, 196 seq - ring->fence_drv.num_fences_mask, 197 timeout); 198 if (r < 1) 199 goto error_undo; 200 201 api_status = (struct MES_API_STATUS *)((char *)pkt + api_status_off); 202 api_status->api_completion_fence_addr = status_gpu_addr; 203 api_status->api_completion_fence_value = 1; 204 205 amdgpu_ring_write_multiple(ring, pkt, size / 4); 206 207 memset(&mes_status_pkt, 0, sizeof(mes_status_pkt)); 208 mes_status_pkt.header.type = MES_API_TYPE_SCHEDULER; 209 mes_status_pkt.header.opcode = MES_SCH_API_QUERY_SCHEDULER_STATUS; 210 mes_status_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; 211 mes_status_pkt.api_status.api_completion_fence_addr = 212 ring->fence_drv.gpu_addr; 213 mes_status_pkt.api_status.api_completion_fence_value = seq; 214 215 amdgpu_ring_write_multiple(ring, &mes_status_pkt, 216 sizeof(mes_status_pkt) / 4); 217 218 amdgpu_ring_commit(ring); 219 spin_unlock_irqrestore(ring_lock, flags); 220 221 op_str = mes_v12_1_get_op_string(x_pkt); 222 misc_op_str = mes_v12_1_get_misc_op_string(x_pkt); 223 224 if (misc_op_str) 225 dev_dbg(adev->dev, "MES(%d, %d) msg=%s (%s) was emitted\n", 226 xcc_id, pipe, op_str, misc_op_str); 227 else if (op_str) 228 dev_dbg(adev->dev, "MES(%d, %d) msg=%s was emitted\n", 229 xcc_id, pipe, op_str); 230 else 231 dev_dbg(adev->dev, "MES(%d, %d) msg=%d was emitted\n", 232 xcc_id, pipe, x_pkt->header.opcode); 233 234 r = amdgpu_fence_wait_polling(ring, seq, timeout); 235 if (r < 1 || !*status_ptr) { 236 if (misc_op_str) 237 dev_err(adev->dev, 238 "MES(%d, %d) failed to respond to msg=%s (%s)\n", 239 xcc_id, pipe, op_str, misc_op_str); 240 else if (op_str) 241 dev_err(adev->dev, 242 "MES(%d, %d) failed to respond to msg=%s\n", 243 xcc_id, pipe, op_str); 244 else 245 dev_err(adev->dev, 246 "MES(%d, %d) failed to respond to msg=%d\n", 247 xcc_id, pipe, x_pkt->header.opcode); 248 249 while (halt_if_hws_hang) 250 schedule(); 251 252 r = -ETIMEDOUT; 253 goto error_wb_free; 254 } 255 256 amdgpu_device_wb_free(adev, status_offset); 257 return 0; 258 259 error_undo: 260 dev_err(adev->dev, "MES(%d, %d) ring buffer is full.\n", xcc_id, pipe); 261 amdgpu_ring_undo(ring); 262 263 error_unlock_free: 264 spin_unlock_irqrestore(ring_lock, flags); 265 266 error_wb_free: 267 amdgpu_device_wb_free(adev, status_offset); 268 return r; 269 } 270 271 static int convert_to_mes_queue_type(int queue_type) 272 { 273 if (queue_type == AMDGPU_RING_TYPE_GFX) 274 return MES_QUEUE_TYPE_GFX; 275 else if (queue_type == AMDGPU_RING_TYPE_COMPUTE) 276 return MES_QUEUE_TYPE_COMPUTE; 277 else if (queue_type == AMDGPU_RING_TYPE_SDMA) 278 return MES_QUEUE_TYPE_SDMA; 279 else if (queue_type == AMDGPU_RING_TYPE_MES) 280 return MES_QUEUE_TYPE_SCHQ; 281 else 282 BUG(); 283 return -1; 284 } 285 286 static int mes_v12_1_add_hw_queue(struct amdgpu_mes *mes, 287 struct mes_add_queue_input *input) 288 { 289 union MESAPI__ADD_QUEUE mes_add_queue_pkt; 290 int xcc_id = input->xcc_id; 291 int inst = MES_PIPE_INST(xcc_id, AMDGPU_MES_SCHED_PIPE); 292 293 if (mes->enable_coop_mode) 294 xcc_id = mes->master_xcc_ids[inst]; 295 296 memset(&mes_add_queue_pkt, 0, sizeof(mes_add_queue_pkt)); 297 298 mes_add_queue_pkt.header.type = MES_API_TYPE_SCHEDULER; 299 mes_add_queue_pkt.header.opcode = MES_SCH_API_ADD_QUEUE; 300 mes_add_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; 301 302 mes_add_queue_pkt.process_id = input->process_id; 303 mes_add_queue_pkt.page_table_base_addr = input->page_table_base_addr; 304 mes_add_queue_pkt.process_va_start = input->process_va_start; 305 mes_add_queue_pkt.process_va_end = input->process_va_end; 306 mes_add_queue_pkt.process_quantum = input->process_quantum; 307 mes_add_queue_pkt.process_context_addr = input->process_context_addr; 308 mes_add_queue_pkt.gang_quantum = input->gang_quantum; 309 mes_add_queue_pkt.gang_context_addr = input->gang_context_addr; 310 mes_add_queue_pkt.inprocess_gang_priority = 311 input->inprocess_gang_priority; 312 mes_add_queue_pkt.gang_global_priority_level = 313 input->gang_global_priority_level; 314 mes_add_queue_pkt.doorbell_offset = input->doorbell_offset; 315 mes_add_queue_pkt.mqd_addr = input->mqd_addr; 316 317 mes_add_queue_pkt.wptr_addr = input->wptr_mc_addr; 318 319 mes_add_queue_pkt.queue_type = 320 convert_to_mes_queue_type(input->queue_type); 321 mes_add_queue_pkt.paging = input->paging; 322 mes_add_queue_pkt.vm_context_cntl = input->vm_cntx_cntl; 323 mes_add_queue_pkt.gws_base = input->gws_base; 324 mes_add_queue_pkt.gws_size = input->gws_size; 325 mes_add_queue_pkt.trap_handler_addr = input->tba_addr; 326 mes_add_queue_pkt.tma_addr = input->tma_addr; 327 mes_add_queue_pkt.trap_en = input->trap_en; 328 mes_add_queue_pkt.skip_process_ctx_clear = input->skip_process_ctx_clear; 329 mes_add_queue_pkt.is_kfd_process = input->is_kfd_process; 330 331 /* For KFD, gds_size is re-used for queue size (needed in MES for AQL queues) */ 332 mes_add_queue_pkt.is_aql_queue = input->is_aql_queue; 333 mes_add_queue_pkt.gds_size = input->queue_size; 334 335 /* For KFD, gds_size is re-used for queue size (needed in MES for AQL queues) */ 336 mes_add_queue_pkt.is_aql_queue = input->is_aql_queue; 337 mes_add_queue_pkt.gds_size = input->queue_size; 338 339 mes_add_queue_pkt.full_sh_mem_config_data = input->sh_mem_config_data; 340 341 return mes_v12_1_submit_pkt_and_poll_completion(mes, 342 xcc_id, AMDGPU_MES_SCHED_PIPE, 343 &mes_add_queue_pkt, sizeof(mes_add_queue_pkt), 344 offsetof(union MESAPI__ADD_QUEUE, api_status)); 345 } 346 347 static int mes_v12_1_remove_hw_queue(struct amdgpu_mes *mes, 348 struct mes_remove_queue_input *input) 349 { 350 union MESAPI__REMOVE_QUEUE mes_remove_queue_pkt; 351 int xcc_id = input->xcc_id; 352 int inst = MES_PIPE_INST(xcc_id, AMDGPU_MES_SCHED_PIPE); 353 354 if (mes->enable_coop_mode) 355 xcc_id = mes->master_xcc_ids[inst]; 356 357 memset(&mes_remove_queue_pkt, 0, sizeof(mes_remove_queue_pkt)); 358 359 mes_remove_queue_pkt.header.type = MES_API_TYPE_SCHEDULER; 360 mes_remove_queue_pkt.header.opcode = MES_SCH_API_REMOVE_QUEUE; 361 mes_remove_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; 362 363 mes_remove_queue_pkt.doorbell_offset = input->doorbell_offset; 364 mes_remove_queue_pkt.gang_context_addr = input->gang_context_addr; 365 366 return mes_v12_1_submit_pkt_and_poll_completion(mes, 367 xcc_id, AMDGPU_MES_SCHED_PIPE, 368 &mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt), 369 offsetof(union MESAPI__REMOVE_QUEUE, api_status)); 370 } 371 372 static int mes_v12_1_reset_hw_queue(struct amdgpu_mes *mes, 373 struct mes_reset_queue_input *input) 374 { 375 union MESAPI__RESET mes_reset_queue_pkt; 376 int pipe; 377 378 memset(&mes_reset_queue_pkt, 0, sizeof(mes_reset_queue_pkt)); 379 380 mes_reset_queue_pkt.header.type = MES_API_TYPE_SCHEDULER; 381 mes_reset_queue_pkt.header.opcode = MES_SCH_API_RESET; 382 mes_reset_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; 383 384 mes_reset_queue_pkt.doorbell_offset = input->doorbell_offset; 385 /* mes_reset_queue_pkt.gang_context_addr = input->gang_context_addr; */ 386 /*mes_reset_queue_pkt.reset_queue_only = 1;*/ 387 388 if (mes->adev->enable_uni_mes) 389 pipe = AMDGPU_MES_KIQ_PIPE; 390 else 391 pipe = AMDGPU_MES_SCHED_PIPE; 392 393 return mes_v12_1_submit_pkt_and_poll_completion(mes, 394 input->xcc_id, pipe, 395 &mes_reset_queue_pkt, sizeof(mes_reset_queue_pkt), 396 offsetof(union MESAPI__REMOVE_QUEUE, api_status)); 397 } 398 399 static int mes_v12_1_map_legacy_queue(struct amdgpu_mes *mes, 400 struct mes_map_legacy_queue_input *input) 401 { 402 union MESAPI__ADD_QUEUE mes_add_queue_pkt; 403 int pipe; 404 405 memset(&mes_add_queue_pkt, 0, sizeof(mes_add_queue_pkt)); 406 407 mes_add_queue_pkt.header.type = MES_API_TYPE_SCHEDULER; 408 mes_add_queue_pkt.header.opcode = MES_SCH_API_ADD_QUEUE; 409 mes_add_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; 410 411 mes_add_queue_pkt.pipe_id = input->pipe_id; 412 mes_add_queue_pkt.queue_id = input->queue_id; 413 mes_add_queue_pkt.doorbell_offset = input->doorbell_offset; 414 mes_add_queue_pkt.mqd_addr = input->mqd_addr; 415 mes_add_queue_pkt.wptr_addr = input->wptr_addr; 416 mes_add_queue_pkt.queue_type = 417 convert_to_mes_queue_type(input->queue_type); 418 mes_add_queue_pkt.map_legacy_kq = 1; 419 420 if (mes->adev->enable_uni_mes) 421 pipe = AMDGPU_MES_KIQ_PIPE; 422 else 423 pipe = AMDGPU_MES_SCHED_PIPE; 424 425 return mes_v12_1_submit_pkt_and_poll_completion(mes, 426 input->xcc_id, pipe, 427 &mes_add_queue_pkt, sizeof(mes_add_queue_pkt), 428 offsetof(union MESAPI__ADD_QUEUE, api_status)); 429 } 430 431 static int mes_v12_1_unmap_legacy_queue(struct amdgpu_mes *mes, 432 struct mes_unmap_legacy_queue_input *input) 433 { 434 union MESAPI__REMOVE_QUEUE mes_remove_queue_pkt; 435 int pipe; 436 437 memset(&mes_remove_queue_pkt, 0, sizeof(mes_remove_queue_pkt)); 438 439 mes_remove_queue_pkt.header.type = MES_API_TYPE_SCHEDULER; 440 mes_remove_queue_pkt.header.opcode = MES_SCH_API_REMOVE_QUEUE; 441 mes_remove_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; 442 443 mes_remove_queue_pkt.doorbell_offset = input->doorbell_offset; 444 mes_remove_queue_pkt.gang_context_addr = 0; 445 446 mes_remove_queue_pkt.pipe_id = input->pipe_id; 447 mes_remove_queue_pkt.queue_id = input->queue_id; 448 449 if (input->action == PREEMPT_QUEUES_NO_UNMAP) { 450 mes_remove_queue_pkt.preempt_legacy_gfx_queue = 1; 451 mes_remove_queue_pkt.tf_addr = input->trail_fence_addr; 452 mes_remove_queue_pkt.tf_data = 453 lower_32_bits(input->trail_fence_data); 454 } else { 455 mes_remove_queue_pkt.unmap_legacy_queue = 1; 456 mes_remove_queue_pkt.queue_type = 457 convert_to_mes_queue_type(input->queue_type); 458 } 459 460 if (mes->adev->enable_uni_mes) 461 pipe = AMDGPU_MES_KIQ_PIPE; 462 else 463 pipe = AMDGPU_MES_SCHED_PIPE; 464 465 return mes_v12_1_submit_pkt_and_poll_completion(mes, 466 input->xcc_id, pipe, 467 &mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt), 468 offsetof(union MESAPI__REMOVE_QUEUE, api_status)); 469 } 470 471 static int mes_v12_1_suspend_gang(struct amdgpu_mes *mes, 472 struct mes_suspend_gang_input *input) 473 { 474 return 0; 475 } 476 477 static int mes_v12_1_resume_gang(struct amdgpu_mes *mes, 478 struct mes_resume_gang_input *input) 479 { 480 return 0; 481 } 482 483 static int mes_v12_1_query_sched_status(struct amdgpu_mes *mes, 484 int pipe, int xcc_id) 485 { 486 union MESAPI__QUERY_MES_STATUS mes_status_pkt; 487 488 memset(&mes_status_pkt, 0, sizeof(mes_status_pkt)); 489 490 mes_status_pkt.header.type = MES_API_TYPE_SCHEDULER; 491 mes_status_pkt.header.opcode = MES_SCH_API_QUERY_SCHEDULER_STATUS; 492 mes_status_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; 493 494 return mes_v12_1_submit_pkt_and_poll_completion(mes, xcc_id, pipe, 495 &mes_status_pkt, sizeof(mes_status_pkt), 496 offsetof(union MESAPI__QUERY_MES_STATUS, api_status)); 497 } 498 static uint32_t mes_v12_1_get_xcc_from_reg(uint32_t reg_offset) 499 { 500 /* Check xcc reg offset range */ 501 uint32_t xcc = (reg_offset & XCC_MID_MASK) ? 4 : 0; 502 /* Each XCC has two register ranges. 503 * These are represented in reg_offset[17:16] 504 */ 505 return ((reg_offset >> 16) & 0x3) + xcc; 506 } 507 508 static void mes_v12_1_get_rrmt(uint32_t reg, uint32_t xcc_id, 509 struct RRMT_OPTION *rrmt_opt) 510 { 511 uint32_t normalized_reg = NORMALIZE_XCC_REG_OFFSET(reg); 512 513 if (((normalized_reg >= XCC_REG_RANGE_0_LOW) && (normalized_reg < XCC_REG_RANGE_0_HIGH)) || 514 ((normalized_reg >= XCC_REG_RANGE_1_LOW) && (normalized_reg < XCC_REG_RANGE_1_HIGH))) { 515 rrmt_opt->xcd_die_id = mes_v12_1_get_xcc_from_reg(reg); 516 rrmt_opt->mode = (xcc_id == rrmt_opt->xcd_die_id) ? 517 MES_RRMT_MODE_LOCAL_XCD : MES_RRMT_MODE_REMOTE_XCD; 518 } else { 519 rrmt_opt->mode = MES_RRMT_MODE_LOCAL_REMOTE_AID; 520 } 521 } 522 523 static int mes_v12_1_misc_op(struct amdgpu_mes *mes, 524 struct mes_misc_op_input *input) 525 { 526 struct amdgpu_device *adev = mes->adev; 527 union MESAPI__MISC misc_pkt; 528 int pipe; 529 530 if (mes->adev->enable_uni_mes) 531 pipe = AMDGPU_MES_KIQ_PIPE; 532 else 533 pipe = AMDGPU_MES_SCHED_PIPE; 534 535 memset(&misc_pkt, 0, sizeof(misc_pkt)); 536 537 misc_pkt.header.type = MES_API_TYPE_SCHEDULER; 538 misc_pkt.header.opcode = MES_SCH_API_MISC; 539 misc_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; 540 541 switch (input->op) { 542 case MES_MISC_OP_READ_REG: 543 misc_pkt.opcode = MESAPI_MISC__READ_REG; 544 misc_pkt.read_reg.reg_offset = input->read_reg.reg_offset; 545 misc_pkt.read_reg.buffer_addr = input->read_reg.buffer_addr; 546 mes_v12_1_get_rrmt(input->read_reg.reg_offset, 547 GET_INST(GC, input->xcc_id), 548 &misc_pkt.read_reg.rrmt_opt); 549 if (misc_pkt.read_reg.rrmt_opt.mode != MES_RRMT_MODE_REMOTE_MID) { 550 misc_pkt.read_reg.reg_offset = 551 NORMALIZE_XCC_REG_OFFSET(misc_pkt.read_reg.reg_offset); 552 } 553 break; 554 case MES_MISC_OP_WRITE_REG: 555 misc_pkt.opcode = MESAPI_MISC__WRITE_REG; 556 misc_pkt.write_reg.reg_offset = input->write_reg.reg_offset; 557 misc_pkt.write_reg.reg_value = input->write_reg.reg_value; 558 mes_v12_1_get_rrmt(input->write_reg.reg_offset, 559 GET_INST(GC, input->xcc_id), 560 &misc_pkt.write_reg.rrmt_opt); 561 if (misc_pkt.write_reg.rrmt_opt.mode != MES_RRMT_MODE_REMOTE_MID) { 562 misc_pkt.write_reg.reg_offset = 563 NORMALIZE_XCC_REG_OFFSET(misc_pkt.write_reg.reg_offset); 564 } 565 break; 566 case MES_MISC_OP_WRM_REG_WAIT: 567 misc_pkt.opcode = MESAPI_MISC__WAIT_REG_MEM; 568 misc_pkt.wait_reg_mem.op = WRM_OPERATION__WAIT_REG_MEM; 569 misc_pkt.wait_reg_mem.reference = input->wrm_reg.ref; 570 misc_pkt.wait_reg_mem.mask = input->wrm_reg.mask; 571 misc_pkt.wait_reg_mem.reg_offset1 = input->wrm_reg.reg0; 572 misc_pkt.wait_reg_mem.reg_offset2 = 0; 573 mes_v12_1_get_rrmt(input->wrm_reg.reg0, 574 GET_INST(GC, input->xcc_id), 575 &misc_pkt.wait_reg_mem.rrmt_opt1); 576 if (misc_pkt.wait_reg_mem.rrmt_opt1.mode != MES_RRMT_MODE_REMOTE_MID) { 577 misc_pkt.wait_reg_mem.reg_offset1 = 578 NORMALIZE_XCC_REG_OFFSET(misc_pkt.wait_reg_mem.reg_offset1); 579 } 580 break; 581 case MES_MISC_OP_WRM_REG_WR_WAIT: 582 misc_pkt.opcode = MESAPI_MISC__WAIT_REG_MEM; 583 misc_pkt.wait_reg_mem.op = WRM_OPERATION__WR_WAIT_WR_REG; 584 misc_pkt.wait_reg_mem.reference = input->wrm_reg.ref; 585 misc_pkt.wait_reg_mem.mask = input->wrm_reg.mask; 586 misc_pkt.wait_reg_mem.reg_offset1 = input->wrm_reg.reg0; 587 misc_pkt.wait_reg_mem.reg_offset2 = input->wrm_reg.reg1; 588 mes_v12_1_get_rrmt(input->wrm_reg.reg0, 589 GET_INST(GC, input->xcc_id), 590 &misc_pkt.wait_reg_mem.rrmt_opt1); 591 mes_v12_1_get_rrmt(input->wrm_reg.reg1, 592 GET_INST(GC, input->xcc_id), 593 &misc_pkt.wait_reg_mem.rrmt_opt2); 594 if (misc_pkt.wait_reg_mem.rrmt_opt1.mode != MES_RRMT_MODE_REMOTE_MID) { 595 misc_pkt.wait_reg_mem.reg_offset1 = 596 NORMALIZE_XCC_REG_OFFSET(misc_pkt.wait_reg_mem.reg_offset1); 597 } 598 if (misc_pkt.wait_reg_mem.rrmt_opt2.mode != MES_RRMT_MODE_REMOTE_MID) { 599 misc_pkt.wait_reg_mem.reg_offset2 = 600 NORMALIZE_XCC_REG_OFFSET(misc_pkt.wait_reg_mem.reg_offset2); 601 } 602 break; 603 case MES_MISC_OP_SET_SHADER_DEBUGGER: 604 pipe = AMDGPU_MES_SCHED_PIPE; 605 misc_pkt.opcode = MESAPI_MISC__SET_SHADER_DEBUGGER; 606 misc_pkt.set_shader_debugger.process_context_addr = 607 input->set_shader_debugger.process_context_addr; 608 misc_pkt.set_shader_debugger.flags.u32all = 609 input->set_shader_debugger.flags.u32all; 610 misc_pkt.set_shader_debugger.spi_gdbg_per_vmid_cntl = 611 input->set_shader_debugger.spi_gdbg_per_vmid_cntl; 612 memcpy(misc_pkt.set_shader_debugger.tcp_watch_cntl, 613 input->set_shader_debugger.tcp_watch_cntl, 614 sizeof(misc_pkt.set_shader_debugger.tcp_watch_cntl)); 615 misc_pkt.set_shader_debugger.trap_en = input->set_shader_debugger.trap_en; 616 break; 617 case MES_MISC_OP_CHANGE_CONFIG: 618 misc_pkt.opcode = MESAPI_MISC__CHANGE_CONFIG; 619 misc_pkt.change_config.opcode = 620 MESAPI_MISC__CHANGE_CONFIG_OPTION_LIMIT_SINGLE_PROCESS; 621 misc_pkt.change_config.option.bits.limit_single_process = 622 input->change_config.option.limit_single_process; 623 break; 624 default: 625 DRM_ERROR("unsupported misc op (%d) \n", input->op); 626 return -EINVAL; 627 } 628 629 return mes_v12_1_submit_pkt_and_poll_completion(mes, 630 input->xcc_id, pipe, 631 &misc_pkt, sizeof(misc_pkt), 632 offsetof(union MESAPI__MISC, api_status)); 633 } 634 635 static int mes_v12_1_set_hw_resources_1(struct amdgpu_mes *mes, 636 int pipe, int xcc_id) 637 { 638 union MESAPI_SET_HW_RESOURCES_1 mes_set_hw_res_1_pkt; 639 int master_xcc_id, inst = MES_PIPE_INST(xcc_id, pipe); 640 641 memset(&mes_set_hw_res_1_pkt, 0, sizeof(mes_set_hw_res_1_pkt)); 642 643 mes_set_hw_res_1_pkt.header.type = MES_API_TYPE_SCHEDULER; 644 mes_set_hw_res_1_pkt.header.opcode = MES_SCH_API_SET_HW_RSRC_1; 645 mes_set_hw_res_1_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; 646 mes_set_hw_res_1_pkt.mes_kiq_unmap_timeout = 100; 647 648 if (mes->enable_coop_mode && pipe == AMDGPU_MES_SCHED_PIPE) { 649 master_xcc_id = mes->master_xcc_ids[inst]; 650 mes_set_hw_res_1_pkt.mes_coop_mode = 1; 651 mes_set_hw_res_1_pkt.coop_sch_shared_mc_addr = 652 mes->shared_cmd_buf_gpu_addr[master_xcc_id]; 653 } 654 655 return mes_v12_1_submit_pkt_and_poll_completion(mes, xcc_id, pipe, 656 &mes_set_hw_res_1_pkt, sizeof(mes_set_hw_res_1_pkt), 657 offsetof(union MESAPI_SET_HW_RESOURCES_1, api_status)); 658 } 659 660 static void mes_v12_1_set_gfx_hqd_mask(union MESAPI_SET_HW_RESOURCES *pkt) 661 { 662 /* 663 * GFX V12 has only one GFX pipe, but 8 queues in it. 664 * GFX pipe 0 queue 0 is being used by Kernel queue. 665 * Set GFX pipe 0 queue 1-7 for MES scheduling 666 * mask = 1111 1110b 667 */ 668 pkt->gfx_hqd_mask[0] = 0xFE; 669 } 670 671 static int mes_v12_1_set_hw_resources(struct amdgpu_mes *mes, 672 int pipe, int xcc_id) 673 { 674 int i; 675 struct amdgpu_device *adev = mes->adev; 676 union MESAPI_SET_HW_RESOURCES mes_set_hw_res_pkt; 677 678 memset(&mes_set_hw_res_pkt, 0, sizeof(mes_set_hw_res_pkt)); 679 680 mes_set_hw_res_pkt.header.type = MES_API_TYPE_SCHEDULER; 681 mes_set_hw_res_pkt.header.opcode = MES_SCH_API_SET_HW_RSRC; 682 mes_set_hw_res_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; 683 684 if (pipe == AMDGPU_MES_SCHED_PIPE) { 685 mes_set_hw_res_pkt.vmid_mask_mmhub = mes->vmid_mask_mmhub; 686 mes_set_hw_res_pkt.vmid_mask_gfxhub = mes->vmid_mask_gfxhub; 687 mes_set_hw_res_pkt.gds_size = adev->gds.gds_size; 688 mes_set_hw_res_pkt.paging_vmid = 0; 689 690 for (i = 0; i < MAX_COMPUTE_PIPES; i++) 691 mes_set_hw_res_pkt.compute_hqd_mask[i] = 692 mes->compute_hqd_mask[i]; 693 694 mes_v12_1_set_gfx_hqd_mask(&mes_set_hw_res_pkt); 695 696 for (i = 0; i < MAX_SDMA_PIPES; i++) 697 mes_set_hw_res_pkt.sdma_hqd_mask[i] = 698 mes->sdma_hqd_mask[i]; 699 700 for (i = 0; i < AMD_PRIORITY_NUM_LEVELS; i++) 701 mes_set_hw_res_pkt.aggregated_doorbells[i] = 702 mes->aggregated_doorbells[i]; 703 } 704 705 mes_set_hw_res_pkt.g_sch_ctx_gpu_mc_ptr = 706 mes->sch_ctx_gpu_addr[pipe]; 707 mes_set_hw_res_pkt.query_status_fence_gpu_mc_ptr = 708 mes->query_status_fence_gpu_addr[pipe]; 709 710 for (i = 0; i < 5; i++) { 711 mes_set_hw_res_pkt.gc_base[i] = 712 adev->reg_offset[GC_HWIP][0][i]; 713 mes_set_hw_res_pkt.mmhub_base[i] = 714 adev->reg_offset[MMHUB_HWIP][0][i]; 715 mes_set_hw_res_pkt.osssys_base[i] = 716 adev->reg_offset[OSSSYS_HWIP][0][i]; 717 } 718 719 mes_set_hw_res_pkt.disable_reset = 1; 720 mes_set_hw_res_pkt.disable_mes_log = 1; 721 mes_set_hw_res_pkt.use_different_vmid_compute = 1; 722 mes_set_hw_res_pkt.enable_reg_active_poll = 1; 723 mes_set_hw_res_pkt.enable_level_process_quantum_check = 1; 724 725 /* 726 * Keep oversubscribe timer for sdma . When we have unmapped doorbell 727 * handling support, other queue will not use the oversubscribe timer. 728 * handling mode - 0: disabled; 1: basic version; 2: basic+ version 729 */ 730 mes_set_hw_res_pkt.oversubscription_timer = 50; 731 mes_set_hw_res_pkt.unmapped_doorbell_handling = 1; 732 733 if (amdgpu_mes_log_enable) { 734 mes_set_hw_res_pkt.enable_mes_event_int_logging = 1; 735 mes_set_hw_res_pkt.event_intr_history_gpu_mc_ptr = 736 mes->event_log_gpu_addr + MES_PIPE_INST(xcc_id, pipe) * AMDGPU_MES_LOG_BUFFER_SIZE; 737 } 738 739 if (adev->enforce_isolation[0] == AMDGPU_ENFORCE_ISOLATION_ENABLE) 740 mes_set_hw_res_pkt.limit_single_process = 1; 741 742 return mes_v12_1_submit_pkt_and_poll_completion(mes, xcc_id, pipe, 743 &mes_set_hw_res_pkt, sizeof(mes_set_hw_res_pkt), 744 offsetof(union MESAPI_SET_HW_RESOURCES, api_status)); 745 } 746 747 static void mes_v12_1_init_aggregated_doorbell(struct amdgpu_mes *mes, 748 int xcc_id) 749 { 750 struct amdgpu_device *adev = mes->adev; 751 uint32_t data; 752 753 data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_DOORBELL_CONTROL1); 754 data &= ~(CP_MES_DOORBELL_CONTROL1__DOORBELL_OFFSET_MASK | 755 CP_MES_DOORBELL_CONTROL1__DOORBELL_EN_MASK | 756 CP_MES_DOORBELL_CONTROL1__DOORBELL_HIT_MASK); 757 data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_LOW] << 758 CP_MES_DOORBELL_CONTROL1__DOORBELL_OFFSET__SHIFT; 759 data |= 1 << CP_MES_DOORBELL_CONTROL1__DOORBELL_EN__SHIFT; 760 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_DOORBELL_CONTROL1, data); 761 762 data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_DOORBELL_CONTROL2); 763 data &= ~(CP_MES_DOORBELL_CONTROL2__DOORBELL_OFFSET_MASK | 764 CP_MES_DOORBELL_CONTROL2__DOORBELL_EN_MASK | 765 CP_MES_DOORBELL_CONTROL2__DOORBELL_HIT_MASK); 766 data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_NORMAL] << 767 CP_MES_DOORBELL_CONTROL2__DOORBELL_OFFSET__SHIFT; 768 data |= 1 << CP_MES_DOORBELL_CONTROL2__DOORBELL_EN__SHIFT; 769 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_DOORBELL_CONTROL2, data); 770 771 data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_DOORBELL_CONTROL3); 772 data &= ~(CP_MES_DOORBELL_CONTROL3__DOORBELL_OFFSET_MASK | 773 CP_MES_DOORBELL_CONTROL3__DOORBELL_EN_MASK | 774 CP_MES_DOORBELL_CONTROL3__DOORBELL_HIT_MASK); 775 data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_MEDIUM] << 776 CP_MES_DOORBELL_CONTROL3__DOORBELL_OFFSET__SHIFT; 777 data |= 1 << CP_MES_DOORBELL_CONTROL3__DOORBELL_EN__SHIFT; 778 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_DOORBELL_CONTROL3, data); 779 780 data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_DOORBELL_CONTROL4); 781 data &= ~(CP_MES_DOORBELL_CONTROL4__DOORBELL_OFFSET_MASK | 782 CP_MES_DOORBELL_CONTROL4__DOORBELL_EN_MASK | 783 CP_MES_DOORBELL_CONTROL4__DOORBELL_HIT_MASK); 784 data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_HIGH] << 785 CP_MES_DOORBELL_CONTROL4__DOORBELL_OFFSET__SHIFT; 786 data |= 1 << CP_MES_DOORBELL_CONTROL4__DOORBELL_EN__SHIFT; 787 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_DOORBELL_CONTROL4, data); 788 789 data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_DOORBELL_CONTROL5); 790 data &= ~(CP_MES_DOORBELL_CONTROL5__DOORBELL_OFFSET_MASK | 791 CP_MES_DOORBELL_CONTROL5__DOORBELL_EN_MASK | 792 CP_MES_DOORBELL_CONTROL5__DOORBELL_HIT_MASK); 793 data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_REALTIME] << 794 CP_MES_DOORBELL_CONTROL5__DOORBELL_OFFSET__SHIFT; 795 data |= 1 << CP_MES_DOORBELL_CONTROL5__DOORBELL_EN__SHIFT; 796 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_DOORBELL_CONTROL5, data); 797 798 data = 1 << CP_HQD_GFX_CONTROL__DB_UPDATED_MSG_EN__SHIFT; 799 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_GFX_CONTROL, data); 800 } 801 802 803 static void mes_v12_1_enable_unmapped_doorbell_handling( 804 struct amdgpu_mes *mes, bool enable, int xcc_id) 805 { 806 struct amdgpu_device *adev = mes->adev; 807 uint32_t data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_UNMAPPED_DOORBELL); 808 809 /* 810 * The default PROC_LSB settng is 0xc which means doorbell 811 * addr[16:12] gives the doorbell page number. For kfd, each 812 * process will use 2 pages of doorbell, we need to change the 813 * setting to 0xd 814 */ 815 data &= ~CP_UNMAPPED_DOORBELL__PROC_LSB_MASK; 816 data |= 0xd << CP_UNMAPPED_DOORBELL__PROC_LSB__SHIFT; 817 818 data |= (enable ? 1 : 0) << CP_UNMAPPED_DOORBELL__ENABLE__SHIFT; 819 820 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_UNMAPPED_DOORBELL, data); 821 } 822 823 #if 0 824 static int mes_v12_1_reset_legacy_queue(struct amdgpu_mes *mes, 825 struct mes_reset_legacy_queue_input *input) 826 { 827 union MESAPI__RESET mes_reset_queue_pkt; 828 int pipe; 829 830 memset(&mes_reset_queue_pkt, 0, sizeof(mes_reset_queue_pkt)); 831 832 mes_reset_queue_pkt.header.type = MES_API_TYPE_SCHEDULER; 833 mes_reset_queue_pkt.header.opcode = MES_SCH_API_RESET; 834 mes_reset_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; 835 836 mes_reset_queue_pkt.queue_type = 837 convert_to_mes_queue_type(input->queue_type); 838 839 if (mes_reset_queue_pkt.queue_type == MES_QUEUE_TYPE_GFX) { 840 mes_reset_queue_pkt.reset_legacy_gfx = 1; 841 mes_reset_queue_pkt.pipe_id_lp = input->pipe_id; 842 mes_reset_queue_pkt.queue_id_lp = input->queue_id; 843 mes_reset_queue_pkt.mqd_mc_addr_lp = input->mqd_addr; 844 mes_reset_queue_pkt.doorbell_offset_lp = input->doorbell_offset; 845 mes_reset_queue_pkt.wptr_addr_lp = input->wptr_addr; 846 mes_reset_queue_pkt.vmid_id_lp = input->vmid; 847 } else { 848 mes_reset_queue_pkt.reset_queue_only = 1; 849 mes_reset_queue_pkt.doorbell_offset = input->doorbell_offset; 850 } 851 852 if (mes->adev->enable_uni_mes) 853 pipe = AMDGPU_MES_KIQ_PIPE; 854 else 855 pipe = AMDGPU_MES_SCHED_PIPE; 856 857 return mes_v12_1_submit_pkt_and_poll_completion(mes, 858 input->xcc_id, pipe, 859 &mes_reset_queue_pkt, sizeof(mes_reset_queue_pkt), 860 offsetof(union MESAPI__RESET, api_status)); 861 } 862 #endif 863 864 static const struct amdgpu_mes_funcs mes_v12_1_funcs = { 865 .add_hw_queue = mes_v12_1_add_hw_queue, 866 .remove_hw_queue = mes_v12_1_remove_hw_queue, 867 .map_legacy_queue = mes_v12_1_map_legacy_queue, 868 .unmap_legacy_queue = mes_v12_1_unmap_legacy_queue, 869 .suspend_gang = mes_v12_1_suspend_gang, 870 .resume_gang = mes_v12_1_resume_gang, 871 .misc_op = mes_v12_1_misc_op, 872 .reset_hw_queue = mes_v12_1_reset_hw_queue, 873 }; 874 875 static int mes_v12_1_allocate_ucode_buffer(struct amdgpu_device *adev, 876 enum amdgpu_mes_pipe pipe, 877 int xcc_id) 878 { 879 int r, inst = MES_PIPE_INST(xcc_id, pipe); 880 const struct mes_firmware_header_v1_0 *mes_hdr; 881 const __le32 *fw_data; 882 unsigned fw_size; 883 884 mes_hdr = (const struct mes_firmware_header_v1_0 *) 885 adev->mes.fw[pipe]->data; 886 887 fw_data = (const __le32 *)(adev->mes.fw[pipe]->data + 888 le32_to_cpu(mes_hdr->mes_ucode_offset_bytes)); 889 fw_size = le32_to_cpu(mes_hdr->mes_ucode_size_bytes); 890 891 r = amdgpu_bo_create_reserved(adev, fw_size, 892 PAGE_SIZE, 893 AMDGPU_GEM_DOMAIN_VRAM, 894 &adev->mes.ucode_fw_obj[inst], 895 &adev->mes.ucode_fw_gpu_addr[inst], 896 (void **)&adev->mes.ucode_fw_ptr[inst]); 897 if (r) { 898 dev_err(adev->dev, "(%d) failed to create mes fw bo\n", r); 899 return r; 900 } 901 902 memcpy(adev->mes.ucode_fw_ptr[inst], fw_data, fw_size); 903 904 amdgpu_bo_kunmap(adev->mes.ucode_fw_obj[inst]); 905 amdgpu_bo_unreserve(adev->mes.ucode_fw_obj[inst]); 906 907 return 0; 908 } 909 910 static int mes_v12_1_allocate_ucode_data_buffer(struct amdgpu_device *adev, 911 enum amdgpu_mes_pipe pipe, 912 int xcc_id) 913 { 914 int r, inst = MES_PIPE_INST(xcc_id, pipe); 915 const struct mes_firmware_header_v1_0 *mes_hdr; 916 const __le32 *fw_data; 917 unsigned fw_size; 918 919 mes_hdr = (const struct mes_firmware_header_v1_0 *) 920 adev->mes.fw[pipe]->data; 921 922 fw_data = (const __le32 *)(adev->mes.fw[pipe]->data + 923 le32_to_cpu(mes_hdr->mes_ucode_data_offset_bytes)); 924 fw_size = le32_to_cpu(mes_hdr->mes_ucode_data_size_bytes); 925 926 r = amdgpu_bo_create_reserved(adev, fw_size, 927 64 * 1024, 928 AMDGPU_GEM_DOMAIN_VRAM, 929 &adev->mes.data_fw_obj[inst], 930 &adev->mes.data_fw_gpu_addr[inst], 931 (void **)&adev->mes.data_fw_ptr[inst]); 932 if (r) { 933 dev_err(adev->dev, "(%d) failed to create mes data fw bo\n", r); 934 return r; 935 } 936 937 memcpy(adev->mes.data_fw_ptr[inst], fw_data, fw_size); 938 939 amdgpu_bo_kunmap(adev->mes.data_fw_obj[inst]); 940 amdgpu_bo_unreserve(adev->mes.data_fw_obj[inst]); 941 942 return 0; 943 } 944 945 static void mes_v12_1_free_ucode_buffers(struct amdgpu_device *adev, 946 enum amdgpu_mes_pipe pipe, 947 int xcc_id) 948 { 949 int inst = MES_PIPE_INST(xcc_id, pipe); 950 951 amdgpu_bo_free_kernel(&adev->mes.data_fw_obj[inst], 952 &adev->mes.data_fw_gpu_addr[inst], 953 (void **)&adev->mes.data_fw_ptr[inst]); 954 955 amdgpu_bo_free_kernel(&adev->mes.ucode_fw_obj[inst], 956 &adev->mes.ucode_fw_gpu_addr[inst], 957 (void **)&adev->mes.ucode_fw_ptr[inst]); 958 } 959 960 static void mes_v12_1_enable(struct amdgpu_device *adev, 961 bool enable, int xcc_id) 962 { 963 uint64_t ucode_addr; 964 uint32_t pipe, data = 0; 965 966 if (enable) { 967 data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_CNTL); 968 data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_RESET, 1); 969 data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_RESET, 1); 970 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_CNTL, data); 971 972 mutex_lock(&adev->srbm_mutex); 973 for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) { 974 soc_v1_0_grbm_select(adev, 3, pipe, 0, 0, 975 GET_INST(GC, xcc_id)); 976 977 ucode_addr = adev->mes.uc_start_addr[pipe] >> 2; 978 WREG32_SOC15(GC, GET_INST(GC, xcc_id), 979 regCP_MES_PRGRM_CNTR_START, 980 lower_32_bits(ucode_addr)); 981 WREG32_SOC15(GC, GET_INST(GC, xcc_id), 982 regCP_MES_PRGRM_CNTR_START_HI, 983 upper_32_bits(ucode_addr)); 984 } 985 soc_v1_0_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id)); 986 mutex_unlock(&adev->srbm_mutex); 987 988 /* unhalt MES and activate pipe0 */ 989 data = REG_SET_FIELD(0, CP_MES_CNTL, MES_PIPE0_ACTIVE, 1); 990 data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_ACTIVE, 1); 991 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_CNTL, data); 992 993 if (amdgpu_emu_mode) 994 msleep(500); 995 else if (adev->enable_uni_mes) 996 udelay(500); 997 else 998 udelay(50); 999 } else { 1000 data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_CNTL); 1001 data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_ACTIVE, 0); 1002 data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_ACTIVE, 0); 1003 data = REG_SET_FIELD(data, CP_MES_CNTL, 1004 MES_INVALIDATE_ICACHE, 1); 1005 data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_RESET, 1); 1006 data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_RESET, 1); 1007 data = REG_SET_FIELD(data, CP_MES_CNTL, MES_HALT, 1); 1008 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_CNTL, data); 1009 } 1010 } 1011 1012 static void mes_v12_1_set_ucode_start_addr(struct amdgpu_device *adev, 1013 int xcc_id) 1014 { 1015 uint64_t ucode_addr; 1016 int pipe; 1017 1018 mes_v12_1_enable(adev, false, xcc_id); 1019 1020 mutex_lock(&adev->srbm_mutex); 1021 for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) { 1022 /* me=3, queue=0 */ 1023 soc_v1_0_grbm_select(adev, 3, pipe, 0, 0, GET_INST(GC, xcc_id)); 1024 1025 /* set ucode start address */ 1026 ucode_addr = adev->mes.uc_start_addr[pipe] >> 2; 1027 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_PRGRM_CNTR_START, 1028 lower_32_bits(ucode_addr)); 1029 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_PRGRM_CNTR_START_HI, 1030 upper_32_bits(ucode_addr)); 1031 1032 soc_v1_0_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id)); 1033 } 1034 mutex_unlock(&adev->srbm_mutex); 1035 } 1036 1037 /* This function is for backdoor MES firmware */ 1038 static int mes_v12_1_load_microcode(struct amdgpu_device *adev, 1039 enum amdgpu_mes_pipe pipe, 1040 bool prime_icache, int xcc_id) 1041 { 1042 int r, inst = MES_PIPE_INST(xcc_id, pipe); 1043 uint32_t data; 1044 1045 mes_v12_1_enable(adev, false, xcc_id); 1046 1047 if (!adev->mes.fw[pipe]) 1048 return -EINVAL; 1049 1050 r = mes_v12_1_allocate_ucode_buffer(adev, pipe, xcc_id); 1051 if (r) 1052 return r; 1053 1054 r = mes_v12_1_allocate_ucode_data_buffer(adev, pipe, xcc_id); 1055 if (r) { 1056 mes_v12_1_free_ucode_buffers(adev, pipe, xcc_id); 1057 return r; 1058 } 1059 1060 mutex_lock(&adev->srbm_mutex); 1061 /* me=3, pipe=0, queue=0 */ 1062 soc_v1_0_grbm_select(adev, 3, pipe, 0, 0, GET_INST(GC, xcc_id)); 1063 1064 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_IC_BASE_CNTL, 0); 1065 1066 /* set ucode fimrware address */ 1067 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_IC_BASE_LO, 1068 lower_32_bits(adev->mes.ucode_fw_gpu_addr[inst])); 1069 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_IC_BASE_HI, 1070 upper_32_bits(adev->mes.ucode_fw_gpu_addr[inst])); 1071 1072 /* set ucode instruction cache boundary to 2M-1 */ 1073 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_MIBOUND_LO, 0x1FFFFF); 1074 1075 /* set ucode data firmware address */ 1076 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_MDBASE_LO, 1077 lower_32_bits(adev->mes.data_fw_gpu_addr[inst])); 1078 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_MDBASE_HI, 1079 upper_32_bits(adev->mes.data_fw_gpu_addr[inst])); 1080 1081 /* Set data cache boundary CP_MES_MDBOUND_LO */ 1082 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_MDBOUND_LO, 0x7FFFF); 1083 1084 if (prime_icache) { 1085 /* invalidate ICACHE */ 1086 data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_IC_OP_CNTL); 1087 data = REG_SET_FIELD(data, CP_MES_IC_OP_CNTL, PRIME_ICACHE, 0); 1088 data = REG_SET_FIELD(data, CP_MES_IC_OP_CNTL, INVALIDATE_CACHE, 1); 1089 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_IC_OP_CNTL, data); 1090 1091 /* prime the ICACHE. */ 1092 data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_IC_OP_CNTL); 1093 data = REG_SET_FIELD(data, CP_MES_IC_OP_CNTL, PRIME_ICACHE, 1); 1094 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_IC_OP_CNTL, data); 1095 } 1096 1097 soc_v1_0_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id)); 1098 mutex_unlock(&adev->srbm_mutex); 1099 1100 return 0; 1101 } 1102 1103 static int mes_v12_1_allocate_eop_buf(struct amdgpu_device *adev, 1104 enum amdgpu_mes_pipe pipe, 1105 int xcc_id) 1106 { 1107 int r, inst = MES_PIPE_INST(xcc_id, pipe); 1108 u32 *eop; 1109 1110 r = amdgpu_bo_create_reserved(adev, MES_EOP_SIZE, PAGE_SIZE, 1111 AMDGPU_GEM_DOMAIN_GTT, 1112 &adev->mes.eop_gpu_obj[inst], 1113 &adev->mes.eop_gpu_addr[inst], 1114 (void **)&eop); 1115 if (r) { 1116 dev_warn(adev->dev, "(%d) create EOP bo failed\n", r); 1117 return r; 1118 } 1119 1120 memset(eop, 0, 1121 adev->mes.eop_gpu_obj[inst]->tbo.base.size); 1122 1123 amdgpu_bo_kunmap(adev->mes.eop_gpu_obj[inst]); 1124 amdgpu_bo_unreserve(adev->mes.eop_gpu_obj[inst]); 1125 1126 return 0; 1127 } 1128 1129 static int mes_v12_1_allocate_shared_cmd_buf(struct amdgpu_device *adev, 1130 enum amdgpu_mes_pipe pipe, 1131 int xcc_id) 1132 { 1133 int r, inst = MES_PIPE_INST(xcc_id, pipe); 1134 1135 if (pipe == AMDGPU_MES_KIQ_PIPE) 1136 return 0; 1137 1138 r = amdgpu_bo_create_kernel(adev, PAGE_SIZE, PAGE_SIZE, 1139 AMDGPU_GEM_DOMAIN_VRAM, 1140 &adev->mes.shared_cmd_buf_obj[inst], 1141 &adev->mes.shared_cmd_buf_gpu_addr[inst], 1142 NULL); 1143 if (r) { 1144 dev_err(adev->dev, 1145 "(%d) failed to create shared cmd buf bo\n", r); 1146 return r; 1147 } 1148 1149 return 0; 1150 } 1151 1152 static int mes_v12_1_mqd_init(struct amdgpu_ring *ring) 1153 { 1154 struct v12_1_mes_mqd *mqd = ring->mqd_ptr; 1155 uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr; 1156 uint32_t tmp; 1157 1158 mqd->header = 0xC0310800; 1159 mqd->compute_pipelinestat_enable = 0x00000001; 1160 mqd->compute_static_thread_mgmt_se0 = 0xffffffff; 1161 mqd->compute_static_thread_mgmt_se1 = 0xffffffff; 1162 mqd->compute_static_thread_mgmt_se2 = 0xffffffff; 1163 mqd->compute_static_thread_mgmt_se3 = 0xffffffff; 1164 mqd->compute_misc_reserved = 0x00000007; 1165 1166 eop_base_addr = ring->eop_gpu_addr >> 8; 1167 1168 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ 1169 tmp = regCP_HQD_EOP_CONTROL_DEFAULT; 1170 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE, 1171 (order_base_2(MES_EOP_SIZE / 4) - 1)); 1172 1173 mqd->cp_hqd_eop_base_addr_lo = lower_32_bits(eop_base_addr); 1174 mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr); 1175 mqd->cp_hqd_eop_control = tmp; 1176 1177 /* disable the queue if it's active */ 1178 ring->wptr = 0; 1179 mqd->cp_hqd_pq_rptr = 0; 1180 mqd->cp_hqd_pq_wptr_lo = 0; 1181 mqd->cp_hqd_pq_wptr_hi = 0; 1182 1183 /* set the pointer to the MQD */ 1184 mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc; 1185 mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr); 1186 1187 /* set MQD vmid to 0 */ 1188 tmp = regCP_MQD_CONTROL_DEFAULT; 1189 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0); 1190 mqd->cp_mqd_control = tmp; 1191 1192 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ 1193 hqd_gpu_addr = ring->gpu_addr >> 8; 1194 mqd->cp_hqd_pq_base_lo = lower_32_bits(hqd_gpu_addr); 1195 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr); 1196 1197 /* set the wb address whether it's enabled or not */ 1198 wb_gpu_addr = ring->rptr_gpu_addr; 1199 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc; 1200 mqd->cp_hqd_pq_rptr_report_addr_hi = 1201 upper_32_bits(wb_gpu_addr) & 0xffff; 1202 1203 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ 1204 wb_gpu_addr = ring->wptr_gpu_addr; 1205 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffff8; 1206 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; 1207 1208 /* set up the HQD, this is similar to CP_RB0_CNTL */ 1209 tmp = regCP_HQD_PQ_CONTROL_DEFAULT; 1210 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE, 1211 (order_base_2(ring->ring_size / 4) - 1)); 1212 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE, 1213 ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8)); 1214 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 1); 1215 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, 0); 1216 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); 1217 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); 1218 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, NO_UPDATE_RPTR, 1); 1219 mqd->cp_hqd_pq_control = tmp; 1220 1221 /* enable doorbell */ 1222 tmp = 0; 1223 if (ring->use_doorbell) { 1224 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 1225 DOORBELL_OFFSET, ring->doorbell_index); 1226 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 1227 DOORBELL_EN, 1); 1228 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 1229 DOORBELL_SOURCE, 0); 1230 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 1231 DOORBELL_HIT, 0); 1232 } else { 1233 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 1234 DOORBELL_EN, 0); 1235 } 1236 mqd->cp_hqd_pq_doorbell_control = tmp; 1237 1238 mqd->cp_hqd_vmid = 0; 1239 /* activate the queue */ 1240 mqd->cp_hqd_active = 1; 1241 1242 tmp = regCP_HQD_PERSISTENT_STATE_DEFAULT; 1243 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, 1244 PRELOAD_SIZE, 0x63); 1245 mqd->cp_hqd_persistent_state = tmp; 1246 1247 mqd->cp_hqd_ib_control = regCP_HQD_IB_CONTROL_MES_12_1_DEFAULT; 1248 mqd->cp_hqd_iq_timer = regCP_HQD_IQ_TIMER_DEFAULT; 1249 mqd->cp_hqd_quantum = regCP_HQD_QUANTUM_DEFAULT; 1250 1251 /* 1252 * Set CP_HQD_GFX_CONTROL.DB_UPDATED_MSG_EN[15] to enable unmapped 1253 * doorbell handling. This is a reserved CP internal register can 1254 * not be accesss by others 1255 */ 1256 mqd->cp_hqd_gfx_control = BIT(15); 1257 1258 return 0; 1259 } 1260 1261 static void mes_v12_1_queue_init_register(struct amdgpu_ring *ring, 1262 int xcc_id) 1263 { 1264 struct v12_1_mes_mqd *mqd = ring->mqd_ptr; 1265 struct amdgpu_device *adev = ring->adev; 1266 uint32_t data = 0; 1267 1268 mutex_lock(&adev->srbm_mutex); 1269 soc_v1_0_grbm_select(adev, 3, ring->pipe, 0, 0, GET_INST(GC, xcc_id)); 1270 1271 /* set CP_HQD_VMID.VMID = 0. */ 1272 data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_VMID); 1273 data = REG_SET_FIELD(data, CP_HQD_VMID, VMID, 0); 1274 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_VMID, data); 1275 1276 /* set CP_HQD_PQ_DOORBELL_CONTROL.DOORBELL_EN=0 */ 1277 data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_DOORBELL_CONTROL); 1278 data = REG_SET_FIELD(data, CP_HQD_PQ_DOORBELL_CONTROL, 1279 DOORBELL_EN, 0); 1280 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_DOORBELL_CONTROL, data); 1281 1282 /* set CP_MQD_BASE_ADDR/HI with the MQD base address */ 1283 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo); 1284 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi); 1285 1286 /* set CP_MQD_CONTROL.VMID=0 */ 1287 data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MQD_CONTROL); 1288 data = REG_SET_FIELD(data, CP_MQD_CONTROL, VMID, 0); 1289 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MQD_CONTROL, 0); 1290 1291 /* set CP_HQD_PQ_BASE/HI with the ring buffer base address */ 1292 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo); 1293 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi); 1294 1295 /* set CP_HQD_PQ_RPTR_REPORT_ADDR/HI */ 1296 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_RPTR_REPORT_ADDR, 1297 mqd->cp_hqd_pq_rptr_report_addr_lo); 1298 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_RPTR_REPORT_ADDR_HI, 1299 mqd->cp_hqd_pq_rptr_report_addr_hi); 1300 1301 /* set CP_HQD_PQ_CONTROL */ 1302 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_CONTROL, mqd->cp_hqd_pq_control); 1303 1304 /* set CP_HQD_PQ_WPTR_POLL_ADDR/HI */ 1305 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_WPTR_POLL_ADDR, 1306 mqd->cp_hqd_pq_wptr_poll_addr_lo); 1307 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_WPTR_POLL_ADDR_HI, 1308 mqd->cp_hqd_pq_wptr_poll_addr_hi); 1309 1310 /* set CP_HQD_PQ_DOORBELL_CONTROL */ 1311 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_DOORBELL_CONTROL, 1312 mqd->cp_hqd_pq_doorbell_control); 1313 1314 /* set CP_HQD_PERSISTENT_STATE.PRELOAD_SIZE=0x53 */ 1315 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PERSISTENT_STATE, mqd->cp_hqd_persistent_state); 1316 1317 /* set CP_HQD_ACTIVE.ACTIVE=1 */ 1318 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_ACTIVE, mqd->cp_hqd_active); 1319 1320 soc_v1_0_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id)); 1321 mutex_unlock(&adev->srbm_mutex); 1322 } 1323 1324 static int mes_v12_1_kiq_enable_queue(struct amdgpu_device *adev, int xcc_id) 1325 { 1326 struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id]; 1327 struct amdgpu_ring *kiq_ring = &adev->gfx.kiq[xcc_id].ring; 1328 int r, inst = MES_PIPE_INST(xcc_id, AMDGPU_MES_SCHED_PIPE); 1329 1330 if (!kiq->pmf || !kiq->pmf->kiq_map_queues) 1331 return -EINVAL; 1332 1333 r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size); 1334 if (r) { 1335 DRM_ERROR("Failed to lock KIQ (%d).\n", r); 1336 return r; 1337 } 1338 1339 kiq->pmf->kiq_map_queues(kiq_ring, &adev->mes.ring[inst]); 1340 1341 r = amdgpu_ring_test_ring(kiq_ring); 1342 if (r) { 1343 DRM_ERROR("kfq enable failed\n"); 1344 kiq_ring->sched.ready = false; 1345 } 1346 return r; 1347 } 1348 1349 static int mes_v12_1_queue_init(struct amdgpu_device *adev, 1350 enum amdgpu_mes_pipe pipe, 1351 int xcc_id) 1352 { 1353 struct amdgpu_ring *ring; 1354 int r; 1355 1356 if (!adev->enable_uni_mes && pipe == AMDGPU_MES_KIQ_PIPE) 1357 ring = &adev->gfx.kiq[xcc_id].ring; 1358 else 1359 ring = &adev->mes.ring[MES_PIPE_INST(xcc_id, pipe)]; 1360 1361 if ((adev->enable_uni_mes || pipe == AMDGPU_MES_SCHED_PIPE) && 1362 (amdgpu_in_reset(adev) || adev->in_suspend)) { 1363 *(ring->wptr_cpu_addr) = 0; 1364 *(ring->rptr_cpu_addr) = 0; 1365 amdgpu_ring_clear_ring(ring); 1366 } 1367 1368 r = mes_v12_1_mqd_init(ring); 1369 if (r) 1370 return r; 1371 1372 if (pipe == AMDGPU_MES_SCHED_PIPE) { 1373 if (adev->enable_uni_mes) 1374 r = amdgpu_mes_map_legacy_queue(adev, ring, xcc_id); 1375 else 1376 r = mes_v12_1_kiq_enable_queue(adev, xcc_id); 1377 if (r) 1378 return r; 1379 } else { 1380 mes_v12_1_queue_init_register(ring, xcc_id); 1381 } 1382 1383 /* get MES scheduler/KIQ versions */ 1384 mutex_lock(&adev->srbm_mutex); 1385 soc_v1_0_grbm_select(adev, 3, pipe, 0, 0, GET_INST(GC, xcc_id)); 1386 1387 if (pipe == AMDGPU_MES_SCHED_PIPE) 1388 adev->mes.sched_version = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_GP3_LO); 1389 else if (pipe == AMDGPU_MES_KIQ_PIPE && adev->enable_mes_kiq) 1390 adev->mes.kiq_version = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_GP3_LO); 1391 1392 soc_v1_0_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id)); 1393 mutex_unlock(&adev->srbm_mutex); 1394 1395 return 0; 1396 } 1397 1398 static int mes_v12_1_ring_init(struct amdgpu_device *adev, 1399 int xcc_id, int pipe) 1400 { 1401 struct amdgpu_ring *ring; 1402 int inst = MES_PIPE_INST(xcc_id, pipe); 1403 1404 ring = &adev->mes.ring[inst]; 1405 1406 ring->funcs = &mes_v12_1_ring_funcs; 1407 1408 ring->me = 3; 1409 ring->pipe = pipe; 1410 ring->queue = 0; 1411 ring->xcc_id = xcc_id; 1412 ring->vm_hub = AMDGPU_GFXHUB(xcc_id); 1413 1414 ring->ring_obj = NULL; 1415 ring->use_doorbell = true; 1416 ring->eop_gpu_addr = adev->mes.eop_gpu_addr[inst]; 1417 ring->no_scheduler = true; 1418 snprintf(ring->name, sizeof(ring->name), "mes_%hhu.%hhu.%hhu.%hhu", 1419 (unsigned char)xcc_id, (unsigned char)ring->me, 1420 (unsigned char)ring->pipe, (unsigned char)ring->queue); 1421 1422 if (pipe == AMDGPU_MES_SCHED_PIPE) 1423 ring->doorbell_index = 1424 (adev->doorbell_index.mes_ring0 + 1425 xcc_id * adev->doorbell_index.xcc_doorbell_range) 1426 << 1; 1427 else 1428 ring->doorbell_index = 1429 (adev->doorbell_index.mes_ring1 + 1430 xcc_id * adev->doorbell_index.xcc_doorbell_range) 1431 << 1; 1432 1433 return amdgpu_ring_init(adev, ring, 1024, NULL, 0, 1434 AMDGPU_RING_PRIO_DEFAULT, NULL); 1435 } 1436 1437 static int mes_v12_1_kiq_ring_init(struct amdgpu_device *adev, int xcc_id) 1438 { 1439 struct amdgpu_ring *ring; 1440 int inst = MES_PIPE_INST(xcc_id, AMDGPU_MES_KIQ_PIPE); 1441 1442 spin_lock_init(&adev->gfx.kiq[xcc_id].ring_lock); 1443 1444 ring = &adev->gfx.kiq[xcc_id].ring; 1445 1446 ring->me = 3; 1447 ring->pipe = 1; 1448 ring->queue = 0; 1449 ring->xcc_id = xcc_id; 1450 ring->vm_hub = AMDGPU_GFXHUB(xcc_id); 1451 1452 ring->adev = NULL; 1453 ring->ring_obj = NULL; 1454 ring->use_doorbell = true; 1455 ring->eop_gpu_addr = adev->mes.eop_gpu_addr[inst]; 1456 ring->no_scheduler = true; 1457 ring->doorbell_index = 1458 (adev->doorbell_index.mes_ring1 + 1459 xcc_id * adev->doorbell_index.xcc_doorbell_range) 1460 << 1; 1461 1462 snprintf(ring->name, sizeof(ring->name), "mes_kiq_%hhu.%hhu.%hhu.%hhu", 1463 (unsigned char)xcc_id, (unsigned char)ring->me, 1464 (unsigned char)ring->pipe, (unsigned char)ring->queue); 1465 1466 return amdgpu_ring_init(adev, ring, 1024, NULL, 0, 1467 AMDGPU_RING_PRIO_DEFAULT, NULL); 1468 } 1469 1470 static int mes_v12_1_mqd_sw_init(struct amdgpu_device *adev, 1471 enum amdgpu_mes_pipe pipe, 1472 int xcc_id) 1473 { 1474 int r, mqd_size = sizeof(struct v12_1_mes_mqd); 1475 struct amdgpu_ring *ring; 1476 int inst = MES_PIPE_INST(xcc_id, pipe); 1477 1478 if (!adev->enable_uni_mes && pipe == AMDGPU_MES_KIQ_PIPE) 1479 ring = &adev->gfx.kiq[xcc_id].ring; 1480 else 1481 ring = &adev->mes.ring[inst]; 1482 1483 if (ring->mqd_obj) 1484 return 0; 1485 1486 r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE, 1487 AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj, 1488 &ring->mqd_gpu_addr, &ring->mqd_ptr); 1489 if (r) { 1490 dev_warn(adev->dev, "failed to create ring mqd bo (%d)", r); 1491 return r; 1492 } 1493 1494 memset(ring->mqd_ptr, 0, mqd_size); 1495 1496 /* prepare MQD backup */ 1497 adev->mes.mqd_backup[inst] = kmalloc(mqd_size, GFP_KERNEL); 1498 if (!adev->mes.mqd_backup[inst]) 1499 dev_warn(adev->dev, 1500 "no memory to create MQD backup for ring %s\n", 1501 ring->name); 1502 1503 return 0; 1504 } 1505 1506 static int mes_v12_1_sw_init(struct amdgpu_ip_block *ip_block) 1507 { 1508 struct amdgpu_device *adev = ip_block->adev; 1509 int pipe, r, xcc_id, num_xcc = NUM_XCC(adev->gfx.xcc_mask); 1510 1511 if (adev->enable_uni_mes && num_xcc > 1) 1512 adev->mes.enable_coop_mode = true; 1513 1514 adev->mes.funcs = &mes_v12_1_funcs; 1515 adev->mes.kiq_hw_init = &mes_v12_1_kiq_hw_init; 1516 adev->mes.kiq_hw_fini = &mes_v12_1_kiq_hw_fini; 1517 adev->mes.enable_legacy_queue_map = true; 1518 1519 adev->mes.event_log_size = 1520 adev->enable_uni_mes ? (AMDGPU_MAX_MES_PIPES * AMDGPU_MES_LOG_BUFFER_SIZE * num_xcc) : AMDGPU_MES_LOG_BUFFER_SIZE; 1521 1522 r = amdgpu_mes_init(adev); 1523 if (r) 1524 return r; 1525 1526 for (xcc_id = 0; xcc_id < num_xcc; xcc_id++) { 1527 for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) { 1528 r = mes_v12_1_allocate_eop_buf(adev, pipe, xcc_id); 1529 if (r) 1530 return r; 1531 1532 r = mes_v12_1_mqd_sw_init(adev, pipe, xcc_id); 1533 if (r) 1534 return r; 1535 1536 if (!adev->enable_uni_mes && pipe == 1537 AMDGPU_MES_KIQ_PIPE) 1538 r = mes_v12_1_kiq_ring_init(adev, xcc_id); 1539 else 1540 r = mes_v12_1_ring_init(adev, xcc_id, pipe); 1541 if (r) 1542 return r; 1543 1544 if (adev->mes.enable_coop_mode) { 1545 r = mes_v12_1_allocate_shared_cmd_buf(adev, 1546 pipe, xcc_id); 1547 if (r) 1548 return r; 1549 } 1550 } 1551 } 1552 1553 return 0; 1554 } 1555 1556 static int mes_v12_1_sw_fini(struct amdgpu_ip_block *ip_block) 1557 { 1558 struct amdgpu_device *adev = ip_block->adev; 1559 int pipe, inst, xcc_id, num_xcc = NUM_XCC(adev->gfx.xcc_mask); 1560 1561 for (xcc_id = 0; xcc_id < num_xcc; xcc_id++) { 1562 for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) { 1563 inst = MES_PIPE_INST(xcc_id, pipe); 1564 1565 amdgpu_bo_free_kernel(&adev->mes.shared_cmd_buf_obj[inst], 1566 &adev->mes.shared_cmd_buf_gpu_addr[inst], 1567 NULL); 1568 1569 kfree(adev->mes.mqd_backup[inst]); 1570 1571 amdgpu_bo_free_kernel(&adev->mes.eop_gpu_obj[inst], 1572 &adev->mes.eop_gpu_addr[inst], 1573 NULL); 1574 amdgpu_ucode_release(&adev->mes.fw[inst]); 1575 1576 if (adev->enable_uni_mes || pipe == AMDGPU_MES_SCHED_PIPE) { 1577 amdgpu_bo_free_kernel(&adev->mes.ring[inst].mqd_obj, 1578 &adev->mes.ring[inst].mqd_gpu_addr, 1579 &adev->mes.ring[inst].mqd_ptr); 1580 amdgpu_ring_fini(&adev->mes.ring[inst]); 1581 } 1582 } 1583 } 1584 1585 for (xcc_id = 0; xcc_id < num_xcc; xcc_id++) { 1586 if (!adev->enable_uni_mes) { 1587 amdgpu_bo_free_kernel(&adev->gfx.kiq[xcc_id].ring.mqd_obj, 1588 &adev->gfx.kiq[xcc_id].ring.mqd_gpu_addr, 1589 &adev->gfx.kiq[xcc_id].ring.mqd_ptr); 1590 amdgpu_ring_fini(&adev->gfx.kiq[xcc_id].ring); 1591 } 1592 1593 if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { 1594 mes_v12_1_free_ucode_buffers(adev, 1595 AMDGPU_MES_KIQ_PIPE, xcc_id); 1596 mes_v12_1_free_ucode_buffers(adev, 1597 AMDGPU_MES_SCHED_PIPE, xcc_id); 1598 } 1599 } 1600 1601 amdgpu_mes_fini(adev); 1602 return 0; 1603 } 1604 1605 static void mes_v12_1_kiq_dequeue_sched(struct amdgpu_device *adev, 1606 int xcc_id) 1607 { 1608 uint32_t data; 1609 int i; 1610 1611 mutex_lock(&adev->srbm_mutex); 1612 soc_v1_0_grbm_select(adev, 3, AMDGPU_MES_SCHED_PIPE, 0, 0, 1613 GET_INST(GC, xcc_id)); 1614 1615 /* disable the queue if it's active */ 1616 if (RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_ACTIVE) & 1) { 1617 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_DEQUEUE_REQUEST, 1); 1618 for (i = 0; i < adev->usec_timeout; i++) { 1619 if (!(RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_ACTIVE) & 1)) 1620 break; 1621 udelay(1); 1622 } 1623 } 1624 data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_DOORBELL_CONTROL); 1625 data = REG_SET_FIELD(data, CP_HQD_PQ_DOORBELL_CONTROL, 1626 DOORBELL_EN, 0); 1627 data = REG_SET_FIELD(data, CP_HQD_PQ_DOORBELL_CONTROL, 1628 DOORBELL_HIT, 1); 1629 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_DOORBELL_CONTROL, data); 1630 1631 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_DOORBELL_CONTROL, 0); 1632 1633 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_WPTR_LO, 0); 1634 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_WPTR_HI, 0); 1635 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_RPTR, 0); 1636 1637 soc_v1_0_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id)); 1638 mutex_unlock(&adev->srbm_mutex); 1639 1640 adev->mes.ring[MES_PIPE_INST(xcc_id, 0)].sched.ready = false; 1641 } 1642 1643 static void mes_v12_1_kiq_setting(struct amdgpu_ring *ring, int xcc_id) 1644 { 1645 uint32_t tmp; 1646 struct amdgpu_device *adev = ring->adev; 1647 1648 /* tell RLC which is KIQ queue */ 1649 tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CP_SCHEDULERS); 1650 tmp &= 0xffffff00; 1651 tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue); 1652 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CP_SCHEDULERS, tmp); 1653 tmp |= 0x80; 1654 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CP_SCHEDULERS, tmp); 1655 } 1656 1657 static int mes_v12_1_kiq_hw_init(struct amdgpu_device *adev, uint32_t xcc_id) 1658 { 1659 int inst = MES_PIPE_INST(xcc_id, AMDGPU_MES_KIQ_PIPE); 1660 int r = 0; 1661 struct amdgpu_ip_block *ip_block; 1662 1663 if (adev->enable_uni_mes) 1664 mes_v12_1_kiq_setting(&adev->mes.ring[inst], xcc_id); 1665 else 1666 mes_v12_1_kiq_setting(&adev->gfx.kiq[xcc_id].ring, xcc_id); 1667 1668 if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { 1669 1670 r = mes_v12_1_load_microcode(adev, AMDGPU_MES_SCHED_PIPE, 1671 false, xcc_id); 1672 if (r) { 1673 DRM_ERROR("failed to load MES fw, r=%d\n", r); 1674 return r; 1675 } 1676 1677 r = mes_v12_1_load_microcode(adev, AMDGPU_MES_KIQ_PIPE, 1678 true, xcc_id); 1679 if (r) { 1680 DRM_ERROR("failed to load MES kiq fw, r=%d\n", r); 1681 return r; 1682 } 1683 1684 mes_v12_1_set_ucode_start_addr(adev, xcc_id); 1685 1686 } else if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) 1687 mes_v12_1_set_ucode_start_addr(adev, xcc_id); 1688 1689 mes_v12_1_enable(adev, true, xcc_id); 1690 1691 ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_MES); 1692 if (unlikely(!ip_block)) { 1693 dev_err(adev->dev, "Failed to get MES handle\n"); 1694 return -EINVAL; 1695 } 1696 1697 r = mes_v12_1_queue_init(adev, AMDGPU_MES_KIQ_PIPE, xcc_id); 1698 if (r) 1699 goto failure; 1700 1701 if (adev->enable_uni_mes) { 1702 r = mes_v12_1_set_hw_resources(&adev->mes, 1703 AMDGPU_MES_KIQ_PIPE, xcc_id); 1704 if (r) 1705 goto failure; 1706 1707 mes_v12_1_set_hw_resources_1(&adev->mes, 1708 AMDGPU_MES_KIQ_PIPE, xcc_id); 1709 } 1710 1711 if (adev->mes.enable_legacy_queue_map) { 1712 r = mes_v12_1_xcc_hw_init(ip_block, xcc_id); 1713 if (r) 1714 goto failure; 1715 } 1716 1717 return r; 1718 1719 failure: 1720 mes_v12_1_hw_fini(ip_block); 1721 return r; 1722 } 1723 1724 static int mes_v12_1_kiq_hw_fini(struct amdgpu_device *adev, uint32_t xcc_id) 1725 { 1726 int inst = MES_PIPE_INST(xcc_id, AMDGPU_MES_SCHED_PIPE); 1727 1728 if (adev->mes.ring[inst].sched.ready) { 1729 if (adev->enable_uni_mes) 1730 amdgpu_mes_unmap_legacy_queue(adev, 1731 &adev->mes.ring[inst], 1732 RESET_QUEUES, 0, 0, xcc_id); 1733 else 1734 mes_v12_1_kiq_dequeue_sched(adev, xcc_id); 1735 1736 adev->mes.ring[inst].sched.ready = false; 1737 } 1738 1739 mes_v12_1_enable(adev, false, xcc_id); 1740 1741 return 0; 1742 } 1743 1744 static int mes_v12_1_xcc_hw_init(struct amdgpu_ip_block *ip_block, int xcc_id) 1745 { 1746 int r; 1747 struct amdgpu_device *adev = ip_block->adev; 1748 1749 if (adev->mes.ring[MES_PIPE_INST(xcc_id, 0)].sched.ready) 1750 goto out; 1751 1752 if (!adev->enable_mes_kiq) { 1753 if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { 1754 r = mes_v12_1_load_microcode(adev, 1755 AMDGPU_MES_SCHED_PIPE, true, xcc_id); 1756 if (r) { 1757 DRM_ERROR("failed to MES fw, r=%d\n", r); 1758 return r; 1759 } 1760 1761 mes_v12_1_set_ucode_start_addr(adev, xcc_id); 1762 1763 } else if (adev->firmware.load_type == 1764 AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) { 1765 1766 mes_v12_1_set_ucode_start_addr(adev, xcc_id); 1767 } 1768 1769 mes_v12_1_enable(adev, true, xcc_id); 1770 } 1771 1772 /* Enable the MES to handle doorbell ring on unmapped queue */ 1773 mes_v12_1_enable_unmapped_doorbell_handling(&adev->mes, true, xcc_id); 1774 1775 r = mes_v12_1_queue_init(adev, AMDGPU_MES_SCHED_PIPE, xcc_id); 1776 if (r) 1777 goto failure; 1778 1779 r = mes_v12_1_set_hw_resources(&adev->mes, 1780 AMDGPU_MES_SCHED_PIPE, xcc_id); 1781 if (r) 1782 goto failure; 1783 1784 if (adev->enable_uni_mes) 1785 mes_v12_1_set_hw_resources_1(&adev->mes, 1786 AMDGPU_MES_SCHED_PIPE, xcc_id); 1787 1788 mes_v12_1_init_aggregated_doorbell(&adev->mes, xcc_id); 1789 1790 r = mes_v12_1_query_sched_status(&adev->mes, 1791 AMDGPU_MES_SCHED_PIPE, xcc_id); 1792 if (r) { 1793 DRM_ERROR("MES is busy\n"); 1794 goto failure; 1795 } 1796 1797 out: 1798 /* 1799 * Disable KIQ ring usage from the driver once MES is enabled. 1800 * MES uses KIQ ring exclusively so driver cannot access KIQ ring 1801 * with MES enabled. 1802 */ 1803 adev->gfx.kiq[xcc_id].ring.sched.ready = false; 1804 adev->mes.ring[MES_PIPE_INST(xcc_id, 0)].sched.ready = true; 1805 1806 return 0; 1807 1808 failure: 1809 mes_v12_1_hw_fini(ip_block); 1810 return r; 1811 } 1812 1813 static int mes_v12_1_hw_init(struct amdgpu_ip_block *ip_block) 1814 { 1815 struct amdgpu_device *adev = ip_block->adev; 1816 int r, xcc_id, num_xcc = NUM_XCC(adev->gfx.xcc_mask); 1817 1818 for (xcc_id = 0; xcc_id < num_xcc; xcc_id++) { 1819 /* for SPX mode, all master xcc ids are set to 0 */ 1820 if (adev->mes.enable_coop_mode) 1821 adev->mes.master_xcc_ids[xcc_id] = 0; 1822 1823 r = mes_v12_1_xcc_hw_init(ip_block, xcc_id); 1824 if (r) 1825 return r; 1826 } 1827 1828 return 0; 1829 } 1830 1831 static int mes_v12_1_hw_fini(struct amdgpu_ip_block *ip_block) 1832 { 1833 return 0; 1834 } 1835 1836 static int mes_v12_1_suspend(struct amdgpu_ip_block *ip_block) 1837 { 1838 int r; 1839 1840 r = amdgpu_mes_suspend(ip_block->adev); 1841 if (r) 1842 return r; 1843 1844 return mes_v12_1_hw_fini(ip_block); 1845 } 1846 1847 static int mes_v12_1_resume(struct amdgpu_ip_block *ip_block) 1848 { 1849 int r; 1850 1851 r = mes_v12_1_hw_init(ip_block); 1852 if (r) 1853 return r; 1854 1855 return amdgpu_mes_resume(ip_block->adev); 1856 } 1857 1858 static int mes_v12_1_early_init(struct amdgpu_ip_block *ip_block) 1859 { 1860 struct amdgpu_device *adev = ip_block->adev; 1861 int pipe, r; 1862 1863 for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) { 1864 r = amdgpu_mes_init_microcode(adev, pipe); 1865 if (r) 1866 return r; 1867 } 1868 1869 return 0; 1870 } 1871 1872 static const struct amd_ip_funcs mes_v12_1_ip_funcs = { 1873 .name = "mes_v12_1", 1874 .early_init = mes_v12_1_early_init, 1875 .late_init = NULL, 1876 .sw_init = mes_v12_1_sw_init, 1877 .sw_fini = mes_v12_1_sw_fini, 1878 .hw_init = mes_v12_1_hw_init, 1879 .hw_fini = mes_v12_1_hw_fini, 1880 .suspend = mes_v12_1_suspend, 1881 .resume = mes_v12_1_resume, 1882 }; 1883 1884 const struct amdgpu_ip_block_version mes_v12_1_ip_block = { 1885 .type = AMD_IP_BLOCK_TYPE_MES, 1886 .major = 12, 1887 .minor = 1, 1888 .rev = 0, 1889 .funcs = &mes_v12_1_ip_funcs, 1890 }; 1891