1 /* 2 * Copyright 2025 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 */ 23 24 #include <linux/firmware.h> 25 #include <linux/module.h> 26 #include "amdgpu.h" 27 #include "soc15_common.h" 28 #include "soc_v1_0.h" 29 #include "gc/gc_12_1_0_offset.h" 30 #include "gc/gc_12_1_0_sh_mask.h" 31 #include "gc/gc_11_0_0_default.h" 32 #include "v12_structs.h" 33 #include "mes_v12_api_def.h" 34 #include "gfx_v12_1_pkt.h" 35 #include "sdma_v7_1_0_pkt_open.h" 36 37 MODULE_FIRMWARE("amdgpu/gc_12_1_0_mes.bin"); 38 MODULE_FIRMWARE("amdgpu/gc_12_1_0_mes1.bin"); 39 MODULE_FIRMWARE("amdgpu/gc_12_1_0_uni_mes.bin"); 40 41 static int mes_v12_1_hw_init(struct amdgpu_ip_block *ip_block); 42 static int mes_v12_1_xcc_hw_init(struct amdgpu_ip_block *ip_block, int xcc_id); 43 static int mes_v12_1_hw_fini(struct amdgpu_ip_block *ip_block); 44 static int mes_v12_1_kiq_hw_init(struct amdgpu_device *adev, uint32_t xcc_id); 45 static int mes_v12_1_kiq_hw_fini(struct amdgpu_device *adev, uint32_t xcc_id); 46 static int mes_v12_1_self_test(struct amdgpu_device *adev, int xcc_id); 47 static int mes_v12_1_setup_coop_mode(struct amdgpu_device *adev, int xcc_id); 48 49 #define MES_EOP_SIZE 2048 50 #define MES12_HUNG_DB_OFFSET_ARRAY_SIZE 8 /* [0:3] = db offset [4:7] hqd info */ 51 #define MES12_HUNG_HQD_INFO_OFFSET 4 52 53 #define regCP_HQD_IB_CONTROL_MES_12_1_DEFAULT 0x100000 54 #define XCC_MID_MASK 0x41000000 55 56 static void mes_v12_1_ring_set_wptr(struct amdgpu_ring *ring) 57 { 58 struct amdgpu_device *adev = ring->adev; 59 60 if (ring->use_doorbell) { 61 atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 62 ring->wptr); 63 WDOORBELL64(ring->doorbell_index, ring->wptr); 64 } else { 65 BUG(); 66 } 67 } 68 69 static u64 mes_v12_1_ring_get_rptr(struct amdgpu_ring *ring) 70 { 71 return *ring->rptr_cpu_addr; 72 } 73 74 static u64 mes_v12_1_ring_get_wptr(struct amdgpu_ring *ring) 75 { 76 u64 wptr; 77 78 if (ring->use_doorbell) 79 wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr); 80 else 81 BUG(); 82 return wptr; 83 } 84 85 static const struct amdgpu_ring_funcs mes_v12_1_ring_funcs = { 86 .type = AMDGPU_RING_TYPE_MES, 87 .align_mask = 1, 88 .nop = 0, 89 .support_64bit_ptrs = true, 90 .get_rptr = mes_v12_1_ring_get_rptr, 91 .get_wptr = mes_v12_1_ring_get_wptr, 92 .set_wptr = mes_v12_1_ring_set_wptr, 93 .insert_nop = amdgpu_ring_insert_nop, 94 }; 95 96 static const char *mes_v12_1_opcodes[] = { 97 "SET_HW_RSRC", 98 "SET_SCHEDULING_CONFIG", 99 "ADD_QUEUE", 100 "REMOVE_QUEUE", 101 "PERFORM_YIELD", 102 "SET_GANG_PRIORITY_LEVEL", 103 "SUSPEND", 104 "RESUME", 105 "RESET", 106 "SET_LOG_BUFFER", 107 "CHANGE_GANG_PRORITY", 108 "QUERY_SCHEDULER_STATUS", 109 "unused", 110 "SET_DEBUG_VMID", 111 "MISC", 112 "UPDATE_ROOT_PAGE_TABLE", 113 "AMD_LOG", 114 "SET_SE_MODE", 115 "SET_GANG_SUBMIT", 116 "SET_HW_RSRC_1", 117 "INVALIDATE_TLBS", 118 }; 119 120 static const char *mes_v12_1_misc_opcodes[] = { 121 "WRITE_REG", 122 "INV_GART", 123 "QUERY_STATUS", 124 "READ_REG", 125 "WAIT_REG_MEM", 126 "SET_SHADER_DEBUGGER", 127 "NOTIFY_WORK_ON_UNMAPPED_QUEUE", 128 "NOTIFY_TO_UNMAP_PROCESSES", 129 }; 130 131 static const char *mes_v12_1_get_op_string(union MESAPI__MISC *x_pkt) 132 { 133 const char *op_str = NULL; 134 135 if (x_pkt->header.opcode < ARRAY_SIZE(mes_v12_1_opcodes)) 136 op_str = mes_v12_1_opcodes[x_pkt->header.opcode]; 137 138 return op_str; 139 } 140 141 static const char *mes_v12_1_get_misc_op_string(union MESAPI__MISC *x_pkt) 142 { 143 const char *op_str = NULL; 144 145 if ((x_pkt->header.opcode == MES_SCH_API_MISC) && 146 (x_pkt->opcode < ARRAY_SIZE(mes_v12_1_misc_opcodes))) 147 op_str = mes_v12_1_misc_opcodes[x_pkt->opcode]; 148 149 return op_str; 150 } 151 152 static int mes_v12_1_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, 153 int xcc_id, int pipe, void *pkt, 154 int size, int api_status_off) 155 { 156 union MESAPI__QUERY_MES_STATUS mes_status_pkt; 157 signed long timeout = 2100000; /* 2100 ms */ 158 struct amdgpu_device *adev = mes->adev; 159 struct amdgpu_ring *ring = &mes->ring[MES_PIPE_INST(xcc_id, pipe)]; 160 spinlock_t *ring_lock = &mes->ring_lock[MES_PIPE_INST(xcc_id, pipe)]; 161 struct MES_API_STATUS *api_status; 162 union MESAPI__MISC *x_pkt = pkt; 163 const char *op_str, *misc_op_str; 164 unsigned long flags; 165 u64 status_gpu_addr; 166 u32 seq, status_offset; 167 u64 *status_ptr; 168 signed long r; 169 int ret; 170 171 if (x_pkt->header.opcode >= MES_SCH_API_MAX) 172 return -EINVAL; 173 174 if (amdgpu_emu_mode) { 175 timeout *= 1000; 176 } else if (amdgpu_sriov_vf(adev)) { 177 /* Worst case in sriov where all other 15 VF timeout, each VF needs about 600ms */ 178 timeout = 15 * 600 * 1000; 179 } 180 181 ret = amdgpu_device_wb_get(adev, &status_offset); 182 if (ret) 183 return ret; 184 185 status_gpu_addr = adev->wb.gpu_addr + (status_offset * 4); 186 status_ptr = (u64 *)&adev->wb.wb[status_offset]; 187 *status_ptr = 0; 188 189 spin_lock_irqsave(ring_lock, flags); 190 r = amdgpu_ring_alloc(ring, (size + sizeof(mes_status_pkt)) / 4); 191 if (r) 192 goto error_unlock_free; 193 194 seq = ++ring->fence_drv.sync_seq; 195 r = amdgpu_fence_wait_polling(ring, 196 seq - ring->fence_drv.num_fences_mask, 197 timeout); 198 if (r < 1) 199 goto error_undo; 200 201 api_status = (struct MES_API_STATUS *)((char *)pkt + api_status_off); 202 api_status->api_completion_fence_addr = status_gpu_addr; 203 api_status->api_completion_fence_value = 1; 204 205 amdgpu_ring_write_multiple(ring, pkt, size / 4); 206 207 memset(&mes_status_pkt, 0, sizeof(mes_status_pkt)); 208 mes_status_pkt.header.type = MES_API_TYPE_SCHEDULER; 209 mes_status_pkt.header.opcode = MES_SCH_API_QUERY_SCHEDULER_STATUS; 210 mes_status_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; 211 mes_status_pkt.api_status.api_completion_fence_addr = 212 ring->fence_drv.gpu_addr; 213 mes_status_pkt.api_status.api_completion_fence_value = seq; 214 215 amdgpu_ring_write_multiple(ring, &mes_status_pkt, 216 sizeof(mes_status_pkt) / 4); 217 218 amdgpu_ring_commit(ring); 219 spin_unlock_irqrestore(ring_lock, flags); 220 221 op_str = mes_v12_1_get_op_string(x_pkt); 222 misc_op_str = mes_v12_1_get_misc_op_string(x_pkt); 223 224 if (misc_op_str) 225 dev_dbg(adev->dev, "MES(%d, %d) msg=%s (%s) was emitted\n", 226 xcc_id, pipe, op_str, misc_op_str); 227 else if (op_str) 228 dev_dbg(adev->dev, "MES(%d, %d) msg=%s was emitted\n", 229 xcc_id, pipe, op_str); 230 else 231 dev_dbg(adev->dev, "MES(%d, %d) msg=%d was emitted\n", 232 xcc_id, pipe, x_pkt->header.opcode); 233 234 r = amdgpu_fence_wait_polling(ring, seq, timeout); 235 if (r < 1 || !lower_32_bits(*status_ptr)) { 236 if (misc_op_str) 237 dev_err(adev->dev, 238 "MES(%d, %d) failed to respond to msg=%s (%s)\n", 239 xcc_id, pipe, op_str, misc_op_str); 240 else if (op_str) 241 dev_err(adev->dev, 242 "MES(%d, %d) failed to respond to msg=%s\n", 243 xcc_id, pipe, op_str); 244 else 245 dev_err(adev->dev, 246 "MES(%d, %d) failed to respond to msg=%d\n", 247 xcc_id, pipe, x_pkt->header.opcode); 248 249 while (halt_if_hws_hang) 250 schedule(); 251 252 r = -ETIMEDOUT; 253 goto error_wb_free; 254 } 255 256 amdgpu_device_wb_free(adev, status_offset); 257 return 0; 258 259 error_undo: 260 dev_err(adev->dev, "MES(%d, %d) ring buffer is full.\n", xcc_id, pipe); 261 amdgpu_ring_undo(ring); 262 263 error_unlock_free: 264 spin_unlock_irqrestore(ring_lock, flags); 265 266 error_wb_free: 267 amdgpu_device_wb_free(adev, status_offset); 268 return r; 269 } 270 271 static int convert_to_mes_queue_type(int queue_type) 272 { 273 if (queue_type == AMDGPU_RING_TYPE_GFX) 274 return MES_QUEUE_TYPE_GFX; 275 else if (queue_type == AMDGPU_RING_TYPE_COMPUTE) 276 return MES_QUEUE_TYPE_COMPUTE; 277 else if (queue_type == AMDGPU_RING_TYPE_SDMA) 278 return MES_QUEUE_TYPE_SDMA; 279 else if (queue_type == AMDGPU_RING_TYPE_MES) 280 return MES_QUEUE_TYPE_SCHQ; 281 else 282 BUG(); 283 return -1; 284 } 285 286 static int mes_v12_1_add_hw_queue(struct amdgpu_mes *mes, 287 struct mes_add_queue_input *input) 288 { 289 union MESAPI__ADD_QUEUE mes_add_queue_pkt; 290 int xcc_id = input->xcc_id; 291 int inst = MES_PIPE_INST(xcc_id, AMDGPU_MES_SCHED_PIPE); 292 293 if (mes->enable_coop_mode) 294 xcc_id = mes->master_xcc_ids[inst]; 295 296 memset(&mes_add_queue_pkt, 0, sizeof(mes_add_queue_pkt)); 297 298 mes_add_queue_pkt.header.type = MES_API_TYPE_SCHEDULER; 299 mes_add_queue_pkt.header.opcode = MES_SCH_API_ADD_QUEUE; 300 mes_add_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; 301 302 mes_add_queue_pkt.process_id = input->process_id; 303 mes_add_queue_pkt.page_table_base_addr = input->page_table_base_addr; 304 mes_add_queue_pkt.process_va_start = input->process_va_start; 305 mes_add_queue_pkt.process_va_end = input->process_va_end; 306 mes_add_queue_pkt.process_quantum = input->process_quantum; 307 mes_add_queue_pkt.process_context_addr = input->process_context_addr; 308 mes_add_queue_pkt.gang_quantum = input->gang_quantum; 309 mes_add_queue_pkt.gang_context_addr = input->gang_context_addr; 310 mes_add_queue_pkt.inprocess_gang_priority = 311 input->inprocess_gang_priority; 312 mes_add_queue_pkt.gang_global_priority_level = 313 input->gang_global_priority_level; 314 mes_add_queue_pkt.doorbell_offset = input->doorbell_offset; 315 mes_add_queue_pkt.mqd_addr = input->mqd_addr; 316 317 mes_add_queue_pkt.wptr_addr = input->wptr_mc_addr; 318 319 mes_add_queue_pkt.queue_type = 320 convert_to_mes_queue_type(input->queue_type); 321 mes_add_queue_pkt.paging = input->paging; 322 mes_add_queue_pkt.vm_context_cntl = input->vm_cntx_cntl; 323 mes_add_queue_pkt.gws_base = input->gws_base; 324 mes_add_queue_pkt.gws_size = input->gws_size; 325 mes_add_queue_pkt.trap_handler_addr = input->tba_addr; 326 mes_add_queue_pkt.tma_addr = input->tma_addr; 327 mes_add_queue_pkt.trap_en = input->trap_en; 328 mes_add_queue_pkt.skip_process_ctx_clear = input->skip_process_ctx_clear; 329 mes_add_queue_pkt.is_kfd_process = input->is_kfd_process; 330 331 /* For KFD, gds_size is re-used for queue size (needed in MES for AQL queues) */ 332 mes_add_queue_pkt.is_aql_queue = input->is_aql_queue; 333 mes_add_queue_pkt.gds_size = input->queue_size; 334 335 /* For KFD, gds_size is re-used for queue size (needed in MES for AQL queues) */ 336 mes_add_queue_pkt.is_aql_queue = input->is_aql_queue; 337 mes_add_queue_pkt.gds_size = input->queue_size; 338 339 mes_add_queue_pkt.full_sh_mem_config_data = input->sh_mem_config_data; 340 341 return mes_v12_1_submit_pkt_and_poll_completion(mes, 342 xcc_id, AMDGPU_MES_SCHED_PIPE, 343 &mes_add_queue_pkt, sizeof(mes_add_queue_pkt), 344 offsetof(union MESAPI__ADD_QUEUE, api_status)); 345 } 346 347 static int mes_v12_1_remove_hw_queue(struct amdgpu_mes *mes, 348 struct mes_remove_queue_input *input) 349 { 350 union MESAPI__REMOVE_QUEUE mes_remove_queue_pkt; 351 int xcc_id = input->xcc_id; 352 int inst = MES_PIPE_INST(xcc_id, AMDGPU_MES_SCHED_PIPE); 353 354 if (mes->enable_coop_mode) 355 xcc_id = mes->master_xcc_ids[inst]; 356 357 memset(&mes_remove_queue_pkt, 0, sizeof(mes_remove_queue_pkt)); 358 359 mes_remove_queue_pkt.header.type = MES_API_TYPE_SCHEDULER; 360 mes_remove_queue_pkt.header.opcode = MES_SCH_API_REMOVE_QUEUE; 361 mes_remove_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; 362 363 mes_remove_queue_pkt.doorbell_offset = input->doorbell_offset; 364 mes_remove_queue_pkt.gang_context_addr = input->gang_context_addr; 365 366 return mes_v12_1_submit_pkt_and_poll_completion(mes, 367 xcc_id, AMDGPU_MES_SCHED_PIPE, 368 &mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt), 369 offsetof(union MESAPI__REMOVE_QUEUE, api_status)); 370 } 371 372 static int mes_v12_1_reset_hw_queue(struct amdgpu_mes *mes, 373 struct mes_reset_queue_input *input) 374 { 375 union MESAPI__RESET mes_reset_queue_pkt; 376 int pipe; 377 378 memset(&mes_reset_queue_pkt, 0, sizeof(mes_reset_queue_pkt)); 379 380 mes_reset_queue_pkt.header.type = MES_API_TYPE_SCHEDULER; 381 mes_reset_queue_pkt.header.opcode = MES_SCH_API_RESET; 382 mes_reset_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; 383 384 mes_reset_queue_pkt.doorbell_offset = input->doorbell_offset; 385 /* mes_reset_queue_pkt.gang_context_addr = input->gang_context_addr; */ 386 /*mes_reset_queue_pkt.reset_queue_only = 1;*/ 387 388 if (mes->adev->enable_uni_mes) 389 pipe = AMDGPU_MES_KIQ_PIPE; 390 else 391 pipe = AMDGPU_MES_SCHED_PIPE; 392 393 return mes_v12_1_submit_pkt_and_poll_completion(mes, 394 input->xcc_id, pipe, 395 &mes_reset_queue_pkt, sizeof(mes_reset_queue_pkt), 396 offsetof(union MESAPI__REMOVE_QUEUE, api_status)); 397 } 398 399 static int mes_v12_1_map_legacy_queue(struct amdgpu_mes *mes, 400 struct mes_map_legacy_queue_input *input) 401 { 402 union MESAPI__ADD_QUEUE mes_add_queue_pkt; 403 int pipe; 404 405 memset(&mes_add_queue_pkt, 0, sizeof(mes_add_queue_pkt)); 406 407 mes_add_queue_pkt.header.type = MES_API_TYPE_SCHEDULER; 408 mes_add_queue_pkt.header.opcode = MES_SCH_API_ADD_QUEUE; 409 mes_add_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; 410 411 mes_add_queue_pkt.pipe_id = input->pipe_id; 412 mes_add_queue_pkt.queue_id = input->queue_id; 413 mes_add_queue_pkt.doorbell_offset = input->doorbell_offset; 414 mes_add_queue_pkt.mqd_addr = input->mqd_addr; 415 mes_add_queue_pkt.wptr_addr = input->wptr_addr; 416 mes_add_queue_pkt.queue_type = 417 convert_to_mes_queue_type(input->queue_type); 418 mes_add_queue_pkt.map_legacy_kq = 1; 419 420 if (mes->adev->enable_uni_mes) 421 pipe = AMDGPU_MES_KIQ_PIPE; 422 else 423 pipe = AMDGPU_MES_SCHED_PIPE; 424 425 return mes_v12_1_submit_pkt_and_poll_completion(mes, 426 input->xcc_id, pipe, 427 &mes_add_queue_pkt, sizeof(mes_add_queue_pkt), 428 offsetof(union MESAPI__ADD_QUEUE, api_status)); 429 } 430 431 static int mes_v12_1_unmap_legacy_queue(struct amdgpu_mes *mes, 432 struct mes_unmap_legacy_queue_input *input) 433 { 434 union MESAPI__REMOVE_QUEUE mes_remove_queue_pkt; 435 int pipe; 436 437 memset(&mes_remove_queue_pkt, 0, sizeof(mes_remove_queue_pkt)); 438 439 mes_remove_queue_pkt.header.type = MES_API_TYPE_SCHEDULER; 440 mes_remove_queue_pkt.header.opcode = MES_SCH_API_REMOVE_QUEUE; 441 mes_remove_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; 442 443 mes_remove_queue_pkt.doorbell_offset = input->doorbell_offset; 444 mes_remove_queue_pkt.gang_context_addr = 0; 445 446 mes_remove_queue_pkt.pipe_id = input->pipe_id; 447 mes_remove_queue_pkt.queue_id = input->queue_id; 448 449 if (input->action == PREEMPT_QUEUES_NO_UNMAP) { 450 mes_remove_queue_pkt.preempt_legacy_gfx_queue = 1; 451 mes_remove_queue_pkt.tf_addr = input->trail_fence_addr; 452 mes_remove_queue_pkt.tf_data = 453 lower_32_bits(input->trail_fence_data); 454 } else { 455 mes_remove_queue_pkt.unmap_legacy_queue = 1; 456 mes_remove_queue_pkt.queue_type = 457 convert_to_mes_queue_type(input->queue_type); 458 } 459 460 if (mes->adev->enable_uni_mes) 461 pipe = AMDGPU_MES_KIQ_PIPE; 462 else 463 pipe = AMDGPU_MES_SCHED_PIPE; 464 465 return mes_v12_1_submit_pkt_and_poll_completion(mes, 466 input->xcc_id, pipe, 467 &mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt), 468 offsetof(union MESAPI__REMOVE_QUEUE, api_status)); 469 } 470 471 static int mes_v12_1_suspend_gang(struct amdgpu_mes *mes, 472 struct mes_suspend_gang_input *input) 473 { 474 union MESAPI__SUSPEND mes_suspend_gang_pkt; 475 476 memset(&mes_suspend_gang_pkt, 0, sizeof(mes_suspend_gang_pkt)); 477 478 mes_suspend_gang_pkt.header.type = MES_API_TYPE_SCHEDULER; 479 mes_suspend_gang_pkt.header.opcode = MES_SCH_API_SUSPEND; 480 mes_suspend_gang_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; 481 482 mes_suspend_gang_pkt.suspend_all_gangs = input->suspend_all_gangs; 483 mes_suspend_gang_pkt.suspend_all_sdma_gangs = input->suspend_all_sdma_gangs; 484 mes_suspend_gang_pkt.gang_context_addr = input->gang_context_addr; 485 mes_suspend_gang_pkt.suspend_fence_addr = input->suspend_fence_addr; 486 mes_suspend_gang_pkt.suspend_fence_value = input->suspend_fence_value; 487 488 /* Suspend gang is handled by master MES */ 489 return mes_v12_1_submit_pkt_and_poll_completion(mes, input->xcc_id, AMDGPU_MES_SCHED_PIPE, 490 &mes_suspend_gang_pkt, sizeof(mes_suspend_gang_pkt), 491 offsetof(union MESAPI__SUSPEND, api_status)); 492 } 493 494 static int mes_v12_1_resume_gang(struct amdgpu_mes *mes, 495 struct mes_resume_gang_input *input) 496 { 497 union MESAPI__RESUME mes_resume_gang_pkt; 498 499 memset(&mes_resume_gang_pkt, 0, sizeof(mes_resume_gang_pkt)); 500 501 mes_resume_gang_pkt.header.type = MES_API_TYPE_SCHEDULER; 502 mes_resume_gang_pkt.header.opcode = MES_SCH_API_RESUME; 503 mes_resume_gang_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; 504 505 mes_resume_gang_pkt.resume_all_gangs = input->resume_all_gangs; 506 mes_resume_gang_pkt.gang_context_addr = input->gang_context_addr; 507 508 /* Resume gang is handled by master MES */ 509 return mes_v12_1_submit_pkt_and_poll_completion(mes, input->xcc_id, AMDGPU_MES_SCHED_PIPE, 510 &mes_resume_gang_pkt, sizeof(mes_resume_gang_pkt), 511 offsetof(union MESAPI__RESUME, api_status)); 512 } 513 514 static int mes_v12_1_query_sched_status(struct amdgpu_mes *mes, 515 int pipe, int xcc_id) 516 { 517 union MESAPI__QUERY_MES_STATUS mes_status_pkt; 518 519 memset(&mes_status_pkt, 0, sizeof(mes_status_pkt)); 520 521 mes_status_pkt.header.type = MES_API_TYPE_SCHEDULER; 522 mes_status_pkt.header.opcode = MES_SCH_API_QUERY_SCHEDULER_STATUS; 523 mes_status_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; 524 525 return mes_v12_1_submit_pkt_and_poll_completion(mes, xcc_id, pipe, 526 &mes_status_pkt, sizeof(mes_status_pkt), 527 offsetof(union MESAPI__QUERY_MES_STATUS, api_status)); 528 } 529 static uint32_t mes_v12_1_get_xcc_from_reg(uint32_t reg_offset) 530 { 531 return ((reg_offset >> 16) & 0x7); 532 } 533 534 static void mes_v12_1_get_rrmt(uint32_t reg, uint32_t xcc_id, 535 struct RRMT_OPTION *rrmt_opt, 536 uint32_t *out_reg) 537 { 538 uint32_t normalized_reg = soc_v1_0_normalize_xcc_reg_offset(reg); 539 540 if (soc_v1_0_normalize_xcc_reg_range(normalized_reg)) { 541 rrmt_opt->xcd_die_id = mes_v12_1_get_xcc_from_reg(reg); 542 rrmt_opt->mode = (xcc_id == rrmt_opt->xcd_die_id) ? 543 MES_RRMT_MODE_LOCAL_XCD : MES_RRMT_MODE_REMOTE_XCD; 544 } else { 545 rrmt_opt->mode = MES_RRMT_MODE_REMOTE_MID; 546 if (soc_v1_0_mid1_reg_range(reg)) 547 rrmt_opt->mid_die_id = 1; 548 } 549 550 *out_reg = soc_v1_0_normalize_reg_offset(reg); 551 } 552 553 static int mes_v12_1_misc_op(struct amdgpu_mes *mes, 554 struct mes_misc_op_input *input) 555 { 556 struct amdgpu_device *adev = mes->adev; 557 union MESAPI__MISC misc_pkt; 558 int pipe; 559 560 if (mes->adev->enable_uni_mes) 561 pipe = AMDGPU_MES_KIQ_PIPE; 562 else 563 pipe = AMDGPU_MES_SCHED_PIPE; 564 565 memset(&misc_pkt, 0, sizeof(misc_pkt)); 566 567 misc_pkt.header.type = MES_API_TYPE_SCHEDULER; 568 misc_pkt.header.opcode = MES_SCH_API_MISC; 569 misc_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; 570 571 switch (input->op) { 572 case MES_MISC_OP_READ_REG: 573 misc_pkt.opcode = MESAPI_MISC__READ_REG; 574 misc_pkt.read_reg.buffer_addr = input->read_reg.buffer_addr; 575 mes_v12_1_get_rrmt(input->read_reg.reg_offset, 576 GET_INST(GC, input->xcc_id), 577 &misc_pkt.read_reg.rrmt_opt, 578 &misc_pkt.read_reg.reg_offset); 579 break; 580 case MES_MISC_OP_WRITE_REG: 581 misc_pkt.opcode = MESAPI_MISC__WRITE_REG; 582 misc_pkt.write_reg.reg_value = input->write_reg.reg_value; 583 mes_v12_1_get_rrmt(input->write_reg.reg_offset, 584 GET_INST(GC, input->xcc_id), 585 &misc_pkt.write_reg.rrmt_opt, 586 &misc_pkt.write_reg.reg_offset); 587 break; 588 case MES_MISC_OP_WRM_REG_WAIT: 589 misc_pkt.opcode = MESAPI_MISC__WAIT_REG_MEM; 590 misc_pkt.wait_reg_mem.op = WRM_OPERATION__WAIT_REG_MEM; 591 misc_pkt.wait_reg_mem.reference = input->wrm_reg.ref; 592 misc_pkt.wait_reg_mem.mask = input->wrm_reg.mask; 593 misc_pkt.wait_reg_mem.reg_offset2 = 0; 594 mes_v12_1_get_rrmt(input->wrm_reg.reg0, 595 GET_INST(GC, input->xcc_id), 596 &misc_pkt.wait_reg_mem.rrmt_opt1, 597 &misc_pkt.wait_reg_mem.reg_offset1); 598 break; 599 case MES_MISC_OP_WRM_REG_WR_WAIT: 600 misc_pkt.opcode = MESAPI_MISC__WAIT_REG_MEM; 601 misc_pkt.wait_reg_mem.op = WRM_OPERATION__WR_WAIT_WR_REG; 602 misc_pkt.wait_reg_mem.reference = input->wrm_reg.ref; 603 misc_pkt.wait_reg_mem.mask = input->wrm_reg.mask; 604 mes_v12_1_get_rrmt(input->wrm_reg.reg0, 605 GET_INST(GC, input->xcc_id), 606 &misc_pkt.wait_reg_mem.rrmt_opt1, 607 &misc_pkt.wait_reg_mem.reg_offset1); 608 mes_v12_1_get_rrmt(input->wrm_reg.reg1, 609 GET_INST(GC, input->xcc_id), 610 &misc_pkt.wait_reg_mem.rrmt_opt2, 611 &misc_pkt.wait_reg_mem.reg_offset2); 612 break; 613 case MES_MISC_OP_SET_SHADER_DEBUGGER: 614 pipe = AMDGPU_MES_SCHED_PIPE; 615 misc_pkt.opcode = MESAPI_MISC__SET_SHADER_DEBUGGER; 616 misc_pkt.set_shader_debugger.process_context_addr = 617 input->set_shader_debugger.process_context_addr; 618 misc_pkt.set_shader_debugger.flags.u32all = 619 input->set_shader_debugger.flags.u32all; 620 misc_pkt.set_shader_debugger.spi_gdbg_per_vmid_cntl = 621 input->set_shader_debugger.spi_gdbg_per_vmid_cntl; 622 memcpy(misc_pkt.set_shader_debugger.tcp_watch_cntl, 623 input->set_shader_debugger.tcp_watch_cntl, 624 sizeof(misc_pkt.set_shader_debugger.tcp_watch_cntl)); 625 misc_pkt.set_shader_debugger.trap_en = input->set_shader_debugger.trap_en; 626 break; 627 case MES_MISC_OP_CHANGE_CONFIG: 628 misc_pkt.opcode = MESAPI_MISC__CHANGE_CONFIG; 629 misc_pkt.change_config.opcode = 630 MESAPI_MISC__CHANGE_CONFIG_OPTION_LIMIT_SINGLE_PROCESS; 631 misc_pkt.change_config.option.bits.limit_single_process = 632 input->change_config.option.limit_single_process; 633 break; 634 default: 635 DRM_ERROR("unsupported misc op (%d) \n", input->op); 636 return -EINVAL; 637 } 638 639 return mes_v12_1_submit_pkt_and_poll_completion(mes, 640 input->xcc_id, pipe, 641 &misc_pkt, sizeof(misc_pkt), 642 offsetof(union MESAPI__MISC, api_status)); 643 } 644 645 static int mes_v12_1_set_hw_resources_1(struct amdgpu_mes *mes, 646 int pipe, int xcc_id) 647 { 648 union MESAPI_SET_HW_RESOURCES_1 mes_set_hw_res_1_pkt; 649 int master_xcc_id, inst = MES_PIPE_INST(xcc_id, pipe); 650 651 memset(&mes_set_hw_res_1_pkt, 0, sizeof(mes_set_hw_res_1_pkt)); 652 653 mes_set_hw_res_1_pkt.header.type = MES_API_TYPE_SCHEDULER; 654 mes_set_hw_res_1_pkt.header.opcode = MES_SCH_API_SET_HW_RSRC_1; 655 mes_set_hw_res_1_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; 656 mes_set_hw_res_1_pkt.mes_kiq_unmap_timeout = 100; 657 658 /* From version 0x74 above, pipe1 support use shared command buffer 659 to distribute some tasks on individual XCCs*/ 660 if (mes->enable_coop_mode && 661 ((pipe == AMDGPU_MES_SCHED_PIPE) || 662 ((mes->kiq_version & AMDGPU_MES_VERSION_MASK) >= 0x74))) { 663 master_xcc_id = mes->master_xcc_ids[inst]; 664 mes_set_hw_res_1_pkt.mes_coop_mode = 1; 665 mes_set_hw_res_1_pkt.coop_sch_shared_mc_addr = 666 mes->shared_cmd_buf_gpu_addr[master_xcc_id + pipe]; 667 } 668 669 return mes_v12_1_submit_pkt_and_poll_completion(mes, xcc_id, pipe, 670 &mes_set_hw_res_1_pkt, sizeof(mes_set_hw_res_1_pkt), 671 offsetof(union MESAPI_SET_HW_RESOURCES_1, api_status)); 672 } 673 674 static int mes_v12_1_set_hw_resources(struct amdgpu_mes *mes, 675 int pipe, int xcc_id) 676 { 677 int i, status; 678 struct amdgpu_device *adev = mes->adev; 679 union MESAPI_SET_HW_RESOURCES mes_set_hw_res_pkt; 680 681 memset(&mes_set_hw_res_pkt, 0, sizeof(mes_set_hw_res_pkt)); 682 683 mes_set_hw_res_pkt.header.type = MES_API_TYPE_SCHEDULER; 684 mes_set_hw_res_pkt.header.opcode = MES_SCH_API_SET_HW_RSRC; 685 mes_set_hw_res_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; 686 687 if (pipe == AMDGPU_MES_SCHED_PIPE) { 688 mes_set_hw_res_pkt.vmid_mask_mmhub = mes->vmid_mask_mmhub; 689 mes_set_hw_res_pkt.vmid_mask_gfxhub = mes->vmid_mask_gfxhub; 690 mes_set_hw_res_pkt.gds_size = adev->gds.gds_size; 691 mes_set_hw_res_pkt.paging_vmid = 0; 692 693 for (i = 0; i < MAX_COMPUTE_PIPES; i++) 694 mes_set_hw_res_pkt.compute_hqd_mask[i] = 695 mes->compute_hqd_mask[i]; 696 697 for (i = 0; i < MAX_GFX_PIPES; i++) 698 mes_set_hw_res_pkt.gfx_hqd_mask[i] = 699 mes->gfx_hqd_mask[i]; 700 701 for (i = 0; i < MAX_SDMA_PIPES; i++) 702 mes_set_hw_res_pkt.sdma_hqd_mask[i] = 703 mes->sdma_hqd_mask[i]; 704 705 for (i = 0; i < AMD_PRIORITY_NUM_LEVELS; i++) 706 mes_set_hw_res_pkt.aggregated_doorbells[i] = 707 mes->aggregated_doorbells[i]; 708 } 709 710 mes_set_hw_res_pkt.g_sch_ctx_gpu_mc_ptr = 711 mes->sch_ctx_gpu_addr[pipe]; 712 mes_set_hw_res_pkt.query_status_fence_gpu_mc_ptr = 713 mes->query_status_fence_gpu_addr[pipe]; 714 715 for (i = 0; i < 5; i++) { 716 mes_set_hw_res_pkt.gc_base[i] = 717 adev->reg_offset[GC_HWIP][0][i]; 718 mes_set_hw_res_pkt.mmhub_base[i] = 719 adev->reg_offset[MMHUB_HWIP][0][i]; 720 mes_set_hw_res_pkt.osssys_base[i] = 721 adev->reg_offset[OSSSYS_HWIP][0][i]; 722 } 723 724 mes_set_hw_res_pkt.disable_reset = 1; 725 mes_set_hw_res_pkt.disable_mes_log = 1; 726 mes_set_hw_res_pkt.use_different_vmid_compute = 1; 727 mes_set_hw_res_pkt.enable_reg_active_poll = 1; 728 mes_set_hw_res_pkt.enable_level_process_quantum_check = 1; 729 730 /* 731 * Keep oversubscribe timer for sdma . When we have unmapped doorbell 732 * handling support, other queue will not use the oversubscribe timer. 733 * handling mode - 0: disabled; 1: basic version; 2: basic+ version 734 */ 735 mes_set_hw_res_pkt.oversubscription_timer = 50; 736 mes_set_hw_res_pkt.unmapped_doorbell_handling = 1; 737 738 if (amdgpu_mes_log_enable) { 739 mes_set_hw_res_pkt.enable_mes_event_int_logging = 1; 740 mes_set_hw_res_pkt.event_intr_history_gpu_mc_ptr = 741 mes->event_log_gpu_addr + MES_PIPE_INST(xcc_id, pipe) * AMDGPU_MES_LOG_BUFFER_SIZE; 742 } 743 744 if (adev->enforce_isolation[0] == AMDGPU_ENFORCE_ISOLATION_ENABLE) 745 mes_set_hw_res_pkt.limit_single_process = 1; 746 747 status = mes_v12_1_submit_pkt_and_poll_completion(mes, xcc_id, pipe, 748 &mes_set_hw_res_pkt, sizeof(mes_set_hw_res_pkt), 749 offsetof(union MESAPI_SET_HW_RESOURCES, api_status)); 750 751 /* get MES scheduler versions */ 752 mutex_lock(&adev->srbm_mutex); 753 soc_v1_0_grbm_select(adev, 3, pipe, 0, 0, GET_INST(GC, xcc_id)); 754 755 if (pipe == AMDGPU_MES_SCHED_PIPE) 756 adev->mes.sched_version = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_GP3_LO); 757 else if (pipe == AMDGPU_MES_KIQ_PIPE && adev->enable_mes_kiq) 758 adev->mes.kiq_version = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_GP3_LO); 759 760 soc_v1_0_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id)); 761 mutex_unlock(&adev->srbm_mutex); 762 763 return status; 764 } 765 766 static void mes_v12_1_init_aggregated_doorbell(struct amdgpu_mes *mes, 767 int xcc_id) 768 { 769 struct amdgpu_device *adev = mes->adev; 770 uint32_t data; 771 772 data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_DOORBELL_CONTROL1); 773 data &= ~(CP_MES_DOORBELL_CONTROL1__DOORBELL_OFFSET_MASK | 774 CP_MES_DOORBELL_CONTROL1__DOORBELL_EN_MASK | 775 CP_MES_DOORBELL_CONTROL1__DOORBELL_HIT_MASK); 776 data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_LOW] << 777 CP_MES_DOORBELL_CONTROL1__DOORBELL_OFFSET__SHIFT; 778 data |= 1 << CP_MES_DOORBELL_CONTROL1__DOORBELL_EN__SHIFT; 779 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_DOORBELL_CONTROL1, data); 780 781 data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_DOORBELL_CONTROL2); 782 data &= ~(CP_MES_DOORBELL_CONTROL2__DOORBELL_OFFSET_MASK | 783 CP_MES_DOORBELL_CONTROL2__DOORBELL_EN_MASK | 784 CP_MES_DOORBELL_CONTROL2__DOORBELL_HIT_MASK); 785 data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_NORMAL] << 786 CP_MES_DOORBELL_CONTROL2__DOORBELL_OFFSET__SHIFT; 787 data |= 1 << CP_MES_DOORBELL_CONTROL2__DOORBELL_EN__SHIFT; 788 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_DOORBELL_CONTROL2, data); 789 790 data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_DOORBELL_CONTROL3); 791 data &= ~(CP_MES_DOORBELL_CONTROL3__DOORBELL_OFFSET_MASK | 792 CP_MES_DOORBELL_CONTROL3__DOORBELL_EN_MASK | 793 CP_MES_DOORBELL_CONTROL3__DOORBELL_HIT_MASK); 794 data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_MEDIUM] << 795 CP_MES_DOORBELL_CONTROL3__DOORBELL_OFFSET__SHIFT; 796 data |= 1 << CP_MES_DOORBELL_CONTROL3__DOORBELL_EN__SHIFT; 797 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_DOORBELL_CONTROL3, data); 798 799 data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_DOORBELL_CONTROL4); 800 data &= ~(CP_MES_DOORBELL_CONTROL4__DOORBELL_OFFSET_MASK | 801 CP_MES_DOORBELL_CONTROL4__DOORBELL_EN_MASK | 802 CP_MES_DOORBELL_CONTROL4__DOORBELL_HIT_MASK); 803 data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_HIGH] << 804 CP_MES_DOORBELL_CONTROL4__DOORBELL_OFFSET__SHIFT; 805 data |= 1 << CP_MES_DOORBELL_CONTROL4__DOORBELL_EN__SHIFT; 806 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_DOORBELL_CONTROL4, data); 807 808 data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_DOORBELL_CONTROL5); 809 data &= ~(CP_MES_DOORBELL_CONTROL5__DOORBELL_OFFSET_MASK | 810 CP_MES_DOORBELL_CONTROL5__DOORBELL_EN_MASK | 811 CP_MES_DOORBELL_CONTROL5__DOORBELL_HIT_MASK); 812 data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_REALTIME] << 813 CP_MES_DOORBELL_CONTROL5__DOORBELL_OFFSET__SHIFT; 814 data |= 1 << CP_MES_DOORBELL_CONTROL5__DOORBELL_EN__SHIFT; 815 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_DOORBELL_CONTROL5, data); 816 817 data = 1 << CP_HQD_GFX_CONTROL__DB_UPDATED_MSG_EN__SHIFT; 818 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_GFX_CONTROL, data); 819 } 820 821 822 static void mes_v12_1_enable_unmapped_doorbell_handling( 823 struct amdgpu_mes *mes, bool enable, int xcc_id) 824 { 825 struct amdgpu_device *adev = mes->adev; 826 uint32_t data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_UNMAPPED_DOORBELL); 827 828 /* 829 * The default PROC_LSB settng is 0xc which means doorbell 830 * addr[16:12] gives the doorbell page number. For kfd, each 831 * process will use 2 pages of doorbell, we need to change the 832 * setting to 0xd 833 */ 834 data &= ~CP_UNMAPPED_DOORBELL__PROC_LSB_MASK; 835 data |= 0xd << CP_UNMAPPED_DOORBELL__PROC_LSB__SHIFT; 836 837 data |= (enable ? 1 : 0) << CP_UNMAPPED_DOORBELL__ENABLE__SHIFT; 838 839 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_UNMAPPED_DOORBELL, data); 840 } 841 842 #if 0 843 static int mes_v12_1_reset_legacy_queue(struct amdgpu_mes *mes, 844 struct mes_reset_legacy_queue_input *input) 845 { 846 union MESAPI__RESET mes_reset_queue_pkt; 847 int pipe; 848 849 memset(&mes_reset_queue_pkt, 0, sizeof(mes_reset_queue_pkt)); 850 851 mes_reset_queue_pkt.header.type = MES_API_TYPE_SCHEDULER; 852 mes_reset_queue_pkt.header.opcode = MES_SCH_API_RESET; 853 mes_reset_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; 854 855 mes_reset_queue_pkt.queue_type = 856 convert_to_mes_queue_type(input->queue_type); 857 858 if (mes_reset_queue_pkt.queue_type == MES_QUEUE_TYPE_GFX) { 859 mes_reset_queue_pkt.reset_legacy_gfx = 1; 860 mes_reset_queue_pkt.pipe_id_lp = input->pipe_id; 861 mes_reset_queue_pkt.queue_id_lp = input->queue_id; 862 mes_reset_queue_pkt.mqd_mc_addr_lp = input->mqd_addr; 863 mes_reset_queue_pkt.doorbell_offset_lp = input->doorbell_offset; 864 mes_reset_queue_pkt.wptr_addr_lp = input->wptr_addr; 865 mes_reset_queue_pkt.vmid_id_lp = input->vmid; 866 } else { 867 mes_reset_queue_pkt.reset_queue_only = 1; 868 mes_reset_queue_pkt.doorbell_offset = input->doorbell_offset; 869 } 870 871 if (mes->adev->enable_uni_mes) 872 pipe = AMDGPU_MES_KIQ_PIPE; 873 else 874 pipe = AMDGPU_MES_SCHED_PIPE; 875 876 return mes_v12_1_submit_pkt_and_poll_completion(mes, 877 input->xcc_id, pipe, 878 &mes_reset_queue_pkt, sizeof(mes_reset_queue_pkt), 879 offsetof(union MESAPI__RESET, api_status)); 880 } 881 #endif 882 883 static int mes_v12_1_detect_and_reset_hung_queues(struct amdgpu_mes *mes, 884 struct mes_detect_and_reset_queue_input *input) 885 { 886 union MESAPI__RESET mes_reset_queue_pkt; 887 888 memset(&mes_reset_queue_pkt, 0, sizeof(mes_reset_queue_pkt)); 889 890 mes_reset_queue_pkt.header.type = MES_API_TYPE_SCHEDULER; 891 mes_reset_queue_pkt.header.opcode = MES_SCH_API_RESET; 892 mes_reset_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; 893 894 mes_reset_queue_pkt.queue_type = 895 convert_to_mes_queue_type(input->queue_type); 896 mes_reset_queue_pkt.doorbell_offset_addr = 897 mes->hung_queue_db_array_gpu_addr[0]; 898 899 if (input->detect_only) 900 mes_reset_queue_pkt.hang_detect_only = 1; 901 else 902 mes_reset_queue_pkt.hang_detect_then_reset = 1; 903 904 return mes_v12_1_submit_pkt_and_poll_completion(mes, 905 input->xcc_id, AMDGPU_MES_SCHED_PIPE, 906 &mes_reset_queue_pkt, sizeof(mes_reset_queue_pkt), 907 offsetof(union MESAPI__RESET, api_status)); 908 } 909 910 static int mes_v12_inv_tlb_convert_hub_id(uint8_t id) 911 { 912 /* 913 * MES doesn't support invalidate gc_hub on slave xcc individually 914 * master xcc will invalidate all gc_hub for the partition 915 */ 916 if (AMDGPU_IS_GFXHUB(id)) 917 return 0; 918 else if (AMDGPU_IS_MMHUB0(id)) 919 return 1; 920 else if (AMDGPU_IS_MMHUB1(id)) 921 return 2; 922 return -EINVAL; 923 924 } 925 926 static int mes_v12_1_inv_tlbs_pasid(struct amdgpu_mes *mes, 927 struct mes_inv_tlbs_pasid_input *input) 928 { 929 union MESAPI__INV_TLBS mes_inv_tlbs; 930 int xcc_id = input->xcc_id; 931 int inst = MES_PIPE_INST(xcc_id, AMDGPU_MES_SCHED_PIPE); 932 int ret; 933 934 if (mes->enable_coop_mode) 935 xcc_id = mes->master_xcc_ids[inst]; 936 937 memset(&mes_inv_tlbs, 0, sizeof(mes_inv_tlbs)); 938 939 mes_inv_tlbs.header.type = MES_API_TYPE_SCHEDULER; 940 mes_inv_tlbs.header.opcode = MES_SCH_API_INV_TLBS; 941 mes_inv_tlbs.header.dwsize = API_FRAME_SIZE_IN_DWORDS; 942 943 mes_inv_tlbs.invalidate_tlbs.inv_sel = 0; 944 mes_inv_tlbs.invalidate_tlbs.flush_type = input->flush_type; 945 mes_inv_tlbs.invalidate_tlbs.inv_sel_id = input->pasid; 946 947 /*convert amdgpu_mes_hub_id to mes expected hub_id */ 948 ret = mes_v12_inv_tlb_convert_hub_id(input->hub_id); 949 if (ret < 0) 950 return -EINVAL; 951 mes_inv_tlbs.invalidate_tlbs.hub_id = ret; 952 return mes_v12_1_submit_pkt_and_poll_completion(mes, xcc_id, AMDGPU_MES_KIQ_PIPE, 953 &mes_inv_tlbs, sizeof(mes_inv_tlbs), 954 offsetof(union MESAPI__INV_TLBS, api_status)); 955 956 } 957 958 static const struct amdgpu_mes_funcs mes_v12_1_funcs = { 959 .add_hw_queue = mes_v12_1_add_hw_queue, 960 .remove_hw_queue = mes_v12_1_remove_hw_queue, 961 .map_legacy_queue = mes_v12_1_map_legacy_queue, 962 .unmap_legacy_queue = mes_v12_1_unmap_legacy_queue, 963 .suspend_gang = mes_v12_1_suspend_gang, 964 .resume_gang = mes_v12_1_resume_gang, 965 .misc_op = mes_v12_1_misc_op, 966 .reset_hw_queue = mes_v12_1_reset_hw_queue, 967 .detect_and_reset_hung_queues = mes_v12_1_detect_and_reset_hung_queues, 968 .invalidate_tlbs_pasid = mes_v12_1_inv_tlbs_pasid, 969 }; 970 971 static int mes_v12_1_allocate_ucode_buffer(struct amdgpu_device *adev, 972 enum amdgpu_mes_pipe pipe, 973 int xcc_id) 974 { 975 int r, inst = MES_PIPE_INST(xcc_id, pipe); 976 const struct mes_firmware_header_v1_0 *mes_hdr; 977 const __le32 *fw_data; 978 unsigned fw_size; 979 980 mes_hdr = (const struct mes_firmware_header_v1_0 *) 981 adev->mes.fw[pipe]->data; 982 983 fw_data = (const __le32 *)(adev->mes.fw[pipe]->data + 984 le32_to_cpu(mes_hdr->mes_ucode_offset_bytes)); 985 fw_size = le32_to_cpu(mes_hdr->mes_ucode_size_bytes); 986 987 r = amdgpu_bo_create_reserved(adev, fw_size, 988 PAGE_SIZE, 989 AMDGPU_GEM_DOMAIN_VRAM, 990 &adev->mes.ucode_fw_obj[inst], 991 &adev->mes.ucode_fw_gpu_addr[inst], 992 (void **)&adev->mes.ucode_fw_ptr[inst]); 993 if (r) { 994 dev_err(adev->dev, "(%d) failed to create mes fw bo\n", r); 995 return r; 996 } 997 998 memcpy(adev->mes.ucode_fw_ptr[inst], fw_data, fw_size); 999 1000 amdgpu_bo_kunmap(adev->mes.ucode_fw_obj[inst]); 1001 amdgpu_bo_unreserve(adev->mes.ucode_fw_obj[inst]); 1002 1003 return 0; 1004 } 1005 1006 static int mes_v12_1_allocate_ucode_data_buffer(struct amdgpu_device *adev, 1007 enum amdgpu_mes_pipe pipe, 1008 int xcc_id) 1009 { 1010 int r, inst = MES_PIPE_INST(xcc_id, pipe); 1011 const struct mes_firmware_header_v1_0 *mes_hdr; 1012 const __le32 *fw_data; 1013 unsigned fw_size; 1014 1015 mes_hdr = (const struct mes_firmware_header_v1_0 *) 1016 adev->mes.fw[pipe]->data; 1017 1018 fw_data = (const __le32 *)(adev->mes.fw[pipe]->data + 1019 le32_to_cpu(mes_hdr->mes_ucode_data_offset_bytes)); 1020 fw_size = le32_to_cpu(mes_hdr->mes_ucode_data_size_bytes); 1021 1022 r = amdgpu_bo_create_reserved(adev, fw_size, 1023 64 * 1024, 1024 AMDGPU_GEM_DOMAIN_VRAM, 1025 &adev->mes.data_fw_obj[inst], 1026 &adev->mes.data_fw_gpu_addr[inst], 1027 (void **)&adev->mes.data_fw_ptr[inst]); 1028 if (r) { 1029 dev_err(adev->dev, "(%d) failed to create mes data fw bo\n", r); 1030 return r; 1031 } 1032 1033 memcpy(adev->mes.data_fw_ptr[inst], fw_data, fw_size); 1034 1035 amdgpu_bo_kunmap(adev->mes.data_fw_obj[inst]); 1036 amdgpu_bo_unreserve(adev->mes.data_fw_obj[inst]); 1037 1038 return 0; 1039 } 1040 1041 static void mes_v12_1_free_ucode_buffers(struct amdgpu_device *adev, 1042 enum amdgpu_mes_pipe pipe, 1043 int xcc_id) 1044 { 1045 int inst = MES_PIPE_INST(xcc_id, pipe); 1046 1047 amdgpu_bo_free_kernel(&adev->mes.data_fw_obj[inst], 1048 &adev->mes.data_fw_gpu_addr[inst], 1049 (void **)&adev->mes.data_fw_ptr[inst]); 1050 1051 amdgpu_bo_free_kernel(&adev->mes.ucode_fw_obj[inst], 1052 &adev->mes.ucode_fw_gpu_addr[inst], 1053 (void **)&adev->mes.ucode_fw_ptr[inst]); 1054 } 1055 1056 static void mes_v12_1_enable(struct amdgpu_device *adev, 1057 bool enable, int xcc_id) 1058 { 1059 uint64_t ucode_addr; 1060 uint32_t pipe, data = 0; 1061 1062 if (enable) { 1063 data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_CNTL); 1064 data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_RESET, 1); 1065 data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_RESET, 1); 1066 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_CNTL, data); 1067 1068 mutex_lock(&adev->srbm_mutex); 1069 for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) { 1070 soc_v1_0_grbm_select(adev, 3, pipe, 0, 0, 1071 GET_INST(GC, xcc_id)); 1072 1073 ucode_addr = adev->mes.uc_start_addr[pipe] >> 2; 1074 WREG32_SOC15(GC, GET_INST(GC, xcc_id), 1075 regCP_MES_PRGRM_CNTR_START, 1076 lower_32_bits(ucode_addr)); 1077 WREG32_SOC15(GC, GET_INST(GC, xcc_id), 1078 regCP_MES_PRGRM_CNTR_START_HI, 1079 upper_32_bits(ucode_addr)); 1080 } 1081 soc_v1_0_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id)); 1082 mutex_unlock(&adev->srbm_mutex); 1083 1084 /* unhalt MES and activate pipe0 */ 1085 data = REG_SET_FIELD(0, CP_MES_CNTL, MES_PIPE0_ACTIVE, 1); 1086 data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_ACTIVE, 1); 1087 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_CNTL, data); 1088 1089 if (amdgpu_emu_mode) 1090 msleep(500); 1091 else if (adev->enable_uni_mes) 1092 udelay(500); 1093 else 1094 udelay(50); 1095 } else { 1096 data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_CNTL); 1097 data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_ACTIVE, 0); 1098 data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_ACTIVE, 0); 1099 data = REG_SET_FIELD(data, CP_MES_CNTL, 1100 MES_INVALIDATE_ICACHE, 1); 1101 data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_RESET, 1); 1102 data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_RESET, 1); 1103 data = REG_SET_FIELD(data, CP_MES_CNTL, MES_HALT, 1); 1104 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_CNTL, data); 1105 } 1106 } 1107 1108 static void mes_v12_1_set_ucode_start_addr(struct amdgpu_device *adev, 1109 int xcc_id) 1110 { 1111 uint64_t ucode_addr; 1112 int pipe; 1113 1114 mes_v12_1_enable(adev, false, xcc_id); 1115 1116 mutex_lock(&adev->srbm_mutex); 1117 for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) { 1118 /* me=3, queue=0 */ 1119 soc_v1_0_grbm_select(adev, 3, pipe, 0, 0, GET_INST(GC, xcc_id)); 1120 1121 /* set ucode start address */ 1122 ucode_addr = adev->mes.uc_start_addr[pipe] >> 2; 1123 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_PRGRM_CNTR_START, 1124 lower_32_bits(ucode_addr)); 1125 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_PRGRM_CNTR_START_HI, 1126 upper_32_bits(ucode_addr)); 1127 1128 soc_v1_0_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id)); 1129 } 1130 mutex_unlock(&adev->srbm_mutex); 1131 } 1132 1133 /* This function is for backdoor MES firmware */ 1134 static int mes_v12_1_load_microcode(struct amdgpu_device *adev, 1135 enum amdgpu_mes_pipe pipe, 1136 bool prime_icache, int xcc_id) 1137 { 1138 int r, inst = MES_PIPE_INST(xcc_id, pipe); 1139 uint32_t data; 1140 1141 mes_v12_1_enable(adev, false, xcc_id); 1142 1143 if (!adev->mes.fw[pipe]) 1144 return -EINVAL; 1145 1146 r = mes_v12_1_allocate_ucode_buffer(adev, pipe, xcc_id); 1147 if (r) 1148 return r; 1149 1150 r = mes_v12_1_allocate_ucode_data_buffer(adev, pipe, xcc_id); 1151 if (r) { 1152 mes_v12_1_free_ucode_buffers(adev, pipe, xcc_id); 1153 return r; 1154 } 1155 1156 mutex_lock(&adev->srbm_mutex); 1157 /* me=3, pipe=0, queue=0 */ 1158 soc_v1_0_grbm_select(adev, 3, pipe, 0, 0, GET_INST(GC, xcc_id)); 1159 1160 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_IC_BASE_CNTL, 0); 1161 1162 /* set ucode fimrware address */ 1163 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_IC_BASE_LO, 1164 lower_32_bits(adev->mes.ucode_fw_gpu_addr[inst])); 1165 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_IC_BASE_HI, 1166 upper_32_bits(adev->mes.ucode_fw_gpu_addr[inst])); 1167 1168 /* set ucode instruction cache boundary to 2M-1 */ 1169 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_MIBOUND_LO, 0x1FFFFF); 1170 1171 /* set ucode data firmware address */ 1172 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_MDBASE_LO, 1173 lower_32_bits(adev->mes.data_fw_gpu_addr[inst])); 1174 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_MDBASE_HI, 1175 upper_32_bits(adev->mes.data_fw_gpu_addr[inst])); 1176 1177 /* Set data cache boundary CP_MES_MDBOUND_LO */ 1178 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_MDBOUND_LO, 0x7FFFF); 1179 1180 if (prime_icache) { 1181 /* invalidate ICACHE */ 1182 data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_IC_OP_CNTL); 1183 data = REG_SET_FIELD(data, CP_MES_IC_OP_CNTL, PRIME_ICACHE, 0); 1184 data = REG_SET_FIELD(data, CP_MES_IC_OP_CNTL, INVALIDATE_CACHE, 1); 1185 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_IC_OP_CNTL, data); 1186 1187 /* prime the ICACHE. */ 1188 data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_IC_OP_CNTL); 1189 data = REG_SET_FIELD(data, CP_MES_IC_OP_CNTL, PRIME_ICACHE, 1); 1190 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_IC_OP_CNTL, data); 1191 } 1192 1193 soc_v1_0_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id)); 1194 mutex_unlock(&adev->srbm_mutex); 1195 1196 return 0; 1197 } 1198 1199 static int mes_v12_1_allocate_eop_buf(struct amdgpu_device *adev, 1200 enum amdgpu_mes_pipe pipe, 1201 int xcc_id) 1202 { 1203 int r, inst = MES_PIPE_INST(xcc_id, pipe); 1204 u32 *eop; 1205 1206 r = amdgpu_bo_create_reserved(adev, MES_EOP_SIZE, PAGE_SIZE, 1207 AMDGPU_GEM_DOMAIN_GTT, 1208 &adev->mes.eop_gpu_obj[inst], 1209 &adev->mes.eop_gpu_addr[inst], 1210 (void **)&eop); 1211 if (r) { 1212 dev_warn(adev->dev, "(%d) create EOP bo failed\n", r); 1213 return r; 1214 } 1215 1216 memset(eop, 0, 1217 adev->mes.eop_gpu_obj[inst]->tbo.base.size); 1218 1219 amdgpu_bo_kunmap(adev->mes.eop_gpu_obj[inst]); 1220 amdgpu_bo_unreserve(adev->mes.eop_gpu_obj[inst]); 1221 1222 return 0; 1223 } 1224 1225 static int mes_v12_1_allocate_shared_cmd_buf(struct amdgpu_device *adev, 1226 enum amdgpu_mes_pipe pipe, 1227 int xcc_id) 1228 { 1229 int r, inst = MES_PIPE_INST(xcc_id, pipe); 1230 1231 r = amdgpu_bo_create_kernel(adev, PAGE_SIZE, PAGE_SIZE, 1232 AMDGPU_GEM_DOMAIN_VRAM, 1233 &adev->mes.shared_cmd_buf_obj[inst], 1234 &adev->mes.shared_cmd_buf_gpu_addr[inst], 1235 NULL); 1236 if (r) { 1237 dev_err(adev->dev, 1238 "(%d) failed to create shared cmd buf bo\n", r); 1239 return r; 1240 } 1241 1242 return 0; 1243 } 1244 1245 static int mes_v12_1_mqd_init(struct amdgpu_ring *ring) 1246 { 1247 struct v12_1_mes_mqd *mqd = ring->mqd_ptr; 1248 uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr; 1249 uint32_t tmp; 1250 1251 mqd->header = 0xC0310800; 1252 mqd->compute_pipelinestat_enable = 0x00000001; 1253 mqd->compute_static_thread_mgmt_se0 = 0xffffffff; 1254 mqd->compute_static_thread_mgmt_se1 = 0xffffffff; 1255 mqd->compute_static_thread_mgmt_se2 = 0xffffffff; 1256 mqd->compute_static_thread_mgmt_se3 = 0xffffffff; 1257 mqd->compute_misc_reserved = 0x00000007; 1258 1259 eop_base_addr = ring->eop_gpu_addr >> 8; 1260 1261 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ 1262 tmp = regCP_HQD_EOP_CONTROL_DEFAULT; 1263 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE, 1264 (order_base_2(MES_EOP_SIZE / 4) - 1)); 1265 1266 mqd->cp_hqd_eop_base_addr_lo = lower_32_bits(eop_base_addr); 1267 mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr); 1268 mqd->cp_hqd_eop_control = tmp; 1269 1270 /* disable the queue if it's active */ 1271 ring->wptr = 0; 1272 mqd->cp_hqd_pq_rptr = 0; 1273 mqd->cp_hqd_pq_wptr_lo = 0; 1274 mqd->cp_hqd_pq_wptr_hi = 0; 1275 1276 /* set the pointer to the MQD */ 1277 mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc; 1278 mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr); 1279 1280 /* set MQD vmid to 0 */ 1281 tmp = regCP_MQD_CONTROL_DEFAULT; 1282 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0); 1283 mqd->cp_mqd_control = tmp; 1284 1285 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ 1286 hqd_gpu_addr = ring->gpu_addr >> 8; 1287 mqd->cp_hqd_pq_base_lo = lower_32_bits(hqd_gpu_addr); 1288 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr); 1289 1290 /* set the wb address whether it's enabled or not */ 1291 wb_gpu_addr = ring->rptr_gpu_addr; 1292 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc; 1293 mqd->cp_hqd_pq_rptr_report_addr_hi = 1294 upper_32_bits(wb_gpu_addr) & 0xffff; 1295 1296 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ 1297 wb_gpu_addr = ring->wptr_gpu_addr; 1298 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffff8; 1299 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; 1300 1301 /* set up the HQD, this is similar to CP_RB0_CNTL */ 1302 tmp = regCP_HQD_PQ_CONTROL_DEFAULT; 1303 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE, 1304 (order_base_2(ring->ring_size / 4) - 1)); 1305 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE, 1306 ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8)); 1307 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 1); 1308 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, 0); 1309 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); 1310 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); 1311 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, NO_UPDATE_RPTR, 1); 1312 mqd->cp_hqd_pq_control = tmp; 1313 1314 /* enable doorbell */ 1315 tmp = 0; 1316 if (ring->use_doorbell) { 1317 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 1318 DOORBELL_OFFSET, ring->doorbell_index); 1319 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 1320 DOORBELL_EN, 1); 1321 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 1322 DOORBELL_SOURCE, 0); 1323 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 1324 DOORBELL_HIT, 0); 1325 } else { 1326 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 1327 DOORBELL_EN, 0); 1328 } 1329 mqd->cp_hqd_pq_doorbell_control = tmp; 1330 1331 mqd->cp_hqd_vmid = 0; 1332 /* activate the queue */ 1333 mqd->cp_hqd_active = 1; 1334 1335 tmp = regCP_HQD_PERSISTENT_STATE_DEFAULT; 1336 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, 1337 PRELOAD_SIZE, 0x63); 1338 mqd->cp_hqd_persistent_state = tmp; 1339 1340 mqd->cp_hqd_ib_control = regCP_HQD_IB_CONTROL_MES_12_1_DEFAULT; 1341 mqd->cp_hqd_iq_timer = regCP_HQD_IQ_TIMER_DEFAULT; 1342 mqd->cp_hqd_quantum = regCP_HQD_QUANTUM_DEFAULT; 1343 1344 /* 1345 * Set CP_HQD_GFX_CONTROL.DB_UPDATED_MSG_EN[15] to enable unmapped 1346 * doorbell handling. This is a reserved CP internal register can 1347 * not be accesss by others 1348 */ 1349 mqd->cp_hqd_gfx_control = BIT(15); 1350 1351 return 0; 1352 } 1353 1354 static void mes_v12_1_queue_init_register(struct amdgpu_ring *ring, 1355 int xcc_id) 1356 { 1357 struct v12_1_mes_mqd *mqd = ring->mqd_ptr; 1358 struct amdgpu_device *adev = ring->adev; 1359 uint32_t data = 0; 1360 1361 mutex_lock(&adev->srbm_mutex); 1362 soc_v1_0_grbm_select(adev, 3, ring->pipe, 0, 0, GET_INST(GC, xcc_id)); 1363 1364 /* set CP_HQD_VMID.VMID = 0. */ 1365 data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_VMID); 1366 data = REG_SET_FIELD(data, CP_HQD_VMID, VMID, 0); 1367 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_VMID, data); 1368 1369 /* set CP_HQD_PQ_DOORBELL_CONTROL.DOORBELL_EN=0 */ 1370 data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_DOORBELL_CONTROL); 1371 data = REG_SET_FIELD(data, CP_HQD_PQ_DOORBELL_CONTROL, 1372 DOORBELL_EN, 0); 1373 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_DOORBELL_CONTROL, data); 1374 1375 /* set CP_MQD_BASE_ADDR/HI with the MQD base address */ 1376 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo); 1377 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi); 1378 1379 /* set CP_MQD_CONTROL.VMID=0 */ 1380 data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MQD_CONTROL); 1381 data = REG_SET_FIELD(data, CP_MQD_CONTROL, VMID, 0); 1382 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MQD_CONTROL, 0); 1383 1384 /* set CP_HQD_PQ_BASE/HI with the ring buffer base address */ 1385 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo); 1386 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi); 1387 1388 /* set CP_HQD_PQ_RPTR_REPORT_ADDR/HI */ 1389 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_RPTR_REPORT_ADDR, 1390 mqd->cp_hqd_pq_rptr_report_addr_lo); 1391 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_RPTR_REPORT_ADDR_HI, 1392 mqd->cp_hqd_pq_rptr_report_addr_hi); 1393 1394 /* set CP_HQD_PQ_CONTROL */ 1395 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_CONTROL, mqd->cp_hqd_pq_control); 1396 1397 /* set CP_HQD_PQ_WPTR_POLL_ADDR/HI */ 1398 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_WPTR_POLL_ADDR, 1399 mqd->cp_hqd_pq_wptr_poll_addr_lo); 1400 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_WPTR_POLL_ADDR_HI, 1401 mqd->cp_hqd_pq_wptr_poll_addr_hi); 1402 1403 /* set CP_HQD_PQ_DOORBELL_CONTROL */ 1404 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_DOORBELL_CONTROL, 1405 mqd->cp_hqd_pq_doorbell_control); 1406 1407 /* set CP_HQD_PERSISTENT_STATE.PRELOAD_SIZE=0x53 */ 1408 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PERSISTENT_STATE, mqd->cp_hqd_persistent_state); 1409 1410 /* set CP_HQD_ACTIVE.ACTIVE=1 */ 1411 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_ACTIVE, mqd->cp_hqd_active); 1412 1413 soc_v1_0_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id)); 1414 mutex_unlock(&adev->srbm_mutex); 1415 } 1416 1417 static int mes_v12_1_kiq_enable_queue(struct amdgpu_device *adev, int xcc_id) 1418 { 1419 struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id]; 1420 struct amdgpu_ring *kiq_ring = &adev->gfx.kiq[xcc_id].ring; 1421 int r, inst = MES_PIPE_INST(xcc_id, AMDGPU_MES_SCHED_PIPE); 1422 1423 if (!kiq->pmf || !kiq->pmf->kiq_map_queues) 1424 return -EINVAL; 1425 1426 r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size); 1427 if (r) { 1428 DRM_ERROR("Failed to lock KIQ (%d).\n", r); 1429 return r; 1430 } 1431 1432 kiq->pmf->kiq_map_queues(kiq_ring, &adev->mes.ring[inst]); 1433 1434 r = amdgpu_ring_test_ring(kiq_ring); 1435 if (r) { 1436 DRM_ERROR("kfq enable failed\n"); 1437 kiq_ring->sched.ready = false; 1438 } 1439 return r; 1440 } 1441 1442 static int mes_v12_1_queue_init(struct amdgpu_device *adev, 1443 enum amdgpu_mes_pipe pipe, 1444 int xcc_id) 1445 { 1446 struct amdgpu_ring *ring; 1447 int r; 1448 1449 if (!adev->enable_uni_mes && pipe == AMDGPU_MES_KIQ_PIPE) 1450 ring = &adev->gfx.kiq[xcc_id].ring; 1451 else 1452 ring = &adev->mes.ring[MES_PIPE_INST(xcc_id, pipe)]; 1453 1454 if ((adev->enable_uni_mes || pipe == AMDGPU_MES_SCHED_PIPE) && 1455 (amdgpu_in_reset(adev) || adev->in_suspend)) { 1456 *(ring->wptr_cpu_addr) = 0; 1457 *(ring->rptr_cpu_addr) = 0; 1458 amdgpu_ring_clear_ring(ring); 1459 } 1460 1461 r = mes_v12_1_mqd_init(ring); 1462 if (r) 1463 return r; 1464 1465 if (pipe == AMDGPU_MES_SCHED_PIPE) { 1466 if (adev->enable_uni_mes) 1467 r = amdgpu_mes_map_legacy_queue(adev, ring, xcc_id); 1468 else 1469 r = mes_v12_1_kiq_enable_queue(adev, xcc_id); 1470 if (r) 1471 return r; 1472 } else { 1473 mes_v12_1_queue_init_register(ring, xcc_id); 1474 } 1475 1476 return 0; 1477 } 1478 1479 static int mes_v12_1_ring_init(struct amdgpu_device *adev, 1480 int xcc_id, int pipe) 1481 { 1482 struct amdgpu_ring *ring; 1483 int inst = MES_PIPE_INST(xcc_id, pipe); 1484 1485 ring = &adev->mes.ring[inst]; 1486 1487 ring->funcs = &mes_v12_1_ring_funcs; 1488 1489 ring->me = 3; 1490 ring->pipe = pipe; 1491 ring->queue = 0; 1492 ring->xcc_id = xcc_id; 1493 ring->vm_hub = AMDGPU_GFXHUB(xcc_id); 1494 1495 ring->ring_obj = NULL; 1496 ring->use_doorbell = true; 1497 ring->eop_gpu_addr = adev->mes.eop_gpu_addr[inst]; 1498 ring->no_scheduler = true; 1499 snprintf(ring->name, sizeof(ring->name), "mes_%hhu.%hhu.%hhu.%hhu", 1500 (unsigned char)xcc_id, (unsigned char)ring->me, 1501 (unsigned char)ring->pipe, (unsigned char)ring->queue); 1502 1503 if (pipe == AMDGPU_MES_SCHED_PIPE) 1504 ring->doorbell_index = 1505 (adev->doorbell_index.mes_ring0 + 1506 xcc_id * adev->doorbell_index.xcc_doorbell_range) 1507 << 1; 1508 else 1509 ring->doorbell_index = 1510 (adev->doorbell_index.mes_ring1 + 1511 xcc_id * adev->doorbell_index.xcc_doorbell_range) 1512 << 1; 1513 1514 return amdgpu_ring_init(adev, ring, 1024, NULL, 0, 1515 AMDGPU_RING_PRIO_DEFAULT, NULL); 1516 } 1517 1518 static int mes_v12_1_kiq_ring_init(struct amdgpu_device *adev, int xcc_id) 1519 { 1520 struct amdgpu_ring *ring; 1521 int inst = MES_PIPE_INST(xcc_id, AMDGPU_MES_KIQ_PIPE); 1522 1523 spin_lock_init(&adev->gfx.kiq[xcc_id].ring_lock); 1524 1525 ring = &adev->gfx.kiq[xcc_id].ring; 1526 1527 ring->me = 3; 1528 ring->pipe = 1; 1529 ring->queue = 0; 1530 ring->xcc_id = xcc_id; 1531 ring->vm_hub = AMDGPU_GFXHUB(xcc_id); 1532 1533 ring->adev = NULL; 1534 ring->ring_obj = NULL; 1535 ring->use_doorbell = true; 1536 ring->eop_gpu_addr = adev->mes.eop_gpu_addr[inst]; 1537 ring->no_scheduler = true; 1538 ring->doorbell_index = 1539 (adev->doorbell_index.mes_ring1 + 1540 xcc_id * adev->doorbell_index.xcc_doorbell_range) 1541 << 1; 1542 1543 snprintf(ring->name, sizeof(ring->name), "mes_kiq_%hhu.%hhu.%hhu.%hhu", 1544 (unsigned char)xcc_id, (unsigned char)ring->me, 1545 (unsigned char)ring->pipe, (unsigned char)ring->queue); 1546 1547 return amdgpu_ring_init(adev, ring, 1024, NULL, 0, 1548 AMDGPU_RING_PRIO_DEFAULT, NULL); 1549 } 1550 1551 static int mes_v12_1_mqd_sw_init(struct amdgpu_device *adev, 1552 enum amdgpu_mes_pipe pipe, 1553 int xcc_id) 1554 { 1555 int r, mqd_size = sizeof(struct v12_1_mes_mqd); 1556 struct amdgpu_ring *ring; 1557 int inst = MES_PIPE_INST(xcc_id, pipe); 1558 1559 if (!adev->enable_uni_mes && pipe == AMDGPU_MES_KIQ_PIPE) 1560 ring = &adev->gfx.kiq[xcc_id].ring; 1561 else 1562 ring = &adev->mes.ring[inst]; 1563 1564 if (ring->mqd_obj) 1565 return 0; 1566 1567 r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE, 1568 AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj, 1569 &ring->mqd_gpu_addr, &ring->mqd_ptr); 1570 if (r) { 1571 dev_warn(adev->dev, "failed to create ring mqd bo (%d)", r); 1572 return r; 1573 } 1574 1575 memset(ring->mqd_ptr, 0, mqd_size); 1576 1577 /* prepare MQD backup */ 1578 adev->mes.mqd_backup[inst] = kmalloc(mqd_size, GFP_KERNEL); 1579 if (!adev->mes.mqd_backup[inst]) 1580 dev_warn(adev->dev, 1581 "no memory to create MQD backup for ring %s\n", 1582 ring->name); 1583 1584 return 0; 1585 } 1586 1587 static int mes_v12_1_sw_init(struct amdgpu_ip_block *ip_block) 1588 { 1589 struct amdgpu_device *adev = ip_block->adev; 1590 int pipe, r, xcc_id, num_xcc = NUM_XCC(adev->gfx.xcc_mask); 1591 1592 adev->mes.funcs = &mes_v12_1_funcs; 1593 adev->mes.kiq_hw_init = &mes_v12_1_kiq_hw_init; 1594 adev->mes.kiq_hw_fini = &mes_v12_1_kiq_hw_fini; 1595 adev->mes.enable_legacy_queue_map = true; 1596 1597 adev->mes.event_log_size = 1598 adev->enable_uni_mes ? (AMDGPU_MAX_MES_PIPES * AMDGPU_MES_LOG_BUFFER_SIZE * num_xcc) : AMDGPU_MES_LOG_BUFFER_SIZE; 1599 1600 r = amdgpu_mes_init(adev); 1601 if (r) 1602 return r; 1603 1604 for (xcc_id = 0; xcc_id < num_xcc; xcc_id++) { 1605 for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) { 1606 r = mes_v12_1_allocate_eop_buf(adev, pipe, xcc_id); 1607 if (r) 1608 return r; 1609 1610 r = mes_v12_1_mqd_sw_init(adev, pipe, xcc_id); 1611 if (r) 1612 return r; 1613 1614 if (!adev->enable_uni_mes && pipe == 1615 AMDGPU_MES_KIQ_PIPE) 1616 r = mes_v12_1_kiq_ring_init(adev, xcc_id); 1617 else 1618 r = mes_v12_1_ring_init(adev, xcc_id, pipe); 1619 if (r) 1620 return r; 1621 1622 if (adev->enable_uni_mes && num_xcc > 1) { 1623 r = mes_v12_1_allocate_shared_cmd_buf(adev, 1624 pipe, xcc_id); 1625 if (r) 1626 return r; 1627 } 1628 } 1629 } 1630 1631 return 0; 1632 } 1633 1634 static int mes_v12_1_sw_fini(struct amdgpu_ip_block *ip_block) 1635 { 1636 struct amdgpu_device *adev = ip_block->adev; 1637 int pipe, inst, xcc_id, num_xcc = NUM_XCC(adev->gfx.xcc_mask); 1638 1639 for (xcc_id = 0; xcc_id < num_xcc; xcc_id++) { 1640 for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) { 1641 inst = MES_PIPE_INST(xcc_id, pipe); 1642 1643 amdgpu_bo_free_kernel(&adev->mes.shared_cmd_buf_obj[inst], 1644 &adev->mes.shared_cmd_buf_gpu_addr[inst], 1645 NULL); 1646 1647 kfree(adev->mes.mqd_backup[inst]); 1648 1649 amdgpu_bo_free_kernel(&adev->mes.eop_gpu_obj[inst], 1650 &adev->mes.eop_gpu_addr[inst], 1651 NULL); 1652 1653 if (adev->enable_uni_mes || pipe == AMDGPU_MES_SCHED_PIPE) { 1654 amdgpu_bo_free_kernel(&adev->mes.ring[inst].mqd_obj, 1655 &adev->mes.ring[inst].mqd_gpu_addr, 1656 &adev->mes.ring[inst].mqd_ptr); 1657 amdgpu_ring_fini(&adev->mes.ring[inst]); 1658 } 1659 } 1660 } 1661 1662 for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) 1663 amdgpu_ucode_release(&adev->mes.fw[pipe]); 1664 1665 for (xcc_id = 0; xcc_id < num_xcc; xcc_id++) { 1666 if (!adev->enable_uni_mes) { 1667 amdgpu_bo_free_kernel(&adev->gfx.kiq[xcc_id].ring.mqd_obj, 1668 &adev->gfx.kiq[xcc_id].ring.mqd_gpu_addr, 1669 &adev->gfx.kiq[xcc_id].ring.mqd_ptr); 1670 amdgpu_ring_fini(&adev->gfx.kiq[xcc_id].ring); 1671 } 1672 1673 if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { 1674 mes_v12_1_free_ucode_buffers(adev, 1675 AMDGPU_MES_KIQ_PIPE, xcc_id); 1676 mes_v12_1_free_ucode_buffers(adev, 1677 AMDGPU_MES_SCHED_PIPE, xcc_id); 1678 } 1679 } 1680 1681 amdgpu_mes_fini(adev); 1682 return 0; 1683 } 1684 1685 static void mes_v12_1_kiq_dequeue_sched(struct amdgpu_device *adev, 1686 int xcc_id) 1687 { 1688 uint32_t data; 1689 int i; 1690 1691 mutex_lock(&adev->srbm_mutex); 1692 soc_v1_0_grbm_select(adev, 3, AMDGPU_MES_SCHED_PIPE, 0, 0, 1693 GET_INST(GC, xcc_id)); 1694 1695 /* disable the queue if it's active */ 1696 if (RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_ACTIVE) & 1) { 1697 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_DEQUEUE_REQUEST, 1); 1698 for (i = 0; i < adev->usec_timeout; i++) { 1699 if (!(RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_ACTIVE) & 1)) 1700 break; 1701 udelay(1); 1702 } 1703 } 1704 data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_DOORBELL_CONTROL); 1705 data = REG_SET_FIELD(data, CP_HQD_PQ_DOORBELL_CONTROL, 1706 DOORBELL_EN, 0); 1707 data = REG_SET_FIELD(data, CP_HQD_PQ_DOORBELL_CONTROL, 1708 DOORBELL_HIT, 1); 1709 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_DOORBELL_CONTROL, data); 1710 1711 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_DOORBELL_CONTROL, 0); 1712 1713 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_WPTR_LO, 0); 1714 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_WPTR_HI, 0); 1715 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_RPTR, 0); 1716 1717 soc_v1_0_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id)); 1718 mutex_unlock(&adev->srbm_mutex); 1719 1720 adev->mes.ring[MES_PIPE_INST(xcc_id, 0)].sched.ready = false; 1721 } 1722 1723 static void mes_v12_1_kiq_setting(struct amdgpu_ring *ring, int xcc_id) 1724 { 1725 uint32_t tmp; 1726 struct amdgpu_device *adev = ring->adev; 1727 1728 /* tell RLC which is KIQ queue */ 1729 tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CP_SCHEDULERS); 1730 tmp &= 0xffffff00; 1731 tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue); 1732 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CP_SCHEDULERS, tmp); 1733 tmp |= 0x80; 1734 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CP_SCHEDULERS, tmp); 1735 } 1736 1737 static int mes_v12_1_kiq_hw_init(struct amdgpu_device *adev, uint32_t xcc_id) 1738 { 1739 int inst = MES_PIPE_INST(xcc_id, AMDGPU_MES_KIQ_PIPE); 1740 int r = 0; 1741 struct amdgpu_ip_block *ip_block; 1742 1743 if (adev->enable_uni_mes) 1744 mes_v12_1_kiq_setting(&adev->mes.ring[inst], xcc_id); 1745 else 1746 mes_v12_1_kiq_setting(&adev->gfx.kiq[xcc_id].ring, xcc_id); 1747 1748 if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { 1749 1750 r = mes_v12_1_load_microcode(adev, AMDGPU_MES_SCHED_PIPE, 1751 false, xcc_id); 1752 if (r) { 1753 DRM_ERROR("failed to load MES fw, r=%d\n", r); 1754 return r; 1755 } 1756 1757 r = mes_v12_1_load_microcode(adev, AMDGPU_MES_KIQ_PIPE, 1758 true, xcc_id); 1759 if (r) { 1760 DRM_ERROR("failed to load MES kiq fw, r=%d\n", r); 1761 return r; 1762 } 1763 1764 mes_v12_1_set_ucode_start_addr(adev, xcc_id); 1765 1766 } else if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) 1767 mes_v12_1_set_ucode_start_addr(adev, xcc_id); 1768 1769 mes_v12_1_enable(adev, true, xcc_id); 1770 1771 ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_MES); 1772 if (unlikely(!ip_block)) { 1773 dev_err(adev->dev, "Failed to get MES handle\n"); 1774 return -EINVAL; 1775 } 1776 1777 r = mes_v12_1_queue_init(adev, AMDGPU_MES_KIQ_PIPE, xcc_id); 1778 if (r) 1779 goto failure; 1780 1781 if (adev->enable_uni_mes) { 1782 r = mes_v12_1_setup_coop_mode(adev, xcc_id); 1783 if (r) 1784 goto failure; 1785 1786 r = mes_v12_1_set_hw_resources(&adev->mes, 1787 AMDGPU_MES_KIQ_PIPE, xcc_id); 1788 if (r) 1789 goto failure; 1790 1791 mes_v12_1_set_hw_resources_1(&adev->mes, 1792 AMDGPU_MES_KIQ_PIPE, xcc_id); 1793 } 1794 1795 if (adev->mes.enable_legacy_queue_map) { 1796 r = mes_v12_1_xcc_hw_init(ip_block, xcc_id); 1797 if (r) 1798 goto failure; 1799 } 1800 1801 return r; 1802 1803 failure: 1804 mes_v12_1_hw_fini(ip_block); 1805 return r; 1806 } 1807 1808 static int mes_v12_1_kiq_hw_fini(struct amdgpu_device *adev, uint32_t xcc_id) 1809 { 1810 int inst = MES_PIPE_INST(xcc_id, AMDGPU_MES_SCHED_PIPE); 1811 1812 if (adev->mes.ring[inst].sched.ready) { 1813 if (adev->enable_uni_mes) 1814 amdgpu_mes_unmap_legacy_queue(adev, 1815 &adev->mes.ring[inst], 1816 RESET_QUEUES, 0, 0, xcc_id); 1817 else 1818 mes_v12_1_kiq_dequeue_sched(adev, xcc_id); 1819 1820 adev->mes.ring[inst].sched.ready = false; 1821 } 1822 1823 mes_v12_1_enable(adev, false, xcc_id); 1824 1825 return 0; 1826 } 1827 1828 static int mes_v12_1_setup_coop_mode(struct amdgpu_device *adev, int xcc_id) 1829 { 1830 u32 num_xcc_per_xcp, num_xcc = NUM_XCC(adev->gfx.xcc_mask); 1831 int r = 0; 1832 1833 if (num_xcc == 1) 1834 return r; 1835 1836 if (adev->gfx.funcs && 1837 adev->gfx.funcs->get_xccs_per_xcp) 1838 num_xcc_per_xcp = adev->gfx.funcs->get_xccs_per_xcp(adev); 1839 else 1840 return -EINVAL; 1841 1842 switch (adev->xcp_mgr->mode) { 1843 case AMDGPU_SPX_PARTITION_MODE: 1844 adev->mes.enable_coop_mode = 1; 1845 adev->mes.master_xcc_ids[xcc_id] = 0; 1846 break; 1847 case AMDGPU_DPX_PARTITION_MODE: 1848 adev->mes.enable_coop_mode = 1; 1849 adev->mes.master_xcc_ids[xcc_id] = 1850 (xcc_id/num_xcc_per_xcp) * (num_xcc / 2); 1851 break; 1852 case AMDGPU_QPX_PARTITION_MODE: 1853 adev->mes.enable_coop_mode = 1; 1854 adev->mes.master_xcc_ids[xcc_id] = 1855 (xcc_id/num_xcc_per_xcp) * (num_xcc / 4); 1856 break; 1857 case AMDGPU_CPX_PARTITION_MODE: 1858 adev->mes.enable_coop_mode = 0; 1859 break; 1860 default: 1861 r = -EINVAL; 1862 break; 1863 } 1864 return r; 1865 } 1866 1867 static int mes_v12_1_xcc_hw_init(struct amdgpu_ip_block *ip_block, int xcc_id) 1868 { 1869 int r; 1870 struct amdgpu_device *adev = ip_block->adev; 1871 1872 if (adev->mes.ring[MES_PIPE_INST(xcc_id, 0)].sched.ready) 1873 goto out; 1874 1875 if (!adev->enable_mes_kiq) { 1876 if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { 1877 r = mes_v12_1_load_microcode(adev, 1878 AMDGPU_MES_SCHED_PIPE, true, xcc_id); 1879 if (r) { 1880 DRM_ERROR("failed to MES fw, r=%d\n", r); 1881 return r; 1882 } 1883 1884 mes_v12_1_set_ucode_start_addr(adev, xcc_id); 1885 1886 } else if (adev->firmware.load_type == 1887 AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) { 1888 1889 mes_v12_1_set_ucode_start_addr(adev, xcc_id); 1890 } 1891 1892 mes_v12_1_enable(adev, true, xcc_id); 1893 } 1894 1895 /* Enable the MES to handle doorbell ring on unmapped queue */ 1896 mes_v12_1_enable_unmapped_doorbell_handling(&adev->mes, true, xcc_id); 1897 1898 r = mes_v12_1_queue_init(adev, AMDGPU_MES_SCHED_PIPE, xcc_id); 1899 if (r) 1900 goto failure; 1901 1902 r = mes_v12_1_set_hw_resources(&adev->mes, 1903 AMDGPU_MES_SCHED_PIPE, xcc_id); 1904 if (r) 1905 goto failure; 1906 1907 if (adev->enable_uni_mes) { 1908 mes_v12_1_set_hw_resources_1(&adev->mes, 1909 AMDGPU_MES_SCHED_PIPE, xcc_id); 1910 } 1911 mes_v12_1_init_aggregated_doorbell(&adev->mes, xcc_id); 1912 1913 r = mes_v12_1_query_sched_status(&adev->mes, 1914 AMDGPU_MES_SCHED_PIPE, xcc_id); 1915 if (r) { 1916 DRM_ERROR("MES is busy\n"); 1917 goto failure; 1918 } 1919 1920 amdgpu_mes_validate_fw_version(adev); 1921 out: 1922 /* 1923 * Disable KIQ ring usage from the driver once MES is enabled. 1924 * MES uses KIQ ring exclusively so driver cannot access KIQ ring 1925 * with MES enabled. 1926 */ 1927 adev->gfx.kiq[xcc_id].ring.sched.ready = false; 1928 adev->mes.ring[MES_PIPE_INST(xcc_id, 0)].sched.ready = true; 1929 1930 return 0; 1931 1932 failure: 1933 mes_v12_1_hw_fini(ip_block); 1934 return r; 1935 } 1936 1937 static int mes_v12_1_hw_init(struct amdgpu_ip_block *ip_block) 1938 { 1939 struct amdgpu_device *adev = ip_block->adev; 1940 int r, xcc_id, num_xcc = NUM_XCC(adev->gfx.xcc_mask); 1941 1942 for (xcc_id = 0; xcc_id < num_xcc; xcc_id++) { 1943 r = mes_v12_1_xcc_hw_init(ip_block, xcc_id); 1944 if (r) 1945 return r; 1946 } 1947 1948 return 0; 1949 } 1950 1951 static int mes_v12_1_hw_fini(struct amdgpu_ip_block *ip_block) 1952 { 1953 return 0; 1954 } 1955 1956 static int mes_v12_1_suspend(struct amdgpu_ip_block *ip_block) 1957 { 1958 return mes_v12_1_hw_fini(ip_block); 1959 } 1960 1961 static int mes_v12_1_resume(struct amdgpu_ip_block *ip_block) 1962 { 1963 return mes_v12_1_hw_init(ip_block); 1964 } 1965 1966 static int mes_v12_1_early_init(struct amdgpu_ip_block *ip_block) 1967 { 1968 struct amdgpu_device *adev = ip_block->adev; 1969 int pipe, r; 1970 1971 adev->mes.hung_queue_db_array_size = MES12_HUNG_DB_OFFSET_ARRAY_SIZE; 1972 adev->mes.hung_queue_hqd_info_offset = MES12_HUNG_HQD_INFO_OFFSET; 1973 1974 for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) { 1975 r = amdgpu_mes_init_microcode(adev, pipe); 1976 if (r) 1977 return r; 1978 } 1979 1980 return 0; 1981 } 1982 1983 static int mes_v12_1_late_init(struct amdgpu_ip_block *ip_block) 1984 { 1985 struct amdgpu_device *adev = ip_block->adev; 1986 int xcc_id, num_xcc = NUM_XCC(adev->gfx.xcc_mask); 1987 1988 /* TODO: remove it if issue fixed. */ 1989 if (adev->mes.enable_coop_mode) 1990 return 0; 1991 1992 for (xcc_id = 0; xcc_id < num_xcc; xcc_id++) { 1993 /* for COOP mode, only test master xcc. */ 1994 if (adev->mes.enable_coop_mode && 1995 adev->mes.master_xcc_ids[xcc_id] != xcc_id) 1996 continue; 1997 1998 mes_v12_1_self_test(adev, xcc_id); 1999 } 2000 2001 return 0; 2002 } 2003 2004 static const struct amd_ip_funcs mes_v12_1_ip_funcs = { 2005 .name = "mes_v12_1", 2006 .early_init = mes_v12_1_early_init, 2007 .late_init = mes_v12_1_late_init, 2008 .sw_init = mes_v12_1_sw_init, 2009 .sw_fini = mes_v12_1_sw_fini, 2010 .hw_init = mes_v12_1_hw_init, 2011 .hw_fini = mes_v12_1_hw_fini, 2012 .suspend = mes_v12_1_suspend, 2013 .resume = mes_v12_1_resume, 2014 }; 2015 2016 const struct amdgpu_ip_block_version mes_v12_1_ip_block = { 2017 .type = AMD_IP_BLOCK_TYPE_MES, 2018 .major = 12, 2019 .minor = 1, 2020 .rev = 0, 2021 .funcs = &mes_v12_1_ip_funcs, 2022 }; 2023 2024 static int mes_v12_1_alloc_test_buf(struct amdgpu_device *adev, 2025 struct amdgpu_bo **bo, uint64_t *addr, 2026 void **ptr, int size) 2027 { 2028 amdgpu_bo_create_kernel(adev, size, PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, 2029 bo, addr, ptr); 2030 if (!*bo) { 2031 dev_err(adev->dev, "failed to allocate test buffer bo\n"); 2032 return -ENOMEM; 2033 } 2034 memset(*ptr, 0, size); 2035 return 0; 2036 } 2037 2038 static int mes_v12_1_map_test_bo(struct amdgpu_device *adev, 2039 struct amdgpu_bo *bo, struct amdgpu_vm *vm, 2040 struct amdgpu_bo_va **bo_va, u64 va, int size) 2041 { 2042 struct amdgpu_sync sync; 2043 int r; 2044 2045 r = amdgpu_map_static_csa(adev, vm, bo, bo_va, va, size); 2046 if (r) 2047 return r; 2048 2049 amdgpu_sync_create(&sync); 2050 2051 r = amdgpu_vm_bo_update(adev, *bo_va, false); 2052 if (r) { 2053 dev_err(adev->dev, "failed to do vm_bo_update on meta data\n"); 2054 goto error; 2055 } 2056 amdgpu_sync_fence(&sync, (*bo_va)->last_pt_update, GFP_KERNEL); 2057 2058 r = amdgpu_vm_update_pdes(adev, vm, false); 2059 if (r) { 2060 dev_err(adev->dev, "failed to update pdes on meta data\n"); 2061 goto error; 2062 } 2063 amdgpu_sync_fence(&sync, vm->last_update, GFP_KERNEL); 2064 amdgpu_sync_wait(&sync, false); 2065 2066 error: 2067 amdgpu_sync_free(&sync); 2068 return r; 2069 } 2070 2071 static int mes_v12_1_test_ring(struct amdgpu_device *adev, int xcc_id, 2072 u32 *queue_ptr, u64 fence_gpu_addr, 2073 void *fence_cpu_ptr, void *wptr_cpu_addr, 2074 u64 doorbell_idx, int queue_type) 2075 { 2076 volatile uint32_t *cpu_ptr = fence_cpu_ptr; 2077 int num_xcc = NUM_XCC(adev->gfx.xcc_mask); 2078 int sdma_ring_align = 0x10, compute_ring_align = 0x100; 2079 uint32_t tmp, xcc_offset; 2080 int r = 0, i, j, wptr = 0; 2081 2082 if (queue_type == AMDGPU_RING_TYPE_COMPUTE) { 2083 if (!adev->mes.enable_coop_mode) { 2084 WREG32_SOC15(GC, GET_INST(GC, xcc_id), 2085 regSCRATCH_REG0, 0xCAFEDEAD); 2086 } else { 2087 for (i = 0; i < num_xcc; i++) { 2088 if (adev->mes.master_xcc_ids[i] == xcc_id) 2089 WREG32_SOC15(GC, GET_INST(GC, i), 2090 regSCRATCH_REG0, 0xCAFEDEAD); 2091 } 2092 } 2093 2094 xcc_offset = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG0); 2095 queue_ptr[wptr++] = PACKET3(PACKET3_SET_UCONFIG_REG, 1); 2096 queue_ptr[wptr++] = xcc_offset - PACKET3_SET_UCONFIG_REG_START; 2097 queue_ptr[wptr++] = 0xDEADBEEF; 2098 2099 for (i = wptr; i < compute_ring_align; i++) 2100 queue_ptr[wptr++] = PACKET3(PACKET3_NOP, 0x3FFF); 2101 2102 } else if (queue_type == AMDGPU_RING_TYPE_SDMA) { 2103 *cpu_ptr = 0xCAFEDEAD; 2104 2105 queue_ptr[wptr++] = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_WRITE) | 2106 SDMA_PKT_COPY_LINEAR_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR); 2107 queue_ptr[wptr++] = lower_32_bits(fence_gpu_addr); 2108 queue_ptr[wptr++] = upper_32_bits(fence_gpu_addr); 2109 queue_ptr[wptr++] = SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(0); 2110 queue_ptr[wptr++] = 0xDEADBEEF; 2111 2112 for (i = wptr; i < sdma_ring_align; i++) 2113 queue_ptr[wptr++] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP); 2114 2115 wptr <<= 2; 2116 } 2117 2118 atomic64_set((atomic64_t *)wptr_cpu_addr, wptr); 2119 WDOORBELL64(doorbell_idx, wptr); 2120 2121 for (i = 0; i < adev->usec_timeout; i++) { 2122 if (queue_type == AMDGPU_RING_TYPE_SDMA) { 2123 tmp = le32_to_cpu(*cpu_ptr); 2124 } else { 2125 if (!adev->mes.enable_coop_mode) { 2126 tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), 2127 regSCRATCH_REG0); 2128 } else { 2129 for (j = 0; j < num_xcc; j++) { 2130 if (xcc_id != adev->mes.master_xcc_ids[j]) 2131 continue; 2132 2133 tmp = RREG32_SOC15(GC, GET_INST(GC, j), 2134 regSCRATCH_REG0); 2135 if (tmp != 0xDEADBEEF) 2136 break; 2137 } 2138 } 2139 } 2140 2141 if (tmp == 0xDEADBEEF) 2142 break; 2143 2144 if (amdgpu_emu_mode == 1) 2145 msleep(1); 2146 else 2147 udelay(1); 2148 } 2149 2150 if (i >= adev->usec_timeout) { 2151 dev_err(adev->dev, "xcc%d: mes self test (%s) failed\n", xcc_id, 2152 queue_type == AMDGPU_RING_TYPE_SDMA ? "sdma" : "compute"); 2153 2154 while (halt_if_hws_hang) 2155 schedule(); 2156 2157 r = -ETIMEDOUT; 2158 } else { 2159 dev_info(adev->dev, "xcc%d: mes self test (%s) pass\n", xcc_id, 2160 queue_type == AMDGPU_RING_TYPE_SDMA ? "sdma" : "compute"); 2161 } 2162 2163 return r; 2164 } 2165 2166 #define USER_CTX_SIZE (PAGE_SIZE * 2) 2167 #define USER_CTX_VA AMDGPU_VA_RESERVED_BOTTOM 2168 #define RING_OFFSET(addr) ((addr)) 2169 #define EOP_OFFSET(addr) ((addr) + PAGE_SIZE) 2170 #define WPTR_OFFSET(addr) ((addr) + USER_CTX_SIZE - sizeof(u64)) 2171 #define RPTR_OFFSET(addr) ((addr) + USER_CTX_SIZE - sizeof(u64) * 2) 2172 #define FENCE_OFFSET(addr) ((addr) + USER_CTX_SIZE - sizeof(u64) * 3) 2173 2174 static int mes_v12_1_test_queue(struct amdgpu_device *adev, int xcc_id, 2175 int pasid, struct amdgpu_vm *vm, u64 meta_gpu_addr, 2176 u64 queue_gpu_addr, void *ctx_ptr, int queue_type) 2177 { 2178 struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)]; 2179 struct amdgpu_mqd *mqd_mgr = &adev->mqds[queue_type]; 2180 struct amdgpu_mqd_prop mqd_prop = {0}; 2181 struct mes_add_queue_input add_queue = {0}; 2182 struct mes_remove_queue_input remove_queue = {0}; 2183 struct amdgpu_bo *mqd_bo = NULL; 2184 int num_xcc = NUM_XCC(adev->gfx.xcc_mask); 2185 int i, r, off, mqd_size, mqd_count = 1; 2186 void *mqd_ptr = NULL; 2187 u64 mqd_gpu_addr, doorbell_idx; 2188 2189 /* extra one page size padding for mes fw */ 2190 mqd_size = mqd_mgr->mqd_size + PAGE_SIZE; 2191 2192 if (queue_type == AMDGPU_RING_TYPE_SDMA) { 2193 doorbell_idx = adev->mes.db_start_dw_offset + \ 2194 adev->doorbell_index.sdma_engine[0]; 2195 } else { 2196 doorbell_idx = adev->mes.db_start_dw_offset + \ 2197 adev->doorbell_index.userqueue_start; 2198 } 2199 2200 if (adev->mes.enable_coop_mode && 2201 queue_type == AMDGPU_RING_TYPE_COMPUTE) { 2202 for (i = 0, mqd_count = 0; i < num_xcc; i++) { 2203 if (adev->mes.master_xcc_ids[i] == xcc_id) 2204 mqd_count++; 2205 } 2206 mqd_size *= mqd_count; 2207 } 2208 2209 r = mes_v12_1_alloc_test_buf(adev, &mqd_bo, &mqd_gpu_addr, 2210 &mqd_ptr, mqd_size * mqd_count); 2211 if (r < 0) 2212 return r; 2213 2214 mqd_prop.mqd_gpu_addr = mqd_gpu_addr; 2215 mqd_prop.hqd_base_gpu_addr = RING_OFFSET(USER_CTX_VA); 2216 mqd_prop.eop_gpu_addr = EOP_OFFSET(USER_CTX_VA); 2217 mqd_prop.wptr_gpu_addr = WPTR_OFFSET(USER_CTX_VA); 2218 mqd_prop.rptr_gpu_addr = RPTR_OFFSET(USER_CTX_VA); 2219 mqd_prop.doorbell_index = doorbell_idx; 2220 mqd_prop.queue_size = PAGE_SIZE; 2221 mqd_prop.mqd_stride_size = mqd_size; 2222 mqd_prop.use_doorbell = true; 2223 mqd_prop.hqd_active = false; 2224 2225 mqd_mgr->init_mqd(adev, mqd_ptr, &mqd_prop); 2226 if (mqd_count > 1) { 2227 for (i = 1; i < mqd_count; i++) { 2228 off = mqd_size * i; 2229 mqd_prop.mqd_gpu_addr = mqd_gpu_addr + off; 2230 mqd_mgr->init_mqd(adev, (char *)mqd_ptr + off, 2231 &mqd_prop); 2232 } 2233 } 2234 2235 add_queue.xcc_id = xcc_id; 2236 add_queue.process_id = pasid; 2237 add_queue.page_table_base_addr = adev->vm_manager.vram_base_offset + 2238 amdgpu_bo_gpu_offset(vm->root.bo) - adev->gmc.vram_start; 2239 add_queue.process_va_start = 0; 2240 add_queue.process_va_end = adev->vm_manager.max_pfn - 1; 2241 add_queue.process_context_addr = meta_gpu_addr; 2242 add_queue.gang_context_addr = meta_gpu_addr + AMDGPU_MES_PROC_CTX_SIZE; 2243 add_queue.doorbell_offset = doorbell_idx; 2244 add_queue.mqd_addr = mqd_gpu_addr; 2245 add_queue.wptr_addr = mqd_prop.wptr_gpu_addr; 2246 add_queue.wptr_mc_addr = WPTR_OFFSET(queue_gpu_addr); 2247 add_queue.queue_type = queue_type; 2248 add_queue.vm_cntx_cntl = hub->vm_cntx_cntl; 2249 2250 r = mes_v12_1_add_hw_queue(&adev->mes, &add_queue); 2251 if (r) 2252 goto error; 2253 2254 mes_v12_1_test_ring(adev, xcc_id, (u32 *)RING_OFFSET((char *)ctx_ptr), 2255 FENCE_OFFSET(USER_CTX_VA), 2256 FENCE_OFFSET((char *)ctx_ptr), 2257 WPTR_OFFSET((char *)ctx_ptr), 2258 doorbell_idx, queue_type); 2259 2260 remove_queue.xcc_id = xcc_id; 2261 remove_queue.doorbell_offset = doorbell_idx; 2262 remove_queue.gang_context_addr = add_queue.gang_context_addr; 2263 r = mes_v12_1_remove_hw_queue(&adev->mes, &remove_queue); 2264 2265 error: 2266 amdgpu_bo_free_kernel(&mqd_bo, &mqd_gpu_addr, &mqd_ptr); 2267 return r; 2268 } 2269 2270 static int mes_v12_1_self_test(struct amdgpu_device *adev, int xcc_id) 2271 { 2272 int queue_types[] = { AMDGPU_RING_TYPE_COMPUTE, 2273 /* AMDGPU_RING_TYPE_SDMA */ }; 2274 struct amdgpu_bo_va *bo_va = NULL; 2275 struct amdgpu_vm *vm = NULL; 2276 struct amdgpu_bo *meta_bo = NULL, *ctx_bo = NULL; 2277 void *meta_ptr = NULL, *ctx_ptr = NULL; 2278 u64 meta_gpu_addr, ctx_gpu_addr; 2279 int size, i, r, pasid; 2280 2281 pasid = amdgpu_pasid_alloc(16); 2282 if (pasid < 0) 2283 pasid = 0; 2284 2285 size = AMDGPU_MES_PROC_CTX_SIZE + AMDGPU_MES_GANG_CTX_SIZE; 2286 r = mes_v12_1_alloc_test_buf(adev, &meta_bo, &meta_gpu_addr, 2287 &meta_ptr, size); 2288 if (r < 0) 2289 goto err2; 2290 2291 r = mes_v12_1_alloc_test_buf(adev, &ctx_bo, &ctx_gpu_addr, 2292 &ctx_ptr, USER_CTX_SIZE); 2293 if (r < 0) 2294 goto err2; 2295 2296 vm = kzalloc(sizeof(*vm), GFP_KERNEL); 2297 if (!vm) { 2298 r = -ENOMEM; 2299 goto err2; 2300 } 2301 2302 r = amdgpu_vm_init(adev, vm, -1, pasid); 2303 if (r) 2304 goto err1; 2305 2306 r = mes_v12_1_map_test_bo(adev, ctx_bo, vm, &bo_va, 2307 USER_CTX_VA, USER_CTX_SIZE); 2308 if (r) 2309 goto err0; 2310 2311 for (i = 0; i < ARRAY_SIZE(queue_types); i++) { 2312 memset(ctx_ptr, 0, USER_CTX_SIZE); 2313 2314 r = mes_v12_1_test_queue(adev, xcc_id, pasid, vm, meta_gpu_addr, 2315 ctx_gpu_addr, ctx_ptr, queue_types[i]); 2316 if (r) 2317 break; 2318 } 2319 2320 amdgpu_unmap_static_csa(adev, vm, ctx_bo, bo_va, USER_CTX_VA); 2321 err0: 2322 amdgpu_vm_fini(adev, vm); 2323 err1: 2324 kfree(vm); 2325 err2: 2326 amdgpu_bo_free_kernel(&meta_bo, &meta_gpu_addr, &meta_ptr); 2327 amdgpu_bo_free_kernel(&ctx_bo, &ctx_gpu_addr, &ctx_ptr); 2328 amdgpu_pasid_free(pasid); 2329 return r; 2330 } 2331 2332