1 /* 2 * Copyright 2025 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 */ 23 24 #include <linux/firmware.h> 25 #include <linux/module.h> 26 #include "amdgpu.h" 27 #include "soc15_common.h" 28 #include "soc_v1_0.h" 29 #include "gc/gc_12_1_0_offset.h" 30 #include "gc/gc_12_1_0_sh_mask.h" 31 #include "gc/gc_11_0_0_default.h" 32 #include "v12_structs.h" 33 #include "mes_v12_api_def.h" 34 #include "gfx_v12_1_pkt.h" 35 #include "sdma_v7_1_0_pkt_open.h" 36 37 MODULE_FIRMWARE("amdgpu/gc_12_1_0_mes.bin"); 38 MODULE_FIRMWARE("amdgpu/gc_12_1_0_mes1.bin"); 39 MODULE_FIRMWARE("amdgpu/gc_12_1_0_uni_mes.bin"); 40 41 static int mes_v12_1_hw_init(struct amdgpu_ip_block *ip_block); 42 static int mes_v12_1_xcc_hw_init(struct amdgpu_ip_block *ip_block, int xcc_id); 43 static int mes_v12_1_hw_fini(struct amdgpu_ip_block *ip_block); 44 static int mes_v12_1_kiq_hw_init(struct amdgpu_device *adev, uint32_t xcc_id); 45 static int mes_v12_1_kiq_hw_fini(struct amdgpu_device *adev, uint32_t xcc_id); 46 static int mes_v12_1_self_test(struct amdgpu_device *adev, int xcc_id); 47 static int mes_v12_1_setup_coop_mode(struct amdgpu_device *adev, int xcc_id); 48 49 #define MES_EOP_SIZE 2048 50 #define MES12_HUNG_DB_OFFSET_ARRAY_SIZE 8 /* [0:3] = db offset [4:7] hqd info */ 51 #define MES12_HUNG_HQD_INFO_OFFSET 4 52 53 #define regCP_HQD_IB_CONTROL_MES_12_1_DEFAULT 0x100000 54 #define XCC_MID_MASK 0x41000000 55 56 static void mes_v12_1_ring_set_wptr(struct amdgpu_ring *ring) 57 { 58 struct amdgpu_device *adev = ring->adev; 59 60 if (ring->use_doorbell) { 61 atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 62 ring->wptr); 63 WDOORBELL64(ring->doorbell_index, ring->wptr); 64 } else { 65 BUG(); 66 } 67 } 68 69 static u64 mes_v12_1_ring_get_rptr(struct amdgpu_ring *ring) 70 { 71 return *ring->rptr_cpu_addr; 72 } 73 74 static u64 mes_v12_1_ring_get_wptr(struct amdgpu_ring *ring) 75 { 76 u64 wptr; 77 78 if (ring->use_doorbell) 79 wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr); 80 else 81 BUG(); 82 return wptr; 83 } 84 85 static const struct amdgpu_ring_funcs mes_v12_1_ring_funcs = { 86 .type = AMDGPU_RING_TYPE_MES, 87 .align_mask = 1, 88 .nop = 0, 89 .support_64bit_ptrs = true, 90 .get_rptr = mes_v12_1_ring_get_rptr, 91 .get_wptr = mes_v12_1_ring_get_wptr, 92 .set_wptr = mes_v12_1_ring_set_wptr, 93 .insert_nop = amdgpu_ring_insert_nop, 94 }; 95 96 static const char *mes_v12_1_opcodes[] = { 97 "SET_HW_RSRC", 98 "SET_SCHEDULING_CONFIG", 99 "ADD_QUEUE", 100 "REMOVE_QUEUE", 101 "PERFORM_YIELD", 102 "SET_GANG_PRIORITY_LEVEL", 103 "SUSPEND", 104 "RESUME", 105 "RESET", 106 "SET_LOG_BUFFER", 107 "CHANGE_GANG_PRORITY", 108 "QUERY_SCHEDULER_STATUS", 109 "unused", 110 "SET_DEBUG_VMID", 111 "MISC", 112 "UPDATE_ROOT_PAGE_TABLE", 113 "AMD_LOG", 114 "SET_SE_MODE", 115 "SET_GANG_SUBMIT", 116 "SET_HW_RSRC_1", 117 "INVALIDATE_TLBS", 118 }; 119 120 static const char *mes_v12_1_misc_opcodes[] = { 121 "WRITE_REG", 122 "INV_GART", 123 "QUERY_STATUS", 124 "READ_REG", 125 "WAIT_REG_MEM", 126 "SET_SHADER_DEBUGGER", 127 "NOTIFY_WORK_ON_UNMAPPED_QUEUE", 128 "NOTIFY_TO_UNMAP_PROCESSES", 129 }; 130 131 static const char *mes_v12_1_get_op_string(union MESAPI__MISC *x_pkt) 132 { 133 const char *op_str = NULL; 134 135 if (x_pkt->header.opcode < ARRAY_SIZE(mes_v12_1_opcodes)) 136 op_str = mes_v12_1_opcodes[x_pkt->header.opcode]; 137 138 return op_str; 139 } 140 141 static const char *mes_v12_1_get_misc_op_string(union MESAPI__MISC *x_pkt) 142 { 143 const char *op_str = NULL; 144 145 if ((x_pkt->header.opcode == MES_SCH_API_MISC) && 146 (x_pkt->opcode < ARRAY_SIZE(mes_v12_1_misc_opcodes))) 147 op_str = mes_v12_1_misc_opcodes[x_pkt->opcode]; 148 149 return op_str; 150 } 151 152 static int mes_v12_1_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, 153 int xcc_id, int pipe, void *pkt, 154 int size, int api_status_off) 155 { 156 union MESAPI__QUERY_MES_STATUS mes_status_pkt; 157 signed long timeout = 2100000; /* 2100 ms */ 158 struct amdgpu_device *adev = mes->adev; 159 struct amdgpu_ring *ring = &mes->ring[MES_PIPE_INST(xcc_id, pipe)]; 160 spinlock_t *ring_lock = &mes->ring_lock[MES_PIPE_INST(xcc_id, pipe)]; 161 struct MES_API_STATUS *api_status; 162 union MESAPI__MISC *x_pkt = pkt; 163 const char *op_str, *misc_op_str; 164 unsigned long flags; 165 u64 status_gpu_addr; 166 u32 seq, status_offset; 167 u64 *status_ptr; 168 signed long r; 169 int ret; 170 171 if (x_pkt->header.opcode >= MES_SCH_API_MAX) 172 return -EINVAL; 173 174 if (amdgpu_emu_mode) { 175 timeout *= 1000; 176 } else if (amdgpu_sriov_vf(adev)) { 177 /* Worst case in sriov where all other 15 VF timeout, each VF needs about 600ms */ 178 timeout = 15 * 600 * 1000; 179 } 180 181 ret = amdgpu_device_wb_get(adev, &status_offset); 182 if (ret) 183 return ret; 184 185 status_gpu_addr = adev->wb.gpu_addr + (status_offset * 4); 186 status_ptr = (u64 *)&adev->wb.wb[status_offset]; 187 *status_ptr = 0; 188 189 spin_lock_irqsave(ring_lock, flags); 190 r = amdgpu_ring_alloc(ring, (size + sizeof(mes_status_pkt)) / 4); 191 if (r) 192 goto error_unlock_free; 193 194 seq = ++ring->fence_drv.sync_seq; 195 r = amdgpu_fence_wait_polling(ring, 196 seq - ring->fence_drv.num_fences_mask, 197 timeout); 198 if (r < 1) 199 goto error_undo; 200 201 api_status = (struct MES_API_STATUS *)((char *)pkt + api_status_off); 202 api_status->api_completion_fence_addr = status_gpu_addr; 203 api_status->api_completion_fence_value = 1; 204 205 amdgpu_ring_write_multiple(ring, pkt, size / 4); 206 207 memset(&mes_status_pkt, 0, sizeof(mes_status_pkt)); 208 mes_status_pkt.header.type = MES_API_TYPE_SCHEDULER; 209 mes_status_pkt.header.opcode = MES_SCH_API_QUERY_SCHEDULER_STATUS; 210 mes_status_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; 211 mes_status_pkt.api_status.api_completion_fence_addr = 212 ring->fence_drv.gpu_addr; 213 mes_status_pkt.api_status.api_completion_fence_value = seq; 214 215 amdgpu_ring_write_multiple(ring, &mes_status_pkt, 216 sizeof(mes_status_pkt) / 4); 217 218 amdgpu_ring_commit(ring); 219 spin_unlock_irqrestore(ring_lock, flags); 220 221 op_str = mes_v12_1_get_op_string(x_pkt); 222 misc_op_str = mes_v12_1_get_misc_op_string(x_pkt); 223 224 if (misc_op_str) 225 dev_dbg(adev->dev, "MES(%d, %d) msg=%s (%s) was emitted\n", 226 xcc_id, pipe, op_str, misc_op_str); 227 else if (op_str) 228 dev_dbg(adev->dev, "MES(%d, %d) msg=%s was emitted\n", 229 xcc_id, pipe, op_str); 230 else 231 dev_dbg(adev->dev, "MES(%d, %d) msg=%d was emitted\n", 232 xcc_id, pipe, x_pkt->header.opcode); 233 234 r = amdgpu_fence_wait_polling(ring, seq, timeout); 235 if (r < 1 || !lower_32_bits(*status_ptr)) { 236 if (misc_op_str) 237 dev_err(adev->dev, 238 "MES(%d, %d) failed to respond to msg=%s (%s)\n", 239 xcc_id, pipe, op_str, misc_op_str); 240 else if (op_str) 241 dev_err(adev->dev, 242 "MES(%d, %d) failed to respond to msg=%s\n", 243 xcc_id, pipe, op_str); 244 else 245 dev_err(adev->dev, 246 "MES(%d, %d) failed to respond to msg=%d\n", 247 xcc_id, pipe, x_pkt->header.opcode); 248 249 while (halt_if_hws_hang) 250 schedule(); 251 252 r = -ETIMEDOUT; 253 goto error_wb_free; 254 } 255 256 amdgpu_device_wb_free(adev, status_offset); 257 return 0; 258 259 error_undo: 260 dev_err(adev->dev, "MES(%d, %d) ring buffer is full.\n", xcc_id, pipe); 261 amdgpu_ring_undo(ring); 262 263 error_unlock_free: 264 spin_unlock_irqrestore(ring_lock, flags); 265 266 error_wb_free: 267 amdgpu_device_wb_free(adev, status_offset); 268 return r; 269 } 270 271 static int convert_to_mes_queue_type(int queue_type) 272 { 273 if (queue_type == AMDGPU_RING_TYPE_GFX) 274 return MES_QUEUE_TYPE_GFX; 275 else if (queue_type == AMDGPU_RING_TYPE_COMPUTE) 276 return MES_QUEUE_TYPE_COMPUTE; 277 else if (queue_type == AMDGPU_RING_TYPE_SDMA) 278 return MES_QUEUE_TYPE_SDMA; 279 else if (queue_type == AMDGPU_RING_TYPE_MES) 280 return MES_QUEUE_TYPE_SCHQ; 281 else 282 BUG(); 283 return -1; 284 } 285 286 static int mes_v12_1_add_hw_queue(struct amdgpu_mes *mes, 287 struct mes_add_queue_input *input) 288 { 289 union MESAPI__ADD_QUEUE mes_add_queue_pkt; 290 int xcc_id = input->xcc_id; 291 int inst = MES_PIPE_INST(xcc_id, AMDGPU_MES_SCHED_PIPE); 292 293 if (mes->enable_coop_mode) 294 xcc_id = mes->master_xcc_ids[inst]; 295 296 memset(&mes_add_queue_pkt, 0, sizeof(mes_add_queue_pkt)); 297 298 mes_add_queue_pkt.header.type = MES_API_TYPE_SCHEDULER; 299 mes_add_queue_pkt.header.opcode = MES_SCH_API_ADD_QUEUE; 300 mes_add_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; 301 302 mes_add_queue_pkt.process_id = input->process_id; 303 mes_add_queue_pkt.page_table_base_addr = input->page_table_base_addr; 304 mes_add_queue_pkt.process_va_start = input->process_va_start; 305 mes_add_queue_pkt.process_va_end = input->process_va_end; 306 mes_add_queue_pkt.process_quantum = input->process_quantum; 307 mes_add_queue_pkt.process_context_addr = input->process_context_addr; 308 mes_add_queue_pkt.gang_quantum = input->gang_quantum; 309 mes_add_queue_pkt.gang_context_addr = input->gang_context_addr; 310 mes_add_queue_pkt.inprocess_gang_priority = 311 input->inprocess_gang_priority; 312 mes_add_queue_pkt.gang_global_priority_level = 313 input->gang_global_priority_level; 314 mes_add_queue_pkt.doorbell_offset = input->doorbell_offset; 315 mes_add_queue_pkt.mqd_addr = input->mqd_addr; 316 317 mes_add_queue_pkt.wptr_addr = input->wptr_mc_addr; 318 319 mes_add_queue_pkt.queue_type = 320 convert_to_mes_queue_type(input->queue_type); 321 mes_add_queue_pkt.paging = input->paging; 322 mes_add_queue_pkt.vm_context_cntl = input->vm_cntx_cntl; 323 mes_add_queue_pkt.gws_base = input->gws_base; 324 mes_add_queue_pkt.gws_size = input->gws_size; 325 mes_add_queue_pkt.trap_handler_addr = input->tba_addr; 326 mes_add_queue_pkt.tma_addr = input->tma_addr; 327 mes_add_queue_pkt.trap_en = input->trap_en; 328 mes_add_queue_pkt.skip_process_ctx_clear = input->skip_process_ctx_clear; 329 mes_add_queue_pkt.is_kfd_process = input->is_kfd_process; 330 331 /* For KFD, gds_size is re-used for queue size (needed in MES for AQL queues) */ 332 mes_add_queue_pkt.is_aql_queue = input->is_aql_queue; 333 mes_add_queue_pkt.gds_size = input->queue_size; 334 335 /* For KFD, gds_size is re-used for queue size (needed in MES for AQL queues) */ 336 mes_add_queue_pkt.is_aql_queue = input->is_aql_queue; 337 mes_add_queue_pkt.gds_size = input->queue_size; 338 339 mes_add_queue_pkt.full_sh_mem_config_data = input->sh_mem_config_data; 340 341 return mes_v12_1_submit_pkt_and_poll_completion(mes, 342 xcc_id, AMDGPU_MES_SCHED_PIPE, 343 &mes_add_queue_pkt, sizeof(mes_add_queue_pkt), 344 offsetof(union MESAPI__ADD_QUEUE, api_status)); 345 } 346 347 static int mes_v12_1_remove_hw_queue(struct amdgpu_mes *mes, 348 struct mes_remove_queue_input *input) 349 { 350 union MESAPI__REMOVE_QUEUE mes_remove_queue_pkt; 351 int xcc_id = input->xcc_id; 352 int inst = MES_PIPE_INST(xcc_id, AMDGPU_MES_SCHED_PIPE); 353 354 if (mes->enable_coop_mode) 355 xcc_id = mes->master_xcc_ids[inst]; 356 357 memset(&mes_remove_queue_pkt, 0, sizeof(mes_remove_queue_pkt)); 358 359 mes_remove_queue_pkt.header.type = MES_API_TYPE_SCHEDULER; 360 mes_remove_queue_pkt.header.opcode = MES_SCH_API_REMOVE_QUEUE; 361 mes_remove_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; 362 363 mes_remove_queue_pkt.doorbell_offset = input->doorbell_offset; 364 mes_remove_queue_pkt.gang_context_addr = input->gang_context_addr; 365 366 return mes_v12_1_submit_pkt_and_poll_completion(mes, 367 xcc_id, AMDGPU_MES_SCHED_PIPE, 368 &mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt), 369 offsetof(union MESAPI__REMOVE_QUEUE, api_status)); 370 } 371 372 static int mes_v12_1_reset_hw_queue(struct amdgpu_mes *mes, 373 struct mes_reset_queue_input *input) 374 { 375 union MESAPI__RESET mes_reset_queue_pkt; 376 int pipe; 377 378 memset(&mes_reset_queue_pkt, 0, sizeof(mes_reset_queue_pkt)); 379 380 mes_reset_queue_pkt.header.type = MES_API_TYPE_SCHEDULER; 381 mes_reset_queue_pkt.header.opcode = MES_SCH_API_RESET; 382 mes_reset_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; 383 384 mes_reset_queue_pkt.doorbell_offset = input->doorbell_offset; 385 /* mes_reset_queue_pkt.gang_context_addr = input->gang_context_addr; */ 386 /*mes_reset_queue_pkt.reset_queue_only = 1;*/ 387 388 if (mes->adev->enable_uni_mes) 389 pipe = AMDGPU_MES_KIQ_PIPE; 390 else 391 pipe = AMDGPU_MES_SCHED_PIPE; 392 393 return mes_v12_1_submit_pkt_and_poll_completion(mes, 394 input->xcc_id, pipe, 395 &mes_reset_queue_pkt, sizeof(mes_reset_queue_pkt), 396 offsetof(union MESAPI__REMOVE_QUEUE, api_status)); 397 } 398 399 static int mes_v12_1_map_legacy_queue(struct amdgpu_mes *mes, 400 struct mes_map_legacy_queue_input *input) 401 { 402 union MESAPI__ADD_QUEUE mes_add_queue_pkt; 403 int pipe; 404 405 memset(&mes_add_queue_pkt, 0, sizeof(mes_add_queue_pkt)); 406 407 mes_add_queue_pkt.header.type = MES_API_TYPE_SCHEDULER; 408 mes_add_queue_pkt.header.opcode = MES_SCH_API_ADD_QUEUE; 409 mes_add_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; 410 411 mes_add_queue_pkt.pipe_id = input->pipe_id; 412 mes_add_queue_pkt.queue_id = input->queue_id; 413 mes_add_queue_pkt.doorbell_offset = input->doorbell_offset; 414 mes_add_queue_pkt.mqd_addr = input->mqd_addr; 415 mes_add_queue_pkt.wptr_addr = input->wptr_addr; 416 mes_add_queue_pkt.queue_type = 417 convert_to_mes_queue_type(input->queue_type); 418 mes_add_queue_pkt.map_legacy_kq = 1; 419 420 if (mes->adev->enable_uni_mes) 421 pipe = AMDGPU_MES_KIQ_PIPE; 422 else 423 pipe = AMDGPU_MES_SCHED_PIPE; 424 425 return mes_v12_1_submit_pkt_and_poll_completion(mes, 426 input->xcc_id, pipe, 427 &mes_add_queue_pkt, sizeof(mes_add_queue_pkt), 428 offsetof(union MESAPI__ADD_QUEUE, api_status)); 429 } 430 431 static int mes_v12_1_unmap_legacy_queue(struct amdgpu_mes *mes, 432 struct mes_unmap_legacy_queue_input *input) 433 { 434 union MESAPI__REMOVE_QUEUE mes_remove_queue_pkt; 435 int pipe; 436 437 memset(&mes_remove_queue_pkt, 0, sizeof(mes_remove_queue_pkt)); 438 439 mes_remove_queue_pkt.header.type = MES_API_TYPE_SCHEDULER; 440 mes_remove_queue_pkt.header.opcode = MES_SCH_API_REMOVE_QUEUE; 441 mes_remove_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; 442 443 mes_remove_queue_pkt.doorbell_offset = input->doorbell_offset; 444 mes_remove_queue_pkt.gang_context_addr = 0; 445 446 mes_remove_queue_pkt.pipe_id = input->pipe_id; 447 mes_remove_queue_pkt.queue_id = input->queue_id; 448 449 if (input->action == PREEMPT_QUEUES_NO_UNMAP) { 450 mes_remove_queue_pkt.preempt_legacy_gfx_queue = 1; 451 mes_remove_queue_pkt.tf_addr = input->trail_fence_addr; 452 mes_remove_queue_pkt.tf_data = 453 lower_32_bits(input->trail_fence_data); 454 } else { 455 mes_remove_queue_pkt.unmap_legacy_queue = 1; 456 mes_remove_queue_pkt.queue_type = 457 convert_to_mes_queue_type(input->queue_type); 458 } 459 460 if (mes->adev->enable_uni_mes) 461 pipe = AMDGPU_MES_KIQ_PIPE; 462 else 463 pipe = AMDGPU_MES_SCHED_PIPE; 464 465 return mes_v12_1_submit_pkt_and_poll_completion(mes, 466 input->xcc_id, pipe, 467 &mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt), 468 offsetof(union MESAPI__REMOVE_QUEUE, api_status)); 469 } 470 471 static int mes_v12_1_suspend_gang(struct amdgpu_mes *mes, 472 struct mes_suspend_gang_input *input) 473 { 474 union MESAPI__SUSPEND mes_suspend_gang_pkt; 475 476 memset(&mes_suspend_gang_pkt, 0, sizeof(mes_suspend_gang_pkt)); 477 478 mes_suspend_gang_pkt.header.type = MES_API_TYPE_SCHEDULER; 479 mes_suspend_gang_pkt.header.opcode = MES_SCH_API_SUSPEND; 480 mes_suspend_gang_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; 481 482 mes_suspend_gang_pkt.suspend_all_gangs = input->suspend_all_gangs; 483 mes_suspend_gang_pkt.gang_context_addr = input->gang_context_addr; 484 mes_suspend_gang_pkt.suspend_fence_addr = input->suspend_fence_addr; 485 mes_suspend_gang_pkt.suspend_fence_value = input->suspend_fence_value; 486 487 /* Suspend gang is handled by master MES */ 488 return mes_v12_1_submit_pkt_and_poll_completion(mes, input->xcc_id, AMDGPU_MES_SCHED_PIPE, 489 &mes_suspend_gang_pkt, sizeof(mes_suspend_gang_pkt), 490 offsetof(union MESAPI__SUSPEND, api_status)); 491 } 492 493 static int mes_v12_1_resume_gang(struct amdgpu_mes *mes, 494 struct mes_resume_gang_input *input) 495 { 496 union MESAPI__RESUME mes_resume_gang_pkt; 497 498 memset(&mes_resume_gang_pkt, 0, sizeof(mes_resume_gang_pkt)); 499 500 mes_resume_gang_pkt.header.type = MES_API_TYPE_SCHEDULER; 501 mes_resume_gang_pkt.header.opcode = MES_SCH_API_RESUME; 502 mes_resume_gang_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; 503 504 mes_resume_gang_pkt.resume_all_gangs = input->resume_all_gangs; 505 mes_resume_gang_pkt.gang_context_addr = input->gang_context_addr; 506 507 /* Resume gang is handled by master MES */ 508 return mes_v12_1_submit_pkt_and_poll_completion(mes, input->xcc_id, AMDGPU_MES_SCHED_PIPE, 509 &mes_resume_gang_pkt, sizeof(mes_resume_gang_pkt), 510 offsetof(union MESAPI__RESUME, api_status)); 511 } 512 513 static int mes_v12_1_query_sched_status(struct amdgpu_mes *mes, 514 int pipe, int xcc_id) 515 { 516 union MESAPI__QUERY_MES_STATUS mes_status_pkt; 517 518 memset(&mes_status_pkt, 0, sizeof(mes_status_pkt)); 519 520 mes_status_pkt.header.type = MES_API_TYPE_SCHEDULER; 521 mes_status_pkt.header.opcode = MES_SCH_API_QUERY_SCHEDULER_STATUS; 522 mes_status_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; 523 524 return mes_v12_1_submit_pkt_and_poll_completion(mes, xcc_id, pipe, 525 &mes_status_pkt, sizeof(mes_status_pkt), 526 offsetof(union MESAPI__QUERY_MES_STATUS, api_status)); 527 } 528 static uint32_t mes_v12_1_get_xcc_from_reg(uint32_t reg_offset) 529 { 530 return ((reg_offset >> 16) & 0x7); 531 } 532 533 static void mes_v12_1_get_rrmt(uint32_t reg, uint32_t xcc_id, 534 struct RRMT_OPTION *rrmt_opt, 535 uint32_t *out_reg) 536 { 537 uint32_t normalized_reg = soc_v1_0_normalize_xcc_reg_offset(reg); 538 539 if (soc_v1_0_normalize_xcc_reg_range(normalized_reg)) { 540 rrmt_opt->xcd_die_id = mes_v12_1_get_xcc_from_reg(reg); 541 rrmt_opt->mode = (xcc_id == rrmt_opt->xcd_die_id) ? 542 MES_RRMT_MODE_LOCAL_XCD : MES_RRMT_MODE_REMOTE_XCD; 543 } else { 544 rrmt_opt->mode = MES_RRMT_MODE_REMOTE_MID; 545 if (soc_v1_0_mid1_reg_range(reg)) 546 rrmt_opt->mid_die_id = 1; 547 } 548 549 *out_reg = soc_v1_0_normalize_reg_offset(reg); 550 } 551 552 static int mes_v12_1_misc_op(struct amdgpu_mes *mes, 553 struct mes_misc_op_input *input) 554 { 555 struct amdgpu_device *adev = mes->adev; 556 union MESAPI__MISC misc_pkt; 557 int pipe; 558 559 if (mes->adev->enable_uni_mes) 560 pipe = AMDGPU_MES_KIQ_PIPE; 561 else 562 pipe = AMDGPU_MES_SCHED_PIPE; 563 564 memset(&misc_pkt, 0, sizeof(misc_pkt)); 565 566 misc_pkt.header.type = MES_API_TYPE_SCHEDULER; 567 misc_pkt.header.opcode = MES_SCH_API_MISC; 568 misc_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; 569 570 switch (input->op) { 571 case MES_MISC_OP_READ_REG: 572 misc_pkt.opcode = MESAPI_MISC__READ_REG; 573 misc_pkt.read_reg.buffer_addr = input->read_reg.buffer_addr; 574 mes_v12_1_get_rrmt(input->read_reg.reg_offset, 575 GET_INST(GC, input->xcc_id), 576 &misc_pkt.read_reg.rrmt_opt, 577 &misc_pkt.read_reg.reg_offset); 578 break; 579 case MES_MISC_OP_WRITE_REG: 580 misc_pkt.opcode = MESAPI_MISC__WRITE_REG; 581 misc_pkt.write_reg.reg_value = input->write_reg.reg_value; 582 mes_v12_1_get_rrmt(input->write_reg.reg_offset, 583 GET_INST(GC, input->xcc_id), 584 &misc_pkt.write_reg.rrmt_opt, 585 &misc_pkt.write_reg.reg_offset); 586 break; 587 case MES_MISC_OP_WRM_REG_WAIT: 588 misc_pkt.opcode = MESAPI_MISC__WAIT_REG_MEM; 589 misc_pkt.wait_reg_mem.op = WRM_OPERATION__WAIT_REG_MEM; 590 misc_pkt.wait_reg_mem.reference = input->wrm_reg.ref; 591 misc_pkt.wait_reg_mem.mask = input->wrm_reg.mask; 592 misc_pkt.wait_reg_mem.reg_offset2 = 0; 593 mes_v12_1_get_rrmt(input->wrm_reg.reg0, 594 GET_INST(GC, input->xcc_id), 595 &misc_pkt.wait_reg_mem.rrmt_opt1, 596 &misc_pkt.wait_reg_mem.reg_offset1); 597 break; 598 case MES_MISC_OP_WRM_REG_WR_WAIT: 599 misc_pkt.opcode = MESAPI_MISC__WAIT_REG_MEM; 600 misc_pkt.wait_reg_mem.op = WRM_OPERATION__WR_WAIT_WR_REG; 601 misc_pkt.wait_reg_mem.reference = input->wrm_reg.ref; 602 misc_pkt.wait_reg_mem.mask = input->wrm_reg.mask; 603 mes_v12_1_get_rrmt(input->wrm_reg.reg0, 604 GET_INST(GC, input->xcc_id), 605 &misc_pkt.wait_reg_mem.rrmt_opt1, 606 &misc_pkt.wait_reg_mem.reg_offset1); 607 mes_v12_1_get_rrmt(input->wrm_reg.reg1, 608 GET_INST(GC, input->xcc_id), 609 &misc_pkt.wait_reg_mem.rrmt_opt2, 610 &misc_pkt.wait_reg_mem.reg_offset2); 611 break; 612 case MES_MISC_OP_SET_SHADER_DEBUGGER: 613 pipe = AMDGPU_MES_SCHED_PIPE; 614 misc_pkt.opcode = MESAPI_MISC__SET_SHADER_DEBUGGER; 615 misc_pkt.set_shader_debugger.process_context_addr = 616 input->set_shader_debugger.process_context_addr; 617 misc_pkt.set_shader_debugger.flags.u32all = 618 input->set_shader_debugger.flags.u32all; 619 misc_pkt.set_shader_debugger.spi_gdbg_per_vmid_cntl = 620 input->set_shader_debugger.spi_gdbg_per_vmid_cntl; 621 memcpy(misc_pkt.set_shader_debugger.tcp_watch_cntl, 622 input->set_shader_debugger.tcp_watch_cntl, 623 sizeof(misc_pkt.set_shader_debugger.tcp_watch_cntl)); 624 misc_pkt.set_shader_debugger.trap_en = input->set_shader_debugger.trap_en; 625 break; 626 case MES_MISC_OP_CHANGE_CONFIG: 627 misc_pkt.opcode = MESAPI_MISC__CHANGE_CONFIG; 628 misc_pkt.change_config.opcode = 629 MESAPI_MISC__CHANGE_CONFIG_OPTION_LIMIT_SINGLE_PROCESS; 630 misc_pkt.change_config.option.bits.limit_single_process = 631 input->change_config.option.limit_single_process; 632 break; 633 default: 634 DRM_ERROR("unsupported misc op (%d) \n", input->op); 635 return -EINVAL; 636 } 637 638 return mes_v12_1_submit_pkt_and_poll_completion(mes, 639 input->xcc_id, pipe, 640 &misc_pkt, sizeof(misc_pkt), 641 offsetof(union MESAPI__MISC, api_status)); 642 } 643 644 static int mes_v12_1_set_hw_resources_1(struct amdgpu_mes *mes, 645 int pipe, int xcc_id) 646 { 647 union MESAPI_SET_HW_RESOURCES_1 mes_set_hw_res_1_pkt; 648 int master_xcc_id, inst = MES_PIPE_INST(xcc_id, pipe); 649 650 memset(&mes_set_hw_res_1_pkt, 0, sizeof(mes_set_hw_res_1_pkt)); 651 652 mes_set_hw_res_1_pkt.header.type = MES_API_TYPE_SCHEDULER; 653 mes_set_hw_res_1_pkt.header.opcode = MES_SCH_API_SET_HW_RSRC_1; 654 mes_set_hw_res_1_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; 655 mes_set_hw_res_1_pkt.mes_kiq_unmap_timeout = 100; 656 657 /* From version 0x74 above, pipe1 support use shared command buffer 658 to distribute some tasks on individual XCCs*/ 659 if (mes->enable_coop_mode && 660 ((pipe == AMDGPU_MES_SCHED_PIPE) || 661 ((mes->kiq_version & AMDGPU_MES_VERSION_MASK) >= 0x74))) { 662 master_xcc_id = mes->master_xcc_ids[inst]; 663 mes_set_hw_res_1_pkt.mes_coop_mode = 1; 664 mes_set_hw_res_1_pkt.coop_sch_shared_mc_addr = 665 mes->shared_cmd_buf_gpu_addr[master_xcc_id + pipe]; 666 } 667 668 return mes_v12_1_submit_pkt_and_poll_completion(mes, xcc_id, pipe, 669 &mes_set_hw_res_1_pkt, sizeof(mes_set_hw_res_1_pkt), 670 offsetof(union MESAPI_SET_HW_RESOURCES_1, api_status)); 671 } 672 673 static int mes_v12_1_set_hw_resources(struct amdgpu_mes *mes, 674 int pipe, int xcc_id) 675 { 676 int i, status; 677 struct amdgpu_device *adev = mes->adev; 678 union MESAPI_SET_HW_RESOURCES mes_set_hw_res_pkt; 679 680 memset(&mes_set_hw_res_pkt, 0, sizeof(mes_set_hw_res_pkt)); 681 682 mes_set_hw_res_pkt.header.type = MES_API_TYPE_SCHEDULER; 683 mes_set_hw_res_pkt.header.opcode = MES_SCH_API_SET_HW_RSRC; 684 mes_set_hw_res_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; 685 686 if (pipe == AMDGPU_MES_SCHED_PIPE) { 687 mes_set_hw_res_pkt.vmid_mask_mmhub = mes->vmid_mask_mmhub; 688 mes_set_hw_res_pkt.vmid_mask_gfxhub = mes->vmid_mask_gfxhub; 689 mes_set_hw_res_pkt.gds_size = adev->gds.gds_size; 690 mes_set_hw_res_pkt.paging_vmid = 0; 691 692 for (i = 0; i < MAX_COMPUTE_PIPES; i++) 693 mes_set_hw_res_pkt.compute_hqd_mask[i] = 694 mes->compute_hqd_mask[i]; 695 696 for (i = 0; i < MAX_GFX_PIPES; i++) 697 mes_set_hw_res_pkt.gfx_hqd_mask[i] = 698 mes->gfx_hqd_mask[i]; 699 700 for (i = 0; i < MAX_SDMA_PIPES; i++) 701 mes_set_hw_res_pkt.sdma_hqd_mask[i] = 702 mes->sdma_hqd_mask[i]; 703 704 for (i = 0; i < AMD_PRIORITY_NUM_LEVELS; i++) 705 mes_set_hw_res_pkt.aggregated_doorbells[i] = 706 mes->aggregated_doorbells[i]; 707 } 708 709 mes_set_hw_res_pkt.g_sch_ctx_gpu_mc_ptr = 710 mes->sch_ctx_gpu_addr[pipe]; 711 mes_set_hw_res_pkt.query_status_fence_gpu_mc_ptr = 712 mes->query_status_fence_gpu_addr[pipe]; 713 714 for (i = 0; i < 5; i++) { 715 mes_set_hw_res_pkt.gc_base[i] = 716 adev->reg_offset[GC_HWIP][0][i]; 717 mes_set_hw_res_pkt.mmhub_base[i] = 718 adev->reg_offset[MMHUB_HWIP][0][i]; 719 mes_set_hw_res_pkt.osssys_base[i] = 720 adev->reg_offset[OSSSYS_HWIP][0][i]; 721 } 722 723 mes_set_hw_res_pkt.disable_reset = 1; 724 mes_set_hw_res_pkt.disable_mes_log = 1; 725 mes_set_hw_res_pkt.use_different_vmid_compute = 1; 726 mes_set_hw_res_pkt.enable_reg_active_poll = 1; 727 mes_set_hw_res_pkt.enable_level_process_quantum_check = 1; 728 729 /* 730 * Keep oversubscribe timer for sdma . When we have unmapped doorbell 731 * handling support, other queue will not use the oversubscribe timer. 732 * handling mode - 0: disabled; 1: basic version; 2: basic+ version 733 */ 734 mes_set_hw_res_pkt.oversubscription_timer = 50; 735 mes_set_hw_res_pkt.unmapped_doorbell_handling = 1; 736 737 if (amdgpu_mes_log_enable) { 738 mes_set_hw_res_pkt.enable_mes_event_int_logging = 1; 739 mes_set_hw_res_pkt.event_intr_history_gpu_mc_ptr = 740 mes->event_log_gpu_addr + MES_PIPE_INST(xcc_id, pipe) * AMDGPU_MES_LOG_BUFFER_SIZE; 741 } 742 743 if (adev->enforce_isolation[0] == AMDGPU_ENFORCE_ISOLATION_ENABLE) 744 mes_set_hw_res_pkt.limit_single_process = 1; 745 746 status = mes_v12_1_submit_pkt_and_poll_completion(mes, xcc_id, pipe, 747 &mes_set_hw_res_pkt, sizeof(mes_set_hw_res_pkt), 748 offsetof(union MESAPI_SET_HW_RESOURCES, api_status)); 749 750 /* get MES scheduler versions */ 751 mutex_lock(&adev->srbm_mutex); 752 soc_v1_0_grbm_select(adev, 3, pipe, 0, 0, GET_INST(GC, xcc_id)); 753 754 if (pipe == AMDGPU_MES_SCHED_PIPE) 755 adev->mes.sched_version = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_GP3_LO); 756 else if (pipe == AMDGPU_MES_KIQ_PIPE && adev->enable_mes_kiq) 757 adev->mes.kiq_version = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_GP3_LO); 758 759 soc_v1_0_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id)); 760 mutex_unlock(&adev->srbm_mutex); 761 762 return status; 763 } 764 765 static void mes_v12_1_init_aggregated_doorbell(struct amdgpu_mes *mes, 766 int xcc_id) 767 { 768 struct amdgpu_device *adev = mes->adev; 769 uint32_t data; 770 771 data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_DOORBELL_CONTROL1); 772 data &= ~(CP_MES_DOORBELL_CONTROL1__DOORBELL_OFFSET_MASK | 773 CP_MES_DOORBELL_CONTROL1__DOORBELL_EN_MASK | 774 CP_MES_DOORBELL_CONTROL1__DOORBELL_HIT_MASK); 775 data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_LOW] << 776 CP_MES_DOORBELL_CONTROL1__DOORBELL_OFFSET__SHIFT; 777 data |= 1 << CP_MES_DOORBELL_CONTROL1__DOORBELL_EN__SHIFT; 778 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_DOORBELL_CONTROL1, data); 779 780 data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_DOORBELL_CONTROL2); 781 data &= ~(CP_MES_DOORBELL_CONTROL2__DOORBELL_OFFSET_MASK | 782 CP_MES_DOORBELL_CONTROL2__DOORBELL_EN_MASK | 783 CP_MES_DOORBELL_CONTROL2__DOORBELL_HIT_MASK); 784 data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_NORMAL] << 785 CP_MES_DOORBELL_CONTROL2__DOORBELL_OFFSET__SHIFT; 786 data |= 1 << CP_MES_DOORBELL_CONTROL2__DOORBELL_EN__SHIFT; 787 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_DOORBELL_CONTROL2, data); 788 789 data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_DOORBELL_CONTROL3); 790 data &= ~(CP_MES_DOORBELL_CONTROL3__DOORBELL_OFFSET_MASK | 791 CP_MES_DOORBELL_CONTROL3__DOORBELL_EN_MASK | 792 CP_MES_DOORBELL_CONTROL3__DOORBELL_HIT_MASK); 793 data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_MEDIUM] << 794 CP_MES_DOORBELL_CONTROL3__DOORBELL_OFFSET__SHIFT; 795 data |= 1 << CP_MES_DOORBELL_CONTROL3__DOORBELL_EN__SHIFT; 796 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_DOORBELL_CONTROL3, data); 797 798 data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_DOORBELL_CONTROL4); 799 data &= ~(CP_MES_DOORBELL_CONTROL4__DOORBELL_OFFSET_MASK | 800 CP_MES_DOORBELL_CONTROL4__DOORBELL_EN_MASK | 801 CP_MES_DOORBELL_CONTROL4__DOORBELL_HIT_MASK); 802 data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_HIGH] << 803 CP_MES_DOORBELL_CONTROL4__DOORBELL_OFFSET__SHIFT; 804 data |= 1 << CP_MES_DOORBELL_CONTROL4__DOORBELL_EN__SHIFT; 805 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_DOORBELL_CONTROL4, data); 806 807 data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_DOORBELL_CONTROL5); 808 data &= ~(CP_MES_DOORBELL_CONTROL5__DOORBELL_OFFSET_MASK | 809 CP_MES_DOORBELL_CONTROL5__DOORBELL_EN_MASK | 810 CP_MES_DOORBELL_CONTROL5__DOORBELL_HIT_MASK); 811 data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_REALTIME] << 812 CP_MES_DOORBELL_CONTROL5__DOORBELL_OFFSET__SHIFT; 813 data |= 1 << CP_MES_DOORBELL_CONTROL5__DOORBELL_EN__SHIFT; 814 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_DOORBELL_CONTROL5, data); 815 816 data = 1 << CP_HQD_GFX_CONTROL__DB_UPDATED_MSG_EN__SHIFT; 817 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_GFX_CONTROL, data); 818 } 819 820 821 static void mes_v12_1_enable_unmapped_doorbell_handling( 822 struct amdgpu_mes *mes, bool enable, int xcc_id) 823 { 824 struct amdgpu_device *adev = mes->adev; 825 uint32_t data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_UNMAPPED_DOORBELL); 826 827 /* 828 * The default PROC_LSB settng is 0xc which means doorbell 829 * addr[16:12] gives the doorbell page number. For kfd, each 830 * process will use 2 pages of doorbell, we need to change the 831 * setting to 0xd 832 */ 833 data &= ~CP_UNMAPPED_DOORBELL__PROC_LSB_MASK; 834 data |= 0xd << CP_UNMAPPED_DOORBELL__PROC_LSB__SHIFT; 835 836 data |= (enable ? 1 : 0) << CP_UNMAPPED_DOORBELL__ENABLE__SHIFT; 837 838 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_UNMAPPED_DOORBELL, data); 839 } 840 841 #if 0 842 static int mes_v12_1_reset_legacy_queue(struct amdgpu_mes *mes, 843 struct mes_reset_legacy_queue_input *input) 844 { 845 union MESAPI__RESET mes_reset_queue_pkt; 846 int pipe; 847 848 memset(&mes_reset_queue_pkt, 0, sizeof(mes_reset_queue_pkt)); 849 850 mes_reset_queue_pkt.header.type = MES_API_TYPE_SCHEDULER; 851 mes_reset_queue_pkt.header.opcode = MES_SCH_API_RESET; 852 mes_reset_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; 853 854 mes_reset_queue_pkt.queue_type = 855 convert_to_mes_queue_type(input->queue_type); 856 857 if (mes_reset_queue_pkt.queue_type == MES_QUEUE_TYPE_GFX) { 858 mes_reset_queue_pkt.reset_legacy_gfx = 1; 859 mes_reset_queue_pkt.pipe_id_lp = input->pipe_id; 860 mes_reset_queue_pkt.queue_id_lp = input->queue_id; 861 mes_reset_queue_pkt.mqd_mc_addr_lp = input->mqd_addr; 862 mes_reset_queue_pkt.doorbell_offset_lp = input->doorbell_offset; 863 mes_reset_queue_pkt.wptr_addr_lp = input->wptr_addr; 864 mes_reset_queue_pkt.vmid_id_lp = input->vmid; 865 } else { 866 mes_reset_queue_pkt.reset_queue_only = 1; 867 mes_reset_queue_pkt.doorbell_offset = input->doorbell_offset; 868 } 869 870 if (mes->adev->enable_uni_mes) 871 pipe = AMDGPU_MES_KIQ_PIPE; 872 else 873 pipe = AMDGPU_MES_SCHED_PIPE; 874 875 return mes_v12_1_submit_pkt_and_poll_completion(mes, 876 input->xcc_id, pipe, 877 &mes_reset_queue_pkt, sizeof(mes_reset_queue_pkt), 878 offsetof(union MESAPI__RESET, api_status)); 879 } 880 #endif 881 882 static int mes_v12_1_detect_and_reset_hung_queues(struct amdgpu_mes *mes, 883 struct mes_detect_and_reset_queue_input *input) 884 { 885 union MESAPI__RESET mes_reset_queue_pkt; 886 887 memset(&mes_reset_queue_pkt, 0, sizeof(mes_reset_queue_pkt)); 888 889 mes_reset_queue_pkt.header.type = MES_API_TYPE_SCHEDULER; 890 mes_reset_queue_pkt.header.opcode = MES_SCH_API_RESET; 891 mes_reset_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; 892 893 mes_reset_queue_pkt.queue_type = 894 convert_to_mes_queue_type(input->queue_type); 895 mes_reset_queue_pkt.doorbell_offset_addr = 896 mes->hung_queue_db_array_gpu_addr[0]; 897 898 if (input->detect_only) 899 mes_reset_queue_pkt.hang_detect_only = 1; 900 else 901 mes_reset_queue_pkt.hang_detect_then_reset = 1; 902 903 return mes_v12_1_submit_pkt_and_poll_completion(mes, 904 input->xcc_id, AMDGPU_MES_SCHED_PIPE, 905 &mes_reset_queue_pkt, sizeof(mes_reset_queue_pkt), 906 offsetof(union MESAPI__RESET, api_status)); 907 } 908 909 static int mes_v12_inv_tlb_convert_hub_id(uint8_t id) 910 { 911 /* 912 * MES doesn't support invalidate gc_hub on slave xcc individually 913 * master xcc will invalidate all gc_hub for the partition 914 */ 915 if (AMDGPU_IS_GFXHUB(id)) 916 return 0; 917 else if (AMDGPU_IS_MMHUB0(id)) 918 return 1; 919 else if (AMDGPU_IS_MMHUB1(id)) 920 return 2; 921 return -EINVAL; 922 923 } 924 925 static int mes_v12_1_inv_tlbs_pasid(struct amdgpu_mes *mes, 926 struct mes_inv_tlbs_pasid_input *input) 927 { 928 union MESAPI__INV_TLBS mes_inv_tlbs; 929 int xcc_id = input->xcc_id; 930 int inst = MES_PIPE_INST(xcc_id, AMDGPU_MES_SCHED_PIPE); 931 int ret; 932 933 if (mes->enable_coop_mode) 934 xcc_id = mes->master_xcc_ids[inst]; 935 936 memset(&mes_inv_tlbs, 0, sizeof(mes_inv_tlbs)); 937 938 mes_inv_tlbs.header.type = MES_API_TYPE_SCHEDULER; 939 mes_inv_tlbs.header.opcode = MES_SCH_API_INV_TLBS; 940 mes_inv_tlbs.header.dwsize = API_FRAME_SIZE_IN_DWORDS; 941 942 mes_inv_tlbs.invalidate_tlbs.inv_sel = 0; 943 mes_inv_tlbs.invalidate_tlbs.flush_type = input->flush_type; 944 mes_inv_tlbs.invalidate_tlbs.inv_sel_id = input->pasid; 945 946 /*convert amdgpu_mes_hub_id to mes expected hub_id */ 947 ret = mes_v12_inv_tlb_convert_hub_id(input->hub_id); 948 if (ret < 0) 949 return -EINVAL; 950 mes_inv_tlbs.invalidate_tlbs.hub_id = ret; 951 return mes_v12_1_submit_pkt_and_poll_completion(mes, xcc_id, AMDGPU_MES_KIQ_PIPE, 952 &mes_inv_tlbs, sizeof(mes_inv_tlbs), 953 offsetof(union MESAPI__INV_TLBS, api_status)); 954 955 } 956 957 static const struct amdgpu_mes_funcs mes_v12_1_funcs = { 958 .add_hw_queue = mes_v12_1_add_hw_queue, 959 .remove_hw_queue = mes_v12_1_remove_hw_queue, 960 .map_legacy_queue = mes_v12_1_map_legacy_queue, 961 .unmap_legacy_queue = mes_v12_1_unmap_legacy_queue, 962 .suspend_gang = mes_v12_1_suspend_gang, 963 .resume_gang = mes_v12_1_resume_gang, 964 .misc_op = mes_v12_1_misc_op, 965 .reset_hw_queue = mes_v12_1_reset_hw_queue, 966 .detect_and_reset_hung_queues = mes_v12_1_detect_and_reset_hung_queues, 967 .invalidate_tlbs_pasid = mes_v12_1_inv_tlbs_pasid, 968 }; 969 970 static int mes_v12_1_allocate_ucode_buffer(struct amdgpu_device *adev, 971 enum amdgpu_mes_pipe pipe, 972 int xcc_id) 973 { 974 int r, inst = MES_PIPE_INST(xcc_id, pipe); 975 const struct mes_firmware_header_v1_0 *mes_hdr; 976 const __le32 *fw_data; 977 unsigned fw_size; 978 979 mes_hdr = (const struct mes_firmware_header_v1_0 *) 980 adev->mes.fw[pipe]->data; 981 982 fw_data = (const __le32 *)(adev->mes.fw[pipe]->data + 983 le32_to_cpu(mes_hdr->mes_ucode_offset_bytes)); 984 fw_size = le32_to_cpu(mes_hdr->mes_ucode_size_bytes); 985 986 r = amdgpu_bo_create_reserved(adev, fw_size, 987 PAGE_SIZE, 988 AMDGPU_GEM_DOMAIN_VRAM, 989 &adev->mes.ucode_fw_obj[inst], 990 &adev->mes.ucode_fw_gpu_addr[inst], 991 (void **)&adev->mes.ucode_fw_ptr[inst]); 992 if (r) { 993 dev_err(adev->dev, "(%d) failed to create mes fw bo\n", r); 994 return r; 995 } 996 997 memcpy(adev->mes.ucode_fw_ptr[inst], fw_data, fw_size); 998 999 amdgpu_bo_kunmap(adev->mes.ucode_fw_obj[inst]); 1000 amdgpu_bo_unreserve(adev->mes.ucode_fw_obj[inst]); 1001 1002 return 0; 1003 } 1004 1005 static int mes_v12_1_allocate_ucode_data_buffer(struct amdgpu_device *adev, 1006 enum amdgpu_mes_pipe pipe, 1007 int xcc_id) 1008 { 1009 int r, inst = MES_PIPE_INST(xcc_id, pipe); 1010 const struct mes_firmware_header_v1_0 *mes_hdr; 1011 const __le32 *fw_data; 1012 unsigned fw_size; 1013 1014 mes_hdr = (const struct mes_firmware_header_v1_0 *) 1015 adev->mes.fw[pipe]->data; 1016 1017 fw_data = (const __le32 *)(adev->mes.fw[pipe]->data + 1018 le32_to_cpu(mes_hdr->mes_ucode_data_offset_bytes)); 1019 fw_size = le32_to_cpu(mes_hdr->mes_ucode_data_size_bytes); 1020 1021 r = amdgpu_bo_create_reserved(adev, fw_size, 1022 64 * 1024, 1023 AMDGPU_GEM_DOMAIN_VRAM, 1024 &adev->mes.data_fw_obj[inst], 1025 &adev->mes.data_fw_gpu_addr[inst], 1026 (void **)&adev->mes.data_fw_ptr[inst]); 1027 if (r) { 1028 dev_err(adev->dev, "(%d) failed to create mes data fw bo\n", r); 1029 return r; 1030 } 1031 1032 memcpy(adev->mes.data_fw_ptr[inst], fw_data, fw_size); 1033 1034 amdgpu_bo_kunmap(adev->mes.data_fw_obj[inst]); 1035 amdgpu_bo_unreserve(adev->mes.data_fw_obj[inst]); 1036 1037 return 0; 1038 } 1039 1040 static void mes_v12_1_free_ucode_buffers(struct amdgpu_device *adev, 1041 enum amdgpu_mes_pipe pipe, 1042 int xcc_id) 1043 { 1044 int inst = MES_PIPE_INST(xcc_id, pipe); 1045 1046 amdgpu_bo_free_kernel(&adev->mes.data_fw_obj[inst], 1047 &adev->mes.data_fw_gpu_addr[inst], 1048 (void **)&adev->mes.data_fw_ptr[inst]); 1049 1050 amdgpu_bo_free_kernel(&adev->mes.ucode_fw_obj[inst], 1051 &adev->mes.ucode_fw_gpu_addr[inst], 1052 (void **)&adev->mes.ucode_fw_ptr[inst]); 1053 } 1054 1055 static void mes_v12_1_enable(struct amdgpu_device *adev, 1056 bool enable, int xcc_id) 1057 { 1058 uint64_t ucode_addr; 1059 uint32_t pipe, data = 0; 1060 1061 if (enable) { 1062 data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_CNTL); 1063 data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_RESET, 1); 1064 data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_RESET, 1); 1065 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_CNTL, data); 1066 1067 mutex_lock(&adev->srbm_mutex); 1068 for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) { 1069 soc_v1_0_grbm_select(adev, 3, pipe, 0, 0, 1070 GET_INST(GC, xcc_id)); 1071 1072 ucode_addr = adev->mes.uc_start_addr[pipe] >> 2; 1073 WREG32_SOC15(GC, GET_INST(GC, xcc_id), 1074 regCP_MES_PRGRM_CNTR_START, 1075 lower_32_bits(ucode_addr)); 1076 WREG32_SOC15(GC, GET_INST(GC, xcc_id), 1077 regCP_MES_PRGRM_CNTR_START_HI, 1078 upper_32_bits(ucode_addr)); 1079 } 1080 soc_v1_0_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id)); 1081 mutex_unlock(&adev->srbm_mutex); 1082 1083 /* unhalt MES and activate pipe0 */ 1084 data = REG_SET_FIELD(0, CP_MES_CNTL, MES_PIPE0_ACTIVE, 1); 1085 data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_ACTIVE, 1); 1086 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_CNTL, data); 1087 1088 if (amdgpu_emu_mode) 1089 msleep(500); 1090 else if (adev->enable_uni_mes) 1091 udelay(500); 1092 else 1093 udelay(50); 1094 } else { 1095 data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_CNTL); 1096 data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_ACTIVE, 0); 1097 data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_ACTIVE, 0); 1098 data = REG_SET_FIELD(data, CP_MES_CNTL, 1099 MES_INVALIDATE_ICACHE, 1); 1100 data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_RESET, 1); 1101 data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_RESET, 1); 1102 data = REG_SET_FIELD(data, CP_MES_CNTL, MES_HALT, 1); 1103 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_CNTL, data); 1104 } 1105 } 1106 1107 static void mes_v12_1_set_ucode_start_addr(struct amdgpu_device *adev, 1108 int xcc_id) 1109 { 1110 uint64_t ucode_addr; 1111 int pipe; 1112 1113 mes_v12_1_enable(adev, false, xcc_id); 1114 1115 mutex_lock(&adev->srbm_mutex); 1116 for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) { 1117 /* me=3, queue=0 */ 1118 soc_v1_0_grbm_select(adev, 3, pipe, 0, 0, GET_INST(GC, xcc_id)); 1119 1120 /* set ucode start address */ 1121 ucode_addr = adev->mes.uc_start_addr[pipe] >> 2; 1122 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_PRGRM_CNTR_START, 1123 lower_32_bits(ucode_addr)); 1124 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_PRGRM_CNTR_START_HI, 1125 upper_32_bits(ucode_addr)); 1126 1127 soc_v1_0_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id)); 1128 } 1129 mutex_unlock(&adev->srbm_mutex); 1130 } 1131 1132 /* This function is for backdoor MES firmware */ 1133 static int mes_v12_1_load_microcode(struct amdgpu_device *adev, 1134 enum amdgpu_mes_pipe pipe, 1135 bool prime_icache, int xcc_id) 1136 { 1137 int r, inst = MES_PIPE_INST(xcc_id, pipe); 1138 uint32_t data; 1139 1140 mes_v12_1_enable(adev, false, xcc_id); 1141 1142 if (!adev->mes.fw[pipe]) 1143 return -EINVAL; 1144 1145 r = mes_v12_1_allocate_ucode_buffer(adev, pipe, xcc_id); 1146 if (r) 1147 return r; 1148 1149 r = mes_v12_1_allocate_ucode_data_buffer(adev, pipe, xcc_id); 1150 if (r) { 1151 mes_v12_1_free_ucode_buffers(adev, pipe, xcc_id); 1152 return r; 1153 } 1154 1155 mutex_lock(&adev->srbm_mutex); 1156 /* me=3, pipe=0, queue=0 */ 1157 soc_v1_0_grbm_select(adev, 3, pipe, 0, 0, GET_INST(GC, xcc_id)); 1158 1159 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_IC_BASE_CNTL, 0); 1160 1161 /* set ucode fimrware address */ 1162 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_IC_BASE_LO, 1163 lower_32_bits(adev->mes.ucode_fw_gpu_addr[inst])); 1164 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_IC_BASE_HI, 1165 upper_32_bits(adev->mes.ucode_fw_gpu_addr[inst])); 1166 1167 /* set ucode instruction cache boundary to 2M-1 */ 1168 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_MIBOUND_LO, 0x1FFFFF); 1169 1170 /* set ucode data firmware address */ 1171 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_MDBASE_LO, 1172 lower_32_bits(adev->mes.data_fw_gpu_addr[inst])); 1173 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_MDBASE_HI, 1174 upper_32_bits(adev->mes.data_fw_gpu_addr[inst])); 1175 1176 /* Set data cache boundary CP_MES_MDBOUND_LO */ 1177 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_MDBOUND_LO, 0x7FFFF); 1178 1179 if (prime_icache) { 1180 /* invalidate ICACHE */ 1181 data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_IC_OP_CNTL); 1182 data = REG_SET_FIELD(data, CP_MES_IC_OP_CNTL, PRIME_ICACHE, 0); 1183 data = REG_SET_FIELD(data, CP_MES_IC_OP_CNTL, INVALIDATE_CACHE, 1); 1184 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_IC_OP_CNTL, data); 1185 1186 /* prime the ICACHE. */ 1187 data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_IC_OP_CNTL); 1188 data = REG_SET_FIELD(data, CP_MES_IC_OP_CNTL, PRIME_ICACHE, 1); 1189 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_IC_OP_CNTL, data); 1190 } 1191 1192 soc_v1_0_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id)); 1193 mutex_unlock(&adev->srbm_mutex); 1194 1195 return 0; 1196 } 1197 1198 static int mes_v12_1_allocate_eop_buf(struct amdgpu_device *adev, 1199 enum amdgpu_mes_pipe pipe, 1200 int xcc_id) 1201 { 1202 int r, inst = MES_PIPE_INST(xcc_id, pipe); 1203 u32 *eop; 1204 1205 r = amdgpu_bo_create_reserved(adev, MES_EOP_SIZE, PAGE_SIZE, 1206 AMDGPU_GEM_DOMAIN_GTT, 1207 &adev->mes.eop_gpu_obj[inst], 1208 &adev->mes.eop_gpu_addr[inst], 1209 (void **)&eop); 1210 if (r) { 1211 dev_warn(adev->dev, "(%d) create EOP bo failed\n", r); 1212 return r; 1213 } 1214 1215 memset(eop, 0, 1216 adev->mes.eop_gpu_obj[inst]->tbo.base.size); 1217 1218 amdgpu_bo_kunmap(adev->mes.eop_gpu_obj[inst]); 1219 amdgpu_bo_unreserve(adev->mes.eop_gpu_obj[inst]); 1220 1221 return 0; 1222 } 1223 1224 static int mes_v12_1_allocate_shared_cmd_buf(struct amdgpu_device *adev, 1225 enum amdgpu_mes_pipe pipe, 1226 int xcc_id) 1227 { 1228 int r, inst = MES_PIPE_INST(xcc_id, pipe); 1229 1230 r = amdgpu_bo_create_kernel(adev, PAGE_SIZE, PAGE_SIZE, 1231 AMDGPU_GEM_DOMAIN_VRAM, 1232 &adev->mes.shared_cmd_buf_obj[inst], 1233 &adev->mes.shared_cmd_buf_gpu_addr[inst], 1234 NULL); 1235 if (r) { 1236 dev_err(adev->dev, 1237 "(%d) failed to create shared cmd buf bo\n", r); 1238 return r; 1239 } 1240 1241 return 0; 1242 } 1243 1244 static int mes_v12_1_mqd_init(struct amdgpu_ring *ring) 1245 { 1246 struct v12_1_mes_mqd *mqd = ring->mqd_ptr; 1247 uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr; 1248 uint32_t tmp; 1249 1250 mqd->header = 0xC0310800; 1251 mqd->compute_pipelinestat_enable = 0x00000001; 1252 mqd->compute_static_thread_mgmt_se0 = 0xffffffff; 1253 mqd->compute_static_thread_mgmt_se1 = 0xffffffff; 1254 mqd->compute_static_thread_mgmt_se2 = 0xffffffff; 1255 mqd->compute_static_thread_mgmt_se3 = 0xffffffff; 1256 mqd->compute_misc_reserved = 0x00000007; 1257 1258 eop_base_addr = ring->eop_gpu_addr >> 8; 1259 1260 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ 1261 tmp = regCP_HQD_EOP_CONTROL_DEFAULT; 1262 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE, 1263 (order_base_2(MES_EOP_SIZE / 4) - 1)); 1264 1265 mqd->cp_hqd_eop_base_addr_lo = lower_32_bits(eop_base_addr); 1266 mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr); 1267 mqd->cp_hqd_eop_control = tmp; 1268 1269 /* disable the queue if it's active */ 1270 ring->wptr = 0; 1271 mqd->cp_hqd_pq_rptr = 0; 1272 mqd->cp_hqd_pq_wptr_lo = 0; 1273 mqd->cp_hqd_pq_wptr_hi = 0; 1274 1275 /* set the pointer to the MQD */ 1276 mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc; 1277 mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr); 1278 1279 /* set MQD vmid to 0 */ 1280 tmp = regCP_MQD_CONTROL_DEFAULT; 1281 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0); 1282 mqd->cp_mqd_control = tmp; 1283 1284 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ 1285 hqd_gpu_addr = ring->gpu_addr >> 8; 1286 mqd->cp_hqd_pq_base_lo = lower_32_bits(hqd_gpu_addr); 1287 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr); 1288 1289 /* set the wb address whether it's enabled or not */ 1290 wb_gpu_addr = ring->rptr_gpu_addr; 1291 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc; 1292 mqd->cp_hqd_pq_rptr_report_addr_hi = 1293 upper_32_bits(wb_gpu_addr) & 0xffff; 1294 1295 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ 1296 wb_gpu_addr = ring->wptr_gpu_addr; 1297 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffff8; 1298 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; 1299 1300 /* set up the HQD, this is similar to CP_RB0_CNTL */ 1301 tmp = regCP_HQD_PQ_CONTROL_DEFAULT; 1302 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE, 1303 (order_base_2(ring->ring_size / 4) - 1)); 1304 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE, 1305 ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8)); 1306 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 1); 1307 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, 0); 1308 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); 1309 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); 1310 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, NO_UPDATE_RPTR, 1); 1311 mqd->cp_hqd_pq_control = tmp; 1312 1313 /* enable doorbell */ 1314 tmp = 0; 1315 if (ring->use_doorbell) { 1316 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 1317 DOORBELL_OFFSET, ring->doorbell_index); 1318 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 1319 DOORBELL_EN, 1); 1320 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 1321 DOORBELL_SOURCE, 0); 1322 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 1323 DOORBELL_HIT, 0); 1324 } else { 1325 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 1326 DOORBELL_EN, 0); 1327 } 1328 mqd->cp_hqd_pq_doorbell_control = tmp; 1329 1330 mqd->cp_hqd_vmid = 0; 1331 /* activate the queue */ 1332 mqd->cp_hqd_active = 1; 1333 1334 tmp = regCP_HQD_PERSISTENT_STATE_DEFAULT; 1335 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, 1336 PRELOAD_SIZE, 0x63); 1337 mqd->cp_hqd_persistent_state = tmp; 1338 1339 mqd->cp_hqd_ib_control = regCP_HQD_IB_CONTROL_MES_12_1_DEFAULT; 1340 mqd->cp_hqd_iq_timer = regCP_HQD_IQ_TIMER_DEFAULT; 1341 mqd->cp_hqd_quantum = regCP_HQD_QUANTUM_DEFAULT; 1342 1343 /* 1344 * Set CP_HQD_GFX_CONTROL.DB_UPDATED_MSG_EN[15] to enable unmapped 1345 * doorbell handling. This is a reserved CP internal register can 1346 * not be accesss by others 1347 */ 1348 mqd->cp_hqd_gfx_control = BIT(15); 1349 1350 return 0; 1351 } 1352 1353 static void mes_v12_1_queue_init_register(struct amdgpu_ring *ring, 1354 int xcc_id) 1355 { 1356 struct v12_1_mes_mqd *mqd = ring->mqd_ptr; 1357 struct amdgpu_device *adev = ring->adev; 1358 uint32_t data = 0; 1359 1360 mutex_lock(&adev->srbm_mutex); 1361 soc_v1_0_grbm_select(adev, 3, ring->pipe, 0, 0, GET_INST(GC, xcc_id)); 1362 1363 /* set CP_HQD_VMID.VMID = 0. */ 1364 data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_VMID); 1365 data = REG_SET_FIELD(data, CP_HQD_VMID, VMID, 0); 1366 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_VMID, data); 1367 1368 /* set CP_HQD_PQ_DOORBELL_CONTROL.DOORBELL_EN=0 */ 1369 data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_DOORBELL_CONTROL); 1370 data = REG_SET_FIELD(data, CP_HQD_PQ_DOORBELL_CONTROL, 1371 DOORBELL_EN, 0); 1372 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_DOORBELL_CONTROL, data); 1373 1374 /* set CP_MQD_BASE_ADDR/HI with the MQD base address */ 1375 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo); 1376 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi); 1377 1378 /* set CP_MQD_CONTROL.VMID=0 */ 1379 data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MQD_CONTROL); 1380 data = REG_SET_FIELD(data, CP_MQD_CONTROL, VMID, 0); 1381 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MQD_CONTROL, 0); 1382 1383 /* set CP_HQD_PQ_BASE/HI with the ring buffer base address */ 1384 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo); 1385 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi); 1386 1387 /* set CP_HQD_PQ_RPTR_REPORT_ADDR/HI */ 1388 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_RPTR_REPORT_ADDR, 1389 mqd->cp_hqd_pq_rptr_report_addr_lo); 1390 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_RPTR_REPORT_ADDR_HI, 1391 mqd->cp_hqd_pq_rptr_report_addr_hi); 1392 1393 /* set CP_HQD_PQ_CONTROL */ 1394 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_CONTROL, mqd->cp_hqd_pq_control); 1395 1396 /* set CP_HQD_PQ_WPTR_POLL_ADDR/HI */ 1397 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_WPTR_POLL_ADDR, 1398 mqd->cp_hqd_pq_wptr_poll_addr_lo); 1399 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_WPTR_POLL_ADDR_HI, 1400 mqd->cp_hqd_pq_wptr_poll_addr_hi); 1401 1402 /* set CP_HQD_PQ_DOORBELL_CONTROL */ 1403 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_DOORBELL_CONTROL, 1404 mqd->cp_hqd_pq_doorbell_control); 1405 1406 /* set CP_HQD_PERSISTENT_STATE.PRELOAD_SIZE=0x53 */ 1407 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PERSISTENT_STATE, mqd->cp_hqd_persistent_state); 1408 1409 /* set CP_HQD_ACTIVE.ACTIVE=1 */ 1410 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_ACTIVE, mqd->cp_hqd_active); 1411 1412 soc_v1_0_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id)); 1413 mutex_unlock(&adev->srbm_mutex); 1414 } 1415 1416 static int mes_v12_1_kiq_enable_queue(struct amdgpu_device *adev, int xcc_id) 1417 { 1418 struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id]; 1419 struct amdgpu_ring *kiq_ring = &adev->gfx.kiq[xcc_id].ring; 1420 int r, inst = MES_PIPE_INST(xcc_id, AMDGPU_MES_SCHED_PIPE); 1421 1422 if (!kiq->pmf || !kiq->pmf->kiq_map_queues) 1423 return -EINVAL; 1424 1425 r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size); 1426 if (r) { 1427 DRM_ERROR("Failed to lock KIQ (%d).\n", r); 1428 return r; 1429 } 1430 1431 kiq->pmf->kiq_map_queues(kiq_ring, &adev->mes.ring[inst]); 1432 1433 r = amdgpu_ring_test_ring(kiq_ring); 1434 if (r) { 1435 DRM_ERROR("kfq enable failed\n"); 1436 kiq_ring->sched.ready = false; 1437 } 1438 return r; 1439 } 1440 1441 static int mes_v12_1_queue_init(struct amdgpu_device *adev, 1442 enum amdgpu_mes_pipe pipe, 1443 int xcc_id) 1444 { 1445 struct amdgpu_ring *ring; 1446 int r; 1447 1448 if (!adev->enable_uni_mes && pipe == AMDGPU_MES_KIQ_PIPE) 1449 ring = &adev->gfx.kiq[xcc_id].ring; 1450 else 1451 ring = &adev->mes.ring[MES_PIPE_INST(xcc_id, pipe)]; 1452 1453 if ((adev->enable_uni_mes || pipe == AMDGPU_MES_SCHED_PIPE) && 1454 (amdgpu_in_reset(adev) || adev->in_suspend)) { 1455 *(ring->wptr_cpu_addr) = 0; 1456 *(ring->rptr_cpu_addr) = 0; 1457 amdgpu_ring_clear_ring(ring); 1458 } 1459 1460 r = mes_v12_1_mqd_init(ring); 1461 if (r) 1462 return r; 1463 1464 if (pipe == AMDGPU_MES_SCHED_PIPE) { 1465 if (adev->enable_uni_mes) 1466 r = amdgpu_mes_map_legacy_queue(adev, ring, xcc_id); 1467 else 1468 r = mes_v12_1_kiq_enable_queue(adev, xcc_id); 1469 if (r) 1470 return r; 1471 } else { 1472 mes_v12_1_queue_init_register(ring, xcc_id); 1473 } 1474 1475 return 0; 1476 } 1477 1478 static int mes_v12_1_ring_init(struct amdgpu_device *adev, 1479 int xcc_id, int pipe) 1480 { 1481 struct amdgpu_ring *ring; 1482 int inst = MES_PIPE_INST(xcc_id, pipe); 1483 1484 ring = &adev->mes.ring[inst]; 1485 1486 ring->funcs = &mes_v12_1_ring_funcs; 1487 1488 ring->me = 3; 1489 ring->pipe = pipe; 1490 ring->queue = 0; 1491 ring->xcc_id = xcc_id; 1492 ring->vm_hub = AMDGPU_GFXHUB(xcc_id); 1493 1494 ring->ring_obj = NULL; 1495 ring->use_doorbell = true; 1496 ring->eop_gpu_addr = adev->mes.eop_gpu_addr[inst]; 1497 ring->no_scheduler = true; 1498 snprintf(ring->name, sizeof(ring->name), "mes_%hhu.%hhu.%hhu.%hhu", 1499 (unsigned char)xcc_id, (unsigned char)ring->me, 1500 (unsigned char)ring->pipe, (unsigned char)ring->queue); 1501 1502 if (pipe == AMDGPU_MES_SCHED_PIPE) 1503 ring->doorbell_index = 1504 (adev->doorbell_index.mes_ring0 + 1505 xcc_id * adev->doorbell_index.xcc_doorbell_range) 1506 << 1; 1507 else 1508 ring->doorbell_index = 1509 (adev->doorbell_index.mes_ring1 + 1510 xcc_id * adev->doorbell_index.xcc_doorbell_range) 1511 << 1; 1512 1513 return amdgpu_ring_init(adev, ring, 1024, NULL, 0, 1514 AMDGPU_RING_PRIO_DEFAULT, NULL); 1515 } 1516 1517 static int mes_v12_1_kiq_ring_init(struct amdgpu_device *adev, int xcc_id) 1518 { 1519 struct amdgpu_ring *ring; 1520 int inst = MES_PIPE_INST(xcc_id, AMDGPU_MES_KIQ_PIPE); 1521 1522 spin_lock_init(&adev->gfx.kiq[xcc_id].ring_lock); 1523 1524 ring = &adev->gfx.kiq[xcc_id].ring; 1525 1526 ring->me = 3; 1527 ring->pipe = 1; 1528 ring->queue = 0; 1529 ring->xcc_id = xcc_id; 1530 ring->vm_hub = AMDGPU_GFXHUB(xcc_id); 1531 1532 ring->adev = NULL; 1533 ring->ring_obj = NULL; 1534 ring->use_doorbell = true; 1535 ring->eop_gpu_addr = adev->mes.eop_gpu_addr[inst]; 1536 ring->no_scheduler = true; 1537 ring->doorbell_index = 1538 (adev->doorbell_index.mes_ring1 + 1539 xcc_id * adev->doorbell_index.xcc_doorbell_range) 1540 << 1; 1541 1542 snprintf(ring->name, sizeof(ring->name), "mes_kiq_%hhu.%hhu.%hhu.%hhu", 1543 (unsigned char)xcc_id, (unsigned char)ring->me, 1544 (unsigned char)ring->pipe, (unsigned char)ring->queue); 1545 1546 return amdgpu_ring_init(adev, ring, 1024, NULL, 0, 1547 AMDGPU_RING_PRIO_DEFAULT, NULL); 1548 } 1549 1550 static int mes_v12_1_mqd_sw_init(struct amdgpu_device *adev, 1551 enum amdgpu_mes_pipe pipe, 1552 int xcc_id) 1553 { 1554 int r, mqd_size = sizeof(struct v12_1_mes_mqd); 1555 struct amdgpu_ring *ring; 1556 int inst = MES_PIPE_INST(xcc_id, pipe); 1557 1558 if (!adev->enable_uni_mes && pipe == AMDGPU_MES_KIQ_PIPE) 1559 ring = &adev->gfx.kiq[xcc_id].ring; 1560 else 1561 ring = &adev->mes.ring[inst]; 1562 1563 if (ring->mqd_obj) 1564 return 0; 1565 1566 r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE, 1567 AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj, 1568 &ring->mqd_gpu_addr, &ring->mqd_ptr); 1569 if (r) { 1570 dev_warn(adev->dev, "failed to create ring mqd bo (%d)", r); 1571 return r; 1572 } 1573 1574 memset(ring->mqd_ptr, 0, mqd_size); 1575 1576 /* prepare MQD backup */ 1577 adev->mes.mqd_backup[inst] = kmalloc(mqd_size, GFP_KERNEL); 1578 if (!adev->mes.mqd_backup[inst]) 1579 dev_warn(adev->dev, 1580 "no memory to create MQD backup for ring %s\n", 1581 ring->name); 1582 1583 return 0; 1584 } 1585 1586 static int mes_v12_1_sw_init(struct amdgpu_ip_block *ip_block) 1587 { 1588 struct amdgpu_device *adev = ip_block->adev; 1589 int pipe, r, xcc_id, num_xcc = NUM_XCC(adev->gfx.xcc_mask); 1590 1591 adev->mes.funcs = &mes_v12_1_funcs; 1592 adev->mes.kiq_hw_init = &mes_v12_1_kiq_hw_init; 1593 adev->mes.kiq_hw_fini = &mes_v12_1_kiq_hw_fini; 1594 adev->mes.enable_legacy_queue_map = true; 1595 1596 adev->mes.event_log_size = 1597 adev->enable_uni_mes ? (AMDGPU_MAX_MES_PIPES * AMDGPU_MES_LOG_BUFFER_SIZE * num_xcc) : AMDGPU_MES_LOG_BUFFER_SIZE; 1598 1599 r = amdgpu_mes_init(adev); 1600 if (r) 1601 return r; 1602 1603 for (xcc_id = 0; xcc_id < num_xcc; xcc_id++) { 1604 for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) { 1605 r = mes_v12_1_allocate_eop_buf(adev, pipe, xcc_id); 1606 if (r) 1607 return r; 1608 1609 r = mes_v12_1_mqd_sw_init(adev, pipe, xcc_id); 1610 if (r) 1611 return r; 1612 1613 if (!adev->enable_uni_mes && pipe == 1614 AMDGPU_MES_KIQ_PIPE) 1615 r = mes_v12_1_kiq_ring_init(adev, xcc_id); 1616 else 1617 r = mes_v12_1_ring_init(adev, xcc_id, pipe); 1618 if (r) 1619 return r; 1620 1621 if (adev->enable_uni_mes && num_xcc > 1) { 1622 r = mes_v12_1_allocate_shared_cmd_buf(adev, 1623 pipe, xcc_id); 1624 if (r) 1625 return r; 1626 } 1627 } 1628 } 1629 1630 return 0; 1631 } 1632 1633 static int mes_v12_1_sw_fini(struct amdgpu_ip_block *ip_block) 1634 { 1635 struct amdgpu_device *adev = ip_block->adev; 1636 int pipe, inst, xcc_id, num_xcc = NUM_XCC(adev->gfx.xcc_mask); 1637 1638 for (xcc_id = 0; xcc_id < num_xcc; xcc_id++) { 1639 for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) { 1640 inst = MES_PIPE_INST(xcc_id, pipe); 1641 1642 amdgpu_bo_free_kernel(&adev->mes.shared_cmd_buf_obj[inst], 1643 &adev->mes.shared_cmd_buf_gpu_addr[inst], 1644 NULL); 1645 1646 kfree(adev->mes.mqd_backup[inst]); 1647 1648 amdgpu_bo_free_kernel(&adev->mes.eop_gpu_obj[inst], 1649 &adev->mes.eop_gpu_addr[inst], 1650 NULL); 1651 1652 if (adev->enable_uni_mes || pipe == AMDGPU_MES_SCHED_PIPE) { 1653 amdgpu_bo_free_kernel(&adev->mes.ring[inst].mqd_obj, 1654 &adev->mes.ring[inst].mqd_gpu_addr, 1655 &adev->mes.ring[inst].mqd_ptr); 1656 amdgpu_ring_fini(&adev->mes.ring[inst]); 1657 } 1658 } 1659 } 1660 1661 for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) 1662 amdgpu_ucode_release(&adev->mes.fw[pipe]); 1663 1664 for (xcc_id = 0; xcc_id < num_xcc; xcc_id++) { 1665 if (!adev->enable_uni_mes) { 1666 amdgpu_bo_free_kernel(&adev->gfx.kiq[xcc_id].ring.mqd_obj, 1667 &adev->gfx.kiq[xcc_id].ring.mqd_gpu_addr, 1668 &adev->gfx.kiq[xcc_id].ring.mqd_ptr); 1669 amdgpu_ring_fini(&adev->gfx.kiq[xcc_id].ring); 1670 } 1671 1672 if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { 1673 mes_v12_1_free_ucode_buffers(adev, 1674 AMDGPU_MES_KIQ_PIPE, xcc_id); 1675 mes_v12_1_free_ucode_buffers(adev, 1676 AMDGPU_MES_SCHED_PIPE, xcc_id); 1677 } 1678 } 1679 1680 amdgpu_mes_fini(adev); 1681 return 0; 1682 } 1683 1684 static void mes_v12_1_kiq_dequeue_sched(struct amdgpu_device *adev, 1685 int xcc_id) 1686 { 1687 uint32_t data; 1688 int i; 1689 1690 mutex_lock(&adev->srbm_mutex); 1691 soc_v1_0_grbm_select(adev, 3, AMDGPU_MES_SCHED_PIPE, 0, 0, 1692 GET_INST(GC, xcc_id)); 1693 1694 /* disable the queue if it's active */ 1695 if (RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_ACTIVE) & 1) { 1696 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_DEQUEUE_REQUEST, 1); 1697 for (i = 0; i < adev->usec_timeout; i++) { 1698 if (!(RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_ACTIVE) & 1)) 1699 break; 1700 udelay(1); 1701 } 1702 } 1703 data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_DOORBELL_CONTROL); 1704 data = REG_SET_FIELD(data, CP_HQD_PQ_DOORBELL_CONTROL, 1705 DOORBELL_EN, 0); 1706 data = REG_SET_FIELD(data, CP_HQD_PQ_DOORBELL_CONTROL, 1707 DOORBELL_HIT, 1); 1708 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_DOORBELL_CONTROL, data); 1709 1710 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_DOORBELL_CONTROL, 0); 1711 1712 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_WPTR_LO, 0); 1713 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_WPTR_HI, 0); 1714 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_RPTR, 0); 1715 1716 soc_v1_0_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id)); 1717 mutex_unlock(&adev->srbm_mutex); 1718 1719 adev->mes.ring[MES_PIPE_INST(xcc_id, 0)].sched.ready = false; 1720 } 1721 1722 static void mes_v12_1_kiq_setting(struct amdgpu_ring *ring, int xcc_id) 1723 { 1724 uint32_t tmp; 1725 struct amdgpu_device *adev = ring->adev; 1726 1727 /* tell RLC which is KIQ queue */ 1728 tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CP_SCHEDULERS); 1729 tmp &= 0xffffff00; 1730 tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue); 1731 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CP_SCHEDULERS, tmp); 1732 tmp |= 0x80; 1733 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CP_SCHEDULERS, tmp); 1734 } 1735 1736 static int mes_v12_1_kiq_hw_init(struct amdgpu_device *adev, uint32_t xcc_id) 1737 { 1738 int inst = MES_PIPE_INST(xcc_id, AMDGPU_MES_KIQ_PIPE); 1739 int r = 0; 1740 struct amdgpu_ip_block *ip_block; 1741 1742 if (adev->enable_uni_mes) 1743 mes_v12_1_kiq_setting(&adev->mes.ring[inst], xcc_id); 1744 else 1745 mes_v12_1_kiq_setting(&adev->gfx.kiq[xcc_id].ring, xcc_id); 1746 1747 if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { 1748 1749 r = mes_v12_1_load_microcode(adev, AMDGPU_MES_SCHED_PIPE, 1750 false, xcc_id); 1751 if (r) { 1752 DRM_ERROR("failed to load MES fw, r=%d\n", r); 1753 return r; 1754 } 1755 1756 r = mes_v12_1_load_microcode(adev, AMDGPU_MES_KIQ_PIPE, 1757 true, xcc_id); 1758 if (r) { 1759 DRM_ERROR("failed to load MES kiq fw, r=%d\n", r); 1760 return r; 1761 } 1762 1763 mes_v12_1_set_ucode_start_addr(adev, xcc_id); 1764 1765 } else if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) 1766 mes_v12_1_set_ucode_start_addr(adev, xcc_id); 1767 1768 mes_v12_1_enable(adev, true, xcc_id); 1769 1770 ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_MES); 1771 if (unlikely(!ip_block)) { 1772 dev_err(adev->dev, "Failed to get MES handle\n"); 1773 return -EINVAL; 1774 } 1775 1776 r = mes_v12_1_queue_init(adev, AMDGPU_MES_KIQ_PIPE, xcc_id); 1777 if (r) 1778 goto failure; 1779 1780 if (adev->enable_uni_mes) { 1781 r = mes_v12_1_setup_coop_mode(adev, xcc_id); 1782 if (r) 1783 goto failure; 1784 1785 r = mes_v12_1_set_hw_resources(&adev->mes, 1786 AMDGPU_MES_KIQ_PIPE, xcc_id); 1787 if (r) 1788 goto failure; 1789 1790 mes_v12_1_set_hw_resources_1(&adev->mes, 1791 AMDGPU_MES_KIQ_PIPE, xcc_id); 1792 } 1793 1794 if (adev->mes.enable_legacy_queue_map) { 1795 r = mes_v12_1_xcc_hw_init(ip_block, xcc_id); 1796 if (r) 1797 goto failure; 1798 } 1799 1800 return r; 1801 1802 failure: 1803 mes_v12_1_hw_fini(ip_block); 1804 return r; 1805 } 1806 1807 static int mes_v12_1_kiq_hw_fini(struct amdgpu_device *adev, uint32_t xcc_id) 1808 { 1809 int inst = MES_PIPE_INST(xcc_id, AMDGPU_MES_SCHED_PIPE); 1810 1811 if (adev->mes.ring[inst].sched.ready) { 1812 if (adev->enable_uni_mes) 1813 amdgpu_mes_unmap_legacy_queue(adev, 1814 &adev->mes.ring[inst], 1815 RESET_QUEUES, 0, 0, xcc_id); 1816 else 1817 mes_v12_1_kiq_dequeue_sched(adev, xcc_id); 1818 1819 adev->mes.ring[inst].sched.ready = false; 1820 } 1821 1822 mes_v12_1_enable(adev, false, xcc_id); 1823 1824 return 0; 1825 } 1826 1827 static int mes_v12_1_setup_coop_mode(struct amdgpu_device *adev, int xcc_id) 1828 { 1829 u32 num_xcc_per_xcp, num_xcc = NUM_XCC(adev->gfx.xcc_mask); 1830 int r = 0; 1831 1832 if (num_xcc == 1) 1833 return r; 1834 1835 if (adev->gfx.funcs && 1836 adev->gfx.funcs->get_xccs_per_xcp) 1837 num_xcc_per_xcp = adev->gfx.funcs->get_xccs_per_xcp(adev); 1838 else 1839 return -EINVAL; 1840 1841 switch (adev->xcp_mgr->mode) { 1842 case AMDGPU_SPX_PARTITION_MODE: 1843 adev->mes.enable_coop_mode = 1; 1844 adev->mes.master_xcc_ids[xcc_id] = 0; 1845 break; 1846 case AMDGPU_DPX_PARTITION_MODE: 1847 adev->mes.enable_coop_mode = 1; 1848 adev->mes.master_xcc_ids[xcc_id] = 1849 (xcc_id/num_xcc_per_xcp) * (num_xcc / 2); 1850 break; 1851 case AMDGPU_QPX_PARTITION_MODE: 1852 adev->mes.enable_coop_mode = 1; 1853 adev->mes.master_xcc_ids[xcc_id] = 1854 (xcc_id/num_xcc_per_xcp) * (num_xcc / 4); 1855 break; 1856 case AMDGPU_CPX_PARTITION_MODE: 1857 adev->mes.enable_coop_mode = 0; 1858 break; 1859 default: 1860 r = -EINVAL; 1861 break; 1862 } 1863 return r; 1864 } 1865 1866 static int mes_v12_1_xcc_hw_init(struct amdgpu_ip_block *ip_block, int xcc_id) 1867 { 1868 int r; 1869 struct amdgpu_device *adev = ip_block->adev; 1870 1871 if (adev->mes.ring[MES_PIPE_INST(xcc_id, 0)].sched.ready) 1872 goto out; 1873 1874 if (!adev->enable_mes_kiq) { 1875 if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { 1876 r = mes_v12_1_load_microcode(adev, 1877 AMDGPU_MES_SCHED_PIPE, true, xcc_id); 1878 if (r) { 1879 DRM_ERROR("failed to MES fw, r=%d\n", r); 1880 return r; 1881 } 1882 1883 mes_v12_1_set_ucode_start_addr(adev, xcc_id); 1884 1885 } else if (adev->firmware.load_type == 1886 AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) { 1887 1888 mes_v12_1_set_ucode_start_addr(adev, xcc_id); 1889 } 1890 1891 mes_v12_1_enable(adev, true, xcc_id); 1892 } 1893 1894 /* Enable the MES to handle doorbell ring on unmapped queue */ 1895 mes_v12_1_enable_unmapped_doorbell_handling(&adev->mes, true, xcc_id); 1896 1897 r = mes_v12_1_queue_init(adev, AMDGPU_MES_SCHED_PIPE, xcc_id); 1898 if (r) 1899 goto failure; 1900 1901 r = mes_v12_1_set_hw_resources(&adev->mes, 1902 AMDGPU_MES_SCHED_PIPE, xcc_id); 1903 if (r) 1904 goto failure; 1905 1906 if (adev->enable_uni_mes) { 1907 mes_v12_1_set_hw_resources_1(&adev->mes, 1908 AMDGPU_MES_SCHED_PIPE, xcc_id); 1909 } 1910 mes_v12_1_init_aggregated_doorbell(&adev->mes, xcc_id); 1911 1912 r = mes_v12_1_query_sched_status(&adev->mes, 1913 AMDGPU_MES_SCHED_PIPE, xcc_id); 1914 if (r) { 1915 DRM_ERROR("MES is busy\n"); 1916 goto failure; 1917 } 1918 1919 out: 1920 /* 1921 * Disable KIQ ring usage from the driver once MES is enabled. 1922 * MES uses KIQ ring exclusively so driver cannot access KIQ ring 1923 * with MES enabled. 1924 */ 1925 adev->gfx.kiq[xcc_id].ring.sched.ready = false; 1926 adev->mes.ring[MES_PIPE_INST(xcc_id, 0)].sched.ready = true; 1927 1928 return 0; 1929 1930 failure: 1931 mes_v12_1_hw_fini(ip_block); 1932 return r; 1933 } 1934 1935 static int mes_v12_1_hw_init(struct amdgpu_ip_block *ip_block) 1936 { 1937 struct amdgpu_device *adev = ip_block->adev; 1938 int r, xcc_id, num_xcc = NUM_XCC(adev->gfx.xcc_mask); 1939 1940 for (xcc_id = 0; xcc_id < num_xcc; xcc_id++) { 1941 r = mes_v12_1_xcc_hw_init(ip_block, xcc_id); 1942 if (r) 1943 return r; 1944 } 1945 1946 return 0; 1947 } 1948 1949 static int mes_v12_1_hw_fini(struct amdgpu_ip_block *ip_block) 1950 { 1951 return 0; 1952 } 1953 1954 static int mes_v12_1_suspend(struct amdgpu_ip_block *ip_block) 1955 { 1956 return mes_v12_1_hw_fini(ip_block); 1957 } 1958 1959 static int mes_v12_1_resume(struct amdgpu_ip_block *ip_block) 1960 { 1961 return mes_v12_1_hw_init(ip_block); 1962 } 1963 1964 static int mes_v12_1_early_init(struct amdgpu_ip_block *ip_block) 1965 { 1966 struct amdgpu_device *adev = ip_block->adev; 1967 int pipe, r; 1968 1969 adev->mes.hung_queue_db_array_size = MES12_HUNG_DB_OFFSET_ARRAY_SIZE; 1970 adev->mes.hung_queue_hqd_info_offset = MES12_HUNG_HQD_INFO_OFFSET; 1971 1972 for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) { 1973 r = amdgpu_mes_init_microcode(adev, pipe); 1974 if (r) 1975 return r; 1976 } 1977 1978 return 0; 1979 } 1980 1981 static int mes_v12_1_late_init(struct amdgpu_ip_block *ip_block) 1982 { 1983 struct amdgpu_device *adev = ip_block->adev; 1984 int xcc_id, num_xcc = NUM_XCC(adev->gfx.xcc_mask); 1985 1986 /* TODO: remove it if issue fixed. */ 1987 if (adev->mes.enable_coop_mode) 1988 return 0; 1989 1990 for (xcc_id = 0; xcc_id < num_xcc; xcc_id++) { 1991 /* for COOP mode, only test master xcc. */ 1992 if (adev->mes.enable_coop_mode && 1993 adev->mes.master_xcc_ids[xcc_id] != xcc_id) 1994 continue; 1995 1996 mes_v12_1_self_test(adev, xcc_id); 1997 } 1998 1999 return 0; 2000 } 2001 2002 static const struct amd_ip_funcs mes_v12_1_ip_funcs = { 2003 .name = "mes_v12_1", 2004 .early_init = mes_v12_1_early_init, 2005 .late_init = mes_v12_1_late_init, 2006 .sw_init = mes_v12_1_sw_init, 2007 .sw_fini = mes_v12_1_sw_fini, 2008 .hw_init = mes_v12_1_hw_init, 2009 .hw_fini = mes_v12_1_hw_fini, 2010 .suspend = mes_v12_1_suspend, 2011 .resume = mes_v12_1_resume, 2012 }; 2013 2014 const struct amdgpu_ip_block_version mes_v12_1_ip_block = { 2015 .type = AMD_IP_BLOCK_TYPE_MES, 2016 .major = 12, 2017 .minor = 1, 2018 .rev = 0, 2019 .funcs = &mes_v12_1_ip_funcs, 2020 }; 2021 2022 static int mes_v12_1_alloc_test_buf(struct amdgpu_device *adev, 2023 struct amdgpu_bo **bo, uint64_t *addr, 2024 void **ptr, int size) 2025 { 2026 amdgpu_bo_create_kernel(adev, size, PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, 2027 bo, addr, ptr); 2028 if (!*bo) { 2029 dev_err(adev->dev, "failed to allocate test buffer bo\n"); 2030 return -ENOMEM; 2031 } 2032 memset(*ptr, 0, size); 2033 return 0; 2034 } 2035 2036 static int mes_v12_1_map_test_bo(struct amdgpu_device *adev, 2037 struct amdgpu_bo *bo, struct amdgpu_vm *vm, 2038 struct amdgpu_bo_va **bo_va, u64 va, int size) 2039 { 2040 struct amdgpu_sync sync; 2041 int r; 2042 2043 r = amdgpu_map_static_csa(adev, vm, bo, bo_va, va, size); 2044 if (r) 2045 return r; 2046 2047 amdgpu_sync_create(&sync); 2048 2049 r = amdgpu_vm_bo_update(adev, *bo_va, false); 2050 if (r) { 2051 dev_err(adev->dev, "failed to do vm_bo_update on meta data\n"); 2052 goto error; 2053 } 2054 amdgpu_sync_fence(&sync, (*bo_va)->last_pt_update, GFP_KERNEL); 2055 2056 r = amdgpu_vm_update_pdes(adev, vm, false); 2057 if (r) { 2058 dev_err(adev->dev, "failed to update pdes on meta data\n"); 2059 goto error; 2060 } 2061 amdgpu_sync_fence(&sync, vm->last_update, GFP_KERNEL); 2062 amdgpu_sync_wait(&sync, false); 2063 2064 error: 2065 amdgpu_sync_free(&sync); 2066 return r; 2067 } 2068 2069 static int mes_v12_1_test_ring(struct amdgpu_device *adev, int xcc_id, 2070 u32 *queue_ptr, u64 fence_gpu_addr, 2071 void *fence_cpu_ptr, void *wptr_cpu_addr, 2072 u64 doorbell_idx, int queue_type) 2073 { 2074 volatile uint32_t *cpu_ptr = fence_cpu_ptr; 2075 int num_xcc = NUM_XCC(adev->gfx.xcc_mask); 2076 int sdma_ring_align = 0x10, compute_ring_align = 0x100; 2077 uint32_t tmp, xcc_offset; 2078 int r = 0, i, j, wptr = 0; 2079 2080 if (queue_type == AMDGPU_RING_TYPE_COMPUTE) { 2081 if (!adev->mes.enable_coop_mode) { 2082 WREG32_SOC15(GC, GET_INST(GC, xcc_id), 2083 regSCRATCH_REG0, 0xCAFEDEAD); 2084 } else { 2085 for (i = 0; i < num_xcc; i++) { 2086 if (adev->mes.master_xcc_ids[i] == xcc_id) 2087 WREG32_SOC15(GC, GET_INST(GC, i), 2088 regSCRATCH_REG0, 0xCAFEDEAD); 2089 } 2090 } 2091 2092 xcc_offset = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG0); 2093 queue_ptr[wptr++] = PACKET3(PACKET3_SET_UCONFIG_REG, 1); 2094 queue_ptr[wptr++] = xcc_offset - PACKET3_SET_UCONFIG_REG_START; 2095 queue_ptr[wptr++] = 0xDEADBEEF; 2096 2097 for (i = wptr; i < compute_ring_align; i++) 2098 queue_ptr[wptr++] = PACKET3(PACKET3_NOP, 0x3FFF); 2099 2100 } else if (queue_type == AMDGPU_RING_TYPE_SDMA) { 2101 *cpu_ptr = 0xCAFEDEAD; 2102 2103 queue_ptr[wptr++] = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_WRITE) | 2104 SDMA_PKT_COPY_LINEAR_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR); 2105 queue_ptr[wptr++] = lower_32_bits(fence_gpu_addr); 2106 queue_ptr[wptr++] = upper_32_bits(fence_gpu_addr); 2107 queue_ptr[wptr++] = SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(0); 2108 queue_ptr[wptr++] = 0xDEADBEEF; 2109 2110 for (i = wptr; i < sdma_ring_align; i++) 2111 queue_ptr[wptr++] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP); 2112 2113 wptr <<= 2; 2114 } 2115 2116 atomic64_set((atomic64_t *)wptr_cpu_addr, wptr); 2117 WDOORBELL64(doorbell_idx, wptr); 2118 2119 for (i = 0; i < adev->usec_timeout; i++) { 2120 if (queue_type == AMDGPU_RING_TYPE_SDMA) { 2121 tmp = le32_to_cpu(*cpu_ptr); 2122 } else { 2123 if (!adev->mes.enable_coop_mode) { 2124 tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), 2125 regSCRATCH_REG0); 2126 } else { 2127 for (j = 0; j < num_xcc; j++) { 2128 if (xcc_id != adev->mes.master_xcc_ids[j]) 2129 continue; 2130 2131 tmp = RREG32_SOC15(GC, GET_INST(GC, j), 2132 regSCRATCH_REG0); 2133 if (tmp != 0xDEADBEEF) 2134 break; 2135 } 2136 } 2137 } 2138 2139 if (tmp == 0xDEADBEEF) 2140 break; 2141 2142 if (amdgpu_emu_mode == 1) 2143 msleep(1); 2144 else 2145 udelay(1); 2146 } 2147 2148 if (i >= adev->usec_timeout) { 2149 dev_err(adev->dev, "xcc%d: mes self test (%s) failed\n", xcc_id, 2150 queue_type == AMDGPU_RING_TYPE_SDMA ? "sdma" : "compute"); 2151 2152 while (halt_if_hws_hang) 2153 schedule(); 2154 2155 r = -ETIMEDOUT; 2156 } else { 2157 dev_info(adev->dev, "xcc%d: mes self test (%s) pass\n", xcc_id, 2158 queue_type == AMDGPU_RING_TYPE_SDMA ? "sdma" : "compute"); 2159 } 2160 2161 return r; 2162 } 2163 2164 #define USER_CTX_SIZE (PAGE_SIZE * 2) 2165 #define USER_CTX_VA AMDGPU_VA_RESERVED_BOTTOM 2166 #define RING_OFFSET(addr) ((addr)) 2167 #define EOP_OFFSET(addr) ((addr) + PAGE_SIZE) 2168 #define WPTR_OFFSET(addr) ((addr) + USER_CTX_SIZE - sizeof(u64)) 2169 #define RPTR_OFFSET(addr) ((addr) + USER_CTX_SIZE - sizeof(u64) * 2) 2170 #define FENCE_OFFSET(addr) ((addr) + USER_CTX_SIZE - sizeof(u64) * 3) 2171 2172 static int mes_v12_1_test_queue(struct amdgpu_device *adev, int xcc_id, 2173 int pasid, struct amdgpu_vm *vm, u64 meta_gpu_addr, 2174 u64 queue_gpu_addr, void *ctx_ptr, int queue_type) 2175 { 2176 struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)]; 2177 struct amdgpu_mqd *mqd_mgr = &adev->mqds[queue_type]; 2178 struct amdgpu_mqd_prop mqd_prop = {0}; 2179 struct mes_add_queue_input add_queue = {0}; 2180 struct mes_remove_queue_input remove_queue = {0}; 2181 struct amdgpu_bo *mqd_bo = NULL; 2182 int num_xcc = NUM_XCC(adev->gfx.xcc_mask); 2183 int i, r, off, mqd_size, mqd_count = 1; 2184 void *mqd_ptr = NULL; 2185 u64 mqd_gpu_addr, doorbell_idx; 2186 2187 /* extra one page size padding for mes fw */ 2188 mqd_size = mqd_mgr->mqd_size + PAGE_SIZE; 2189 2190 if (queue_type == AMDGPU_RING_TYPE_SDMA) { 2191 doorbell_idx = adev->mes.db_start_dw_offset + \ 2192 adev->doorbell_index.sdma_engine[0]; 2193 } else { 2194 doorbell_idx = adev->mes.db_start_dw_offset + \ 2195 adev->doorbell_index.userqueue_start; 2196 } 2197 2198 if (adev->mes.enable_coop_mode && 2199 queue_type == AMDGPU_RING_TYPE_COMPUTE) { 2200 for (i = 0, mqd_count = 0; i < num_xcc; i++) { 2201 if (adev->mes.master_xcc_ids[i] == xcc_id) 2202 mqd_count++; 2203 } 2204 mqd_size *= mqd_count; 2205 } 2206 2207 r = mes_v12_1_alloc_test_buf(adev, &mqd_bo, &mqd_gpu_addr, 2208 &mqd_ptr, mqd_size * mqd_count); 2209 if (r < 0) 2210 return r; 2211 2212 mqd_prop.mqd_gpu_addr = mqd_gpu_addr; 2213 mqd_prop.hqd_base_gpu_addr = RING_OFFSET(USER_CTX_VA); 2214 mqd_prop.eop_gpu_addr = EOP_OFFSET(USER_CTX_VA); 2215 mqd_prop.wptr_gpu_addr = WPTR_OFFSET(USER_CTX_VA); 2216 mqd_prop.rptr_gpu_addr = RPTR_OFFSET(USER_CTX_VA); 2217 mqd_prop.doorbell_index = doorbell_idx; 2218 mqd_prop.queue_size = PAGE_SIZE; 2219 mqd_prop.mqd_stride_size = mqd_size; 2220 mqd_prop.use_doorbell = true; 2221 mqd_prop.hqd_active = false; 2222 2223 mqd_mgr->init_mqd(adev, mqd_ptr, &mqd_prop); 2224 if (mqd_count > 1) { 2225 for (i = 1; i < mqd_count; i++) { 2226 off = mqd_size * i; 2227 mqd_prop.mqd_gpu_addr = mqd_gpu_addr + off; 2228 mqd_mgr->init_mqd(adev, (char *)mqd_ptr + off, 2229 &mqd_prop); 2230 } 2231 } 2232 2233 add_queue.xcc_id = xcc_id; 2234 add_queue.process_id = pasid; 2235 add_queue.page_table_base_addr = adev->vm_manager.vram_base_offset + 2236 amdgpu_bo_gpu_offset(vm->root.bo) - adev->gmc.vram_start; 2237 add_queue.process_va_start = 0; 2238 add_queue.process_va_end = adev->vm_manager.max_pfn - 1; 2239 add_queue.process_context_addr = meta_gpu_addr; 2240 add_queue.gang_context_addr = meta_gpu_addr + AMDGPU_MES_PROC_CTX_SIZE; 2241 add_queue.doorbell_offset = doorbell_idx; 2242 add_queue.mqd_addr = mqd_gpu_addr; 2243 add_queue.wptr_addr = mqd_prop.wptr_gpu_addr; 2244 add_queue.wptr_mc_addr = WPTR_OFFSET(queue_gpu_addr); 2245 add_queue.queue_type = queue_type; 2246 add_queue.vm_cntx_cntl = hub->vm_cntx_cntl; 2247 2248 r = mes_v12_1_add_hw_queue(&adev->mes, &add_queue); 2249 if (r) 2250 goto error; 2251 2252 mes_v12_1_test_ring(adev, xcc_id, (u32 *)RING_OFFSET((char *)ctx_ptr), 2253 FENCE_OFFSET(USER_CTX_VA), 2254 FENCE_OFFSET((char *)ctx_ptr), 2255 WPTR_OFFSET((char *)ctx_ptr), 2256 doorbell_idx, queue_type); 2257 2258 remove_queue.xcc_id = xcc_id; 2259 remove_queue.doorbell_offset = doorbell_idx; 2260 remove_queue.gang_context_addr = add_queue.gang_context_addr; 2261 r = mes_v12_1_remove_hw_queue(&adev->mes, &remove_queue); 2262 2263 error: 2264 amdgpu_bo_free_kernel(&mqd_bo, &mqd_gpu_addr, &mqd_ptr); 2265 return r; 2266 } 2267 2268 static int mes_v12_1_self_test(struct amdgpu_device *adev, int xcc_id) 2269 { 2270 int queue_types[] = { AMDGPU_RING_TYPE_COMPUTE, 2271 /* AMDGPU_RING_TYPE_SDMA */ }; 2272 struct amdgpu_bo_va *bo_va = NULL; 2273 struct amdgpu_vm *vm = NULL; 2274 struct amdgpu_bo *meta_bo = NULL, *ctx_bo = NULL; 2275 void *meta_ptr = NULL, *ctx_ptr = NULL; 2276 u64 meta_gpu_addr, ctx_gpu_addr; 2277 int size, i, r, pasid; 2278 2279 pasid = amdgpu_pasid_alloc(16); 2280 if (pasid < 0) 2281 pasid = 0; 2282 2283 size = AMDGPU_MES_PROC_CTX_SIZE + AMDGPU_MES_GANG_CTX_SIZE; 2284 r = mes_v12_1_alloc_test_buf(adev, &meta_bo, &meta_gpu_addr, 2285 &meta_ptr, size); 2286 if (r < 0) 2287 goto err2; 2288 2289 r = mes_v12_1_alloc_test_buf(adev, &ctx_bo, &ctx_gpu_addr, 2290 &ctx_ptr, USER_CTX_SIZE); 2291 if (r < 0) 2292 goto err2; 2293 2294 vm = kzalloc(sizeof(*vm), GFP_KERNEL); 2295 if (!vm) { 2296 r = -ENOMEM; 2297 goto err2; 2298 } 2299 2300 r = amdgpu_vm_init(adev, vm, -1, pasid); 2301 if (r) 2302 goto err1; 2303 2304 r = mes_v12_1_map_test_bo(adev, ctx_bo, vm, &bo_va, 2305 USER_CTX_VA, USER_CTX_SIZE); 2306 if (r) 2307 goto err0; 2308 2309 for (i = 0; i < ARRAY_SIZE(queue_types); i++) { 2310 memset(ctx_ptr, 0, USER_CTX_SIZE); 2311 2312 r = mes_v12_1_test_queue(adev, xcc_id, pasid, vm, meta_gpu_addr, 2313 ctx_gpu_addr, ctx_ptr, queue_types[i]); 2314 if (r) 2315 break; 2316 } 2317 2318 amdgpu_unmap_static_csa(adev, vm, ctx_bo, bo_va, USER_CTX_VA); 2319 err0: 2320 amdgpu_vm_fini(adev, vm); 2321 err1: 2322 kfree(vm); 2323 err2: 2324 amdgpu_bo_free_kernel(&meta_bo, &meta_gpu_addr, &meta_ptr); 2325 amdgpu_bo_free_kernel(&ctx_bo, &ctx_gpu_addr, &ctx_ptr); 2326 amdgpu_pasid_free(pasid); 2327 return r; 2328 } 2329 2330