1 /* 2 * Copyright 2025 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 */ 23 24 #include <linux/firmware.h> 25 #include <linux/module.h> 26 #include "amdgpu.h" 27 #include "soc15_common.h" 28 #include "soc_v1_0.h" 29 #include "gc/gc_12_1_0_offset.h" 30 #include "gc/gc_12_1_0_sh_mask.h" 31 #include "gc/gc_11_0_0_default.h" 32 #include "v12_structs.h" 33 #include "mes_v12_api_def.h" 34 #include "gfx_v12_1_pkt.h" 35 #include "sdma_v7_1_0_pkt_open.h" 36 37 MODULE_FIRMWARE("amdgpu/gc_12_1_0_mes.bin"); 38 MODULE_FIRMWARE("amdgpu/gc_12_1_0_mes1.bin"); 39 MODULE_FIRMWARE("amdgpu/gc_12_1_0_uni_mes.bin"); 40 41 static int mes_v12_1_hw_init(struct amdgpu_ip_block *ip_block); 42 static int mes_v12_1_xcc_hw_init(struct amdgpu_ip_block *ip_block, int xcc_id); 43 static int mes_v12_1_hw_fini(struct amdgpu_ip_block *ip_block); 44 static int mes_v12_1_kiq_hw_init(struct amdgpu_device *adev, uint32_t xcc_id); 45 static int mes_v12_1_kiq_hw_fini(struct amdgpu_device *adev, uint32_t xcc_id); 46 static int mes_v12_1_self_test(struct amdgpu_device *adev, int xcc_id); 47 static int mes_v12_1_setup_coop_mode(struct amdgpu_device *adev, int xcc_id); 48 49 #define MES_EOP_SIZE 2048 50 51 #define regCP_HQD_IB_CONTROL_MES_12_1_DEFAULT 0x100000 52 #define XCC_MID_MASK 0x41000000 53 54 static void mes_v12_1_ring_set_wptr(struct amdgpu_ring *ring) 55 { 56 struct amdgpu_device *adev = ring->adev; 57 58 if (ring->use_doorbell) { 59 atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 60 ring->wptr); 61 WDOORBELL64(ring->doorbell_index, ring->wptr); 62 } else { 63 BUG(); 64 } 65 } 66 67 static u64 mes_v12_1_ring_get_rptr(struct amdgpu_ring *ring) 68 { 69 return *ring->rptr_cpu_addr; 70 } 71 72 static u64 mes_v12_1_ring_get_wptr(struct amdgpu_ring *ring) 73 { 74 u64 wptr; 75 76 if (ring->use_doorbell) 77 wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr); 78 else 79 BUG(); 80 return wptr; 81 } 82 83 static const struct amdgpu_ring_funcs mes_v12_1_ring_funcs = { 84 .type = AMDGPU_RING_TYPE_MES, 85 .align_mask = 1, 86 .nop = 0, 87 .support_64bit_ptrs = true, 88 .get_rptr = mes_v12_1_ring_get_rptr, 89 .get_wptr = mes_v12_1_ring_get_wptr, 90 .set_wptr = mes_v12_1_ring_set_wptr, 91 .insert_nop = amdgpu_ring_insert_nop, 92 }; 93 94 static const char *mes_v12_1_opcodes[] = { 95 "SET_HW_RSRC", 96 "SET_SCHEDULING_CONFIG", 97 "ADD_QUEUE", 98 "REMOVE_QUEUE", 99 "PERFORM_YIELD", 100 "SET_GANG_PRIORITY_LEVEL", 101 "SUSPEND", 102 "RESUME", 103 "RESET", 104 "SET_LOG_BUFFER", 105 "CHANGE_GANG_PRORITY", 106 "QUERY_SCHEDULER_STATUS", 107 "unused", 108 "SET_DEBUG_VMID", 109 "MISC", 110 "UPDATE_ROOT_PAGE_TABLE", 111 "AMD_LOG", 112 "SET_SE_MODE", 113 "SET_GANG_SUBMIT", 114 "SET_HW_RSRC_1", 115 "INVALIDATE_TLBS", 116 }; 117 118 static const char *mes_v12_1_misc_opcodes[] = { 119 "WRITE_REG", 120 "INV_GART", 121 "QUERY_STATUS", 122 "READ_REG", 123 "WAIT_REG_MEM", 124 "SET_SHADER_DEBUGGER", 125 "NOTIFY_WORK_ON_UNMAPPED_QUEUE", 126 "NOTIFY_TO_UNMAP_PROCESSES", 127 }; 128 129 static const char *mes_v12_1_get_op_string(union MESAPI__MISC *x_pkt) 130 { 131 const char *op_str = NULL; 132 133 if (x_pkt->header.opcode < ARRAY_SIZE(mes_v12_1_opcodes)) 134 op_str = mes_v12_1_opcodes[x_pkt->header.opcode]; 135 136 return op_str; 137 } 138 139 static const char *mes_v12_1_get_misc_op_string(union MESAPI__MISC *x_pkt) 140 { 141 const char *op_str = NULL; 142 143 if ((x_pkt->header.opcode == MES_SCH_API_MISC) && 144 (x_pkt->opcode < ARRAY_SIZE(mes_v12_1_misc_opcodes))) 145 op_str = mes_v12_1_misc_opcodes[x_pkt->opcode]; 146 147 return op_str; 148 } 149 150 static int mes_v12_1_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, 151 int xcc_id, int pipe, void *pkt, 152 int size, int api_status_off) 153 { 154 union MESAPI__QUERY_MES_STATUS mes_status_pkt; 155 signed long timeout = 2100000; /* 2100 ms */ 156 struct amdgpu_device *adev = mes->adev; 157 struct amdgpu_ring *ring = &mes->ring[MES_PIPE_INST(xcc_id, pipe)]; 158 spinlock_t *ring_lock = &mes->ring_lock[MES_PIPE_INST(xcc_id, pipe)]; 159 struct MES_API_STATUS *api_status; 160 union MESAPI__MISC *x_pkt = pkt; 161 const char *op_str, *misc_op_str; 162 unsigned long flags; 163 u64 status_gpu_addr; 164 u32 seq, status_offset; 165 u64 *status_ptr; 166 signed long r; 167 int ret; 168 169 if (x_pkt->header.opcode >= MES_SCH_API_MAX) 170 return -EINVAL; 171 172 if (amdgpu_emu_mode) { 173 timeout *= 1000; 174 } else if (amdgpu_sriov_vf(adev)) { 175 /* Worst case in sriov where all other 15 VF timeout, each VF needs about 600ms */ 176 timeout = 15 * 600 * 1000; 177 } 178 179 ret = amdgpu_device_wb_get(adev, &status_offset); 180 if (ret) 181 return ret; 182 183 status_gpu_addr = adev->wb.gpu_addr + (status_offset * 4); 184 status_ptr = (u64 *)&adev->wb.wb[status_offset]; 185 *status_ptr = 0; 186 187 spin_lock_irqsave(ring_lock, flags); 188 r = amdgpu_ring_alloc(ring, (size + sizeof(mes_status_pkt)) / 4); 189 if (r) 190 goto error_unlock_free; 191 192 seq = ++ring->fence_drv.sync_seq; 193 r = amdgpu_fence_wait_polling(ring, 194 seq - ring->fence_drv.num_fences_mask, 195 timeout); 196 if (r < 1) 197 goto error_undo; 198 199 api_status = (struct MES_API_STATUS *)((char *)pkt + api_status_off); 200 api_status->api_completion_fence_addr = status_gpu_addr; 201 api_status->api_completion_fence_value = 1; 202 203 amdgpu_ring_write_multiple(ring, pkt, size / 4); 204 205 memset(&mes_status_pkt, 0, sizeof(mes_status_pkt)); 206 mes_status_pkt.header.type = MES_API_TYPE_SCHEDULER; 207 mes_status_pkt.header.opcode = MES_SCH_API_QUERY_SCHEDULER_STATUS; 208 mes_status_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; 209 mes_status_pkt.api_status.api_completion_fence_addr = 210 ring->fence_drv.gpu_addr; 211 mes_status_pkt.api_status.api_completion_fence_value = seq; 212 213 amdgpu_ring_write_multiple(ring, &mes_status_pkt, 214 sizeof(mes_status_pkt) / 4); 215 216 amdgpu_ring_commit(ring); 217 spin_unlock_irqrestore(ring_lock, flags); 218 219 op_str = mes_v12_1_get_op_string(x_pkt); 220 misc_op_str = mes_v12_1_get_misc_op_string(x_pkt); 221 222 if (misc_op_str) 223 dev_dbg(adev->dev, "MES(%d, %d) msg=%s (%s) was emitted\n", 224 xcc_id, pipe, op_str, misc_op_str); 225 else if (op_str) 226 dev_dbg(adev->dev, "MES(%d, %d) msg=%s was emitted\n", 227 xcc_id, pipe, op_str); 228 else 229 dev_dbg(adev->dev, "MES(%d, %d) msg=%d was emitted\n", 230 xcc_id, pipe, x_pkt->header.opcode); 231 232 r = amdgpu_fence_wait_polling(ring, seq, timeout); 233 if (r < 1 || !*status_ptr) { 234 if (misc_op_str) 235 dev_err(adev->dev, 236 "MES(%d, %d) failed to respond to msg=%s (%s)\n", 237 xcc_id, pipe, op_str, misc_op_str); 238 else if (op_str) 239 dev_err(adev->dev, 240 "MES(%d, %d) failed to respond to msg=%s\n", 241 xcc_id, pipe, op_str); 242 else 243 dev_err(adev->dev, 244 "MES(%d, %d) failed to respond to msg=%d\n", 245 xcc_id, pipe, x_pkt->header.opcode); 246 247 while (halt_if_hws_hang) 248 schedule(); 249 250 r = -ETIMEDOUT; 251 goto error_wb_free; 252 } 253 254 amdgpu_device_wb_free(adev, status_offset); 255 return 0; 256 257 error_undo: 258 dev_err(adev->dev, "MES(%d, %d) ring buffer is full.\n", xcc_id, pipe); 259 amdgpu_ring_undo(ring); 260 261 error_unlock_free: 262 spin_unlock_irqrestore(ring_lock, flags); 263 264 error_wb_free: 265 amdgpu_device_wb_free(adev, status_offset); 266 return r; 267 } 268 269 static int convert_to_mes_queue_type(int queue_type) 270 { 271 if (queue_type == AMDGPU_RING_TYPE_GFX) 272 return MES_QUEUE_TYPE_GFX; 273 else if (queue_type == AMDGPU_RING_TYPE_COMPUTE) 274 return MES_QUEUE_TYPE_COMPUTE; 275 else if (queue_type == AMDGPU_RING_TYPE_SDMA) 276 return MES_QUEUE_TYPE_SDMA; 277 else if (queue_type == AMDGPU_RING_TYPE_MES) 278 return MES_QUEUE_TYPE_SCHQ; 279 else 280 BUG(); 281 return -1; 282 } 283 284 static int mes_v12_1_add_hw_queue(struct amdgpu_mes *mes, 285 struct mes_add_queue_input *input) 286 { 287 union MESAPI__ADD_QUEUE mes_add_queue_pkt; 288 int xcc_id = input->xcc_id; 289 int inst = MES_PIPE_INST(xcc_id, AMDGPU_MES_SCHED_PIPE); 290 291 if (mes->enable_coop_mode) 292 xcc_id = mes->master_xcc_ids[inst]; 293 294 memset(&mes_add_queue_pkt, 0, sizeof(mes_add_queue_pkt)); 295 296 mes_add_queue_pkt.header.type = MES_API_TYPE_SCHEDULER; 297 mes_add_queue_pkt.header.opcode = MES_SCH_API_ADD_QUEUE; 298 mes_add_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; 299 300 mes_add_queue_pkt.process_id = input->process_id; 301 mes_add_queue_pkt.page_table_base_addr = input->page_table_base_addr; 302 mes_add_queue_pkt.process_va_start = input->process_va_start; 303 mes_add_queue_pkt.process_va_end = input->process_va_end; 304 mes_add_queue_pkt.process_quantum = input->process_quantum; 305 mes_add_queue_pkt.process_context_addr = input->process_context_addr; 306 mes_add_queue_pkt.gang_quantum = input->gang_quantum; 307 mes_add_queue_pkt.gang_context_addr = input->gang_context_addr; 308 mes_add_queue_pkt.inprocess_gang_priority = 309 input->inprocess_gang_priority; 310 mes_add_queue_pkt.gang_global_priority_level = 311 input->gang_global_priority_level; 312 mes_add_queue_pkt.doorbell_offset = input->doorbell_offset; 313 mes_add_queue_pkt.mqd_addr = input->mqd_addr; 314 315 mes_add_queue_pkt.wptr_addr = input->wptr_mc_addr; 316 317 mes_add_queue_pkt.queue_type = 318 convert_to_mes_queue_type(input->queue_type); 319 mes_add_queue_pkt.paging = input->paging; 320 mes_add_queue_pkt.vm_context_cntl = input->vm_cntx_cntl; 321 mes_add_queue_pkt.gws_base = input->gws_base; 322 mes_add_queue_pkt.gws_size = input->gws_size; 323 mes_add_queue_pkt.trap_handler_addr = input->tba_addr; 324 mes_add_queue_pkt.tma_addr = input->tma_addr; 325 mes_add_queue_pkt.trap_en = input->trap_en; 326 mes_add_queue_pkt.skip_process_ctx_clear = input->skip_process_ctx_clear; 327 mes_add_queue_pkt.is_kfd_process = input->is_kfd_process; 328 329 /* For KFD, gds_size is re-used for queue size (needed in MES for AQL queues) */ 330 mes_add_queue_pkt.is_aql_queue = input->is_aql_queue; 331 mes_add_queue_pkt.gds_size = input->queue_size; 332 333 /* For KFD, gds_size is re-used for queue size (needed in MES for AQL queues) */ 334 mes_add_queue_pkt.is_aql_queue = input->is_aql_queue; 335 mes_add_queue_pkt.gds_size = input->queue_size; 336 337 mes_add_queue_pkt.full_sh_mem_config_data = input->sh_mem_config_data; 338 339 return mes_v12_1_submit_pkt_and_poll_completion(mes, 340 xcc_id, AMDGPU_MES_SCHED_PIPE, 341 &mes_add_queue_pkt, sizeof(mes_add_queue_pkt), 342 offsetof(union MESAPI__ADD_QUEUE, api_status)); 343 } 344 345 static int mes_v12_1_remove_hw_queue(struct amdgpu_mes *mes, 346 struct mes_remove_queue_input *input) 347 { 348 union MESAPI__REMOVE_QUEUE mes_remove_queue_pkt; 349 int xcc_id = input->xcc_id; 350 int inst = MES_PIPE_INST(xcc_id, AMDGPU_MES_SCHED_PIPE); 351 352 if (mes->enable_coop_mode) 353 xcc_id = mes->master_xcc_ids[inst]; 354 355 memset(&mes_remove_queue_pkt, 0, sizeof(mes_remove_queue_pkt)); 356 357 mes_remove_queue_pkt.header.type = MES_API_TYPE_SCHEDULER; 358 mes_remove_queue_pkt.header.opcode = MES_SCH_API_REMOVE_QUEUE; 359 mes_remove_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; 360 361 mes_remove_queue_pkt.doorbell_offset = input->doorbell_offset; 362 mes_remove_queue_pkt.gang_context_addr = input->gang_context_addr; 363 364 return mes_v12_1_submit_pkt_and_poll_completion(mes, 365 xcc_id, AMDGPU_MES_SCHED_PIPE, 366 &mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt), 367 offsetof(union MESAPI__REMOVE_QUEUE, api_status)); 368 } 369 370 static int mes_v12_1_reset_hw_queue(struct amdgpu_mes *mes, 371 struct mes_reset_queue_input *input) 372 { 373 union MESAPI__RESET mes_reset_queue_pkt; 374 int pipe; 375 376 memset(&mes_reset_queue_pkt, 0, sizeof(mes_reset_queue_pkt)); 377 378 mes_reset_queue_pkt.header.type = MES_API_TYPE_SCHEDULER; 379 mes_reset_queue_pkt.header.opcode = MES_SCH_API_RESET; 380 mes_reset_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; 381 382 mes_reset_queue_pkt.doorbell_offset = input->doorbell_offset; 383 /* mes_reset_queue_pkt.gang_context_addr = input->gang_context_addr; */ 384 /*mes_reset_queue_pkt.reset_queue_only = 1;*/ 385 386 if (mes->adev->enable_uni_mes) 387 pipe = AMDGPU_MES_KIQ_PIPE; 388 else 389 pipe = AMDGPU_MES_SCHED_PIPE; 390 391 return mes_v12_1_submit_pkt_and_poll_completion(mes, 392 input->xcc_id, pipe, 393 &mes_reset_queue_pkt, sizeof(mes_reset_queue_pkt), 394 offsetof(union MESAPI__REMOVE_QUEUE, api_status)); 395 } 396 397 static int mes_v12_1_map_legacy_queue(struct amdgpu_mes *mes, 398 struct mes_map_legacy_queue_input *input) 399 { 400 union MESAPI__ADD_QUEUE mes_add_queue_pkt; 401 int pipe; 402 403 memset(&mes_add_queue_pkt, 0, sizeof(mes_add_queue_pkt)); 404 405 mes_add_queue_pkt.header.type = MES_API_TYPE_SCHEDULER; 406 mes_add_queue_pkt.header.opcode = MES_SCH_API_ADD_QUEUE; 407 mes_add_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; 408 409 mes_add_queue_pkt.pipe_id = input->pipe_id; 410 mes_add_queue_pkt.queue_id = input->queue_id; 411 mes_add_queue_pkt.doorbell_offset = input->doorbell_offset; 412 mes_add_queue_pkt.mqd_addr = input->mqd_addr; 413 mes_add_queue_pkt.wptr_addr = input->wptr_addr; 414 mes_add_queue_pkt.queue_type = 415 convert_to_mes_queue_type(input->queue_type); 416 mes_add_queue_pkt.map_legacy_kq = 1; 417 418 if (mes->adev->enable_uni_mes) 419 pipe = AMDGPU_MES_KIQ_PIPE; 420 else 421 pipe = AMDGPU_MES_SCHED_PIPE; 422 423 return mes_v12_1_submit_pkt_and_poll_completion(mes, 424 input->xcc_id, pipe, 425 &mes_add_queue_pkt, sizeof(mes_add_queue_pkt), 426 offsetof(union MESAPI__ADD_QUEUE, api_status)); 427 } 428 429 static int mes_v12_1_unmap_legacy_queue(struct amdgpu_mes *mes, 430 struct mes_unmap_legacy_queue_input *input) 431 { 432 union MESAPI__REMOVE_QUEUE mes_remove_queue_pkt; 433 int pipe; 434 435 memset(&mes_remove_queue_pkt, 0, sizeof(mes_remove_queue_pkt)); 436 437 mes_remove_queue_pkt.header.type = MES_API_TYPE_SCHEDULER; 438 mes_remove_queue_pkt.header.opcode = MES_SCH_API_REMOVE_QUEUE; 439 mes_remove_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; 440 441 mes_remove_queue_pkt.doorbell_offset = input->doorbell_offset; 442 mes_remove_queue_pkt.gang_context_addr = 0; 443 444 mes_remove_queue_pkt.pipe_id = input->pipe_id; 445 mes_remove_queue_pkt.queue_id = input->queue_id; 446 447 if (input->action == PREEMPT_QUEUES_NO_UNMAP) { 448 mes_remove_queue_pkt.preempt_legacy_gfx_queue = 1; 449 mes_remove_queue_pkt.tf_addr = input->trail_fence_addr; 450 mes_remove_queue_pkt.tf_data = 451 lower_32_bits(input->trail_fence_data); 452 } else { 453 mes_remove_queue_pkt.unmap_legacy_queue = 1; 454 mes_remove_queue_pkt.queue_type = 455 convert_to_mes_queue_type(input->queue_type); 456 } 457 458 if (mes->adev->enable_uni_mes) 459 pipe = AMDGPU_MES_KIQ_PIPE; 460 else 461 pipe = AMDGPU_MES_SCHED_PIPE; 462 463 return mes_v12_1_submit_pkt_and_poll_completion(mes, 464 input->xcc_id, pipe, 465 &mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt), 466 offsetof(union MESAPI__REMOVE_QUEUE, api_status)); 467 } 468 469 static int mes_v12_1_suspend_gang(struct amdgpu_mes *mes, 470 struct mes_suspend_gang_input *input) 471 { 472 return 0; 473 } 474 475 static int mes_v12_1_resume_gang(struct amdgpu_mes *mes, 476 struct mes_resume_gang_input *input) 477 { 478 return 0; 479 } 480 481 static int mes_v12_1_query_sched_status(struct amdgpu_mes *mes, 482 int pipe, int xcc_id) 483 { 484 union MESAPI__QUERY_MES_STATUS mes_status_pkt; 485 486 memset(&mes_status_pkt, 0, sizeof(mes_status_pkt)); 487 488 mes_status_pkt.header.type = MES_API_TYPE_SCHEDULER; 489 mes_status_pkt.header.opcode = MES_SCH_API_QUERY_SCHEDULER_STATUS; 490 mes_status_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; 491 492 return mes_v12_1_submit_pkt_and_poll_completion(mes, xcc_id, pipe, 493 &mes_status_pkt, sizeof(mes_status_pkt), 494 offsetof(union MESAPI__QUERY_MES_STATUS, api_status)); 495 } 496 static uint32_t mes_v12_1_get_xcc_from_reg(uint32_t reg_offset) 497 { 498 return ((reg_offset >> 16) & 0x7); 499 } 500 501 static void mes_v12_1_get_rrmt(uint32_t reg, uint32_t xcc_id, 502 struct RRMT_OPTION *rrmt_opt, 503 uint32_t *out_reg) 504 { 505 uint32_t normalized_reg = soc_v1_0_normalize_xcc_reg_offset(reg); 506 507 if (soc_v1_0_normalize_xcc_reg_range(normalized_reg)) { 508 rrmt_opt->xcd_die_id = mes_v12_1_get_xcc_from_reg(reg); 509 rrmt_opt->mode = (xcc_id == rrmt_opt->xcd_die_id) ? 510 MES_RRMT_MODE_LOCAL_XCD : MES_RRMT_MODE_REMOTE_XCD; 511 } else { 512 rrmt_opt->mode = MES_RRMT_MODE_REMOTE_MID; 513 if (soc_v1_0_mid1_reg_range(reg)) 514 rrmt_opt->mid_die_id = 1; 515 } 516 517 *out_reg = soc_v1_0_normalize_reg_offset(reg); 518 } 519 520 static int mes_v12_1_misc_op(struct amdgpu_mes *mes, 521 struct mes_misc_op_input *input) 522 { 523 struct amdgpu_device *adev = mes->adev; 524 union MESAPI__MISC misc_pkt; 525 int pipe; 526 527 if (mes->adev->enable_uni_mes) 528 pipe = AMDGPU_MES_KIQ_PIPE; 529 else 530 pipe = AMDGPU_MES_SCHED_PIPE; 531 532 memset(&misc_pkt, 0, sizeof(misc_pkt)); 533 534 misc_pkt.header.type = MES_API_TYPE_SCHEDULER; 535 misc_pkt.header.opcode = MES_SCH_API_MISC; 536 misc_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; 537 538 switch (input->op) { 539 case MES_MISC_OP_READ_REG: 540 misc_pkt.opcode = MESAPI_MISC__READ_REG; 541 misc_pkt.read_reg.buffer_addr = input->read_reg.buffer_addr; 542 mes_v12_1_get_rrmt(input->read_reg.reg_offset, 543 GET_INST(GC, input->xcc_id), 544 &misc_pkt.read_reg.rrmt_opt, 545 &misc_pkt.read_reg.reg_offset); 546 break; 547 case MES_MISC_OP_WRITE_REG: 548 misc_pkt.opcode = MESAPI_MISC__WRITE_REG; 549 misc_pkt.write_reg.reg_value = input->write_reg.reg_value; 550 mes_v12_1_get_rrmt(input->write_reg.reg_offset, 551 GET_INST(GC, input->xcc_id), 552 &misc_pkt.write_reg.rrmt_opt, 553 &misc_pkt.write_reg.reg_offset); 554 break; 555 case MES_MISC_OP_WRM_REG_WAIT: 556 misc_pkt.opcode = MESAPI_MISC__WAIT_REG_MEM; 557 misc_pkt.wait_reg_mem.op = WRM_OPERATION__WAIT_REG_MEM; 558 misc_pkt.wait_reg_mem.reference = input->wrm_reg.ref; 559 misc_pkt.wait_reg_mem.mask = input->wrm_reg.mask; 560 misc_pkt.wait_reg_mem.reg_offset2 = 0; 561 mes_v12_1_get_rrmt(input->wrm_reg.reg0, 562 GET_INST(GC, input->xcc_id), 563 &misc_pkt.wait_reg_mem.rrmt_opt1, 564 &misc_pkt.wait_reg_mem.reg_offset1); 565 break; 566 case MES_MISC_OP_WRM_REG_WR_WAIT: 567 misc_pkt.opcode = MESAPI_MISC__WAIT_REG_MEM; 568 misc_pkt.wait_reg_mem.op = WRM_OPERATION__WR_WAIT_WR_REG; 569 misc_pkt.wait_reg_mem.reference = input->wrm_reg.ref; 570 misc_pkt.wait_reg_mem.mask = input->wrm_reg.mask; 571 mes_v12_1_get_rrmt(input->wrm_reg.reg0, 572 GET_INST(GC, input->xcc_id), 573 &misc_pkt.wait_reg_mem.rrmt_opt1, 574 &misc_pkt.wait_reg_mem.reg_offset1); 575 mes_v12_1_get_rrmt(input->wrm_reg.reg1, 576 GET_INST(GC, input->xcc_id), 577 &misc_pkt.wait_reg_mem.rrmt_opt2, 578 &misc_pkt.wait_reg_mem.reg_offset2); 579 break; 580 case MES_MISC_OP_SET_SHADER_DEBUGGER: 581 pipe = AMDGPU_MES_SCHED_PIPE; 582 misc_pkt.opcode = MESAPI_MISC__SET_SHADER_DEBUGGER; 583 misc_pkt.set_shader_debugger.process_context_addr = 584 input->set_shader_debugger.process_context_addr; 585 misc_pkt.set_shader_debugger.flags.u32all = 586 input->set_shader_debugger.flags.u32all; 587 misc_pkt.set_shader_debugger.spi_gdbg_per_vmid_cntl = 588 input->set_shader_debugger.spi_gdbg_per_vmid_cntl; 589 memcpy(misc_pkt.set_shader_debugger.tcp_watch_cntl, 590 input->set_shader_debugger.tcp_watch_cntl, 591 sizeof(misc_pkt.set_shader_debugger.tcp_watch_cntl)); 592 misc_pkt.set_shader_debugger.trap_en = input->set_shader_debugger.trap_en; 593 break; 594 case MES_MISC_OP_CHANGE_CONFIG: 595 misc_pkt.opcode = MESAPI_MISC__CHANGE_CONFIG; 596 misc_pkt.change_config.opcode = 597 MESAPI_MISC__CHANGE_CONFIG_OPTION_LIMIT_SINGLE_PROCESS; 598 misc_pkt.change_config.option.bits.limit_single_process = 599 input->change_config.option.limit_single_process; 600 break; 601 default: 602 DRM_ERROR("unsupported misc op (%d) \n", input->op); 603 return -EINVAL; 604 } 605 606 return mes_v12_1_submit_pkt_and_poll_completion(mes, 607 input->xcc_id, pipe, 608 &misc_pkt, sizeof(misc_pkt), 609 offsetof(union MESAPI__MISC, api_status)); 610 } 611 612 static int mes_v12_1_set_hw_resources_1(struct amdgpu_mes *mes, 613 int pipe, int xcc_id) 614 { 615 union MESAPI_SET_HW_RESOURCES_1 mes_set_hw_res_1_pkt; 616 int master_xcc_id, inst = MES_PIPE_INST(xcc_id, pipe); 617 618 memset(&mes_set_hw_res_1_pkt, 0, sizeof(mes_set_hw_res_1_pkt)); 619 620 mes_set_hw_res_1_pkt.header.type = MES_API_TYPE_SCHEDULER; 621 mes_set_hw_res_1_pkt.header.opcode = MES_SCH_API_SET_HW_RSRC_1; 622 mes_set_hw_res_1_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; 623 mes_set_hw_res_1_pkt.mes_kiq_unmap_timeout = 100; 624 625 /* From version 0x74 above, pipe1 support use shared command buffer 626 to distribute some tasks on individual XCCs*/ 627 if (mes->enable_coop_mode && 628 ((pipe == AMDGPU_MES_SCHED_PIPE) || 629 ((mes->kiq_version & AMDGPU_MES_VERSION_MASK) >= 0x74))) { 630 master_xcc_id = mes->master_xcc_ids[inst]; 631 mes_set_hw_res_1_pkt.mes_coop_mode = 1; 632 mes_set_hw_res_1_pkt.coop_sch_shared_mc_addr = 633 mes->shared_cmd_buf_gpu_addr[master_xcc_id + pipe]; 634 } 635 636 return mes_v12_1_submit_pkt_and_poll_completion(mes, xcc_id, pipe, 637 &mes_set_hw_res_1_pkt, sizeof(mes_set_hw_res_1_pkt), 638 offsetof(union MESAPI_SET_HW_RESOURCES_1, api_status)); 639 } 640 641 static int mes_v12_1_set_hw_resources(struct amdgpu_mes *mes, 642 int pipe, int xcc_id) 643 { 644 int i, status; 645 struct amdgpu_device *adev = mes->adev; 646 union MESAPI_SET_HW_RESOURCES mes_set_hw_res_pkt; 647 648 memset(&mes_set_hw_res_pkt, 0, sizeof(mes_set_hw_res_pkt)); 649 650 mes_set_hw_res_pkt.header.type = MES_API_TYPE_SCHEDULER; 651 mes_set_hw_res_pkt.header.opcode = MES_SCH_API_SET_HW_RSRC; 652 mes_set_hw_res_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; 653 654 if (pipe == AMDGPU_MES_SCHED_PIPE) { 655 mes_set_hw_res_pkt.vmid_mask_mmhub = mes->vmid_mask_mmhub; 656 mes_set_hw_res_pkt.vmid_mask_gfxhub = mes->vmid_mask_gfxhub; 657 mes_set_hw_res_pkt.gds_size = adev->gds.gds_size; 658 mes_set_hw_res_pkt.paging_vmid = 0; 659 660 for (i = 0; i < MAX_COMPUTE_PIPES; i++) 661 mes_set_hw_res_pkt.compute_hqd_mask[i] = 662 mes->compute_hqd_mask[i]; 663 664 for (i = 0; i < MAX_GFX_PIPES; i++) 665 mes_set_hw_res_pkt.gfx_hqd_mask[i] = 666 mes->gfx_hqd_mask[i]; 667 668 for (i = 0; i < MAX_SDMA_PIPES; i++) 669 mes_set_hw_res_pkt.sdma_hqd_mask[i] = 670 mes->sdma_hqd_mask[i]; 671 672 for (i = 0; i < AMD_PRIORITY_NUM_LEVELS; i++) 673 mes_set_hw_res_pkt.aggregated_doorbells[i] = 674 mes->aggregated_doorbells[i]; 675 } 676 677 mes_set_hw_res_pkt.g_sch_ctx_gpu_mc_ptr = 678 mes->sch_ctx_gpu_addr[pipe]; 679 mes_set_hw_res_pkt.query_status_fence_gpu_mc_ptr = 680 mes->query_status_fence_gpu_addr[pipe]; 681 682 for (i = 0; i < 5; i++) { 683 mes_set_hw_res_pkt.gc_base[i] = 684 adev->reg_offset[GC_HWIP][0][i]; 685 mes_set_hw_res_pkt.mmhub_base[i] = 686 adev->reg_offset[MMHUB_HWIP][0][i]; 687 mes_set_hw_res_pkt.osssys_base[i] = 688 adev->reg_offset[OSSSYS_HWIP][0][i]; 689 } 690 691 mes_set_hw_res_pkt.disable_reset = 1; 692 mes_set_hw_res_pkt.disable_mes_log = 1; 693 mes_set_hw_res_pkt.use_different_vmid_compute = 1; 694 mes_set_hw_res_pkt.enable_reg_active_poll = 1; 695 mes_set_hw_res_pkt.enable_level_process_quantum_check = 1; 696 697 /* 698 * Keep oversubscribe timer for sdma . When we have unmapped doorbell 699 * handling support, other queue will not use the oversubscribe timer. 700 * handling mode - 0: disabled; 1: basic version; 2: basic+ version 701 */ 702 mes_set_hw_res_pkt.oversubscription_timer = 50; 703 mes_set_hw_res_pkt.unmapped_doorbell_handling = 1; 704 705 if (amdgpu_mes_log_enable) { 706 mes_set_hw_res_pkt.enable_mes_event_int_logging = 1; 707 mes_set_hw_res_pkt.event_intr_history_gpu_mc_ptr = 708 mes->event_log_gpu_addr + MES_PIPE_INST(xcc_id, pipe) * AMDGPU_MES_LOG_BUFFER_SIZE; 709 } 710 711 if (adev->enforce_isolation[0] == AMDGPU_ENFORCE_ISOLATION_ENABLE) 712 mes_set_hw_res_pkt.limit_single_process = 1; 713 714 status = mes_v12_1_submit_pkt_and_poll_completion(mes, xcc_id, pipe, 715 &mes_set_hw_res_pkt, sizeof(mes_set_hw_res_pkt), 716 offsetof(union MESAPI_SET_HW_RESOURCES, api_status)); 717 718 /* get MES scheduler versions */ 719 mutex_lock(&adev->srbm_mutex); 720 soc_v1_0_grbm_select(adev, 3, pipe, 0, 0, GET_INST(GC, xcc_id)); 721 722 if (pipe == AMDGPU_MES_SCHED_PIPE) 723 adev->mes.sched_version = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_GP3_LO); 724 else if (pipe == AMDGPU_MES_KIQ_PIPE && adev->enable_mes_kiq) 725 adev->mes.kiq_version = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_GP3_LO); 726 727 soc_v1_0_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id)); 728 mutex_unlock(&adev->srbm_mutex); 729 730 return status; 731 } 732 733 static void mes_v12_1_init_aggregated_doorbell(struct amdgpu_mes *mes, 734 int xcc_id) 735 { 736 struct amdgpu_device *adev = mes->adev; 737 uint32_t data; 738 739 data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_DOORBELL_CONTROL1); 740 data &= ~(CP_MES_DOORBELL_CONTROL1__DOORBELL_OFFSET_MASK | 741 CP_MES_DOORBELL_CONTROL1__DOORBELL_EN_MASK | 742 CP_MES_DOORBELL_CONTROL1__DOORBELL_HIT_MASK); 743 data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_LOW] << 744 CP_MES_DOORBELL_CONTROL1__DOORBELL_OFFSET__SHIFT; 745 data |= 1 << CP_MES_DOORBELL_CONTROL1__DOORBELL_EN__SHIFT; 746 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_DOORBELL_CONTROL1, data); 747 748 data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_DOORBELL_CONTROL2); 749 data &= ~(CP_MES_DOORBELL_CONTROL2__DOORBELL_OFFSET_MASK | 750 CP_MES_DOORBELL_CONTROL2__DOORBELL_EN_MASK | 751 CP_MES_DOORBELL_CONTROL2__DOORBELL_HIT_MASK); 752 data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_NORMAL] << 753 CP_MES_DOORBELL_CONTROL2__DOORBELL_OFFSET__SHIFT; 754 data |= 1 << CP_MES_DOORBELL_CONTROL2__DOORBELL_EN__SHIFT; 755 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_DOORBELL_CONTROL2, data); 756 757 data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_DOORBELL_CONTROL3); 758 data &= ~(CP_MES_DOORBELL_CONTROL3__DOORBELL_OFFSET_MASK | 759 CP_MES_DOORBELL_CONTROL3__DOORBELL_EN_MASK | 760 CP_MES_DOORBELL_CONTROL3__DOORBELL_HIT_MASK); 761 data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_MEDIUM] << 762 CP_MES_DOORBELL_CONTROL3__DOORBELL_OFFSET__SHIFT; 763 data |= 1 << CP_MES_DOORBELL_CONTROL3__DOORBELL_EN__SHIFT; 764 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_DOORBELL_CONTROL3, data); 765 766 data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_DOORBELL_CONTROL4); 767 data &= ~(CP_MES_DOORBELL_CONTROL4__DOORBELL_OFFSET_MASK | 768 CP_MES_DOORBELL_CONTROL4__DOORBELL_EN_MASK | 769 CP_MES_DOORBELL_CONTROL4__DOORBELL_HIT_MASK); 770 data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_HIGH] << 771 CP_MES_DOORBELL_CONTROL4__DOORBELL_OFFSET__SHIFT; 772 data |= 1 << CP_MES_DOORBELL_CONTROL4__DOORBELL_EN__SHIFT; 773 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_DOORBELL_CONTROL4, data); 774 775 data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_DOORBELL_CONTROL5); 776 data &= ~(CP_MES_DOORBELL_CONTROL5__DOORBELL_OFFSET_MASK | 777 CP_MES_DOORBELL_CONTROL5__DOORBELL_EN_MASK | 778 CP_MES_DOORBELL_CONTROL5__DOORBELL_HIT_MASK); 779 data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_REALTIME] << 780 CP_MES_DOORBELL_CONTROL5__DOORBELL_OFFSET__SHIFT; 781 data |= 1 << CP_MES_DOORBELL_CONTROL5__DOORBELL_EN__SHIFT; 782 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_DOORBELL_CONTROL5, data); 783 784 data = 1 << CP_HQD_GFX_CONTROL__DB_UPDATED_MSG_EN__SHIFT; 785 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_GFX_CONTROL, data); 786 } 787 788 789 static void mes_v12_1_enable_unmapped_doorbell_handling( 790 struct amdgpu_mes *mes, bool enable, int xcc_id) 791 { 792 struct amdgpu_device *adev = mes->adev; 793 uint32_t data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_UNMAPPED_DOORBELL); 794 795 /* 796 * The default PROC_LSB settng is 0xc which means doorbell 797 * addr[16:12] gives the doorbell page number. For kfd, each 798 * process will use 2 pages of doorbell, we need to change the 799 * setting to 0xd 800 */ 801 data &= ~CP_UNMAPPED_DOORBELL__PROC_LSB_MASK; 802 data |= 0xd << CP_UNMAPPED_DOORBELL__PROC_LSB__SHIFT; 803 804 data |= (enable ? 1 : 0) << CP_UNMAPPED_DOORBELL__ENABLE__SHIFT; 805 806 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_UNMAPPED_DOORBELL, data); 807 } 808 809 #if 0 810 static int mes_v12_1_reset_legacy_queue(struct amdgpu_mes *mes, 811 struct mes_reset_legacy_queue_input *input) 812 { 813 union MESAPI__RESET mes_reset_queue_pkt; 814 int pipe; 815 816 memset(&mes_reset_queue_pkt, 0, sizeof(mes_reset_queue_pkt)); 817 818 mes_reset_queue_pkt.header.type = MES_API_TYPE_SCHEDULER; 819 mes_reset_queue_pkt.header.opcode = MES_SCH_API_RESET; 820 mes_reset_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; 821 822 mes_reset_queue_pkt.queue_type = 823 convert_to_mes_queue_type(input->queue_type); 824 825 if (mes_reset_queue_pkt.queue_type == MES_QUEUE_TYPE_GFX) { 826 mes_reset_queue_pkt.reset_legacy_gfx = 1; 827 mes_reset_queue_pkt.pipe_id_lp = input->pipe_id; 828 mes_reset_queue_pkt.queue_id_lp = input->queue_id; 829 mes_reset_queue_pkt.mqd_mc_addr_lp = input->mqd_addr; 830 mes_reset_queue_pkt.doorbell_offset_lp = input->doorbell_offset; 831 mes_reset_queue_pkt.wptr_addr_lp = input->wptr_addr; 832 mes_reset_queue_pkt.vmid_id_lp = input->vmid; 833 } else { 834 mes_reset_queue_pkt.reset_queue_only = 1; 835 mes_reset_queue_pkt.doorbell_offset = input->doorbell_offset; 836 } 837 838 if (mes->adev->enable_uni_mes) 839 pipe = AMDGPU_MES_KIQ_PIPE; 840 else 841 pipe = AMDGPU_MES_SCHED_PIPE; 842 843 return mes_v12_1_submit_pkt_and_poll_completion(mes, 844 input->xcc_id, pipe, 845 &mes_reset_queue_pkt, sizeof(mes_reset_queue_pkt), 846 offsetof(union MESAPI__RESET, api_status)); 847 } 848 #endif 849 850 static int mes_v12_inv_tlb_convert_hub_id(uint8_t id) 851 { 852 /* 853 * MES doesn't support invalidate gc_hub on slave xcc individually 854 * master xcc will invalidate all gc_hub for the partition 855 */ 856 if (AMDGPU_IS_GFXHUB(id)) 857 return 0; 858 else if (AMDGPU_IS_MMHUB0(id)) 859 return 1; 860 else if (AMDGPU_IS_MMHUB1(id)) 861 return 2; 862 return -EINVAL; 863 864 } 865 866 static int mes_v12_1_inv_tlbs_pasid(struct amdgpu_mes *mes, 867 struct mes_inv_tlbs_pasid_input *input) 868 { 869 union MESAPI__INV_TLBS mes_inv_tlbs; 870 int xcc_id = input->xcc_id; 871 int inst = MES_PIPE_INST(xcc_id, AMDGPU_MES_SCHED_PIPE); 872 int ret; 873 874 if (mes->enable_coop_mode) 875 xcc_id = mes->master_xcc_ids[inst]; 876 877 memset(&mes_inv_tlbs, 0, sizeof(mes_inv_tlbs)); 878 879 mes_inv_tlbs.header.type = MES_API_TYPE_SCHEDULER; 880 mes_inv_tlbs.header.opcode = MES_SCH_API_INV_TLBS; 881 mes_inv_tlbs.header.dwsize = API_FRAME_SIZE_IN_DWORDS; 882 883 mes_inv_tlbs.invalidate_tlbs.inv_sel = 0; 884 mes_inv_tlbs.invalidate_tlbs.flush_type = input->flush_type; 885 mes_inv_tlbs.invalidate_tlbs.inv_sel_id = input->pasid; 886 887 /*convert amdgpu_mes_hub_id to mes expected hub_id */ 888 ret = mes_v12_inv_tlb_convert_hub_id(input->hub_id); 889 if (ret < 0) 890 return -EINVAL; 891 mes_inv_tlbs.invalidate_tlbs.hub_id = ret; 892 return mes_v12_1_submit_pkt_and_poll_completion(mes, xcc_id, AMDGPU_MES_KIQ_PIPE, 893 &mes_inv_tlbs, sizeof(mes_inv_tlbs), 894 offsetof(union MESAPI__INV_TLBS, api_status)); 895 896 } 897 898 static const struct amdgpu_mes_funcs mes_v12_1_funcs = { 899 .add_hw_queue = mes_v12_1_add_hw_queue, 900 .remove_hw_queue = mes_v12_1_remove_hw_queue, 901 .map_legacy_queue = mes_v12_1_map_legacy_queue, 902 .unmap_legacy_queue = mes_v12_1_unmap_legacy_queue, 903 .suspend_gang = mes_v12_1_suspend_gang, 904 .resume_gang = mes_v12_1_resume_gang, 905 .misc_op = mes_v12_1_misc_op, 906 .reset_hw_queue = mes_v12_1_reset_hw_queue, 907 .invalidate_tlbs_pasid = mes_v12_1_inv_tlbs_pasid, 908 }; 909 910 static int mes_v12_1_allocate_ucode_buffer(struct amdgpu_device *adev, 911 enum amdgpu_mes_pipe pipe, 912 int xcc_id) 913 { 914 int r, inst = MES_PIPE_INST(xcc_id, pipe); 915 const struct mes_firmware_header_v1_0 *mes_hdr; 916 const __le32 *fw_data; 917 unsigned fw_size; 918 919 mes_hdr = (const struct mes_firmware_header_v1_0 *) 920 adev->mes.fw[pipe]->data; 921 922 fw_data = (const __le32 *)(adev->mes.fw[pipe]->data + 923 le32_to_cpu(mes_hdr->mes_ucode_offset_bytes)); 924 fw_size = le32_to_cpu(mes_hdr->mes_ucode_size_bytes); 925 926 r = amdgpu_bo_create_reserved(adev, fw_size, 927 PAGE_SIZE, 928 AMDGPU_GEM_DOMAIN_VRAM, 929 &adev->mes.ucode_fw_obj[inst], 930 &adev->mes.ucode_fw_gpu_addr[inst], 931 (void **)&adev->mes.ucode_fw_ptr[inst]); 932 if (r) { 933 dev_err(adev->dev, "(%d) failed to create mes fw bo\n", r); 934 return r; 935 } 936 937 memcpy(adev->mes.ucode_fw_ptr[inst], fw_data, fw_size); 938 939 amdgpu_bo_kunmap(adev->mes.ucode_fw_obj[inst]); 940 amdgpu_bo_unreserve(adev->mes.ucode_fw_obj[inst]); 941 942 return 0; 943 } 944 945 static int mes_v12_1_allocate_ucode_data_buffer(struct amdgpu_device *adev, 946 enum amdgpu_mes_pipe pipe, 947 int xcc_id) 948 { 949 int r, inst = MES_PIPE_INST(xcc_id, pipe); 950 const struct mes_firmware_header_v1_0 *mes_hdr; 951 const __le32 *fw_data; 952 unsigned fw_size; 953 954 mes_hdr = (const struct mes_firmware_header_v1_0 *) 955 adev->mes.fw[pipe]->data; 956 957 fw_data = (const __le32 *)(adev->mes.fw[pipe]->data + 958 le32_to_cpu(mes_hdr->mes_ucode_data_offset_bytes)); 959 fw_size = le32_to_cpu(mes_hdr->mes_ucode_data_size_bytes); 960 961 r = amdgpu_bo_create_reserved(adev, fw_size, 962 64 * 1024, 963 AMDGPU_GEM_DOMAIN_VRAM, 964 &adev->mes.data_fw_obj[inst], 965 &adev->mes.data_fw_gpu_addr[inst], 966 (void **)&adev->mes.data_fw_ptr[inst]); 967 if (r) { 968 dev_err(adev->dev, "(%d) failed to create mes data fw bo\n", r); 969 return r; 970 } 971 972 memcpy(adev->mes.data_fw_ptr[inst], fw_data, fw_size); 973 974 amdgpu_bo_kunmap(adev->mes.data_fw_obj[inst]); 975 amdgpu_bo_unreserve(adev->mes.data_fw_obj[inst]); 976 977 return 0; 978 } 979 980 static void mes_v12_1_free_ucode_buffers(struct amdgpu_device *adev, 981 enum amdgpu_mes_pipe pipe, 982 int xcc_id) 983 { 984 int inst = MES_PIPE_INST(xcc_id, pipe); 985 986 amdgpu_bo_free_kernel(&adev->mes.data_fw_obj[inst], 987 &adev->mes.data_fw_gpu_addr[inst], 988 (void **)&adev->mes.data_fw_ptr[inst]); 989 990 amdgpu_bo_free_kernel(&adev->mes.ucode_fw_obj[inst], 991 &adev->mes.ucode_fw_gpu_addr[inst], 992 (void **)&adev->mes.ucode_fw_ptr[inst]); 993 } 994 995 static void mes_v12_1_enable(struct amdgpu_device *adev, 996 bool enable, int xcc_id) 997 { 998 uint64_t ucode_addr; 999 uint32_t pipe, data = 0; 1000 1001 if (enable) { 1002 data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_CNTL); 1003 data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_RESET, 1); 1004 data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_RESET, 1); 1005 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_CNTL, data); 1006 1007 mutex_lock(&adev->srbm_mutex); 1008 for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) { 1009 soc_v1_0_grbm_select(adev, 3, pipe, 0, 0, 1010 GET_INST(GC, xcc_id)); 1011 1012 ucode_addr = adev->mes.uc_start_addr[pipe] >> 2; 1013 WREG32_SOC15(GC, GET_INST(GC, xcc_id), 1014 regCP_MES_PRGRM_CNTR_START, 1015 lower_32_bits(ucode_addr)); 1016 WREG32_SOC15(GC, GET_INST(GC, xcc_id), 1017 regCP_MES_PRGRM_CNTR_START_HI, 1018 upper_32_bits(ucode_addr)); 1019 } 1020 soc_v1_0_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id)); 1021 mutex_unlock(&adev->srbm_mutex); 1022 1023 /* unhalt MES and activate pipe0 */ 1024 data = REG_SET_FIELD(0, CP_MES_CNTL, MES_PIPE0_ACTIVE, 1); 1025 data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_ACTIVE, 1); 1026 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_CNTL, data); 1027 1028 if (amdgpu_emu_mode) 1029 msleep(500); 1030 else if (adev->enable_uni_mes) 1031 udelay(500); 1032 else 1033 udelay(50); 1034 } else { 1035 data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_CNTL); 1036 data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_ACTIVE, 0); 1037 data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_ACTIVE, 0); 1038 data = REG_SET_FIELD(data, CP_MES_CNTL, 1039 MES_INVALIDATE_ICACHE, 1); 1040 data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_RESET, 1); 1041 data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_RESET, 1); 1042 data = REG_SET_FIELD(data, CP_MES_CNTL, MES_HALT, 1); 1043 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_CNTL, data); 1044 } 1045 } 1046 1047 static void mes_v12_1_set_ucode_start_addr(struct amdgpu_device *adev, 1048 int xcc_id) 1049 { 1050 uint64_t ucode_addr; 1051 int pipe; 1052 1053 mes_v12_1_enable(adev, false, xcc_id); 1054 1055 mutex_lock(&adev->srbm_mutex); 1056 for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) { 1057 /* me=3, queue=0 */ 1058 soc_v1_0_grbm_select(adev, 3, pipe, 0, 0, GET_INST(GC, xcc_id)); 1059 1060 /* set ucode start address */ 1061 ucode_addr = adev->mes.uc_start_addr[pipe] >> 2; 1062 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_PRGRM_CNTR_START, 1063 lower_32_bits(ucode_addr)); 1064 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_PRGRM_CNTR_START_HI, 1065 upper_32_bits(ucode_addr)); 1066 1067 soc_v1_0_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id)); 1068 } 1069 mutex_unlock(&adev->srbm_mutex); 1070 } 1071 1072 /* This function is for backdoor MES firmware */ 1073 static int mes_v12_1_load_microcode(struct amdgpu_device *adev, 1074 enum amdgpu_mes_pipe pipe, 1075 bool prime_icache, int xcc_id) 1076 { 1077 int r, inst = MES_PIPE_INST(xcc_id, pipe); 1078 uint32_t data; 1079 1080 mes_v12_1_enable(adev, false, xcc_id); 1081 1082 if (!adev->mes.fw[pipe]) 1083 return -EINVAL; 1084 1085 r = mes_v12_1_allocate_ucode_buffer(adev, pipe, xcc_id); 1086 if (r) 1087 return r; 1088 1089 r = mes_v12_1_allocate_ucode_data_buffer(adev, pipe, xcc_id); 1090 if (r) { 1091 mes_v12_1_free_ucode_buffers(adev, pipe, xcc_id); 1092 return r; 1093 } 1094 1095 mutex_lock(&adev->srbm_mutex); 1096 /* me=3, pipe=0, queue=0 */ 1097 soc_v1_0_grbm_select(adev, 3, pipe, 0, 0, GET_INST(GC, xcc_id)); 1098 1099 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_IC_BASE_CNTL, 0); 1100 1101 /* set ucode fimrware address */ 1102 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_IC_BASE_LO, 1103 lower_32_bits(adev->mes.ucode_fw_gpu_addr[inst])); 1104 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_IC_BASE_HI, 1105 upper_32_bits(adev->mes.ucode_fw_gpu_addr[inst])); 1106 1107 /* set ucode instruction cache boundary to 2M-1 */ 1108 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_MIBOUND_LO, 0x1FFFFF); 1109 1110 /* set ucode data firmware address */ 1111 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_MDBASE_LO, 1112 lower_32_bits(adev->mes.data_fw_gpu_addr[inst])); 1113 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_MDBASE_HI, 1114 upper_32_bits(adev->mes.data_fw_gpu_addr[inst])); 1115 1116 /* Set data cache boundary CP_MES_MDBOUND_LO */ 1117 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_MDBOUND_LO, 0x7FFFF); 1118 1119 if (prime_icache) { 1120 /* invalidate ICACHE */ 1121 data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_IC_OP_CNTL); 1122 data = REG_SET_FIELD(data, CP_MES_IC_OP_CNTL, PRIME_ICACHE, 0); 1123 data = REG_SET_FIELD(data, CP_MES_IC_OP_CNTL, INVALIDATE_CACHE, 1); 1124 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_IC_OP_CNTL, data); 1125 1126 /* prime the ICACHE. */ 1127 data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_IC_OP_CNTL); 1128 data = REG_SET_FIELD(data, CP_MES_IC_OP_CNTL, PRIME_ICACHE, 1); 1129 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_IC_OP_CNTL, data); 1130 } 1131 1132 soc_v1_0_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id)); 1133 mutex_unlock(&adev->srbm_mutex); 1134 1135 return 0; 1136 } 1137 1138 static int mes_v12_1_allocate_eop_buf(struct amdgpu_device *adev, 1139 enum amdgpu_mes_pipe pipe, 1140 int xcc_id) 1141 { 1142 int r, inst = MES_PIPE_INST(xcc_id, pipe); 1143 u32 *eop; 1144 1145 r = amdgpu_bo_create_reserved(adev, MES_EOP_SIZE, PAGE_SIZE, 1146 AMDGPU_GEM_DOMAIN_GTT, 1147 &adev->mes.eop_gpu_obj[inst], 1148 &adev->mes.eop_gpu_addr[inst], 1149 (void **)&eop); 1150 if (r) { 1151 dev_warn(adev->dev, "(%d) create EOP bo failed\n", r); 1152 return r; 1153 } 1154 1155 memset(eop, 0, 1156 adev->mes.eop_gpu_obj[inst]->tbo.base.size); 1157 1158 amdgpu_bo_kunmap(adev->mes.eop_gpu_obj[inst]); 1159 amdgpu_bo_unreserve(adev->mes.eop_gpu_obj[inst]); 1160 1161 return 0; 1162 } 1163 1164 static int mes_v12_1_allocate_shared_cmd_buf(struct amdgpu_device *adev, 1165 enum amdgpu_mes_pipe pipe, 1166 int xcc_id) 1167 { 1168 int r, inst = MES_PIPE_INST(xcc_id, pipe); 1169 1170 r = amdgpu_bo_create_kernel(adev, PAGE_SIZE, PAGE_SIZE, 1171 AMDGPU_GEM_DOMAIN_VRAM, 1172 &adev->mes.shared_cmd_buf_obj[inst], 1173 &adev->mes.shared_cmd_buf_gpu_addr[inst], 1174 NULL); 1175 if (r) { 1176 dev_err(adev->dev, 1177 "(%d) failed to create shared cmd buf bo\n", r); 1178 return r; 1179 } 1180 1181 return 0; 1182 } 1183 1184 static int mes_v12_1_mqd_init(struct amdgpu_ring *ring) 1185 { 1186 struct v12_1_mes_mqd *mqd = ring->mqd_ptr; 1187 uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr; 1188 uint32_t tmp; 1189 1190 mqd->header = 0xC0310800; 1191 mqd->compute_pipelinestat_enable = 0x00000001; 1192 mqd->compute_static_thread_mgmt_se0 = 0xffffffff; 1193 mqd->compute_static_thread_mgmt_se1 = 0xffffffff; 1194 mqd->compute_static_thread_mgmt_se2 = 0xffffffff; 1195 mqd->compute_static_thread_mgmt_se3 = 0xffffffff; 1196 mqd->compute_misc_reserved = 0x00000007; 1197 1198 eop_base_addr = ring->eop_gpu_addr >> 8; 1199 1200 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ 1201 tmp = regCP_HQD_EOP_CONTROL_DEFAULT; 1202 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE, 1203 (order_base_2(MES_EOP_SIZE / 4) - 1)); 1204 1205 mqd->cp_hqd_eop_base_addr_lo = lower_32_bits(eop_base_addr); 1206 mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr); 1207 mqd->cp_hqd_eop_control = tmp; 1208 1209 /* disable the queue if it's active */ 1210 ring->wptr = 0; 1211 mqd->cp_hqd_pq_rptr = 0; 1212 mqd->cp_hqd_pq_wptr_lo = 0; 1213 mqd->cp_hqd_pq_wptr_hi = 0; 1214 1215 /* set the pointer to the MQD */ 1216 mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc; 1217 mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr); 1218 1219 /* set MQD vmid to 0 */ 1220 tmp = regCP_MQD_CONTROL_DEFAULT; 1221 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0); 1222 mqd->cp_mqd_control = tmp; 1223 1224 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ 1225 hqd_gpu_addr = ring->gpu_addr >> 8; 1226 mqd->cp_hqd_pq_base_lo = lower_32_bits(hqd_gpu_addr); 1227 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr); 1228 1229 /* set the wb address whether it's enabled or not */ 1230 wb_gpu_addr = ring->rptr_gpu_addr; 1231 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc; 1232 mqd->cp_hqd_pq_rptr_report_addr_hi = 1233 upper_32_bits(wb_gpu_addr) & 0xffff; 1234 1235 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ 1236 wb_gpu_addr = ring->wptr_gpu_addr; 1237 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffff8; 1238 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; 1239 1240 /* set up the HQD, this is similar to CP_RB0_CNTL */ 1241 tmp = regCP_HQD_PQ_CONTROL_DEFAULT; 1242 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE, 1243 (order_base_2(ring->ring_size / 4) - 1)); 1244 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE, 1245 ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8)); 1246 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 1); 1247 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, 0); 1248 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); 1249 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); 1250 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, NO_UPDATE_RPTR, 1); 1251 mqd->cp_hqd_pq_control = tmp; 1252 1253 /* enable doorbell */ 1254 tmp = 0; 1255 if (ring->use_doorbell) { 1256 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 1257 DOORBELL_OFFSET, ring->doorbell_index); 1258 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 1259 DOORBELL_EN, 1); 1260 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 1261 DOORBELL_SOURCE, 0); 1262 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 1263 DOORBELL_HIT, 0); 1264 } else { 1265 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 1266 DOORBELL_EN, 0); 1267 } 1268 mqd->cp_hqd_pq_doorbell_control = tmp; 1269 1270 mqd->cp_hqd_vmid = 0; 1271 /* activate the queue */ 1272 mqd->cp_hqd_active = 1; 1273 1274 tmp = regCP_HQD_PERSISTENT_STATE_DEFAULT; 1275 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, 1276 PRELOAD_SIZE, 0x63); 1277 mqd->cp_hqd_persistent_state = tmp; 1278 1279 mqd->cp_hqd_ib_control = regCP_HQD_IB_CONTROL_MES_12_1_DEFAULT; 1280 mqd->cp_hqd_iq_timer = regCP_HQD_IQ_TIMER_DEFAULT; 1281 mqd->cp_hqd_quantum = regCP_HQD_QUANTUM_DEFAULT; 1282 1283 /* 1284 * Set CP_HQD_GFX_CONTROL.DB_UPDATED_MSG_EN[15] to enable unmapped 1285 * doorbell handling. This is a reserved CP internal register can 1286 * not be accesss by others 1287 */ 1288 mqd->cp_hqd_gfx_control = BIT(15); 1289 1290 return 0; 1291 } 1292 1293 static void mes_v12_1_queue_init_register(struct amdgpu_ring *ring, 1294 int xcc_id) 1295 { 1296 struct v12_1_mes_mqd *mqd = ring->mqd_ptr; 1297 struct amdgpu_device *adev = ring->adev; 1298 uint32_t data = 0; 1299 1300 mutex_lock(&adev->srbm_mutex); 1301 soc_v1_0_grbm_select(adev, 3, ring->pipe, 0, 0, GET_INST(GC, xcc_id)); 1302 1303 /* set CP_HQD_VMID.VMID = 0. */ 1304 data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_VMID); 1305 data = REG_SET_FIELD(data, CP_HQD_VMID, VMID, 0); 1306 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_VMID, data); 1307 1308 /* set CP_HQD_PQ_DOORBELL_CONTROL.DOORBELL_EN=0 */ 1309 data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_DOORBELL_CONTROL); 1310 data = REG_SET_FIELD(data, CP_HQD_PQ_DOORBELL_CONTROL, 1311 DOORBELL_EN, 0); 1312 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_DOORBELL_CONTROL, data); 1313 1314 /* set CP_MQD_BASE_ADDR/HI with the MQD base address */ 1315 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo); 1316 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi); 1317 1318 /* set CP_MQD_CONTROL.VMID=0 */ 1319 data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MQD_CONTROL); 1320 data = REG_SET_FIELD(data, CP_MQD_CONTROL, VMID, 0); 1321 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MQD_CONTROL, 0); 1322 1323 /* set CP_HQD_PQ_BASE/HI with the ring buffer base address */ 1324 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo); 1325 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi); 1326 1327 /* set CP_HQD_PQ_RPTR_REPORT_ADDR/HI */ 1328 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_RPTR_REPORT_ADDR, 1329 mqd->cp_hqd_pq_rptr_report_addr_lo); 1330 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_RPTR_REPORT_ADDR_HI, 1331 mqd->cp_hqd_pq_rptr_report_addr_hi); 1332 1333 /* set CP_HQD_PQ_CONTROL */ 1334 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_CONTROL, mqd->cp_hqd_pq_control); 1335 1336 /* set CP_HQD_PQ_WPTR_POLL_ADDR/HI */ 1337 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_WPTR_POLL_ADDR, 1338 mqd->cp_hqd_pq_wptr_poll_addr_lo); 1339 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_WPTR_POLL_ADDR_HI, 1340 mqd->cp_hqd_pq_wptr_poll_addr_hi); 1341 1342 /* set CP_HQD_PQ_DOORBELL_CONTROL */ 1343 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_DOORBELL_CONTROL, 1344 mqd->cp_hqd_pq_doorbell_control); 1345 1346 /* set CP_HQD_PERSISTENT_STATE.PRELOAD_SIZE=0x53 */ 1347 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PERSISTENT_STATE, mqd->cp_hqd_persistent_state); 1348 1349 /* set CP_HQD_ACTIVE.ACTIVE=1 */ 1350 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_ACTIVE, mqd->cp_hqd_active); 1351 1352 soc_v1_0_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id)); 1353 mutex_unlock(&adev->srbm_mutex); 1354 } 1355 1356 static int mes_v12_1_kiq_enable_queue(struct amdgpu_device *adev, int xcc_id) 1357 { 1358 struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id]; 1359 struct amdgpu_ring *kiq_ring = &adev->gfx.kiq[xcc_id].ring; 1360 int r, inst = MES_PIPE_INST(xcc_id, AMDGPU_MES_SCHED_PIPE); 1361 1362 if (!kiq->pmf || !kiq->pmf->kiq_map_queues) 1363 return -EINVAL; 1364 1365 r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size); 1366 if (r) { 1367 DRM_ERROR("Failed to lock KIQ (%d).\n", r); 1368 return r; 1369 } 1370 1371 kiq->pmf->kiq_map_queues(kiq_ring, &adev->mes.ring[inst]); 1372 1373 r = amdgpu_ring_test_ring(kiq_ring); 1374 if (r) { 1375 DRM_ERROR("kfq enable failed\n"); 1376 kiq_ring->sched.ready = false; 1377 } 1378 return r; 1379 } 1380 1381 static int mes_v12_1_queue_init(struct amdgpu_device *adev, 1382 enum amdgpu_mes_pipe pipe, 1383 int xcc_id) 1384 { 1385 struct amdgpu_ring *ring; 1386 int r; 1387 1388 if (!adev->enable_uni_mes && pipe == AMDGPU_MES_KIQ_PIPE) 1389 ring = &adev->gfx.kiq[xcc_id].ring; 1390 else 1391 ring = &adev->mes.ring[MES_PIPE_INST(xcc_id, pipe)]; 1392 1393 if ((adev->enable_uni_mes || pipe == AMDGPU_MES_SCHED_PIPE) && 1394 (amdgpu_in_reset(adev) || adev->in_suspend)) { 1395 *(ring->wptr_cpu_addr) = 0; 1396 *(ring->rptr_cpu_addr) = 0; 1397 amdgpu_ring_clear_ring(ring); 1398 } 1399 1400 r = mes_v12_1_mqd_init(ring); 1401 if (r) 1402 return r; 1403 1404 if (pipe == AMDGPU_MES_SCHED_PIPE) { 1405 if (adev->enable_uni_mes) 1406 r = amdgpu_mes_map_legacy_queue(adev, ring, xcc_id); 1407 else 1408 r = mes_v12_1_kiq_enable_queue(adev, xcc_id); 1409 if (r) 1410 return r; 1411 } else { 1412 mes_v12_1_queue_init_register(ring, xcc_id); 1413 } 1414 1415 return 0; 1416 } 1417 1418 static int mes_v12_1_ring_init(struct amdgpu_device *adev, 1419 int xcc_id, int pipe) 1420 { 1421 struct amdgpu_ring *ring; 1422 int inst = MES_PIPE_INST(xcc_id, pipe); 1423 1424 ring = &adev->mes.ring[inst]; 1425 1426 ring->funcs = &mes_v12_1_ring_funcs; 1427 1428 ring->me = 3; 1429 ring->pipe = pipe; 1430 ring->queue = 0; 1431 ring->xcc_id = xcc_id; 1432 ring->vm_hub = AMDGPU_GFXHUB(xcc_id); 1433 1434 ring->ring_obj = NULL; 1435 ring->use_doorbell = true; 1436 ring->eop_gpu_addr = adev->mes.eop_gpu_addr[inst]; 1437 ring->no_scheduler = true; 1438 snprintf(ring->name, sizeof(ring->name), "mes_%hhu.%hhu.%hhu.%hhu", 1439 (unsigned char)xcc_id, (unsigned char)ring->me, 1440 (unsigned char)ring->pipe, (unsigned char)ring->queue); 1441 1442 if (pipe == AMDGPU_MES_SCHED_PIPE) 1443 ring->doorbell_index = 1444 (adev->doorbell_index.mes_ring0 + 1445 xcc_id * adev->doorbell_index.xcc_doorbell_range) 1446 << 1; 1447 else 1448 ring->doorbell_index = 1449 (adev->doorbell_index.mes_ring1 + 1450 xcc_id * adev->doorbell_index.xcc_doorbell_range) 1451 << 1; 1452 1453 return amdgpu_ring_init(adev, ring, 1024, NULL, 0, 1454 AMDGPU_RING_PRIO_DEFAULT, NULL); 1455 } 1456 1457 static int mes_v12_1_kiq_ring_init(struct amdgpu_device *adev, int xcc_id) 1458 { 1459 struct amdgpu_ring *ring; 1460 int inst = MES_PIPE_INST(xcc_id, AMDGPU_MES_KIQ_PIPE); 1461 1462 spin_lock_init(&adev->gfx.kiq[xcc_id].ring_lock); 1463 1464 ring = &adev->gfx.kiq[xcc_id].ring; 1465 1466 ring->me = 3; 1467 ring->pipe = 1; 1468 ring->queue = 0; 1469 ring->xcc_id = xcc_id; 1470 ring->vm_hub = AMDGPU_GFXHUB(xcc_id); 1471 1472 ring->adev = NULL; 1473 ring->ring_obj = NULL; 1474 ring->use_doorbell = true; 1475 ring->eop_gpu_addr = adev->mes.eop_gpu_addr[inst]; 1476 ring->no_scheduler = true; 1477 ring->doorbell_index = 1478 (adev->doorbell_index.mes_ring1 + 1479 xcc_id * adev->doorbell_index.xcc_doorbell_range) 1480 << 1; 1481 1482 snprintf(ring->name, sizeof(ring->name), "mes_kiq_%hhu.%hhu.%hhu.%hhu", 1483 (unsigned char)xcc_id, (unsigned char)ring->me, 1484 (unsigned char)ring->pipe, (unsigned char)ring->queue); 1485 1486 return amdgpu_ring_init(adev, ring, 1024, NULL, 0, 1487 AMDGPU_RING_PRIO_DEFAULT, NULL); 1488 } 1489 1490 static int mes_v12_1_mqd_sw_init(struct amdgpu_device *adev, 1491 enum amdgpu_mes_pipe pipe, 1492 int xcc_id) 1493 { 1494 int r, mqd_size = sizeof(struct v12_1_mes_mqd); 1495 struct amdgpu_ring *ring; 1496 int inst = MES_PIPE_INST(xcc_id, pipe); 1497 1498 if (!adev->enable_uni_mes && pipe == AMDGPU_MES_KIQ_PIPE) 1499 ring = &adev->gfx.kiq[xcc_id].ring; 1500 else 1501 ring = &adev->mes.ring[inst]; 1502 1503 if (ring->mqd_obj) 1504 return 0; 1505 1506 r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE, 1507 AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj, 1508 &ring->mqd_gpu_addr, &ring->mqd_ptr); 1509 if (r) { 1510 dev_warn(adev->dev, "failed to create ring mqd bo (%d)", r); 1511 return r; 1512 } 1513 1514 memset(ring->mqd_ptr, 0, mqd_size); 1515 1516 /* prepare MQD backup */ 1517 adev->mes.mqd_backup[inst] = kmalloc(mqd_size, GFP_KERNEL); 1518 if (!adev->mes.mqd_backup[inst]) 1519 dev_warn(adev->dev, 1520 "no memory to create MQD backup for ring %s\n", 1521 ring->name); 1522 1523 return 0; 1524 } 1525 1526 static int mes_v12_1_sw_init(struct amdgpu_ip_block *ip_block) 1527 { 1528 struct amdgpu_device *adev = ip_block->adev; 1529 int pipe, r, xcc_id, num_xcc = NUM_XCC(adev->gfx.xcc_mask); 1530 1531 adev->mes.funcs = &mes_v12_1_funcs; 1532 adev->mes.kiq_hw_init = &mes_v12_1_kiq_hw_init; 1533 adev->mes.kiq_hw_fini = &mes_v12_1_kiq_hw_fini; 1534 adev->mes.enable_legacy_queue_map = true; 1535 1536 adev->mes.event_log_size = 1537 adev->enable_uni_mes ? (AMDGPU_MAX_MES_PIPES * AMDGPU_MES_LOG_BUFFER_SIZE * num_xcc) : AMDGPU_MES_LOG_BUFFER_SIZE; 1538 1539 r = amdgpu_mes_init(adev); 1540 if (r) 1541 return r; 1542 1543 for (xcc_id = 0; xcc_id < num_xcc; xcc_id++) { 1544 for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) { 1545 r = mes_v12_1_allocate_eop_buf(adev, pipe, xcc_id); 1546 if (r) 1547 return r; 1548 1549 r = mes_v12_1_mqd_sw_init(adev, pipe, xcc_id); 1550 if (r) 1551 return r; 1552 1553 if (!adev->enable_uni_mes && pipe == 1554 AMDGPU_MES_KIQ_PIPE) 1555 r = mes_v12_1_kiq_ring_init(adev, xcc_id); 1556 else 1557 r = mes_v12_1_ring_init(adev, xcc_id, pipe); 1558 if (r) 1559 return r; 1560 1561 if (adev->enable_uni_mes && num_xcc > 1) { 1562 r = mes_v12_1_allocate_shared_cmd_buf(adev, 1563 pipe, xcc_id); 1564 if (r) 1565 return r; 1566 } 1567 } 1568 } 1569 1570 return 0; 1571 } 1572 1573 static int mes_v12_1_sw_fini(struct amdgpu_ip_block *ip_block) 1574 { 1575 struct amdgpu_device *adev = ip_block->adev; 1576 int pipe, inst, xcc_id, num_xcc = NUM_XCC(adev->gfx.xcc_mask); 1577 1578 for (xcc_id = 0; xcc_id < num_xcc; xcc_id++) { 1579 for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) { 1580 inst = MES_PIPE_INST(xcc_id, pipe); 1581 1582 amdgpu_bo_free_kernel(&adev->mes.shared_cmd_buf_obj[inst], 1583 &adev->mes.shared_cmd_buf_gpu_addr[inst], 1584 NULL); 1585 1586 kfree(adev->mes.mqd_backup[inst]); 1587 1588 amdgpu_bo_free_kernel(&adev->mes.eop_gpu_obj[inst], 1589 &adev->mes.eop_gpu_addr[inst], 1590 NULL); 1591 1592 if (adev->enable_uni_mes || pipe == AMDGPU_MES_SCHED_PIPE) { 1593 amdgpu_bo_free_kernel(&adev->mes.ring[inst].mqd_obj, 1594 &adev->mes.ring[inst].mqd_gpu_addr, 1595 &adev->mes.ring[inst].mqd_ptr); 1596 amdgpu_ring_fini(&adev->mes.ring[inst]); 1597 } 1598 } 1599 } 1600 1601 for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) 1602 amdgpu_ucode_release(&adev->mes.fw[pipe]); 1603 1604 for (xcc_id = 0; xcc_id < num_xcc; xcc_id++) { 1605 if (!adev->enable_uni_mes) { 1606 amdgpu_bo_free_kernel(&adev->gfx.kiq[xcc_id].ring.mqd_obj, 1607 &adev->gfx.kiq[xcc_id].ring.mqd_gpu_addr, 1608 &adev->gfx.kiq[xcc_id].ring.mqd_ptr); 1609 amdgpu_ring_fini(&adev->gfx.kiq[xcc_id].ring); 1610 } 1611 1612 if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { 1613 mes_v12_1_free_ucode_buffers(adev, 1614 AMDGPU_MES_KIQ_PIPE, xcc_id); 1615 mes_v12_1_free_ucode_buffers(adev, 1616 AMDGPU_MES_SCHED_PIPE, xcc_id); 1617 } 1618 } 1619 1620 amdgpu_mes_fini(adev); 1621 return 0; 1622 } 1623 1624 static void mes_v12_1_kiq_dequeue_sched(struct amdgpu_device *adev, 1625 int xcc_id) 1626 { 1627 uint32_t data; 1628 int i; 1629 1630 mutex_lock(&adev->srbm_mutex); 1631 soc_v1_0_grbm_select(adev, 3, AMDGPU_MES_SCHED_PIPE, 0, 0, 1632 GET_INST(GC, xcc_id)); 1633 1634 /* disable the queue if it's active */ 1635 if (RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_ACTIVE) & 1) { 1636 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_DEQUEUE_REQUEST, 1); 1637 for (i = 0; i < adev->usec_timeout; i++) { 1638 if (!(RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_ACTIVE) & 1)) 1639 break; 1640 udelay(1); 1641 } 1642 } 1643 data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_DOORBELL_CONTROL); 1644 data = REG_SET_FIELD(data, CP_HQD_PQ_DOORBELL_CONTROL, 1645 DOORBELL_EN, 0); 1646 data = REG_SET_FIELD(data, CP_HQD_PQ_DOORBELL_CONTROL, 1647 DOORBELL_HIT, 1); 1648 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_DOORBELL_CONTROL, data); 1649 1650 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_DOORBELL_CONTROL, 0); 1651 1652 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_WPTR_LO, 0); 1653 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_WPTR_HI, 0); 1654 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_RPTR, 0); 1655 1656 soc_v1_0_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id)); 1657 mutex_unlock(&adev->srbm_mutex); 1658 1659 adev->mes.ring[MES_PIPE_INST(xcc_id, 0)].sched.ready = false; 1660 } 1661 1662 static void mes_v12_1_kiq_setting(struct amdgpu_ring *ring, int xcc_id) 1663 { 1664 uint32_t tmp; 1665 struct amdgpu_device *adev = ring->adev; 1666 1667 /* tell RLC which is KIQ queue */ 1668 tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CP_SCHEDULERS); 1669 tmp &= 0xffffff00; 1670 tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue); 1671 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CP_SCHEDULERS, tmp); 1672 tmp |= 0x80; 1673 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CP_SCHEDULERS, tmp); 1674 } 1675 1676 static int mes_v12_1_kiq_hw_init(struct amdgpu_device *adev, uint32_t xcc_id) 1677 { 1678 int inst = MES_PIPE_INST(xcc_id, AMDGPU_MES_KIQ_PIPE); 1679 int r = 0; 1680 struct amdgpu_ip_block *ip_block; 1681 1682 if (adev->enable_uni_mes) 1683 mes_v12_1_kiq_setting(&adev->mes.ring[inst], xcc_id); 1684 else 1685 mes_v12_1_kiq_setting(&adev->gfx.kiq[xcc_id].ring, xcc_id); 1686 1687 if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { 1688 1689 r = mes_v12_1_load_microcode(adev, AMDGPU_MES_SCHED_PIPE, 1690 false, xcc_id); 1691 if (r) { 1692 DRM_ERROR("failed to load MES fw, r=%d\n", r); 1693 return r; 1694 } 1695 1696 r = mes_v12_1_load_microcode(adev, AMDGPU_MES_KIQ_PIPE, 1697 true, xcc_id); 1698 if (r) { 1699 DRM_ERROR("failed to load MES kiq fw, r=%d\n", r); 1700 return r; 1701 } 1702 1703 mes_v12_1_set_ucode_start_addr(adev, xcc_id); 1704 1705 } else if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) 1706 mes_v12_1_set_ucode_start_addr(adev, xcc_id); 1707 1708 mes_v12_1_enable(adev, true, xcc_id); 1709 1710 ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_MES); 1711 if (unlikely(!ip_block)) { 1712 dev_err(adev->dev, "Failed to get MES handle\n"); 1713 return -EINVAL; 1714 } 1715 1716 r = mes_v12_1_queue_init(adev, AMDGPU_MES_KIQ_PIPE, xcc_id); 1717 if (r) 1718 goto failure; 1719 1720 if (adev->enable_uni_mes) { 1721 r = mes_v12_1_setup_coop_mode(adev, xcc_id); 1722 if (r) 1723 goto failure; 1724 1725 r = mes_v12_1_set_hw_resources(&adev->mes, 1726 AMDGPU_MES_KIQ_PIPE, xcc_id); 1727 if (r) 1728 goto failure; 1729 1730 mes_v12_1_set_hw_resources_1(&adev->mes, 1731 AMDGPU_MES_KIQ_PIPE, xcc_id); 1732 } 1733 1734 if (adev->mes.enable_legacy_queue_map) { 1735 r = mes_v12_1_xcc_hw_init(ip_block, xcc_id); 1736 if (r) 1737 goto failure; 1738 } 1739 1740 return r; 1741 1742 failure: 1743 mes_v12_1_hw_fini(ip_block); 1744 return r; 1745 } 1746 1747 static int mes_v12_1_kiq_hw_fini(struct amdgpu_device *adev, uint32_t xcc_id) 1748 { 1749 int inst = MES_PIPE_INST(xcc_id, AMDGPU_MES_SCHED_PIPE); 1750 1751 if (adev->mes.ring[inst].sched.ready) { 1752 if (adev->enable_uni_mes) 1753 amdgpu_mes_unmap_legacy_queue(adev, 1754 &adev->mes.ring[inst], 1755 RESET_QUEUES, 0, 0, xcc_id); 1756 else 1757 mes_v12_1_kiq_dequeue_sched(adev, xcc_id); 1758 1759 adev->mes.ring[inst].sched.ready = false; 1760 } 1761 1762 mes_v12_1_enable(adev, false, xcc_id); 1763 1764 return 0; 1765 } 1766 1767 static int mes_v12_1_setup_coop_mode(struct amdgpu_device *adev, int xcc_id) 1768 { 1769 u32 num_xcc_per_xcp, num_xcc = NUM_XCC(adev->gfx.xcc_mask); 1770 int r = 0; 1771 1772 if (num_xcc == 1) 1773 return r; 1774 1775 if (adev->gfx.funcs && 1776 adev->gfx.funcs->get_xccs_per_xcp) 1777 num_xcc_per_xcp = adev->gfx.funcs->get_xccs_per_xcp(adev); 1778 else 1779 return -EINVAL; 1780 1781 switch (adev->xcp_mgr->mode) { 1782 case AMDGPU_SPX_PARTITION_MODE: 1783 adev->mes.enable_coop_mode = 1; 1784 adev->mes.master_xcc_ids[xcc_id] = 0; 1785 break; 1786 case AMDGPU_DPX_PARTITION_MODE: 1787 adev->mes.enable_coop_mode = 1; 1788 adev->mes.master_xcc_ids[xcc_id] = 1789 (xcc_id/num_xcc_per_xcp) * (num_xcc / 2); 1790 break; 1791 case AMDGPU_QPX_PARTITION_MODE: 1792 adev->mes.enable_coop_mode = 1; 1793 adev->mes.master_xcc_ids[xcc_id] = 1794 (xcc_id/num_xcc_per_xcp) * (num_xcc / 4); 1795 break; 1796 case AMDGPU_CPX_PARTITION_MODE: 1797 adev->mes.enable_coop_mode = 0; 1798 break; 1799 default: 1800 r = -EINVAL; 1801 break; 1802 } 1803 return r; 1804 } 1805 1806 static int mes_v12_1_xcc_hw_init(struct amdgpu_ip_block *ip_block, int xcc_id) 1807 { 1808 int r; 1809 struct amdgpu_device *adev = ip_block->adev; 1810 1811 if (adev->mes.ring[MES_PIPE_INST(xcc_id, 0)].sched.ready) 1812 goto out; 1813 1814 if (!adev->enable_mes_kiq) { 1815 if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { 1816 r = mes_v12_1_load_microcode(adev, 1817 AMDGPU_MES_SCHED_PIPE, true, xcc_id); 1818 if (r) { 1819 DRM_ERROR("failed to MES fw, r=%d\n", r); 1820 return r; 1821 } 1822 1823 mes_v12_1_set_ucode_start_addr(adev, xcc_id); 1824 1825 } else if (adev->firmware.load_type == 1826 AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) { 1827 1828 mes_v12_1_set_ucode_start_addr(adev, xcc_id); 1829 } 1830 1831 mes_v12_1_enable(adev, true, xcc_id); 1832 } 1833 1834 /* Enable the MES to handle doorbell ring on unmapped queue */ 1835 mes_v12_1_enable_unmapped_doorbell_handling(&adev->mes, true, xcc_id); 1836 1837 r = mes_v12_1_queue_init(adev, AMDGPU_MES_SCHED_PIPE, xcc_id); 1838 if (r) 1839 goto failure; 1840 1841 r = mes_v12_1_set_hw_resources(&adev->mes, 1842 AMDGPU_MES_SCHED_PIPE, xcc_id); 1843 if (r) 1844 goto failure; 1845 1846 if (adev->enable_uni_mes) { 1847 mes_v12_1_set_hw_resources_1(&adev->mes, 1848 AMDGPU_MES_SCHED_PIPE, xcc_id); 1849 } 1850 mes_v12_1_init_aggregated_doorbell(&adev->mes, xcc_id); 1851 1852 r = mes_v12_1_query_sched_status(&adev->mes, 1853 AMDGPU_MES_SCHED_PIPE, xcc_id); 1854 if (r) { 1855 DRM_ERROR("MES is busy\n"); 1856 goto failure; 1857 } 1858 1859 out: 1860 /* 1861 * Disable KIQ ring usage from the driver once MES is enabled. 1862 * MES uses KIQ ring exclusively so driver cannot access KIQ ring 1863 * with MES enabled. 1864 */ 1865 adev->gfx.kiq[xcc_id].ring.sched.ready = false; 1866 adev->mes.ring[MES_PIPE_INST(xcc_id, 0)].sched.ready = true; 1867 1868 return 0; 1869 1870 failure: 1871 mes_v12_1_hw_fini(ip_block); 1872 return r; 1873 } 1874 1875 static int mes_v12_1_hw_init(struct amdgpu_ip_block *ip_block) 1876 { 1877 struct amdgpu_device *adev = ip_block->adev; 1878 int r, xcc_id, num_xcc = NUM_XCC(adev->gfx.xcc_mask); 1879 1880 for (xcc_id = 0; xcc_id < num_xcc; xcc_id++) { 1881 r = mes_v12_1_xcc_hw_init(ip_block, xcc_id); 1882 if (r) 1883 return r; 1884 } 1885 1886 return 0; 1887 } 1888 1889 static int mes_v12_1_hw_fini(struct amdgpu_ip_block *ip_block) 1890 { 1891 return 0; 1892 } 1893 1894 static int mes_v12_1_suspend(struct amdgpu_ip_block *ip_block) 1895 { 1896 return mes_v12_1_hw_fini(ip_block); 1897 } 1898 1899 static int mes_v12_1_resume(struct amdgpu_ip_block *ip_block) 1900 { 1901 return mes_v12_1_hw_init(ip_block); 1902 } 1903 1904 static int mes_v12_1_early_init(struct amdgpu_ip_block *ip_block) 1905 { 1906 struct amdgpu_device *adev = ip_block->adev; 1907 int pipe, r; 1908 1909 for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) { 1910 r = amdgpu_mes_init_microcode(adev, pipe); 1911 if (r) 1912 return r; 1913 } 1914 1915 return 0; 1916 } 1917 1918 static int mes_v12_1_late_init(struct amdgpu_ip_block *ip_block) 1919 { 1920 struct amdgpu_device *adev = ip_block->adev; 1921 int xcc_id, num_xcc = NUM_XCC(adev->gfx.xcc_mask); 1922 1923 /* TODO: remove it if issue fixed. */ 1924 if (adev->mes.enable_coop_mode) 1925 return 0; 1926 1927 for (xcc_id = 0; xcc_id < num_xcc; xcc_id++) { 1928 /* for COOP mode, only test master xcc. */ 1929 if (adev->mes.enable_coop_mode && 1930 adev->mes.master_xcc_ids[xcc_id] != xcc_id) 1931 continue; 1932 1933 mes_v12_1_self_test(adev, xcc_id); 1934 } 1935 1936 return 0; 1937 } 1938 1939 static const struct amd_ip_funcs mes_v12_1_ip_funcs = { 1940 .name = "mes_v12_1", 1941 .early_init = mes_v12_1_early_init, 1942 .late_init = mes_v12_1_late_init, 1943 .sw_init = mes_v12_1_sw_init, 1944 .sw_fini = mes_v12_1_sw_fini, 1945 .hw_init = mes_v12_1_hw_init, 1946 .hw_fini = mes_v12_1_hw_fini, 1947 .suspend = mes_v12_1_suspend, 1948 .resume = mes_v12_1_resume, 1949 }; 1950 1951 const struct amdgpu_ip_block_version mes_v12_1_ip_block = { 1952 .type = AMD_IP_BLOCK_TYPE_MES, 1953 .major = 12, 1954 .minor = 1, 1955 .rev = 0, 1956 .funcs = &mes_v12_1_ip_funcs, 1957 }; 1958 1959 static int mes_v12_1_alloc_test_buf(struct amdgpu_device *adev, 1960 struct amdgpu_bo **bo, uint64_t *addr, 1961 void **ptr, int size) 1962 { 1963 amdgpu_bo_create_kernel(adev, size, PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, 1964 bo, addr, ptr); 1965 if (!*bo) { 1966 dev_err(adev->dev, "failed to allocate test buffer bo\n"); 1967 return -ENOMEM; 1968 } 1969 memset(*ptr, 0, size); 1970 return 0; 1971 } 1972 1973 static int mes_v12_1_map_test_bo(struct amdgpu_device *adev, 1974 struct amdgpu_bo *bo, struct amdgpu_vm *vm, 1975 struct amdgpu_bo_va **bo_va, u64 va, int size) 1976 { 1977 struct amdgpu_sync sync; 1978 int r; 1979 1980 r = amdgpu_map_static_csa(adev, vm, bo, bo_va, va, size); 1981 if (r) 1982 return r; 1983 1984 amdgpu_sync_create(&sync); 1985 1986 r = amdgpu_vm_bo_update(adev, *bo_va, false); 1987 if (r) { 1988 dev_err(adev->dev, "failed to do vm_bo_update on meta data\n"); 1989 goto error; 1990 } 1991 amdgpu_sync_fence(&sync, (*bo_va)->last_pt_update, GFP_KERNEL); 1992 1993 r = amdgpu_vm_update_pdes(adev, vm, false); 1994 if (r) { 1995 dev_err(adev->dev, "failed to update pdes on meta data\n"); 1996 goto error; 1997 } 1998 amdgpu_sync_fence(&sync, vm->last_update, GFP_KERNEL); 1999 amdgpu_sync_wait(&sync, false); 2000 2001 error: 2002 amdgpu_sync_free(&sync); 2003 return 0; 2004 } 2005 2006 static int mes_v12_1_test_ring(struct amdgpu_device *adev, int xcc_id, 2007 u32 *queue_ptr, u64 fence_gpu_addr, 2008 void *fence_cpu_ptr, void *wptr_cpu_addr, 2009 u64 doorbell_idx, int queue_type) 2010 { 2011 volatile uint32_t *cpu_ptr = fence_cpu_ptr; 2012 int num_xcc = NUM_XCC(adev->gfx.xcc_mask); 2013 int sdma_ring_align = 0x10, compute_ring_align = 0x100; 2014 uint32_t tmp, xcc_offset; 2015 int r = 0, i, j, wptr = 0; 2016 2017 if (queue_type == AMDGPU_RING_TYPE_COMPUTE) { 2018 if (!adev->mes.enable_coop_mode) { 2019 WREG32_SOC15(GC, GET_INST(GC, xcc_id), 2020 regSCRATCH_REG0, 0xCAFEDEAD); 2021 } else { 2022 for (i = 0; i < num_xcc; i++) { 2023 if (adev->mes.master_xcc_ids[i] == xcc_id) 2024 WREG32_SOC15(GC, GET_INST(GC, i), 2025 regSCRATCH_REG0, 0xCAFEDEAD); 2026 } 2027 } 2028 2029 xcc_offset = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG0); 2030 queue_ptr[wptr++] = PACKET3(PACKET3_SET_UCONFIG_REG, 1); 2031 queue_ptr[wptr++] = xcc_offset - PACKET3_SET_UCONFIG_REG_START; 2032 queue_ptr[wptr++] = 0xDEADBEEF; 2033 2034 for (i = wptr; i < compute_ring_align; i++) 2035 queue_ptr[wptr++] = PACKET3(PACKET3_NOP, 0x3FFF); 2036 2037 } else if (queue_type == AMDGPU_RING_TYPE_SDMA) { 2038 *cpu_ptr = 0xCAFEDEAD; 2039 2040 queue_ptr[wptr++] = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_WRITE) | 2041 SDMA_PKT_COPY_LINEAR_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR); 2042 queue_ptr[wptr++] = lower_32_bits(fence_gpu_addr); 2043 queue_ptr[wptr++] = upper_32_bits(fence_gpu_addr); 2044 queue_ptr[wptr++] = SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(0); 2045 queue_ptr[wptr++] = 0xDEADBEEF; 2046 2047 for (i = wptr; i < sdma_ring_align; i++) 2048 queue_ptr[wptr++] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP); 2049 2050 wptr <<= 2; 2051 } 2052 2053 atomic64_set((atomic64_t *)wptr_cpu_addr, wptr); 2054 WDOORBELL64(doorbell_idx, wptr); 2055 2056 for (i = 0; i < adev->usec_timeout; i++) { 2057 if (queue_type == AMDGPU_RING_TYPE_SDMA) { 2058 tmp = le32_to_cpu(*cpu_ptr); 2059 } else { 2060 if (!adev->mes.enable_coop_mode) { 2061 tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), 2062 regSCRATCH_REG0); 2063 } else { 2064 for (j = 0; j < num_xcc; j++) { 2065 if (xcc_id != adev->mes.master_xcc_ids[j]) 2066 continue; 2067 2068 tmp = RREG32_SOC15(GC, GET_INST(GC, j), 2069 regSCRATCH_REG0); 2070 if (tmp != 0xDEADBEEF) 2071 break; 2072 } 2073 } 2074 } 2075 2076 if (tmp == 0xDEADBEEF) 2077 break; 2078 2079 if (amdgpu_emu_mode == 1) 2080 msleep(1); 2081 else 2082 udelay(1); 2083 } 2084 2085 if (i >= adev->usec_timeout) { 2086 dev_err(adev->dev, "xcc%d: mes self test (%s) failed\n", xcc_id, 2087 queue_type == AMDGPU_RING_TYPE_SDMA ? "sdma" : "compute"); 2088 2089 while (halt_if_hws_hang) 2090 schedule(); 2091 2092 r = -ETIMEDOUT; 2093 } else { 2094 dev_info(adev->dev, "xcc%d: mes self test (%s) pass\n", xcc_id, 2095 queue_type == AMDGPU_RING_TYPE_SDMA ? "sdma" : "compute"); 2096 } 2097 2098 return r; 2099 } 2100 2101 #define USER_CTX_SIZE (PAGE_SIZE * 2) 2102 #define USER_CTX_VA AMDGPU_VA_RESERVED_BOTTOM 2103 #define RING_OFFSET(addr) ((addr)) 2104 #define EOP_OFFSET(addr) ((addr) + PAGE_SIZE) 2105 #define WPTR_OFFSET(addr) ((addr) + USER_CTX_SIZE - sizeof(u64)) 2106 #define RPTR_OFFSET(addr) ((addr) + USER_CTX_SIZE - sizeof(u64) * 2) 2107 #define FENCE_OFFSET(addr) ((addr) + USER_CTX_SIZE - sizeof(u64) * 3) 2108 2109 static int mes_v12_1_test_queue(struct amdgpu_device *adev, int xcc_id, 2110 int pasid, struct amdgpu_vm *vm, u64 meta_gpu_addr, 2111 u64 queue_gpu_addr, void *ctx_ptr, int queue_type) 2112 { 2113 struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)]; 2114 struct amdgpu_mqd *mqd_mgr = &adev->mqds[queue_type]; 2115 struct amdgpu_mqd_prop mqd_prop = {0}; 2116 struct mes_add_queue_input add_queue = {0}; 2117 struct mes_remove_queue_input remove_queue = {0}; 2118 struct amdgpu_bo *mqd_bo = NULL; 2119 int num_xcc = NUM_XCC(adev->gfx.xcc_mask); 2120 int i, r, off, mqd_size, mqd_count = 1; 2121 void *mqd_ptr = NULL; 2122 u64 mqd_gpu_addr, doorbell_idx; 2123 2124 /* extra one page size padding for mes fw */ 2125 mqd_size = mqd_mgr->mqd_size + PAGE_SIZE; 2126 2127 if (queue_type == AMDGPU_RING_TYPE_SDMA) { 2128 doorbell_idx = adev->mes.db_start_dw_offset + \ 2129 adev->doorbell_index.sdma_engine[0]; 2130 } else { 2131 doorbell_idx = adev->mes.db_start_dw_offset + \ 2132 adev->doorbell_index.userqueue_start; 2133 } 2134 2135 if (adev->mes.enable_coop_mode && 2136 queue_type == AMDGPU_RING_TYPE_COMPUTE) { 2137 for (i = 0, mqd_count = 0; i < num_xcc; i++) { 2138 if (adev->mes.master_xcc_ids[i] == xcc_id) 2139 mqd_count++; 2140 } 2141 mqd_size *= mqd_count; 2142 } 2143 2144 r = mes_v12_1_alloc_test_buf(adev, &mqd_bo, &mqd_gpu_addr, 2145 &mqd_ptr, mqd_size * mqd_count); 2146 if (r < 0) 2147 return r; 2148 2149 mqd_prop.mqd_gpu_addr = mqd_gpu_addr; 2150 mqd_prop.hqd_base_gpu_addr = RING_OFFSET(USER_CTX_VA); 2151 mqd_prop.eop_gpu_addr = EOP_OFFSET(USER_CTX_VA); 2152 mqd_prop.wptr_gpu_addr = WPTR_OFFSET(USER_CTX_VA); 2153 mqd_prop.rptr_gpu_addr = RPTR_OFFSET(USER_CTX_VA); 2154 mqd_prop.doorbell_index = doorbell_idx; 2155 mqd_prop.queue_size = PAGE_SIZE; 2156 mqd_prop.mqd_stride_size = mqd_size; 2157 mqd_prop.use_doorbell = true; 2158 mqd_prop.hqd_active = false; 2159 2160 mqd_mgr->init_mqd(adev, mqd_ptr, &mqd_prop); 2161 if (mqd_count > 1) { 2162 for (i = 1; i < mqd_count; i++) { 2163 off = mqd_size * i; 2164 mqd_prop.mqd_gpu_addr = mqd_gpu_addr + off; 2165 mqd_mgr->init_mqd(adev, (char *)mqd_ptr + off, 2166 &mqd_prop); 2167 } 2168 } 2169 2170 add_queue.xcc_id = xcc_id; 2171 add_queue.process_id = pasid; 2172 add_queue.page_table_base_addr = adev->vm_manager.vram_base_offset + 2173 amdgpu_bo_gpu_offset(vm->root.bo) - adev->gmc.vram_start; 2174 add_queue.process_va_start = 0; 2175 add_queue.process_va_end = adev->vm_manager.max_pfn - 1; 2176 add_queue.process_context_addr = meta_gpu_addr; 2177 add_queue.gang_context_addr = meta_gpu_addr + AMDGPU_MES_PROC_CTX_SIZE; 2178 add_queue.doorbell_offset = doorbell_idx; 2179 add_queue.mqd_addr = mqd_gpu_addr; 2180 add_queue.wptr_addr = mqd_prop.wptr_gpu_addr; 2181 add_queue.wptr_mc_addr = WPTR_OFFSET(queue_gpu_addr); 2182 add_queue.queue_type = queue_type; 2183 add_queue.vm_cntx_cntl = hub->vm_cntx_cntl; 2184 2185 r = mes_v12_1_add_hw_queue(&adev->mes, &add_queue); 2186 if (r) 2187 goto error; 2188 2189 mes_v12_1_test_ring(adev, xcc_id, (u32 *)RING_OFFSET((char *)ctx_ptr), 2190 FENCE_OFFSET(USER_CTX_VA), 2191 FENCE_OFFSET((char *)ctx_ptr), 2192 WPTR_OFFSET((char *)ctx_ptr), 2193 doorbell_idx, queue_type); 2194 2195 remove_queue.xcc_id = xcc_id; 2196 remove_queue.doorbell_offset = doorbell_idx; 2197 remove_queue.gang_context_addr = add_queue.gang_context_addr; 2198 r = mes_v12_1_remove_hw_queue(&adev->mes, &remove_queue); 2199 2200 error: 2201 amdgpu_bo_free_kernel(&mqd_bo, &mqd_gpu_addr, &mqd_ptr); 2202 return r; 2203 } 2204 2205 static int mes_v12_1_self_test(struct amdgpu_device *adev, int xcc_id) 2206 { 2207 int queue_types[] = { AMDGPU_RING_TYPE_COMPUTE, 2208 /* AMDGPU_RING_TYPE_SDMA */ }; 2209 struct amdgpu_bo_va *bo_va = NULL; 2210 struct amdgpu_vm *vm = NULL; 2211 struct amdgpu_bo *meta_bo = NULL, *ctx_bo = NULL; 2212 void *meta_ptr = NULL, *ctx_ptr = NULL; 2213 u64 meta_gpu_addr, ctx_gpu_addr; 2214 int size, i, r, pasid; 2215 2216 pasid = amdgpu_pasid_alloc(16); 2217 if (pasid < 0) 2218 pasid = 0; 2219 2220 size = AMDGPU_MES_PROC_CTX_SIZE + AMDGPU_MES_GANG_CTX_SIZE; 2221 r = mes_v12_1_alloc_test_buf(adev, &meta_bo, &meta_gpu_addr, 2222 &meta_ptr, size); 2223 if (r < 0) 2224 goto err2; 2225 2226 r = mes_v12_1_alloc_test_buf(adev, &ctx_bo, &ctx_gpu_addr, 2227 &ctx_ptr, USER_CTX_SIZE); 2228 if (r < 0) 2229 goto err2; 2230 2231 vm = kzalloc(sizeof(*vm), GFP_KERNEL); 2232 if (!vm) { 2233 r = -ENOMEM; 2234 goto err2; 2235 } 2236 2237 r = amdgpu_vm_init(adev, vm, -1, pasid); 2238 if (r) 2239 goto err1; 2240 2241 r = mes_v12_1_map_test_bo(adev, ctx_bo, vm, &bo_va, 2242 USER_CTX_VA, USER_CTX_SIZE); 2243 if (r) 2244 goto err0; 2245 2246 for (i = 0; i < ARRAY_SIZE(queue_types); i++) { 2247 memset(ctx_ptr, 0, USER_CTX_SIZE); 2248 2249 r = mes_v12_1_test_queue(adev, xcc_id, pasid, vm, meta_gpu_addr, 2250 ctx_gpu_addr, ctx_ptr, queue_types[i]); 2251 if (r) 2252 break; 2253 } 2254 2255 amdgpu_unmap_static_csa(adev, vm, ctx_bo, bo_va, USER_CTX_VA); 2256 err0: 2257 amdgpu_vm_fini(adev, vm); 2258 err1: 2259 kfree(vm); 2260 err2: 2261 amdgpu_bo_free_kernel(&meta_bo, &meta_gpu_addr, &meta_ptr); 2262 amdgpu_bo_free_kernel(&ctx_bo, &ctx_gpu_addr, &ctx_ptr); 2263 amdgpu_pasid_free(pasid); 2264 return r; 2265 } 2266 2267