1e220edf2SJack Xiao /* 2e220edf2SJack Xiao * Copyright 2025 Advanced Micro Devices, Inc. 3e220edf2SJack Xiao * 4e220edf2SJack Xiao * Permission is hereby granted, free of charge, to any person obtaining a 5e220edf2SJack Xiao * copy of this software and associated documentation files (the "Software"), 6e220edf2SJack Xiao * to deal in the Software without restriction, including without limitation 7e220edf2SJack Xiao * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8e220edf2SJack Xiao * and/or sell copies of the Software, and to permit persons to whom the 9e220edf2SJack Xiao * Software is furnished to do so, subject to the following conditions: 10e220edf2SJack Xiao * 11e220edf2SJack Xiao * The above copyright notice and this permission notice shall be included in 12e220edf2SJack Xiao * all copies or substantial portions of the Software. 13e220edf2SJack Xiao * 14e220edf2SJack Xiao * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15e220edf2SJack Xiao * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16e220edf2SJack Xiao * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17e220edf2SJack Xiao * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18e220edf2SJack Xiao * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19e220edf2SJack Xiao * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20e220edf2SJack Xiao * OTHER DEALINGS IN THE SOFTWARE. 21e220edf2SJack Xiao * 22e220edf2SJack Xiao */ 23e220edf2SJack Xiao 24e220edf2SJack Xiao #include <linux/firmware.h> 25e220edf2SJack Xiao #include <linux/module.h> 26e220edf2SJack Xiao #include "amdgpu.h" 27e220edf2SJack Xiao #include "soc15_common.h" 28e220edf2SJack Xiao #include "soc_v1_0.h" 29e220edf2SJack Xiao #include "gc/gc_12_1_0_offset.h" 30e220edf2SJack Xiao #include "gc/gc_12_1_0_sh_mask.h" 31e220edf2SJack Xiao #include "gc/gc_11_0_0_default.h" 32e220edf2SJack Xiao #include "v12_structs.h" 33e220edf2SJack Xiao #include "mes_v12_api_def.h" 34e220edf2SJack Xiao 35e220edf2SJack Xiao MODULE_FIRMWARE("amdgpu/gc_12_1_0_mes.bin"); 36e220edf2SJack Xiao MODULE_FIRMWARE("amdgpu/gc_12_1_0_mes1.bin"); 37e220edf2SJack Xiao MODULE_FIRMWARE("amdgpu/gc_12_1_0_uni_mes.bin"); 38e220edf2SJack Xiao 39e220edf2SJack Xiao static int mes_v12_1_hw_init(struct amdgpu_ip_block *ip_block); 40a5192fbbSLikun Gao static int mes_v12_1_xcc_hw_init(struct amdgpu_ip_block *ip_block, int xcc_id); 41e220edf2SJack Xiao static int mes_v12_1_hw_fini(struct amdgpu_ip_block *ip_block); 42e220edf2SJack Xiao static int mes_v12_1_kiq_hw_init(struct amdgpu_device *adev, uint32_t xcc_id); 43e220edf2SJack Xiao static int mes_v12_1_kiq_hw_fini(struct amdgpu_device *adev, uint32_t xcc_id); 44e220edf2SJack Xiao 45e220edf2SJack Xiao #define MES_EOP_SIZE 2048 46e220edf2SJack Xiao 47e220edf2SJack Xiao #define regCP_HQD_IB_CONTROL_MES_12_1_DEFAULT 0x100000 48f8692d2fSAlex Sierra #define XCC_MID_MASK 0x41000000 49f8692d2fSAlex Sierra 50e220edf2SJack Xiao static void mes_v12_1_ring_set_wptr(struct amdgpu_ring *ring) 51e220edf2SJack Xiao { 52e220edf2SJack Xiao struct amdgpu_device *adev = ring->adev; 53e220edf2SJack Xiao 54e220edf2SJack Xiao if (ring->use_doorbell) { 55e220edf2SJack Xiao atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 56e220edf2SJack Xiao ring->wptr); 57e220edf2SJack Xiao WDOORBELL64(ring->doorbell_index, ring->wptr); 58e220edf2SJack Xiao } else { 59e220edf2SJack Xiao BUG(); 60e220edf2SJack Xiao } 61e220edf2SJack Xiao } 62e220edf2SJack Xiao 63e220edf2SJack Xiao static u64 mes_v12_1_ring_get_rptr(struct amdgpu_ring *ring) 64e220edf2SJack Xiao { 65e220edf2SJack Xiao return *ring->rptr_cpu_addr; 66e220edf2SJack Xiao } 67e220edf2SJack Xiao 68e220edf2SJack Xiao static u64 mes_v12_1_ring_get_wptr(struct amdgpu_ring *ring) 69e220edf2SJack Xiao { 70e220edf2SJack Xiao u64 wptr; 71e220edf2SJack Xiao 72e220edf2SJack Xiao if (ring->use_doorbell) 73e220edf2SJack Xiao wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr); 74e220edf2SJack Xiao else 75e220edf2SJack Xiao BUG(); 76e220edf2SJack Xiao return wptr; 77e220edf2SJack Xiao } 78e220edf2SJack Xiao 79e220edf2SJack Xiao static const struct amdgpu_ring_funcs mes_v12_1_ring_funcs = { 80e220edf2SJack Xiao .type = AMDGPU_RING_TYPE_MES, 81e220edf2SJack Xiao .align_mask = 1, 82e220edf2SJack Xiao .nop = 0, 83e220edf2SJack Xiao .support_64bit_ptrs = true, 84e220edf2SJack Xiao .get_rptr = mes_v12_1_ring_get_rptr, 85e220edf2SJack Xiao .get_wptr = mes_v12_1_ring_get_wptr, 86e220edf2SJack Xiao .set_wptr = mes_v12_1_ring_set_wptr, 87e220edf2SJack Xiao .insert_nop = amdgpu_ring_insert_nop, 88e220edf2SJack Xiao }; 89e220edf2SJack Xiao 90e220edf2SJack Xiao static const char *mes_v12_1_opcodes[] = { 91e220edf2SJack Xiao "SET_HW_RSRC", 92e220edf2SJack Xiao "SET_SCHEDULING_CONFIG", 93e220edf2SJack Xiao "ADD_QUEUE", 94e220edf2SJack Xiao "REMOVE_QUEUE", 95e220edf2SJack Xiao "PERFORM_YIELD", 96e220edf2SJack Xiao "SET_GANG_PRIORITY_LEVEL", 97e220edf2SJack Xiao "SUSPEND", 98e220edf2SJack Xiao "RESUME", 99e220edf2SJack Xiao "RESET", 100e220edf2SJack Xiao "SET_LOG_BUFFER", 101e220edf2SJack Xiao "CHANGE_GANG_PRORITY", 102e220edf2SJack Xiao "QUERY_SCHEDULER_STATUS", 103e220edf2SJack Xiao "unused", 104e220edf2SJack Xiao "SET_DEBUG_VMID", 105e220edf2SJack Xiao "MISC", 106e220edf2SJack Xiao "UPDATE_ROOT_PAGE_TABLE", 107e220edf2SJack Xiao "AMD_LOG", 108e220edf2SJack Xiao "SET_SE_MODE", 109e220edf2SJack Xiao "SET_GANG_SUBMIT", 110e220edf2SJack Xiao "SET_HW_RSRC_1", 111*d0c989a0SShaoyun Liu "INVALIDATE_TLBS", 112e220edf2SJack Xiao }; 113e220edf2SJack Xiao 114e220edf2SJack Xiao static const char *mes_v12_1_misc_opcodes[] = { 115e220edf2SJack Xiao "WRITE_REG", 116e220edf2SJack Xiao "INV_GART", 117e220edf2SJack Xiao "QUERY_STATUS", 118e220edf2SJack Xiao "READ_REG", 119e220edf2SJack Xiao "WAIT_REG_MEM", 120e220edf2SJack Xiao "SET_SHADER_DEBUGGER", 121e220edf2SJack Xiao "NOTIFY_WORK_ON_UNMAPPED_QUEUE", 122e220edf2SJack Xiao "NOTIFY_TO_UNMAP_PROCESSES", 123e220edf2SJack Xiao }; 124e220edf2SJack Xiao 125e220edf2SJack Xiao static const char *mes_v12_1_get_op_string(union MESAPI__MISC *x_pkt) 126e220edf2SJack Xiao { 127e220edf2SJack Xiao const char *op_str = NULL; 128e220edf2SJack Xiao 129e220edf2SJack Xiao if (x_pkt->header.opcode < ARRAY_SIZE(mes_v12_1_opcodes)) 130e220edf2SJack Xiao op_str = mes_v12_1_opcodes[x_pkt->header.opcode]; 131e220edf2SJack Xiao 132e220edf2SJack Xiao return op_str; 133e220edf2SJack Xiao } 134e220edf2SJack Xiao 135e220edf2SJack Xiao static const char *mes_v12_1_get_misc_op_string(union MESAPI__MISC *x_pkt) 136e220edf2SJack Xiao { 137e220edf2SJack Xiao const char *op_str = NULL; 138e220edf2SJack Xiao 139e220edf2SJack Xiao if ((x_pkt->header.opcode == MES_SCH_API_MISC) && 140e220edf2SJack Xiao (x_pkt->opcode < ARRAY_SIZE(mes_v12_1_misc_opcodes))) 141e220edf2SJack Xiao op_str = mes_v12_1_misc_opcodes[x_pkt->opcode]; 142e220edf2SJack Xiao 143e220edf2SJack Xiao return op_str; 144e220edf2SJack Xiao } 145e220edf2SJack Xiao 146e220edf2SJack Xiao static int mes_v12_1_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, 147e220edf2SJack Xiao int xcc_id, int pipe, void *pkt, 148e220edf2SJack Xiao int size, int api_status_off) 149e220edf2SJack Xiao { 150e220edf2SJack Xiao union MESAPI__QUERY_MES_STATUS mes_status_pkt; 151e220edf2SJack Xiao signed long timeout = 2100000; /* 2100 ms */ 152e220edf2SJack Xiao struct amdgpu_device *adev = mes->adev; 153e220edf2SJack Xiao struct amdgpu_ring *ring = &mes->ring[MES_PIPE_INST(xcc_id, pipe)]; 154e220edf2SJack Xiao spinlock_t *ring_lock = &mes->ring_lock[MES_PIPE_INST(xcc_id, pipe)]; 155e220edf2SJack Xiao struct MES_API_STATUS *api_status; 156e220edf2SJack Xiao union MESAPI__MISC *x_pkt = pkt; 157e220edf2SJack Xiao const char *op_str, *misc_op_str; 158e220edf2SJack Xiao unsigned long flags; 159e220edf2SJack Xiao u64 status_gpu_addr; 160e220edf2SJack Xiao u32 seq, status_offset; 161e220edf2SJack Xiao u64 *status_ptr; 162e220edf2SJack Xiao signed long r; 163e220edf2SJack Xiao int ret; 164e220edf2SJack Xiao 165e220edf2SJack Xiao if (x_pkt->header.opcode >= MES_SCH_API_MAX) 166e220edf2SJack Xiao return -EINVAL; 167e220edf2SJack Xiao 168e220edf2SJack Xiao if (amdgpu_emu_mode) { 169e220edf2SJack Xiao timeout *= 1000; 170e220edf2SJack Xiao } else if (amdgpu_sriov_vf(adev)) { 171e220edf2SJack Xiao /* Worst case in sriov where all other 15 VF timeout, each VF needs about 600ms */ 172e220edf2SJack Xiao timeout = 15 * 600 * 1000; 173e220edf2SJack Xiao } 174e220edf2SJack Xiao 175e220edf2SJack Xiao ret = amdgpu_device_wb_get(adev, &status_offset); 176e220edf2SJack Xiao if (ret) 177e220edf2SJack Xiao return ret; 178e220edf2SJack Xiao 179e220edf2SJack Xiao status_gpu_addr = adev->wb.gpu_addr + (status_offset * 4); 180e220edf2SJack Xiao status_ptr = (u64 *)&adev->wb.wb[status_offset]; 181e220edf2SJack Xiao *status_ptr = 0; 182e220edf2SJack Xiao 183e220edf2SJack Xiao spin_lock_irqsave(ring_lock, flags); 184e220edf2SJack Xiao r = amdgpu_ring_alloc(ring, (size + sizeof(mes_status_pkt)) / 4); 185e220edf2SJack Xiao if (r) 186e220edf2SJack Xiao goto error_unlock_free; 187e220edf2SJack Xiao 188e220edf2SJack Xiao seq = ++ring->fence_drv.sync_seq; 189e220edf2SJack Xiao r = amdgpu_fence_wait_polling(ring, 190e220edf2SJack Xiao seq - ring->fence_drv.num_fences_mask, 191e220edf2SJack Xiao timeout); 192e220edf2SJack Xiao if (r < 1) 193e220edf2SJack Xiao goto error_undo; 194e220edf2SJack Xiao 195e220edf2SJack Xiao api_status = (struct MES_API_STATUS *)((char *)pkt + api_status_off); 196e220edf2SJack Xiao api_status->api_completion_fence_addr = status_gpu_addr; 197e220edf2SJack Xiao api_status->api_completion_fence_value = 1; 198e220edf2SJack Xiao 199e220edf2SJack Xiao amdgpu_ring_write_multiple(ring, pkt, size / 4); 200e220edf2SJack Xiao 201e220edf2SJack Xiao memset(&mes_status_pkt, 0, sizeof(mes_status_pkt)); 202e220edf2SJack Xiao mes_status_pkt.header.type = MES_API_TYPE_SCHEDULER; 203e220edf2SJack Xiao mes_status_pkt.header.opcode = MES_SCH_API_QUERY_SCHEDULER_STATUS; 204e220edf2SJack Xiao mes_status_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; 205e220edf2SJack Xiao mes_status_pkt.api_status.api_completion_fence_addr = 206e220edf2SJack Xiao ring->fence_drv.gpu_addr; 207e220edf2SJack Xiao mes_status_pkt.api_status.api_completion_fence_value = seq; 208e220edf2SJack Xiao 209e220edf2SJack Xiao amdgpu_ring_write_multiple(ring, &mes_status_pkt, 210e220edf2SJack Xiao sizeof(mes_status_pkt) / 4); 211e220edf2SJack Xiao 212e220edf2SJack Xiao amdgpu_ring_commit(ring); 213e220edf2SJack Xiao spin_unlock_irqrestore(ring_lock, flags); 214e220edf2SJack Xiao 215e220edf2SJack Xiao op_str = mes_v12_1_get_op_string(x_pkt); 216e220edf2SJack Xiao misc_op_str = mes_v12_1_get_misc_op_string(x_pkt); 217e220edf2SJack Xiao 218e220edf2SJack Xiao if (misc_op_str) 219e220edf2SJack Xiao dev_dbg(adev->dev, "MES(%d, %d) msg=%s (%s) was emitted\n", 220e220edf2SJack Xiao xcc_id, pipe, op_str, misc_op_str); 221e220edf2SJack Xiao else if (op_str) 222e220edf2SJack Xiao dev_dbg(adev->dev, "MES(%d, %d) msg=%s was emitted\n", 223e220edf2SJack Xiao xcc_id, pipe, op_str); 224e220edf2SJack Xiao else 225e220edf2SJack Xiao dev_dbg(adev->dev, "MES(%d, %d) msg=%d was emitted\n", 226e220edf2SJack Xiao xcc_id, pipe, x_pkt->header.opcode); 227e220edf2SJack Xiao 228e220edf2SJack Xiao r = amdgpu_fence_wait_polling(ring, seq, timeout); 229e220edf2SJack Xiao if (r < 1 || !*status_ptr) { 230e220edf2SJack Xiao if (misc_op_str) 231e220edf2SJack Xiao dev_err(adev->dev, 232e220edf2SJack Xiao "MES(%d, %d) failed to respond to msg=%s (%s)\n", 233e220edf2SJack Xiao xcc_id, pipe, op_str, misc_op_str); 234e220edf2SJack Xiao else if (op_str) 235e220edf2SJack Xiao dev_err(adev->dev, 236e220edf2SJack Xiao "MES(%d, %d) failed to respond to msg=%s\n", 237e220edf2SJack Xiao xcc_id, pipe, op_str); 238e220edf2SJack Xiao else 239e220edf2SJack Xiao dev_err(adev->dev, 240e220edf2SJack Xiao "MES(%d, %d) failed to respond to msg=%d\n", 241e220edf2SJack Xiao xcc_id, pipe, x_pkt->header.opcode); 242e220edf2SJack Xiao 243e220edf2SJack Xiao while (halt_if_hws_hang) 244e220edf2SJack Xiao schedule(); 245e220edf2SJack Xiao 246e220edf2SJack Xiao r = -ETIMEDOUT; 247e220edf2SJack Xiao goto error_wb_free; 248e220edf2SJack Xiao } 249e220edf2SJack Xiao 250e220edf2SJack Xiao amdgpu_device_wb_free(adev, status_offset); 251e220edf2SJack Xiao return 0; 252e220edf2SJack Xiao 253e220edf2SJack Xiao error_undo: 254e220edf2SJack Xiao dev_err(adev->dev, "MES(%d, %d) ring buffer is full.\n", xcc_id, pipe); 255e220edf2SJack Xiao amdgpu_ring_undo(ring); 256e220edf2SJack Xiao 257e220edf2SJack Xiao error_unlock_free: 258e220edf2SJack Xiao spin_unlock_irqrestore(ring_lock, flags); 259e220edf2SJack Xiao 260e220edf2SJack Xiao error_wb_free: 261e220edf2SJack Xiao amdgpu_device_wb_free(adev, status_offset); 262e220edf2SJack Xiao return r; 263e220edf2SJack Xiao } 264e220edf2SJack Xiao 265e220edf2SJack Xiao static int convert_to_mes_queue_type(int queue_type) 266e220edf2SJack Xiao { 267e220edf2SJack Xiao if (queue_type == AMDGPU_RING_TYPE_GFX) 268e220edf2SJack Xiao return MES_QUEUE_TYPE_GFX; 269e220edf2SJack Xiao else if (queue_type == AMDGPU_RING_TYPE_COMPUTE) 270e220edf2SJack Xiao return MES_QUEUE_TYPE_COMPUTE; 271e220edf2SJack Xiao else if (queue_type == AMDGPU_RING_TYPE_SDMA) 272e220edf2SJack Xiao return MES_QUEUE_TYPE_SDMA; 273e220edf2SJack Xiao else if (queue_type == AMDGPU_RING_TYPE_MES) 274e220edf2SJack Xiao return MES_QUEUE_TYPE_SCHQ; 275e220edf2SJack Xiao else 276e220edf2SJack Xiao BUG(); 277e220edf2SJack Xiao return -1; 278e220edf2SJack Xiao } 279e220edf2SJack Xiao 280e220edf2SJack Xiao static int mes_v12_1_add_hw_queue(struct amdgpu_mes *mes, 281e220edf2SJack Xiao struct mes_add_queue_input *input) 282e220edf2SJack Xiao { 283e220edf2SJack Xiao union MESAPI__ADD_QUEUE mes_add_queue_pkt; 28475053887SJack Xiao int xcc_id = input->xcc_id; 28575053887SJack Xiao int inst = MES_PIPE_INST(xcc_id, AMDGPU_MES_SCHED_PIPE); 28675053887SJack Xiao 28775053887SJack Xiao if (mes->enable_coop_mode) 28875053887SJack Xiao xcc_id = mes->master_xcc_ids[inst]; 289e220edf2SJack Xiao 290e220edf2SJack Xiao memset(&mes_add_queue_pkt, 0, sizeof(mes_add_queue_pkt)); 291e220edf2SJack Xiao 292e220edf2SJack Xiao mes_add_queue_pkt.header.type = MES_API_TYPE_SCHEDULER; 293e220edf2SJack Xiao mes_add_queue_pkt.header.opcode = MES_SCH_API_ADD_QUEUE; 294e220edf2SJack Xiao mes_add_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; 295e220edf2SJack Xiao 296e220edf2SJack Xiao mes_add_queue_pkt.process_id = input->process_id; 297e220edf2SJack Xiao mes_add_queue_pkt.page_table_base_addr = input->page_table_base_addr; 298e220edf2SJack Xiao mes_add_queue_pkt.process_va_start = input->process_va_start; 299e220edf2SJack Xiao mes_add_queue_pkt.process_va_end = input->process_va_end; 300e220edf2SJack Xiao mes_add_queue_pkt.process_quantum = input->process_quantum; 301e220edf2SJack Xiao mes_add_queue_pkt.process_context_addr = input->process_context_addr; 302e220edf2SJack Xiao mes_add_queue_pkt.gang_quantum = input->gang_quantum; 303e220edf2SJack Xiao mes_add_queue_pkt.gang_context_addr = input->gang_context_addr; 304e220edf2SJack Xiao mes_add_queue_pkt.inprocess_gang_priority = 305e220edf2SJack Xiao input->inprocess_gang_priority; 306e220edf2SJack Xiao mes_add_queue_pkt.gang_global_priority_level = 307e220edf2SJack Xiao input->gang_global_priority_level; 308e220edf2SJack Xiao mes_add_queue_pkt.doorbell_offset = input->doorbell_offset; 309e220edf2SJack Xiao mes_add_queue_pkt.mqd_addr = input->mqd_addr; 310e220edf2SJack Xiao 311e220edf2SJack Xiao mes_add_queue_pkt.wptr_addr = input->wptr_mc_addr; 312e220edf2SJack Xiao 313e220edf2SJack Xiao mes_add_queue_pkt.queue_type = 314e220edf2SJack Xiao convert_to_mes_queue_type(input->queue_type); 315e220edf2SJack Xiao mes_add_queue_pkt.paging = input->paging; 3163235a5b7SMukul Joshi mes_add_queue_pkt.vm_context_cntl = input->vm_cntx_cntl; 317e220edf2SJack Xiao mes_add_queue_pkt.gws_base = input->gws_base; 318e220edf2SJack Xiao mes_add_queue_pkt.gws_size = input->gws_size; 319e220edf2SJack Xiao mes_add_queue_pkt.trap_handler_addr = input->tba_addr; 320e220edf2SJack Xiao mes_add_queue_pkt.tma_addr = input->tma_addr; 321e220edf2SJack Xiao mes_add_queue_pkt.trap_en = input->trap_en; 322e220edf2SJack Xiao mes_add_queue_pkt.skip_process_ctx_clear = input->skip_process_ctx_clear; 323e220edf2SJack Xiao mes_add_queue_pkt.is_kfd_process = input->is_kfd_process; 324e220edf2SJack Xiao 325e220edf2SJack Xiao /* For KFD, gds_size is re-used for queue size (needed in MES for AQL queues) */ 326e220edf2SJack Xiao mes_add_queue_pkt.is_aql_queue = input->is_aql_queue; 327e220edf2SJack Xiao mes_add_queue_pkt.gds_size = input->queue_size; 328e220edf2SJack Xiao 329e220edf2SJack Xiao /* For KFD, gds_size is re-used for queue size (needed in MES for AQL queues) */ 330e220edf2SJack Xiao mes_add_queue_pkt.is_aql_queue = input->is_aql_queue; 331e220edf2SJack Xiao mes_add_queue_pkt.gds_size = input->queue_size; 332e220edf2SJack Xiao 333cc52af1aSMukul Joshi mes_add_queue_pkt.full_sh_mem_config_data = input->sh_mem_config_data; 334cc52af1aSMukul Joshi 335e220edf2SJack Xiao return mes_v12_1_submit_pkt_and_poll_completion(mes, 33675053887SJack Xiao xcc_id, AMDGPU_MES_SCHED_PIPE, 337e220edf2SJack Xiao &mes_add_queue_pkt, sizeof(mes_add_queue_pkt), 338e220edf2SJack Xiao offsetof(union MESAPI__ADD_QUEUE, api_status)); 339e220edf2SJack Xiao } 340e220edf2SJack Xiao 341e220edf2SJack Xiao static int mes_v12_1_remove_hw_queue(struct amdgpu_mes *mes, 342e220edf2SJack Xiao struct mes_remove_queue_input *input) 343e220edf2SJack Xiao { 344e220edf2SJack Xiao union MESAPI__REMOVE_QUEUE mes_remove_queue_pkt; 34575053887SJack Xiao int xcc_id = input->xcc_id; 34675053887SJack Xiao int inst = MES_PIPE_INST(xcc_id, AMDGPU_MES_SCHED_PIPE); 34775053887SJack Xiao 34875053887SJack Xiao if (mes->enable_coop_mode) 34975053887SJack Xiao xcc_id = mes->master_xcc_ids[inst]; 350e220edf2SJack Xiao 351e220edf2SJack Xiao memset(&mes_remove_queue_pkt, 0, sizeof(mes_remove_queue_pkt)); 352e220edf2SJack Xiao 353e220edf2SJack Xiao mes_remove_queue_pkt.header.type = MES_API_TYPE_SCHEDULER; 354e220edf2SJack Xiao mes_remove_queue_pkt.header.opcode = MES_SCH_API_REMOVE_QUEUE; 355e220edf2SJack Xiao mes_remove_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; 356e220edf2SJack Xiao 357e220edf2SJack Xiao mes_remove_queue_pkt.doorbell_offset = input->doorbell_offset; 358e220edf2SJack Xiao mes_remove_queue_pkt.gang_context_addr = input->gang_context_addr; 359e220edf2SJack Xiao 360e220edf2SJack Xiao return mes_v12_1_submit_pkt_and_poll_completion(mes, 36175053887SJack Xiao xcc_id, AMDGPU_MES_SCHED_PIPE, 362e220edf2SJack Xiao &mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt), 363e220edf2SJack Xiao offsetof(union MESAPI__REMOVE_QUEUE, api_status)); 364e220edf2SJack Xiao } 365e220edf2SJack Xiao 366e220edf2SJack Xiao static int mes_v12_1_reset_hw_queue(struct amdgpu_mes *mes, 367e220edf2SJack Xiao struct mes_reset_queue_input *input) 368e220edf2SJack Xiao { 369e220edf2SJack Xiao union MESAPI__RESET mes_reset_queue_pkt; 370e220edf2SJack Xiao int pipe; 371e220edf2SJack Xiao 372e220edf2SJack Xiao memset(&mes_reset_queue_pkt, 0, sizeof(mes_reset_queue_pkt)); 373e220edf2SJack Xiao 374e220edf2SJack Xiao mes_reset_queue_pkt.header.type = MES_API_TYPE_SCHEDULER; 375e220edf2SJack Xiao mes_reset_queue_pkt.header.opcode = MES_SCH_API_RESET; 376e220edf2SJack Xiao mes_reset_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; 377e220edf2SJack Xiao 378e220edf2SJack Xiao mes_reset_queue_pkt.doorbell_offset = input->doorbell_offset; 379e220edf2SJack Xiao /* mes_reset_queue_pkt.gang_context_addr = input->gang_context_addr; */ 380e220edf2SJack Xiao /*mes_reset_queue_pkt.reset_queue_only = 1;*/ 381e220edf2SJack Xiao 382e220edf2SJack Xiao if (mes->adev->enable_uni_mes) 383e220edf2SJack Xiao pipe = AMDGPU_MES_KIQ_PIPE; 384e220edf2SJack Xiao else 385e220edf2SJack Xiao pipe = AMDGPU_MES_SCHED_PIPE; 386e220edf2SJack Xiao 387e220edf2SJack Xiao return mes_v12_1_submit_pkt_and_poll_completion(mes, 388e220edf2SJack Xiao input->xcc_id, pipe, 389e220edf2SJack Xiao &mes_reset_queue_pkt, sizeof(mes_reset_queue_pkt), 390e220edf2SJack Xiao offsetof(union MESAPI__REMOVE_QUEUE, api_status)); 391e220edf2SJack Xiao } 392e220edf2SJack Xiao 393e220edf2SJack Xiao static int mes_v12_1_map_legacy_queue(struct amdgpu_mes *mes, 394e220edf2SJack Xiao struct mes_map_legacy_queue_input *input) 395e220edf2SJack Xiao { 396e220edf2SJack Xiao union MESAPI__ADD_QUEUE mes_add_queue_pkt; 397e220edf2SJack Xiao int pipe; 398e220edf2SJack Xiao 399e220edf2SJack Xiao memset(&mes_add_queue_pkt, 0, sizeof(mes_add_queue_pkt)); 400e220edf2SJack Xiao 401e220edf2SJack Xiao mes_add_queue_pkt.header.type = MES_API_TYPE_SCHEDULER; 402e220edf2SJack Xiao mes_add_queue_pkt.header.opcode = MES_SCH_API_ADD_QUEUE; 403e220edf2SJack Xiao mes_add_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; 404e220edf2SJack Xiao 405e220edf2SJack Xiao mes_add_queue_pkt.pipe_id = input->pipe_id; 406e220edf2SJack Xiao mes_add_queue_pkt.queue_id = input->queue_id; 407e220edf2SJack Xiao mes_add_queue_pkt.doorbell_offset = input->doorbell_offset; 408e220edf2SJack Xiao mes_add_queue_pkt.mqd_addr = input->mqd_addr; 409e220edf2SJack Xiao mes_add_queue_pkt.wptr_addr = input->wptr_addr; 410e220edf2SJack Xiao mes_add_queue_pkt.queue_type = 411e220edf2SJack Xiao convert_to_mes_queue_type(input->queue_type); 412e220edf2SJack Xiao mes_add_queue_pkt.map_legacy_kq = 1; 413e220edf2SJack Xiao 414e220edf2SJack Xiao if (mes->adev->enable_uni_mes) 415e220edf2SJack Xiao pipe = AMDGPU_MES_KIQ_PIPE; 416e220edf2SJack Xiao else 417e220edf2SJack Xiao pipe = AMDGPU_MES_SCHED_PIPE; 418e220edf2SJack Xiao 419e220edf2SJack Xiao return mes_v12_1_submit_pkt_and_poll_completion(mes, 420e220edf2SJack Xiao input->xcc_id, pipe, 421e220edf2SJack Xiao &mes_add_queue_pkt, sizeof(mes_add_queue_pkt), 422e220edf2SJack Xiao offsetof(union MESAPI__ADD_QUEUE, api_status)); 423e220edf2SJack Xiao } 424e220edf2SJack Xiao 425e220edf2SJack Xiao static int mes_v12_1_unmap_legacy_queue(struct amdgpu_mes *mes, 426e220edf2SJack Xiao struct mes_unmap_legacy_queue_input *input) 427e220edf2SJack Xiao { 428e220edf2SJack Xiao union MESAPI__REMOVE_QUEUE mes_remove_queue_pkt; 429e220edf2SJack Xiao int pipe; 430e220edf2SJack Xiao 431e220edf2SJack Xiao memset(&mes_remove_queue_pkt, 0, sizeof(mes_remove_queue_pkt)); 432e220edf2SJack Xiao 433e220edf2SJack Xiao mes_remove_queue_pkt.header.type = MES_API_TYPE_SCHEDULER; 434e220edf2SJack Xiao mes_remove_queue_pkt.header.opcode = MES_SCH_API_REMOVE_QUEUE; 435e220edf2SJack Xiao mes_remove_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; 436e220edf2SJack Xiao 437e220edf2SJack Xiao mes_remove_queue_pkt.doorbell_offset = input->doorbell_offset; 438e220edf2SJack Xiao mes_remove_queue_pkt.gang_context_addr = 0; 439e220edf2SJack Xiao 440e220edf2SJack Xiao mes_remove_queue_pkt.pipe_id = input->pipe_id; 441e220edf2SJack Xiao mes_remove_queue_pkt.queue_id = input->queue_id; 442e220edf2SJack Xiao 443e220edf2SJack Xiao if (input->action == PREEMPT_QUEUES_NO_UNMAP) { 444e220edf2SJack Xiao mes_remove_queue_pkt.preempt_legacy_gfx_queue = 1; 445e220edf2SJack Xiao mes_remove_queue_pkt.tf_addr = input->trail_fence_addr; 446e220edf2SJack Xiao mes_remove_queue_pkt.tf_data = 447e220edf2SJack Xiao lower_32_bits(input->trail_fence_data); 448e220edf2SJack Xiao } else { 449e220edf2SJack Xiao mes_remove_queue_pkt.unmap_legacy_queue = 1; 450e220edf2SJack Xiao mes_remove_queue_pkt.queue_type = 451e220edf2SJack Xiao convert_to_mes_queue_type(input->queue_type); 452e220edf2SJack Xiao } 453e220edf2SJack Xiao 454e220edf2SJack Xiao if (mes->adev->enable_uni_mes) 455e220edf2SJack Xiao pipe = AMDGPU_MES_KIQ_PIPE; 456e220edf2SJack Xiao else 457e220edf2SJack Xiao pipe = AMDGPU_MES_SCHED_PIPE; 458e220edf2SJack Xiao 459e220edf2SJack Xiao return mes_v12_1_submit_pkt_and_poll_completion(mes, 460e220edf2SJack Xiao input->xcc_id, pipe, 461e220edf2SJack Xiao &mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt), 462e220edf2SJack Xiao offsetof(union MESAPI__REMOVE_QUEUE, api_status)); 463e220edf2SJack Xiao } 464e220edf2SJack Xiao 465e220edf2SJack Xiao static int mes_v12_1_suspend_gang(struct amdgpu_mes *mes, 466e220edf2SJack Xiao struct mes_suspend_gang_input *input) 467e220edf2SJack Xiao { 468e220edf2SJack Xiao return 0; 469e220edf2SJack Xiao } 470e220edf2SJack Xiao 471e220edf2SJack Xiao static int mes_v12_1_resume_gang(struct amdgpu_mes *mes, 472e220edf2SJack Xiao struct mes_resume_gang_input *input) 473e220edf2SJack Xiao { 474e220edf2SJack Xiao return 0; 475e220edf2SJack Xiao } 476e220edf2SJack Xiao 477e220edf2SJack Xiao static int mes_v12_1_query_sched_status(struct amdgpu_mes *mes, 478e220edf2SJack Xiao int pipe, int xcc_id) 479e220edf2SJack Xiao { 480e220edf2SJack Xiao union MESAPI__QUERY_MES_STATUS mes_status_pkt; 481e220edf2SJack Xiao 482e220edf2SJack Xiao memset(&mes_status_pkt, 0, sizeof(mes_status_pkt)); 483e220edf2SJack Xiao 484e220edf2SJack Xiao mes_status_pkt.header.type = MES_API_TYPE_SCHEDULER; 485e220edf2SJack Xiao mes_status_pkt.header.opcode = MES_SCH_API_QUERY_SCHEDULER_STATUS; 486e220edf2SJack Xiao mes_status_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; 487e220edf2SJack Xiao 488e220edf2SJack Xiao return mes_v12_1_submit_pkt_and_poll_completion(mes, xcc_id, pipe, 489e220edf2SJack Xiao &mes_status_pkt, sizeof(mes_status_pkt), 490e220edf2SJack Xiao offsetof(union MESAPI__QUERY_MES_STATUS, api_status)); 491e220edf2SJack Xiao } 492f8692d2fSAlex Sierra static uint32_t mes_v12_1_get_xcc_from_reg(uint32_t reg_offset) 493f8692d2fSAlex Sierra { 494f8692d2fSAlex Sierra /* Check xcc reg offset range */ 495f8692d2fSAlex Sierra uint32_t xcc = (reg_offset & XCC_MID_MASK) ? 4 : 0; 496f8692d2fSAlex Sierra /* Each XCC has two register ranges. 497f8692d2fSAlex Sierra * These are represented in reg_offset[17:16] 498f8692d2fSAlex Sierra */ 499f8692d2fSAlex Sierra return ((reg_offset >> 16) & 0x3) + xcc; 500f8692d2fSAlex Sierra } 501f8692d2fSAlex Sierra 502f8692d2fSAlex Sierra static void mes_v12_1_get_rrmt(uint32_t reg, uint32_t xcc_id, 503f8692d2fSAlex Sierra struct RRMT_OPTION *rrmt_opt) 504f8692d2fSAlex Sierra { 505fcc4fc75SLikun Gao uint32_t normalized_reg = soc_v1_0_normalize_xcc_reg_offset(reg); 506f8692d2fSAlex Sierra 507fcc4fc75SLikun Gao if (soc_v1_0_normalize_xcc_reg_range(normalized_reg)) { 508f8692d2fSAlex Sierra rrmt_opt->xcd_die_id = mes_v12_1_get_xcc_from_reg(reg); 509f8692d2fSAlex Sierra rrmt_opt->mode = (xcc_id == rrmt_opt->xcd_die_id) ? 510f8692d2fSAlex Sierra MES_RRMT_MODE_LOCAL_XCD : MES_RRMT_MODE_REMOTE_XCD; 511f8692d2fSAlex Sierra } else { 512f8692d2fSAlex Sierra rrmt_opt->mode = MES_RRMT_MODE_LOCAL_REMOTE_AID; 513f8692d2fSAlex Sierra } 514f8692d2fSAlex Sierra } 515e220edf2SJack Xiao 516e220edf2SJack Xiao static int mes_v12_1_misc_op(struct amdgpu_mes *mes, 517e220edf2SJack Xiao struct mes_misc_op_input *input) 518e220edf2SJack Xiao { 5191eb2a5edSLikun Gao struct amdgpu_device *adev = mes->adev; 520e220edf2SJack Xiao union MESAPI__MISC misc_pkt; 521e220edf2SJack Xiao int pipe; 522e220edf2SJack Xiao 523e220edf2SJack Xiao if (mes->adev->enable_uni_mes) 524e220edf2SJack Xiao pipe = AMDGPU_MES_KIQ_PIPE; 525e220edf2SJack Xiao else 526e220edf2SJack Xiao pipe = AMDGPU_MES_SCHED_PIPE; 527e220edf2SJack Xiao 528e220edf2SJack Xiao memset(&misc_pkt, 0, sizeof(misc_pkt)); 529e220edf2SJack Xiao 530e220edf2SJack Xiao misc_pkt.header.type = MES_API_TYPE_SCHEDULER; 531e220edf2SJack Xiao misc_pkt.header.opcode = MES_SCH_API_MISC; 532e220edf2SJack Xiao misc_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; 533e220edf2SJack Xiao 534e220edf2SJack Xiao switch (input->op) { 535e220edf2SJack Xiao case MES_MISC_OP_READ_REG: 536e220edf2SJack Xiao misc_pkt.opcode = MESAPI_MISC__READ_REG; 537e220edf2SJack Xiao misc_pkt.read_reg.reg_offset = input->read_reg.reg_offset; 538e220edf2SJack Xiao misc_pkt.read_reg.buffer_addr = input->read_reg.buffer_addr; 5391eb2a5edSLikun Gao mes_v12_1_get_rrmt(input->read_reg.reg_offset, 5401eb2a5edSLikun Gao GET_INST(GC, input->xcc_id), 541f8692d2fSAlex Sierra &misc_pkt.read_reg.rrmt_opt); 542b9a0716aSJack Xiao if (misc_pkt.read_reg.rrmt_opt.mode != MES_RRMT_MODE_REMOTE_MID) { 543b9a0716aSJack Xiao misc_pkt.read_reg.reg_offset = 544fcc4fc75SLikun Gao soc_v1_0_normalize_xcc_reg_offset(misc_pkt.read_reg.reg_offset); 545b9a0716aSJack Xiao } 546e220edf2SJack Xiao break; 547e220edf2SJack Xiao case MES_MISC_OP_WRITE_REG: 548e220edf2SJack Xiao misc_pkt.opcode = MESAPI_MISC__WRITE_REG; 549e220edf2SJack Xiao misc_pkt.write_reg.reg_offset = input->write_reg.reg_offset; 550e220edf2SJack Xiao misc_pkt.write_reg.reg_value = input->write_reg.reg_value; 5511eb2a5edSLikun Gao mes_v12_1_get_rrmt(input->write_reg.reg_offset, 5521eb2a5edSLikun Gao GET_INST(GC, input->xcc_id), 553f8692d2fSAlex Sierra &misc_pkt.write_reg.rrmt_opt); 554b9a0716aSJack Xiao if (misc_pkt.write_reg.rrmt_opt.mode != MES_RRMT_MODE_REMOTE_MID) { 555b9a0716aSJack Xiao misc_pkt.write_reg.reg_offset = 556fcc4fc75SLikun Gao soc_v1_0_normalize_xcc_reg_offset(misc_pkt.write_reg.reg_offset); 557b9a0716aSJack Xiao } 558e220edf2SJack Xiao break; 559e220edf2SJack Xiao case MES_MISC_OP_WRM_REG_WAIT: 560e220edf2SJack Xiao misc_pkt.opcode = MESAPI_MISC__WAIT_REG_MEM; 561e220edf2SJack Xiao misc_pkt.wait_reg_mem.op = WRM_OPERATION__WAIT_REG_MEM; 562e220edf2SJack Xiao misc_pkt.wait_reg_mem.reference = input->wrm_reg.ref; 563e220edf2SJack Xiao misc_pkt.wait_reg_mem.mask = input->wrm_reg.mask; 564e220edf2SJack Xiao misc_pkt.wait_reg_mem.reg_offset1 = input->wrm_reg.reg0; 565e220edf2SJack Xiao misc_pkt.wait_reg_mem.reg_offset2 = 0; 5661eb2a5edSLikun Gao mes_v12_1_get_rrmt(input->wrm_reg.reg0, 5671eb2a5edSLikun Gao GET_INST(GC, input->xcc_id), 568f8692d2fSAlex Sierra &misc_pkt.wait_reg_mem.rrmt_opt1); 569b9a0716aSJack Xiao if (misc_pkt.wait_reg_mem.rrmt_opt1.mode != MES_RRMT_MODE_REMOTE_MID) { 570b9a0716aSJack Xiao misc_pkt.wait_reg_mem.reg_offset1 = 571fcc4fc75SLikun Gao soc_v1_0_normalize_xcc_reg_offset(misc_pkt.wait_reg_mem.reg_offset1); 572b9a0716aSJack Xiao } 573e220edf2SJack Xiao break; 574e220edf2SJack Xiao case MES_MISC_OP_WRM_REG_WR_WAIT: 575e220edf2SJack Xiao misc_pkt.opcode = MESAPI_MISC__WAIT_REG_MEM; 576e220edf2SJack Xiao misc_pkt.wait_reg_mem.op = WRM_OPERATION__WR_WAIT_WR_REG; 577e220edf2SJack Xiao misc_pkt.wait_reg_mem.reference = input->wrm_reg.ref; 578e220edf2SJack Xiao misc_pkt.wait_reg_mem.mask = input->wrm_reg.mask; 579e220edf2SJack Xiao misc_pkt.wait_reg_mem.reg_offset1 = input->wrm_reg.reg0; 580e220edf2SJack Xiao misc_pkt.wait_reg_mem.reg_offset2 = input->wrm_reg.reg1; 5811eb2a5edSLikun Gao mes_v12_1_get_rrmt(input->wrm_reg.reg0, 5821eb2a5edSLikun Gao GET_INST(GC, input->xcc_id), 583f8692d2fSAlex Sierra &misc_pkt.wait_reg_mem.rrmt_opt1); 5841eb2a5edSLikun Gao mes_v12_1_get_rrmt(input->wrm_reg.reg1, 5851eb2a5edSLikun Gao GET_INST(GC, input->xcc_id), 586f8692d2fSAlex Sierra &misc_pkt.wait_reg_mem.rrmt_opt2); 587aa0f09f9SMukul Joshi 588b9a0716aSJack Xiao if (misc_pkt.wait_reg_mem.rrmt_opt1.mode != MES_RRMT_MODE_REMOTE_MID) { 589b9a0716aSJack Xiao misc_pkt.wait_reg_mem.reg_offset1 = 590fcc4fc75SLikun Gao soc_v1_0_normalize_xcc_reg_offset(misc_pkt.wait_reg_mem.reg_offset1); 591b9a0716aSJack Xiao } 592b9a0716aSJack Xiao if (misc_pkt.wait_reg_mem.rrmt_opt2.mode != MES_RRMT_MODE_REMOTE_MID) { 593b9a0716aSJack Xiao misc_pkt.wait_reg_mem.reg_offset2 = 594fcc4fc75SLikun Gao soc_v1_0_normalize_xcc_reg_offset(misc_pkt.wait_reg_mem.reg_offset2); 595b9a0716aSJack Xiao } 596e220edf2SJack Xiao break; 597e220edf2SJack Xiao case MES_MISC_OP_SET_SHADER_DEBUGGER: 598e220edf2SJack Xiao pipe = AMDGPU_MES_SCHED_PIPE; 599e220edf2SJack Xiao misc_pkt.opcode = MESAPI_MISC__SET_SHADER_DEBUGGER; 600e220edf2SJack Xiao misc_pkt.set_shader_debugger.process_context_addr = 601e220edf2SJack Xiao input->set_shader_debugger.process_context_addr; 602e220edf2SJack Xiao misc_pkt.set_shader_debugger.flags.u32all = 603e220edf2SJack Xiao input->set_shader_debugger.flags.u32all; 604e220edf2SJack Xiao misc_pkt.set_shader_debugger.spi_gdbg_per_vmid_cntl = 605e220edf2SJack Xiao input->set_shader_debugger.spi_gdbg_per_vmid_cntl; 606e220edf2SJack Xiao memcpy(misc_pkt.set_shader_debugger.tcp_watch_cntl, 607e220edf2SJack Xiao input->set_shader_debugger.tcp_watch_cntl, 608e220edf2SJack Xiao sizeof(misc_pkt.set_shader_debugger.tcp_watch_cntl)); 609e220edf2SJack Xiao misc_pkt.set_shader_debugger.trap_en = input->set_shader_debugger.trap_en; 610e220edf2SJack Xiao break; 611e220edf2SJack Xiao case MES_MISC_OP_CHANGE_CONFIG: 612e220edf2SJack Xiao misc_pkt.opcode = MESAPI_MISC__CHANGE_CONFIG; 613e220edf2SJack Xiao misc_pkt.change_config.opcode = 614e220edf2SJack Xiao MESAPI_MISC__CHANGE_CONFIG_OPTION_LIMIT_SINGLE_PROCESS; 615e220edf2SJack Xiao misc_pkt.change_config.option.bits.limit_single_process = 616e220edf2SJack Xiao input->change_config.option.limit_single_process; 617e220edf2SJack Xiao break; 618e220edf2SJack Xiao default: 619e220edf2SJack Xiao DRM_ERROR("unsupported misc op (%d) \n", input->op); 620e220edf2SJack Xiao return -EINVAL; 621e220edf2SJack Xiao } 622e220edf2SJack Xiao 623e220edf2SJack Xiao return mes_v12_1_submit_pkt_and_poll_completion(mes, 624e220edf2SJack Xiao input->xcc_id, pipe, 625e220edf2SJack Xiao &misc_pkt, sizeof(misc_pkt), 626e220edf2SJack Xiao offsetof(union MESAPI__MISC, api_status)); 627e220edf2SJack Xiao } 628e220edf2SJack Xiao 629e220edf2SJack Xiao static int mes_v12_1_set_hw_resources_1(struct amdgpu_mes *mes, 630e220edf2SJack Xiao int pipe, int xcc_id) 631e220edf2SJack Xiao { 632e220edf2SJack Xiao union MESAPI_SET_HW_RESOURCES_1 mes_set_hw_res_1_pkt; 63375053887SJack Xiao int master_xcc_id, inst = MES_PIPE_INST(xcc_id, pipe); 634e220edf2SJack Xiao 635e220edf2SJack Xiao memset(&mes_set_hw_res_1_pkt, 0, sizeof(mes_set_hw_res_1_pkt)); 636e220edf2SJack Xiao 637e220edf2SJack Xiao mes_set_hw_res_1_pkt.header.type = MES_API_TYPE_SCHEDULER; 638e220edf2SJack Xiao mes_set_hw_res_1_pkt.header.opcode = MES_SCH_API_SET_HW_RSRC_1; 639e220edf2SJack Xiao mes_set_hw_res_1_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; 640e220edf2SJack Xiao mes_set_hw_res_1_pkt.mes_kiq_unmap_timeout = 100; 641e220edf2SJack Xiao 64275053887SJack Xiao if (mes->enable_coop_mode && pipe == AMDGPU_MES_SCHED_PIPE) { 64375053887SJack Xiao master_xcc_id = mes->master_xcc_ids[inst]; 64475053887SJack Xiao mes_set_hw_res_1_pkt.mes_coop_mode = 1; 64575053887SJack Xiao mes_set_hw_res_1_pkt.coop_sch_shared_mc_addr = 64675053887SJack Xiao mes->shared_cmd_buf_gpu_addr[master_xcc_id]; 64775053887SJack Xiao } 64875053887SJack Xiao 649e220edf2SJack Xiao return mes_v12_1_submit_pkt_and_poll_completion(mes, xcc_id, pipe, 650e220edf2SJack Xiao &mes_set_hw_res_1_pkt, sizeof(mes_set_hw_res_1_pkt), 651e220edf2SJack Xiao offsetof(union MESAPI_SET_HW_RESOURCES_1, api_status)); 652e220edf2SJack Xiao } 653e220edf2SJack Xiao 654e220edf2SJack Xiao static void mes_v12_1_set_gfx_hqd_mask(union MESAPI_SET_HW_RESOURCES *pkt) 655e220edf2SJack Xiao { 656e220edf2SJack Xiao /* 657e220edf2SJack Xiao * GFX V12 has only one GFX pipe, but 8 queues in it. 658e220edf2SJack Xiao * GFX pipe 0 queue 0 is being used by Kernel queue. 659e220edf2SJack Xiao * Set GFX pipe 0 queue 1-7 for MES scheduling 660e220edf2SJack Xiao * mask = 1111 1110b 661e220edf2SJack Xiao */ 662e220edf2SJack Xiao pkt->gfx_hqd_mask[0] = 0xFE; 663e220edf2SJack Xiao } 664e220edf2SJack Xiao 665e220edf2SJack Xiao static int mes_v12_1_set_hw_resources(struct amdgpu_mes *mes, 666e220edf2SJack Xiao int pipe, int xcc_id) 667e220edf2SJack Xiao { 668e220edf2SJack Xiao int i; 669e220edf2SJack Xiao struct amdgpu_device *adev = mes->adev; 670e220edf2SJack Xiao union MESAPI_SET_HW_RESOURCES mes_set_hw_res_pkt; 671e220edf2SJack Xiao 672e220edf2SJack Xiao memset(&mes_set_hw_res_pkt, 0, sizeof(mes_set_hw_res_pkt)); 673e220edf2SJack Xiao 674e220edf2SJack Xiao mes_set_hw_res_pkt.header.type = MES_API_TYPE_SCHEDULER; 675e220edf2SJack Xiao mes_set_hw_res_pkt.header.opcode = MES_SCH_API_SET_HW_RSRC; 676e220edf2SJack Xiao mes_set_hw_res_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; 677e220edf2SJack Xiao 678e220edf2SJack Xiao if (pipe == AMDGPU_MES_SCHED_PIPE) { 679e220edf2SJack Xiao mes_set_hw_res_pkt.vmid_mask_mmhub = mes->vmid_mask_mmhub; 680e220edf2SJack Xiao mes_set_hw_res_pkt.vmid_mask_gfxhub = mes->vmid_mask_gfxhub; 681e220edf2SJack Xiao mes_set_hw_res_pkt.gds_size = adev->gds.gds_size; 682e220edf2SJack Xiao mes_set_hw_res_pkt.paging_vmid = 0; 683e220edf2SJack Xiao 684e220edf2SJack Xiao for (i = 0; i < MAX_COMPUTE_PIPES; i++) 685e220edf2SJack Xiao mes_set_hw_res_pkt.compute_hqd_mask[i] = 686e220edf2SJack Xiao mes->compute_hqd_mask[i]; 687e220edf2SJack Xiao 688e220edf2SJack Xiao mes_v12_1_set_gfx_hqd_mask(&mes_set_hw_res_pkt); 689e220edf2SJack Xiao 690e220edf2SJack Xiao for (i = 0; i < MAX_SDMA_PIPES; i++) 691e220edf2SJack Xiao mes_set_hw_res_pkt.sdma_hqd_mask[i] = 692e220edf2SJack Xiao mes->sdma_hqd_mask[i]; 693e220edf2SJack Xiao 694e220edf2SJack Xiao for (i = 0; i < AMD_PRIORITY_NUM_LEVELS; i++) 695e220edf2SJack Xiao mes_set_hw_res_pkt.aggregated_doorbells[i] = 696e220edf2SJack Xiao mes->aggregated_doorbells[i]; 697e220edf2SJack Xiao } 698e220edf2SJack Xiao 699e220edf2SJack Xiao mes_set_hw_res_pkt.g_sch_ctx_gpu_mc_ptr = 700e220edf2SJack Xiao mes->sch_ctx_gpu_addr[pipe]; 701e220edf2SJack Xiao mes_set_hw_res_pkt.query_status_fence_gpu_mc_ptr = 702e220edf2SJack Xiao mes->query_status_fence_gpu_addr[pipe]; 703e220edf2SJack Xiao 704e220edf2SJack Xiao for (i = 0; i < 5; i++) { 70575053887SJack Xiao mes_set_hw_res_pkt.gc_base[i] = 706a5192fbbSLikun Gao adev->reg_offset[GC_HWIP][0][i]; 707e220edf2SJack Xiao mes_set_hw_res_pkt.mmhub_base[i] = 708e220edf2SJack Xiao adev->reg_offset[MMHUB_HWIP][0][i]; 709e220edf2SJack Xiao mes_set_hw_res_pkt.osssys_base[i] = 710e220edf2SJack Xiao adev->reg_offset[OSSSYS_HWIP][0][i]; 711e220edf2SJack Xiao } 712e220edf2SJack Xiao 713e220edf2SJack Xiao mes_set_hw_res_pkt.disable_reset = 1; 714e220edf2SJack Xiao mes_set_hw_res_pkt.disable_mes_log = 1; 715e220edf2SJack Xiao mes_set_hw_res_pkt.use_different_vmid_compute = 1; 716e220edf2SJack Xiao mes_set_hw_res_pkt.enable_reg_active_poll = 1; 717e220edf2SJack Xiao mes_set_hw_res_pkt.enable_level_process_quantum_check = 1; 718e220edf2SJack Xiao 719e220edf2SJack Xiao /* 720e220edf2SJack Xiao * Keep oversubscribe timer for sdma . When we have unmapped doorbell 721e220edf2SJack Xiao * handling support, other queue will not use the oversubscribe timer. 722e220edf2SJack Xiao * handling mode - 0: disabled; 1: basic version; 2: basic+ version 723e220edf2SJack Xiao */ 724e220edf2SJack Xiao mes_set_hw_res_pkt.oversubscription_timer = 50; 725e220edf2SJack Xiao mes_set_hw_res_pkt.unmapped_doorbell_handling = 1; 726e220edf2SJack Xiao 727e220edf2SJack Xiao if (amdgpu_mes_log_enable) { 728e220edf2SJack Xiao mes_set_hw_res_pkt.enable_mes_event_int_logging = 1; 729e220edf2SJack Xiao mes_set_hw_res_pkt.event_intr_history_gpu_mc_ptr = 73061a3ade2SMichael Chen mes->event_log_gpu_addr + MES_PIPE_INST(xcc_id, pipe) * AMDGPU_MES_LOG_BUFFER_SIZE; 731e220edf2SJack Xiao } 732e220edf2SJack Xiao 733e220edf2SJack Xiao if (adev->enforce_isolation[0] == AMDGPU_ENFORCE_ISOLATION_ENABLE) 734e220edf2SJack Xiao mes_set_hw_res_pkt.limit_single_process = 1; 735e220edf2SJack Xiao 736e220edf2SJack Xiao return mes_v12_1_submit_pkt_and_poll_completion(mes, xcc_id, pipe, 737e220edf2SJack Xiao &mes_set_hw_res_pkt, sizeof(mes_set_hw_res_pkt), 738e220edf2SJack Xiao offsetof(union MESAPI_SET_HW_RESOURCES, api_status)); 739e220edf2SJack Xiao } 740e220edf2SJack Xiao 741e220edf2SJack Xiao static void mes_v12_1_init_aggregated_doorbell(struct amdgpu_mes *mes, 742e220edf2SJack Xiao int xcc_id) 743e220edf2SJack Xiao { 744e220edf2SJack Xiao struct amdgpu_device *adev = mes->adev; 745e220edf2SJack Xiao uint32_t data; 746e220edf2SJack Xiao 747e220edf2SJack Xiao data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_DOORBELL_CONTROL1); 748e220edf2SJack Xiao data &= ~(CP_MES_DOORBELL_CONTROL1__DOORBELL_OFFSET_MASK | 749e220edf2SJack Xiao CP_MES_DOORBELL_CONTROL1__DOORBELL_EN_MASK | 750e220edf2SJack Xiao CP_MES_DOORBELL_CONTROL1__DOORBELL_HIT_MASK); 751e220edf2SJack Xiao data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_LOW] << 752e220edf2SJack Xiao CP_MES_DOORBELL_CONTROL1__DOORBELL_OFFSET__SHIFT; 753e220edf2SJack Xiao data |= 1 << CP_MES_DOORBELL_CONTROL1__DOORBELL_EN__SHIFT; 754e220edf2SJack Xiao WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_DOORBELL_CONTROL1, data); 755e220edf2SJack Xiao 756e220edf2SJack Xiao data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_DOORBELL_CONTROL2); 757e220edf2SJack Xiao data &= ~(CP_MES_DOORBELL_CONTROL2__DOORBELL_OFFSET_MASK | 758e220edf2SJack Xiao CP_MES_DOORBELL_CONTROL2__DOORBELL_EN_MASK | 759e220edf2SJack Xiao CP_MES_DOORBELL_CONTROL2__DOORBELL_HIT_MASK); 760e220edf2SJack Xiao data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_NORMAL] << 761e220edf2SJack Xiao CP_MES_DOORBELL_CONTROL2__DOORBELL_OFFSET__SHIFT; 762e220edf2SJack Xiao data |= 1 << CP_MES_DOORBELL_CONTROL2__DOORBELL_EN__SHIFT; 763e220edf2SJack Xiao WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_DOORBELL_CONTROL2, data); 764e220edf2SJack Xiao 765e220edf2SJack Xiao data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_DOORBELL_CONTROL3); 766e220edf2SJack Xiao data &= ~(CP_MES_DOORBELL_CONTROL3__DOORBELL_OFFSET_MASK | 767e220edf2SJack Xiao CP_MES_DOORBELL_CONTROL3__DOORBELL_EN_MASK | 768e220edf2SJack Xiao CP_MES_DOORBELL_CONTROL3__DOORBELL_HIT_MASK); 769e220edf2SJack Xiao data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_MEDIUM] << 770e220edf2SJack Xiao CP_MES_DOORBELL_CONTROL3__DOORBELL_OFFSET__SHIFT; 771e220edf2SJack Xiao data |= 1 << CP_MES_DOORBELL_CONTROL3__DOORBELL_EN__SHIFT; 772e220edf2SJack Xiao WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_DOORBELL_CONTROL3, data); 773e220edf2SJack Xiao 774e220edf2SJack Xiao data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_DOORBELL_CONTROL4); 775e220edf2SJack Xiao data &= ~(CP_MES_DOORBELL_CONTROL4__DOORBELL_OFFSET_MASK | 776e220edf2SJack Xiao CP_MES_DOORBELL_CONTROL4__DOORBELL_EN_MASK | 777e220edf2SJack Xiao CP_MES_DOORBELL_CONTROL4__DOORBELL_HIT_MASK); 778e220edf2SJack Xiao data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_HIGH] << 779e220edf2SJack Xiao CP_MES_DOORBELL_CONTROL4__DOORBELL_OFFSET__SHIFT; 780e220edf2SJack Xiao data |= 1 << CP_MES_DOORBELL_CONTROL4__DOORBELL_EN__SHIFT; 781e220edf2SJack Xiao WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_DOORBELL_CONTROL4, data); 782e220edf2SJack Xiao 783e220edf2SJack Xiao data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_DOORBELL_CONTROL5); 784e220edf2SJack Xiao data &= ~(CP_MES_DOORBELL_CONTROL5__DOORBELL_OFFSET_MASK | 785e220edf2SJack Xiao CP_MES_DOORBELL_CONTROL5__DOORBELL_EN_MASK | 786e220edf2SJack Xiao CP_MES_DOORBELL_CONTROL5__DOORBELL_HIT_MASK); 787e220edf2SJack Xiao data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_REALTIME] << 788e220edf2SJack Xiao CP_MES_DOORBELL_CONTROL5__DOORBELL_OFFSET__SHIFT; 789e220edf2SJack Xiao data |= 1 << CP_MES_DOORBELL_CONTROL5__DOORBELL_EN__SHIFT; 790e220edf2SJack Xiao WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_DOORBELL_CONTROL5, data); 791e220edf2SJack Xiao 792e220edf2SJack Xiao data = 1 << CP_HQD_GFX_CONTROL__DB_UPDATED_MSG_EN__SHIFT; 793e220edf2SJack Xiao WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_GFX_CONTROL, data); 794e220edf2SJack Xiao } 795e220edf2SJack Xiao 796e220edf2SJack Xiao 797e220edf2SJack Xiao static void mes_v12_1_enable_unmapped_doorbell_handling( 798e220edf2SJack Xiao struct amdgpu_mes *mes, bool enable, int xcc_id) 799e220edf2SJack Xiao { 800e220edf2SJack Xiao struct amdgpu_device *adev = mes->adev; 801e220edf2SJack Xiao uint32_t data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_UNMAPPED_DOORBELL); 802e220edf2SJack Xiao 803e220edf2SJack Xiao /* 804e220edf2SJack Xiao * The default PROC_LSB settng is 0xc which means doorbell 805e220edf2SJack Xiao * addr[16:12] gives the doorbell page number. For kfd, each 806e220edf2SJack Xiao * process will use 2 pages of doorbell, we need to change the 807e220edf2SJack Xiao * setting to 0xd 808e220edf2SJack Xiao */ 809e220edf2SJack Xiao data &= ~CP_UNMAPPED_DOORBELL__PROC_LSB_MASK; 810e220edf2SJack Xiao data |= 0xd << CP_UNMAPPED_DOORBELL__PROC_LSB__SHIFT; 811e220edf2SJack Xiao 812e220edf2SJack Xiao data |= (enable ? 1 : 0) << CP_UNMAPPED_DOORBELL__ENABLE__SHIFT; 813e220edf2SJack Xiao 814e220edf2SJack Xiao WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_UNMAPPED_DOORBELL, data); 815e220edf2SJack Xiao } 816e220edf2SJack Xiao 817e220edf2SJack Xiao #if 0 818e220edf2SJack Xiao static int mes_v12_1_reset_legacy_queue(struct amdgpu_mes *mes, 819e220edf2SJack Xiao struct mes_reset_legacy_queue_input *input) 820e220edf2SJack Xiao { 821e220edf2SJack Xiao union MESAPI__RESET mes_reset_queue_pkt; 822e220edf2SJack Xiao int pipe; 823e220edf2SJack Xiao 824e220edf2SJack Xiao memset(&mes_reset_queue_pkt, 0, sizeof(mes_reset_queue_pkt)); 825e220edf2SJack Xiao 826e220edf2SJack Xiao mes_reset_queue_pkt.header.type = MES_API_TYPE_SCHEDULER; 827e220edf2SJack Xiao mes_reset_queue_pkt.header.opcode = MES_SCH_API_RESET; 828e220edf2SJack Xiao mes_reset_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; 829e220edf2SJack Xiao 830e220edf2SJack Xiao mes_reset_queue_pkt.queue_type = 831e220edf2SJack Xiao convert_to_mes_queue_type(input->queue_type); 832e220edf2SJack Xiao 833e220edf2SJack Xiao if (mes_reset_queue_pkt.queue_type == MES_QUEUE_TYPE_GFX) { 834e220edf2SJack Xiao mes_reset_queue_pkt.reset_legacy_gfx = 1; 835e220edf2SJack Xiao mes_reset_queue_pkt.pipe_id_lp = input->pipe_id; 836e220edf2SJack Xiao mes_reset_queue_pkt.queue_id_lp = input->queue_id; 837e220edf2SJack Xiao mes_reset_queue_pkt.mqd_mc_addr_lp = input->mqd_addr; 838e220edf2SJack Xiao mes_reset_queue_pkt.doorbell_offset_lp = input->doorbell_offset; 839e220edf2SJack Xiao mes_reset_queue_pkt.wptr_addr_lp = input->wptr_addr; 840e220edf2SJack Xiao mes_reset_queue_pkt.vmid_id_lp = input->vmid; 841e220edf2SJack Xiao } else { 842e220edf2SJack Xiao mes_reset_queue_pkt.reset_queue_only = 1; 843e220edf2SJack Xiao mes_reset_queue_pkt.doorbell_offset = input->doorbell_offset; 844e220edf2SJack Xiao } 845e220edf2SJack Xiao 846e220edf2SJack Xiao if (mes->adev->enable_uni_mes) 847e220edf2SJack Xiao pipe = AMDGPU_MES_KIQ_PIPE; 848e220edf2SJack Xiao else 849e220edf2SJack Xiao pipe = AMDGPU_MES_SCHED_PIPE; 850e220edf2SJack Xiao 851e220edf2SJack Xiao return mes_v12_1_submit_pkt_and_poll_completion(mes, 852e220edf2SJack Xiao input->xcc_id, pipe, 853e220edf2SJack Xiao &mes_reset_queue_pkt, sizeof(mes_reset_queue_pkt), 854e220edf2SJack Xiao offsetof(union MESAPI__RESET, api_status)); 855e220edf2SJack Xiao } 856e220edf2SJack Xiao #endif 857e220edf2SJack Xiao 858*d0c989a0SShaoyun Liu static int mes_v12_inv_tlb_convert_hub_id(uint8_t id) 859*d0c989a0SShaoyun Liu { 860*d0c989a0SShaoyun Liu /* 861*d0c989a0SShaoyun Liu * MES doesn't support invalidate gc_hub on slave xcc individually 862*d0c989a0SShaoyun Liu * master xcc will invalidate all gc_hub for the partition 863*d0c989a0SShaoyun Liu */ 864*d0c989a0SShaoyun Liu if (AMDGPU_IS_GFXHUB(id)) 865*d0c989a0SShaoyun Liu return 0; 866*d0c989a0SShaoyun Liu else if (AMDGPU_IS_MMHUB0(id)) 867*d0c989a0SShaoyun Liu return 1; 868*d0c989a0SShaoyun Liu else if (AMDGPU_IS_MMHUB1(id)) 869*d0c989a0SShaoyun Liu return 2; 870*d0c989a0SShaoyun Liu return -EINVAL; 871*d0c989a0SShaoyun Liu 872*d0c989a0SShaoyun Liu } 873*d0c989a0SShaoyun Liu 874*d0c989a0SShaoyun Liu static int mes_v12_1_inv_tlbs_pasid(struct amdgpu_mes *mes, 875*d0c989a0SShaoyun Liu struct mes_inv_tlbs_pasid_input *input) 876*d0c989a0SShaoyun Liu { 877*d0c989a0SShaoyun Liu union MESAPI__INV_TLBS mes_inv_tlbs; 878*d0c989a0SShaoyun Liu int xcc_id = input->xcc_id; 879*d0c989a0SShaoyun Liu int inst = MES_PIPE_INST(xcc_id, AMDGPU_MES_SCHED_PIPE); 880*d0c989a0SShaoyun Liu int ret; 881*d0c989a0SShaoyun Liu 882*d0c989a0SShaoyun Liu if (mes->enable_coop_mode) 883*d0c989a0SShaoyun Liu xcc_id = mes->master_xcc_ids[inst]; 884*d0c989a0SShaoyun Liu 885*d0c989a0SShaoyun Liu memset(&mes_inv_tlbs, 0, sizeof(mes_inv_tlbs)); 886*d0c989a0SShaoyun Liu 887*d0c989a0SShaoyun Liu mes_inv_tlbs.header.type = MES_API_TYPE_SCHEDULER; 888*d0c989a0SShaoyun Liu mes_inv_tlbs.header.opcode = MES_SCH_API_INV_TLBS; 889*d0c989a0SShaoyun Liu mes_inv_tlbs.header.dwsize = API_FRAME_SIZE_IN_DWORDS; 890*d0c989a0SShaoyun Liu 891*d0c989a0SShaoyun Liu mes_inv_tlbs.invalidate_tlbs.inv_sel = 0; 892*d0c989a0SShaoyun Liu mes_inv_tlbs.invalidate_tlbs.flush_type = input->flush_type; 893*d0c989a0SShaoyun Liu mes_inv_tlbs.invalidate_tlbs.inv_sel_id = input->pasid; 894*d0c989a0SShaoyun Liu 895*d0c989a0SShaoyun Liu /*convert amdgpu_mes_hub_id to mes expected hub_id */ 896*d0c989a0SShaoyun Liu ret = mes_v12_inv_tlb_convert_hub_id(input->hub_id); 897*d0c989a0SShaoyun Liu if (ret < 0) 898*d0c989a0SShaoyun Liu return -EINVAL; 899*d0c989a0SShaoyun Liu mes_inv_tlbs.invalidate_tlbs.hub_id = ret; 900*d0c989a0SShaoyun Liu return mes_v12_1_submit_pkt_and_poll_completion(mes, xcc_id, AMDGPU_MES_KIQ_PIPE, 901*d0c989a0SShaoyun Liu &mes_inv_tlbs, sizeof(mes_inv_tlbs), 902*d0c989a0SShaoyun Liu offsetof(union MESAPI__INV_TLBS, api_status)); 903*d0c989a0SShaoyun Liu 904*d0c989a0SShaoyun Liu } 905*d0c989a0SShaoyun Liu 906e220edf2SJack Xiao static const struct amdgpu_mes_funcs mes_v12_1_funcs = { 907e220edf2SJack Xiao .add_hw_queue = mes_v12_1_add_hw_queue, 908e220edf2SJack Xiao .remove_hw_queue = mes_v12_1_remove_hw_queue, 909e220edf2SJack Xiao .map_legacy_queue = mes_v12_1_map_legacy_queue, 910e220edf2SJack Xiao .unmap_legacy_queue = mes_v12_1_unmap_legacy_queue, 911e220edf2SJack Xiao .suspend_gang = mes_v12_1_suspend_gang, 912e220edf2SJack Xiao .resume_gang = mes_v12_1_resume_gang, 913e220edf2SJack Xiao .misc_op = mes_v12_1_misc_op, 914e220edf2SJack Xiao .reset_hw_queue = mes_v12_1_reset_hw_queue, 915*d0c989a0SShaoyun Liu .invalidate_tlbs_pasid = mes_v12_1_inv_tlbs_pasid, 916e220edf2SJack Xiao }; 917e220edf2SJack Xiao 918e220edf2SJack Xiao static int mes_v12_1_allocate_ucode_buffer(struct amdgpu_device *adev, 919e220edf2SJack Xiao enum amdgpu_mes_pipe pipe, 920e220edf2SJack Xiao int xcc_id) 921e220edf2SJack Xiao { 922e220edf2SJack Xiao int r, inst = MES_PIPE_INST(xcc_id, pipe); 923e220edf2SJack Xiao const struct mes_firmware_header_v1_0 *mes_hdr; 924e220edf2SJack Xiao const __le32 *fw_data; 925e220edf2SJack Xiao unsigned fw_size; 926e220edf2SJack Xiao 927e220edf2SJack Xiao mes_hdr = (const struct mes_firmware_header_v1_0 *) 928e220edf2SJack Xiao adev->mes.fw[pipe]->data; 929e220edf2SJack Xiao 930e220edf2SJack Xiao fw_data = (const __le32 *)(adev->mes.fw[pipe]->data + 931e220edf2SJack Xiao le32_to_cpu(mes_hdr->mes_ucode_offset_bytes)); 932e220edf2SJack Xiao fw_size = le32_to_cpu(mes_hdr->mes_ucode_size_bytes); 933e220edf2SJack Xiao 934e220edf2SJack Xiao r = amdgpu_bo_create_reserved(adev, fw_size, 935e220edf2SJack Xiao PAGE_SIZE, 936e220edf2SJack Xiao AMDGPU_GEM_DOMAIN_VRAM, 937e220edf2SJack Xiao &adev->mes.ucode_fw_obj[inst], 938e220edf2SJack Xiao &adev->mes.ucode_fw_gpu_addr[inst], 939e220edf2SJack Xiao (void **)&adev->mes.ucode_fw_ptr[inst]); 940e220edf2SJack Xiao if (r) { 941e220edf2SJack Xiao dev_err(adev->dev, "(%d) failed to create mes fw bo\n", r); 942e220edf2SJack Xiao return r; 943e220edf2SJack Xiao } 944e220edf2SJack Xiao 945e220edf2SJack Xiao memcpy(adev->mes.ucode_fw_ptr[inst], fw_data, fw_size); 946e220edf2SJack Xiao 947e220edf2SJack Xiao amdgpu_bo_kunmap(adev->mes.ucode_fw_obj[inst]); 948e220edf2SJack Xiao amdgpu_bo_unreserve(adev->mes.ucode_fw_obj[inst]); 949e220edf2SJack Xiao 950e220edf2SJack Xiao return 0; 951e220edf2SJack Xiao } 952e220edf2SJack Xiao 953e220edf2SJack Xiao static int mes_v12_1_allocate_ucode_data_buffer(struct amdgpu_device *adev, 954e220edf2SJack Xiao enum amdgpu_mes_pipe pipe, 955e220edf2SJack Xiao int xcc_id) 956e220edf2SJack Xiao { 957e220edf2SJack Xiao int r, inst = MES_PIPE_INST(xcc_id, pipe); 958e220edf2SJack Xiao const struct mes_firmware_header_v1_0 *mes_hdr; 959e220edf2SJack Xiao const __le32 *fw_data; 960e220edf2SJack Xiao unsigned fw_size; 961e220edf2SJack Xiao 962e220edf2SJack Xiao mes_hdr = (const struct mes_firmware_header_v1_0 *) 963e220edf2SJack Xiao adev->mes.fw[pipe]->data; 964e220edf2SJack Xiao 965e220edf2SJack Xiao fw_data = (const __le32 *)(adev->mes.fw[pipe]->data + 966e220edf2SJack Xiao le32_to_cpu(mes_hdr->mes_ucode_data_offset_bytes)); 967e220edf2SJack Xiao fw_size = le32_to_cpu(mes_hdr->mes_ucode_data_size_bytes); 968e220edf2SJack Xiao 969e220edf2SJack Xiao r = amdgpu_bo_create_reserved(adev, fw_size, 970e220edf2SJack Xiao 64 * 1024, 971e220edf2SJack Xiao AMDGPU_GEM_DOMAIN_VRAM, 972e220edf2SJack Xiao &adev->mes.data_fw_obj[inst], 973e220edf2SJack Xiao &adev->mes.data_fw_gpu_addr[inst], 974e220edf2SJack Xiao (void **)&adev->mes.data_fw_ptr[inst]); 975e220edf2SJack Xiao if (r) { 976e220edf2SJack Xiao dev_err(adev->dev, "(%d) failed to create mes data fw bo\n", r); 977e220edf2SJack Xiao return r; 978e220edf2SJack Xiao } 979e220edf2SJack Xiao 980e220edf2SJack Xiao memcpy(adev->mes.data_fw_ptr[inst], fw_data, fw_size); 981e220edf2SJack Xiao 982e220edf2SJack Xiao amdgpu_bo_kunmap(adev->mes.data_fw_obj[inst]); 983e220edf2SJack Xiao amdgpu_bo_unreserve(adev->mes.data_fw_obj[inst]); 984e220edf2SJack Xiao 985e220edf2SJack Xiao return 0; 986e220edf2SJack Xiao } 987e220edf2SJack Xiao 988e220edf2SJack Xiao static void mes_v12_1_free_ucode_buffers(struct amdgpu_device *adev, 989e220edf2SJack Xiao enum amdgpu_mes_pipe pipe, 990e220edf2SJack Xiao int xcc_id) 991e220edf2SJack Xiao { 992e220edf2SJack Xiao int inst = MES_PIPE_INST(xcc_id, pipe); 993e220edf2SJack Xiao 994e220edf2SJack Xiao amdgpu_bo_free_kernel(&adev->mes.data_fw_obj[inst], 995e220edf2SJack Xiao &adev->mes.data_fw_gpu_addr[inst], 996e220edf2SJack Xiao (void **)&adev->mes.data_fw_ptr[inst]); 997e220edf2SJack Xiao 998e220edf2SJack Xiao amdgpu_bo_free_kernel(&adev->mes.ucode_fw_obj[inst], 999e220edf2SJack Xiao &adev->mes.ucode_fw_gpu_addr[inst], 1000e220edf2SJack Xiao (void **)&adev->mes.ucode_fw_ptr[inst]); 1001e220edf2SJack Xiao } 1002e220edf2SJack Xiao 1003e220edf2SJack Xiao static void mes_v12_1_enable(struct amdgpu_device *adev, 1004e220edf2SJack Xiao bool enable, int xcc_id) 1005e220edf2SJack Xiao { 1006e220edf2SJack Xiao uint64_t ucode_addr; 1007e220edf2SJack Xiao uint32_t pipe, data = 0; 1008e220edf2SJack Xiao 1009e220edf2SJack Xiao if (enable) { 1010e220edf2SJack Xiao data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_CNTL); 1011e220edf2SJack Xiao data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_RESET, 1); 1012e220edf2SJack Xiao data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_RESET, 1); 1013e220edf2SJack Xiao WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_CNTL, data); 1014e220edf2SJack Xiao 1015e220edf2SJack Xiao mutex_lock(&adev->srbm_mutex); 1016e220edf2SJack Xiao for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) { 1017e220edf2SJack Xiao soc_v1_0_grbm_select(adev, 3, pipe, 0, 0, 1018e220edf2SJack Xiao GET_INST(GC, xcc_id)); 1019e220edf2SJack Xiao 1020e220edf2SJack Xiao ucode_addr = adev->mes.uc_start_addr[pipe] >> 2; 1021e220edf2SJack Xiao WREG32_SOC15(GC, GET_INST(GC, xcc_id), 1022e220edf2SJack Xiao regCP_MES_PRGRM_CNTR_START, 1023e220edf2SJack Xiao lower_32_bits(ucode_addr)); 1024e220edf2SJack Xiao WREG32_SOC15(GC, GET_INST(GC, xcc_id), 1025e220edf2SJack Xiao regCP_MES_PRGRM_CNTR_START_HI, 1026e220edf2SJack Xiao upper_32_bits(ucode_addr)); 1027e220edf2SJack Xiao } 1028e220edf2SJack Xiao soc_v1_0_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id)); 1029e220edf2SJack Xiao mutex_unlock(&adev->srbm_mutex); 1030e220edf2SJack Xiao 1031e220edf2SJack Xiao /* unhalt MES and activate pipe0 */ 1032e220edf2SJack Xiao data = REG_SET_FIELD(0, CP_MES_CNTL, MES_PIPE0_ACTIVE, 1); 1033e220edf2SJack Xiao data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_ACTIVE, 1); 1034e220edf2SJack Xiao WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_CNTL, data); 1035e220edf2SJack Xiao 1036e220edf2SJack Xiao if (amdgpu_emu_mode) 1037e220edf2SJack Xiao msleep(500); 1038e220edf2SJack Xiao else if (adev->enable_uni_mes) 1039e220edf2SJack Xiao udelay(500); 1040e220edf2SJack Xiao else 1041e220edf2SJack Xiao udelay(50); 1042e220edf2SJack Xiao } else { 1043e220edf2SJack Xiao data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_CNTL); 1044e220edf2SJack Xiao data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_ACTIVE, 0); 1045e220edf2SJack Xiao data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_ACTIVE, 0); 1046e220edf2SJack Xiao data = REG_SET_FIELD(data, CP_MES_CNTL, 1047e220edf2SJack Xiao MES_INVALIDATE_ICACHE, 1); 1048e220edf2SJack Xiao data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_RESET, 1); 1049e220edf2SJack Xiao data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_RESET, 1); 1050e220edf2SJack Xiao data = REG_SET_FIELD(data, CP_MES_CNTL, MES_HALT, 1); 1051e220edf2SJack Xiao WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_CNTL, data); 1052e220edf2SJack Xiao } 1053e220edf2SJack Xiao } 1054e220edf2SJack Xiao 1055e220edf2SJack Xiao static void mes_v12_1_set_ucode_start_addr(struct amdgpu_device *adev, 1056e220edf2SJack Xiao int xcc_id) 1057e220edf2SJack Xiao { 1058e220edf2SJack Xiao uint64_t ucode_addr; 1059e220edf2SJack Xiao int pipe; 1060e220edf2SJack Xiao 1061e220edf2SJack Xiao mes_v12_1_enable(adev, false, xcc_id); 1062e220edf2SJack Xiao 1063e220edf2SJack Xiao mutex_lock(&adev->srbm_mutex); 1064e220edf2SJack Xiao for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) { 1065e220edf2SJack Xiao /* me=3, queue=0 */ 1066e220edf2SJack Xiao soc_v1_0_grbm_select(adev, 3, pipe, 0, 0, GET_INST(GC, xcc_id)); 1067e220edf2SJack Xiao 1068e220edf2SJack Xiao /* set ucode start address */ 1069e220edf2SJack Xiao ucode_addr = adev->mes.uc_start_addr[pipe] >> 2; 1070e220edf2SJack Xiao WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_PRGRM_CNTR_START, 1071e220edf2SJack Xiao lower_32_bits(ucode_addr)); 1072e220edf2SJack Xiao WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_PRGRM_CNTR_START_HI, 1073e220edf2SJack Xiao upper_32_bits(ucode_addr)); 1074e220edf2SJack Xiao 1075e220edf2SJack Xiao soc_v1_0_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id)); 1076e220edf2SJack Xiao } 1077e220edf2SJack Xiao mutex_unlock(&adev->srbm_mutex); 1078e220edf2SJack Xiao } 1079e220edf2SJack Xiao 1080e220edf2SJack Xiao /* This function is for backdoor MES firmware */ 1081e220edf2SJack Xiao static int mes_v12_1_load_microcode(struct amdgpu_device *adev, 1082e220edf2SJack Xiao enum amdgpu_mes_pipe pipe, 1083e220edf2SJack Xiao bool prime_icache, int xcc_id) 1084e220edf2SJack Xiao { 1085e220edf2SJack Xiao int r, inst = MES_PIPE_INST(xcc_id, pipe); 1086e220edf2SJack Xiao uint32_t data; 1087e220edf2SJack Xiao 1088e220edf2SJack Xiao mes_v12_1_enable(adev, false, xcc_id); 1089e220edf2SJack Xiao 1090e220edf2SJack Xiao if (!adev->mes.fw[pipe]) 1091e220edf2SJack Xiao return -EINVAL; 1092e220edf2SJack Xiao 1093e220edf2SJack Xiao r = mes_v12_1_allocate_ucode_buffer(adev, pipe, xcc_id); 1094e220edf2SJack Xiao if (r) 1095e220edf2SJack Xiao return r; 1096e220edf2SJack Xiao 1097e220edf2SJack Xiao r = mes_v12_1_allocate_ucode_data_buffer(adev, pipe, xcc_id); 1098e220edf2SJack Xiao if (r) { 1099e220edf2SJack Xiao mes_v12_1_free_ucode_buffers(adev, pipe, xcc_id); 1100e220edf2SJack Xiao return r; 1101e220edf2SJack Xiao } 1102e220edf2SJack Xiao 1103e220edf2SJack Xiao mutex_lock(&adev->srbm_mutex); 1104e220edf2SJack Xiao /* me=3, pipe=0, queue=0 */ 1105e220edf2SJack Xiao soc_v1_0_grbm_select(adev, 3, pipe, 0, 0, GET_INST(GC, xcc_id)); 1106e220edf2SJack Xiao 1107e220edf2SJack Xiao WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_IC_BASE_CNTL, 0); 1108e220edf2SJack Xiao 1109e220edf2SJack Xiao /* set ucode fimrware address */ 1110e220edf2SJack Xiao WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_IC_BASE_LO, 1111e220edf2SJack Xiao lower_32_bits(adev->mes.ucode_fw_gpu_addr[inst])); 1112e220edf2SJack Xiao WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_IC_BASE_HI, 1113e220edf2SJack Xiao upper_32_bits(adev->mes.ucode_fw_gpu_addr[inst])); 1114e220edf2SJack Xiao 1115e220edf2SJack Xiao /* set ucode instruction cache boundary to 2M-1 */ 1116e220edf2SJack Xiao WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_MIBOUND_LO, 0x1FFFFF); 1117e220edf2SJack Xiao 1118e220edf2SJack Xiao /* set ucode data firmware address */ 1119e220edf2SJack Xiao WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_MDBASE_LO, 1120e220edf2SJack Xiao lower_32_bits(adev->mes.data_fw_gpu_addr[inst])); 1121e220edf2SJack Xiao WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_MDBASE_HI, 1122e220edf2SJack Xiao upper_32_bits(adev->mes.data_fw_gpu_addr[inst])); 1123e220edf2SJack Xiao 1124e220edf2SJack Xiao /* Set data cache boundary CP_MES_MDBOUND_LO */ 1125e220edf2SJack Xiao WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_MDBOUND_LO, 0x7FFFF); 1126e220edf2SJack Xiao 1127e220edf2SJack Xiao if (prime_icache) { 1128e220edf2SJack Xiao /* invalidate ICACHE */ 1129e220edf2SJack Xiao data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_IC_OP_CNTL); 1130e220edf2SJack Xiao data = REG_SET_FIELD(data, CP_MES_IC_OP_CNTL, PRIME_ICACHE, 0); 1131e220edf2SJack Xiao data = REG_SET_FIELD(data, CP_MES_IC_OP_CNTL, INVALIDATE_CACHE, 1); 1132e220edf2SJack Xiao WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_IC_OP_CNTL, data); 1133e220edf2SJack Xiao 1134e220edf2SJack Xiao /* prime the ICACHE. */ 1135e220edf2SJack Xiao data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_IC_OP_CNTL); 1136e220edf2SJack Xiao data = REG_SET_FIELD(data, CP_MES_IC_OP_CNTL, PRIME_ICACHE, 1); 1137e220edf2SJack Xiao WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_IC_OP_CNTL, data); 1138e220edf2SJack Xiao } 1139e220edf2SJack Xiao 1140e220edf2SJack Xiao soc_v1_0_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id)); 1141e220edf2SJack Xiao mutex_unlock(&adev->srbm_mutex); 1142e220edf2SJack Xiao 1143e220edf2SJack Xiao return 0; 1144e220edf2SJack Xiao } 1145e220edf2SJack Xiao 1146e220edf2SJack Xiao static int mes_v12_1_allocate_eop_buf(struct amdgpu_device *adev, 1147e220edf2SJack Xiao enum amdgpu_mes_pipe pipe, 1148e220edf2SJack Xiao int xcc_id) 1149e220edf2SJack Xiao { 1150e220edf2SJack Xiao int r, inst = MES_PIPE_INST(xcc_id, pipe); 1151e220edf2SJack Xiao u32 *eop; 1152e220edf2SJack Xiao 1153e220edf2SJack Xiao r = amdgpu_bo_create_reserved(adev, MES_EOP_SIZE, PAGE_SIZE, 1154e220edf2SJack Xiao AMDGPU_GEM_DOMAIN_GTT, 1155e220edf2SJack Xiao &adev->mes.eop_gpu_obj[inst], 1156e220edf2SJack Xiao &adev->mes.eop_gpu_addr[inst], 1157e220edf2SJack Xiao (void **)&eop); 1158e220edf2SJack Xiao if (r) { 1159e220edf2SJack Xiao dev_warn(adev->dev, "(%d) create EOP bo failed\n", r); 1160e220edf2SJack Xiao return r; 1161e220edf2SJack Xiao } 1162e220edf2SJack Xiao 1163e220edf2SJack Xiao memset(eop, 0, 1164e220edf2SJack Xiao adev->mes.eop_gpu_obj[inst]->tbo.base.size); 1165e220edf2SJack Xiao 1166e220edf2SJack Xiao amdgpu_bo_kunmap(adev->mes.eop_gpu_obj[inst]); 1167e220edf2SJack Xiao amdgpu_bo_unreserve(adev->mes.eop_gpu_obj[inst]); 1168e220edf2SJack Xiao 1169e220edf2SJack Xiao return 0; 1170e220edf2SJack Xiao } 1171e220edf2SJack Xiao 117275053887SJack Xiao static int mes_v12_1_allocate_shared_cmd_buf(struct amdgpu_device *adev, 117375053887SJack Xiao enum amdgpu_mes_pipe pipe, 117475053887SJack Xiao int xcc_id) 117575053887SJack Xiao { 117675053887SJack Xiao int r, inst = MES_PIPE_INST(xcc_id, pipe); 117775053887SJack Xiao 117875053887SJack Xiao if (pipe == AMDGPU_MES_KIQ_PIPE) 117975053887SJack Xiao return 0; 118075053887SJack Xiao 118175053887SJack Xiao r = amdgpu_bo_create_kernel(adev, PAGE_SIZE, PAGE_SIZE, 118275053887SJack Xiao AMDGPU_GEM_DOMAIN_VRAM, 118375053887SJack Xiao &adev->mes.shared_cmd_buf_obj[inst], 118475053887SJack Xiao &adev->mes.shared_cmd_buf_gpu_addr[inst], 118575053887SJack Xiao NULL); 118675053887SJack Xiao if (r) { 118775053887SJack Xiao dev_err(adev->dev, 118875053887SJack Xiao "(%d) failed to create shared cmd buf bo\n", r); 118975053887SJack Xiao return r; 119075053887SJack Xiao } 119175053887SJack Xiao 119275053887SJack Xiao return 0; 119375053887SJack Xiao } 119475053887SJack Xiao 1195e220edf2SJack Xiao static int mes_v12_1_mqd_init(struct amdgpu_ring *ring) 1196e220edf2SJack Xiao { 1197e220edf2SJack Xiao struct v12_1_mes_mqd *mqd = ring->mqd_ptr; 1198e220edf2SJack Xiao uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr; 1199e220edf2SJack Xiao uint32_t tmp; 1200e220edf2SJack Xiao 1201e220edf2SJack Xiao mqd->header = 0xC0310800; 1202e220edf2SJack Xiao mqd->compute_pipelinestat_enable = 0x00000001; 1203e220edf2SJack Xiao mqd->compute_static_thread_mgmt_se0 = 0xffffffff; 1204e220edf2SJack Xiao mqd->compute_static_thread_mgmt_se1 = 0xffffffff; 1205e220edf2SJack Xiao mqd->compute_static_thread_mgmt_se2 = 0xffffffff; 1206e220edf2SJack Xiao mqd->compute_static_thread_mgmt_se3 = 0xffffffff; 1207e220edf2SJack Xiao mqd->compute_misc_reserved = 0x00000007; 1208e220edf2SJack Xiao 1209e220edf2SJack Xiao eop_base_addr = ring->eop_gpu_addr >> 8; 1210e220edf2SJack Xiao 1211e220edf2SJack Xiao /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ 1212e220edf2SJack Xiao tmp = regCP_HQD_EOP_CONTROL_DEFAULT; 1213e220edf2SJack Xiao tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE, 1214e220edf2SJack Xiao (order_base_2(MES_EOP_SIZE / 4) - 1)); 1215e220edf2SJack Xiao 1216e220edf2SJack Xiao mqd->cp_hqd_eop_base_addr_lo = lower_32_bits(eop_base_addr); 1217e220edf2SJack Xiao mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr); 1218e220edf2SJack Xiao mqd->cp_hqd_eop_control = tmp; 1219e220edf2SJack Xiao 1220e220edf2SJack Xiao /* disable the queue if it's active */ 1221e220edf2SJack Xiao ring->wptr = 0; 1222e220edf2SJack Xiao mqd->cp_hqd_pq_rptr = 0; 1223e220edf2SJack Xiao mqd->cp_hqd_pq_wptr_lo = 0; 1224e220edf2SJack Xiao mqd->cp_hqd_pq_wptr_hi = 0; 1225e220edf2SJack Xiao 1226e220edf2SJack Xiao /* set the pointer to the MQD */ 1227e220edf2SJack Xiao mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc; 1228e220edf2SJack Xiao mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr); 1229e220edf2SJack Xiao 1230e220edf2SJack Xiao /* set MQD vmid to 0 */ 1231e220edf2SJack Xiao tmp = regCP_MQD_CONTROL_DEFAULT; 1232e220edf2SJack Xiao tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0); 1233e220edf2SJack Xiao mqd->cp_mqd_control = tmp; 1234e220edf2SJack Xiao 1235e220edf2SJack Xiao /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ 1236e220edf2SJack Xiao hqd_gpu_addr = ring->gpu_addr >> 8; 1237e220edf2SJack Xiao mqd->cp_hqd_pq_base_lo = lower_32_bits(hqd_gpu_addr); 1238e220edf2SJack Xiao mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr); 1239e220edf2SJack Xiao 1240e220edf2SJack Xiao /* set the wb address whether it's enabled or not */ 1241e220edf2SJack Xiao wb_gpu_addr = ring->rptr_gpu_addr; 1242e220edf2SJack Xiao mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc; 1243e220edf2SJack Xiao mqd->cp_hqd_pq_rptr_report_addr_hi = 1244e220edf2SJack Xiao upper_32_bits(wb_gpu_addr) & 0xffff; 1245e220edf2SJack Xiao 1246e220edf2SJack Xiao /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ 1247e220edf2SJack Xiao wb_gpu_addr = ring->wptr_gpu_addr; 1248e220edf2SJack Xiao mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffff8; 1249e220edf2SJack Xiao mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; 1250e220edf2SJack Xiao 1251e220edf2SJack Xiao /* set up the HQD, this is similar to CP_RB0_CNTL */ 1252e220edf2SJack Xiao tmp = regCP_HQD_PQ_CONTROL_DEFAULT; 1253e220edf2SJack Xiao tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE, 1254e220edf2SJack Xiao (order_base_2(ring->ring_size / 4) - 1)); 1255e220edf2SJack Xiao tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE, 1256e220edf2SJack Xiao ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8)); 1257e220edf2SJack Xiao tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 1); 1258e220edf2SJack Xiao tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, 0); 1259e220edf2SJack Xiao tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); 1260e220edf2SJack Xiao tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); 1261e220edf2SJack Xiao tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, NO_UPDATE_RPTR, 1); 1262e220edf2SJack Xiao mqd->cp_hqd_pq_control = tmp; 1263e220edf2SJack Xiao 1264e220edf2SJack Xiao /* enable doorbell */ 1265e220edf2SJack Xiao tmp = 0; 1266e220edf2SJack Xiao if (ring->use_doorbell) { 1267e220edf2SJack Xiao tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 1268e220edf2SJack Xiao DOORBELL_OFFSET, ring->doorbell_index); 1269e220edf2SJack Xiao tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 1270e220edf2SJack Xiao DOORBELL_EN, 1); 1271e220edf2SJack Xiao tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 1272e220edf2SJack Xiao DOORBELL_SOURCE, 0); 1273e220edf2SJack Xiao tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 1274e220edf2SJack Xiao DOORBELL_HIT, 0); 1275e220edf2SJack Xiao } else { 1276e220edf2SJack Xiao tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 1277e220edf2SJack Xiao DOORBELL_EN, 0); 1278e220edf2SJack Xiao } 1279e220edf2SJack Xiao mqd->cp_hqd_pq_doorbell_control = tmp; 1280e220edf2SJack Xiao 1281e220edf2SJack Xiao mqd->cp_hqd_vmid = 0; 1282e220edf2SJack Xiao /* activate the queue */ 1283e220edf2SJack Xiao mqd->cp_hqd_active = 1; 1284e220edf2SJack Xiao 1285e220edf2SJack Xiao tmp = regCP_HQD_PERSISTENT_STATE_DEFAULT; 1286e220edf2SJack Xiao tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, 1287e220edf2SJack Xiao PRELOAD_SIZE, 0x63); 1288e220edf2SJack Xiao mqd->cp_hqd_persistent_state = tmp; 1289e220edf2SJack Xiao 1290e220edf2SJack Xiao mqd->cp_hqd_ib_control = regCP_HQD_IB_CONTROL_MES_12_1_DEFAULT; 1291e220edf2SJack Xiao mqd->cp_hqd_iq_timer = regCP_HQD_IQ_TIMER_DEFAULT; 1292e220edf2SJack Xiao mqd->cp_hqd_quantum = regCP_HQD_QUANTUM_DEFAULT; 1293e220edf2SJack Xiao 1294e220edf2SJack Xiao /* 1295e220edf2SJack Xiao * Set CP_HQD_GFX_CONTROL.DB_UPDATED_MSG_EN[15] to enable unmapped 1296e220edf2SJack Xiao * doorbell handling. This is a reserved CP internal register can 1297e220edf2SJack Xiao * not be accesss by others 1298e220edf2SJack Xiao */ 129977385a2dSLikun Gao mqd->cp_hqd_gfx_control = BIT(15); 1300e220edf2SJack Xiao 1301e220edf2SJack Xiao return 0; 1302e220edf2SJack Xiao } 1303e220edf2SJack Xiao 1304e220edf2SJack Xiao static void mes_v12_1_queue_init_register(struct amdgpu_ring *ring, 1305e220edf2SJack Xiao int xcc_id) 1306e220edf2SJack Xiao { 1307e220edf2SJack Xiao struct v12_1_mes_mqd *mqd = ring->mqd_ptr; 1308e220edf2SJack Xiao struct amdgpu_device *adev = ring->adev; 1309e220edf2SJack Xiao uint32_t data = 0; 1310e220edf2SJack Xiao 1311e220edf2SJack Xiao mutex_lock(&adev->srbm_mutex); 1312e220edf2SJack Xiao soc_v1_0_grbm_select(adev, 3, ring->pipe, 0, 0, GET_INST(GC, xcc_id)); 1313e220edf2SJack Xiao 1314e220edf2SJack Xiao /* set CP_HQD_VMID.VMID = 0. */ 1315e220edf2SJack Xiao data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_VMID); 1316e220edf2SJack Xiao data = REG_SET_FIELD(data, CP_HQD_VMID, VMID, 0); 1317e220edf2SJack Xiao WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_VMID, data); 1318e220edf2SJack Xiao 1319e220edf2SJack Xiao /* set CP_HQD_PQ_DOORBELL_CONTROL.DOORBELL_EN=0 */ 1320e220edf2SJack Xiao data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_DOORBELL_CONTROL); 1321e220edf2SJack Xiao data = REG_SET_FIELD(data, CP_HQD_PQ_DOORBELL_CONTROL, 1322e220edf2SJack Xiao DOORBELL_EN, 0); 1323e220edf2SJack Xiao WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_DOORBELL_CONTROL, data); 1324e220edf2SJack Xiao 1325e220edf2SJack Xiao /* set CP_MQD_BASE_ADDR/HI with the MQD base address */ 1326e220edf2SJack Xiao WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo); 1327e220edf2SJack Xiao WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi); 1328e220edf2SJack Xiao 1329e220edf2SJack Xiao /* set CP_MQD_CONTROL.VMID=0 */ 1330e220edf2SJack Xiao data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MQD_CONTROL); 1331e220edf2SJack Xiao data = REG_SET_FIELD(data, CP_MQD_CONTROL, VMID, 0); 1332e220edf2SJack Xiao WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MQD_CONTROL, 0); 1333e220edf2SJack Xiao 1334e220edf2SJack Xiao /* set CP_HQD_PQ_BASE/HI with the ring buffer base address */ 1335e220edf2SJack Xiao WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo); 1336e220edf2SJack Xiao WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi); 1337e220edf2SJack Xiao 1338e220edf2SJack Xiao /* set CP_HQD_PQ_RPTR_REPORT_ADDR/HI */ 1339e220edf2SJack Xiao WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_RPTR_REPORT_ADDR, 1340e220edf2SJack Xiao mqd->cp_hqd_pq_rptr_report_addr_lo); 1341e220edf2SJack Xiao WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_RPTR_REPORT_ADDR_HI, 1342e220edf2SJack Xiao mqd->cp_hqd_pq_rptr_report_addr_hi); 1343e220edf2SJack Xiao 1344e220edf2SJack Xiao /* set CP_HQD_PQ_CONTROL */ 1345e220edf2SJack Xiao WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_CONTROL, mqd->cp_hqd_pq_control); 1346e220edf2SJack Xiao 1347e220edf2SJack Xiao /* set CP_HQD_PQ_WPTR_POLL_ADDR/HI */ 1348e220edf2SJack Xiao WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_WPTR_POLL_ADDR, 1349e220edf2SJack Xiao mqd->cp_hqd_pq_wptr_poll_addr_lo); 1350e220edf2SJack Xiao WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_WPTR_POLL_ADDR_HI, 1351e220edf2SJack Xiao mqd->cp_hqd_pq_wptr_poll_addr_hi); 1352e220edf2SJack Xiao 1353e220edf2SJack Xiao /* set CP_HQD_PQ_DOORBELL_CONTROL */ 1354e220edf2SJack Xiao WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_DOORBELL_CONTROL, 1355e220edf2SJack Xiao mqd->cp_hqd_pq_doorbell_control); 1356e220edf2SJack Xiao 1357e220edf2SJack Xiao /* set CP_HQD_PERSISTENT_STATE.PRELOAD_SIZE=0x53 */ 1358e220edf2SJack Xiao WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PERSISTENT_STATE, mqd->cp_hqd_persistent_state); 1359e220edf2SJack Xiao 1360e220edf2SJack Xiao /* set CP_HQD_ACTIVE.ACTIVE=1 */ 1361e220edf2SJack Xiao WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_ACTIVE, mqd->cp_hqd_active); 1362e220edf2SJack Xiao 1363e220edf2SJack Xiao soc_v1_0_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id)); 1364e220edf2SJack Xiao mutex_unlock(&adev->srbm_mutex); 1365e220edf2SJack Xiao } 1366e220edf2SJack Xiao 1367e220edf2SJack Xiao static int mes_v12_1_kiq_enable_queue(struct amdgpu_device *adev, int xcc_id) 1368e220edf2SJack Xiao { 1369e220edf2SJack Xiao struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id]; 1370e220edf2SJack Xiao struct amdgpu_ring *kiq_ring = &adev->gfx.kiq[xcc_id].ring; 1371e220edf2SJack Xiao int r, inst = MES_PIPE_INST(xcc_id, AMDGPU_MES_SCHED_PIPE); 1372e220edf2SJack Xiao 1373e220edf2SJack Xiao if (!kiq->pmf || !kiq->pmf->kiq_map_queues) 1374e220edf2SJack Xiao return -EINVAL; 1375e220edf2SJack Xiao 1376e220edf2SJack Xiao r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size); 1377e220edf2SJack Xiao if (r) { 1378e220edf2SJack Xiao DRM_ERROR("Failed to lock KIQ (%d).\n", r); 1379e220edf2SJack Xiao return r; 1380e220edf2SJack Xiao } 1381e220edf2SJack Xiao 1382e220edf2SJack Xiao kiq->pmf->kiq_map_queues(kiq_ring, &adev->mes.ring[inst]); 1383e220edf2SJack Xiao 1384e220edf2SJack Xiao r = amdgpu_ring_test_ring(kiq_ring); 1385e220edf2SJack Xiao if (r) { 1386e220edf2SJack Xiao DRM_ERROR("kfq enable failed\n"); 1387e220edf2SJack Xiao kiq_ring->sched.ready = false; 1388e220edf2SJack Xiao } 1389e220edf2SJack Xiao return r; 1390e220edf2SJack Xiao } 1391e220edf2SJack Xiao 1392e220edf2SJack Xiao static int mes_v12_1_queue_init(struct amdgpu_device *adev, 1393e220edf2SJack Xiao enum amdgpu_mes_pipe pipe, 1394e220edf2SJack Xiao int xcc_id) 1395e220edf2SJack Xiao { 1396e220edf2SJack Xiao struct amdgpu_ring *ring; 1397e220edf2SJack Xiao int r; 1398e220edf2SJack Xiao 1399e220edf2SJack Xiao if (!adev->enable_uni_mes && pipe == AMDGPU_MES_KIQ_PIPE) 1400e220edf2SJack Xiao ring = &adev->gfx.kiq[xcc_id].ring; 1401e220edf2SJack Xiao else 1402e220edf2SJack Xiao ring = &adev->mes.ring[MES_PIPE_INST(xcc_id, pipe)]; 1403e220edf2SJack Xiao 1404e220edf2SJack Xiao if ((adev->enable_uni_mes || pipe == AMDGPU_MES_SCHED_PIPE) && 1405e220edf2SJack Xiao (amdgpu_in_reset(adev) || adev->in_suspend)) { 1406e220edf2SJack Xiao *(ring->wptr_cpu_addr) = 0; 1407e220edf2SJack Xiao *(ring->rptr_cpu_addr) = 0; 1408e220edf2SJack Xiao amdgpu_ring_clear_ring(ring); 1409e220edf2SJack Xiao } 1410e220edf2SJack Xiao 1411e220edf2SJack Xiao r = mes_v12_1_mqd_init(ring); 1412e220edf2SJack Xiao if (r) 1413e220edf2SJack Xiao return r; 1414e220edf2SJack Xiao 1415e220edf2SJack Xiao if (pipe == AMDGPU_MES_SCHED_PIPE) { 1416e220edf2SJack Xiao if (adev->enable_uni_mes) 1417e220edf2SJack Xiao r = amdgpu_mes_map_legacy_queue(adev, ring, xcc_id); 1418e220edf2SJack Xiao else 1419e220edf2SJack Xiao r = mes_v12_1_kiq_enable_queue(adev, xcc_id); 1420e220edf2SJack Xiao if (r) 1421e220edf2SJack Xiao return r; 1422e220edf2SJack Xiao } else { 1423e220edf2SJack Xiao mes_v12_1_queue_init_register(ring, xcc_id); 1424e220edf2SJack Xiao } 1425e220edf2SJack Xiao 1426e220edf2SJack Xiao /* get MES scheduler/KIQ versions */ 1427e220edf2SJack Xiao mutex_lock(&adev->srbm_mutex); 1428e220edf2SJack Xiao soc_v1_0_grbm_select(adev, 3, pipe, 0, 0, GET_INST(GC, xcc_id)); 1429e220edf2SJack Xiao 1430e220edf2SJack Xiao if (pipe == AMDGPU_MES_SCHED_PIPE) 1431e220edf2SJack Xiao adev->mes.sched_version = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_GP3_LO); 1432e220edf2SJack Xiao else if (pipe == AMDGPU_MES_KIQ_PIPE && adev->enable_mes_kiq) 1433e220edf2SJack Xiao adev->mes.kiq_version = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_GP3_LO); 1434e220edf2SJack Xiao 1435e220edf2SJack Xiao soc_v1_0_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id)); 1436e220edf2SJack Xiao mutex_unlock(&adev->srbm_mutex); 1437e220edf2SJack Xiao 1438e220edf2SJack Xiao return 0; 1439e220edf2SJack Xiao } 1440e220edf2SJack Xiao 1441e220edf2SJack Xiao static int mes_v12_1_ring_init(struct amdgpu_device *adev, 1442e220edf2SJack Xiao int xcc_id, int pipe) 1443e220edf2SJack Xiao { 1444e220edf2SJack Xiao struct amdgpu_ring *ring; 1445e220edf2SJack Xiao int inst = MES_PIPE_INST(xcc_id, pipe); 1446e220edf2SJack Xiao 1447e220edf2SJack Xiao ring = &adev->mes.ring[inst]; 1448e220edf2SJack Xiao 1449e220edf2SJack Xiao ring->funcs = &mes_v12_1_ring_funcs; 1450e220edf2SJack Xiao 1451e220edf2SJack Xiao ring->me = 3; 1452e220edf2SJack Xiao ring->pipe = pipe; 1453e220edf2SJack Xiao ring->queue = 0; 1454e220edf2SJack Xiao ring->xcc_id = xcc_id; 1455e220edf2SJack Xiao ring->vm_hub = AMDGPU_GFXHUB(xcc_id); 1456e220edf2SJack Xiao 1457e220edf2SJack Xiao ring->ring_obj = NULL; 1458e220edf2SJack Xiao ring->use_doorbell = true; 1459e220edf2SJack Xiao ring->eop_gpu_addr = adev->mes.eop_gpu_addr[inst]; 1460e220edf2SJack Xiao ring->no_scheduler = true; 1461e220edf2SJack Xiao snprintf(ring->name, sizeof(ring->name), "mes_%hhu.%hhu.%hhu.%hhu", 1462e220edf2SJack Xiao (unsigned char)xcc_id, (unsigned char)ring->me, 1463e220edf2SJack Xiao (unsigned char)ring->pipe, (unsigned char)ring->queue); 1464e220edf2SJack Xiao 1465e220edf2SJack Xiao if (pipe == AMDGPU_MES_SCHED_PIPE) 1466e220edf2SJack Xiao ring->doorbell_index = 1467e220edf2SJack Xiao (adev->doorbell_index.mes_ring0 + 1468e220edf2SJack Xiao xcc_id * adev->doorbell_index.xcc_doorbell_range) 1469e220edf2SJack Xiao << 1; 1470e220edf2SJack Xiao else 1471e220edf2SJack Xiao ring->doorbell_index = 1472e220edf2SJack Xiao (adev->doorbell_index.mes_ring1 + 1473e220edf2SJack Xiao xcc_id * adev->doorbell_index.xcc_doorbell_range) 1474e220edf2SJack Xiao << 1; 1475e220edf2SJack Xiao 1476e220edf2SJack Xiao return amdgpu_ring_init(adev, ring, 1024, NULL, 0, 1477e220edf2SJack Xiao AMDGPU_RING_PRIO_DEFAULT, NULL); 1478e220edf2SJack Xiao } 1479e220edf2SJack Xiao 1480e220edf2SJack Xiao static int mes_v12_1_kiq_ring_init(struct amdgpu_device *adev, int xcc_id) 1481e220edf2SJack Xiao { 1482e220edf2SJack Xiao struct amdgpu_ring *ring; 1483e220edf2SJack Xiao int inst = MES_PIPE_INST(xcc_id, AMDGPU_MES_KIQ_PIPE); 1484e220edf2SJack Xiao 1485e220edf2SJack Xiao spin_lock_init(&adev->gfx.kiq[xcc_id].ring_lock); 1486e220edf2SJack Xiao 1487e220edf2SJack Xiao ring = &adev->gfx.kiq[xcc_id].ring; 1488e220edf2SJack Xiao 1489e220edf2SJack Xiao ring->me = 3; 1490e220edf2SJack Xiao ring->pipe = 1; 1491e220edf2SJack Xiao ring->queue = 0; 1492e220edf2SJack Xiao ring->xcc_id = xcc_id; 1493e220edf2SJack Xiao ring->vm_hub = AMDGPU_GFXHUB(xcc_id); 1494e220edf2SJack Xiao 1495e220edf2SJack Xiao ring->adev = NULL; 1496e220edf2SJack Xiao ring->ring_obj = NULL; 1497e220edf2SJack Xiao ring->use_doorbell = true; 1498e220edf2SJack Xiao ring->eop_gpu_addr = adev->mes.eop_gpu_addr[inst]; 1499e220edf2SJack Xiao ring->no_scheduler = true; 1500e220edf2SJack Xiao ring->doorbell_index = 1501e220edf2SJack Xiao (adev->doorbell_index.mes_ring1 + 1502e220edf2SJack Xiao xcc_id * adev->doorbell_index.xcc_doorbell_range) 1503e220edf2SJack Xiao << 1; 1504e220edf2SJack Xiao 1505e220edf2SJack Xiao snprintf(ring->name, sizeof(ring->name), "mes_kiq_%hhu.%hhu.%hhu.%hhu", 1506e220edf2SJack Xiao (unsigned char)xcc_id, (unsigned char)ring->me, 1507e220edf2SJack Xiao (unsigned char)ring->pipe, (unsigned char)ring->queue); 1508e220edf2SJack Xiao 1509e220edf2SJack Xiao return amdgpu_ring_init(adev, ring, 1024, NULL, 0, 1510e220edf2SJack Xiao AMDGPU_RING_PRIO_DEFAULT, NULL); 1511e220edf2SJack Xiao } 1512e220edf2SJack Xiao 1513e220edf2SJack Xiao static int mes_v12_1_mqd_sw_init(struct amdgpu_device *adev, 1514e220edf2SJack Xiao enum amdgpu_mes_pipe pipe, 1515e220edf2SJack Xiao int xcc_id) 1516e220edf2SJack Xiao { 1517e220edf2SJack Xiao int r, mqd_size = sizeof(struct v12_1_mes_mqd); 1518e220edf2SJack Xiao struct amdgpu_ring *ring; 1519e220edf2SJack Xiao int inst = MES_PIPE_INST(xcc_id, pipe); 1520e220edf2SJack Xiao 1521e220edf2SJack Xiao if (!adev->enable_uni_mes && pipe == AMDGPU_MES_KIQ_PIPE) 1522e220edf2SJack Xiao ring = &adev->gfx.kiq[xcc_id].ring; 1523e220edf2SJack Xiao else 1524e220edf2SJack Xiao ring = &adev->mes.ring[inst]; 1525e220edf2SJack Xiao 1526e220edf2SJack Xiao if (ring->mqd_obj) 1527e220edf2SJack Xiao return 0; 1528e220edf2SJack Xiao 1529e220edf2SJack Xiao r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE, 1530e220edf2SJack Xiao AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj, 1531e220edf2SJack Xiao &ring->mqd_gpu_addr, &ring->mqd_ptr); 1532e220edf2SJack Xiao if (r) { 1533e220edf2SJack Xiao dev_warn(adev->dev, "failed to create ring mqd bo (%d)", r); 1534e220edf2SJack Xiao return r; 1535e220edf2SJack Xiao } 1536e220edf2SJack Xiao 1537e220edf2SJack Xiao memset(ring->mqd_ptr, 0, mqd_size); 1538e220edf2SJack Xiao 1539e220edf2SJack Xiao /* prepare MQD backup */ 1540e220edf2SJack Xiao adev->mes.mqd_backup[inst] = kmalloc(mqd_size, GFP_KERNEL); 1541e220edf2SJack Xiao if (!adev->mes.mqd_backup[inst]) 1542e220edf2SJack Xiao dev_warn(adev->dev, 1543e220edf2SJack Xiao "no memory to create MQD backup for ring %s\n", 1544e220edf2SJack Xiao ring->name); 1545e220edf2SJack Xiao 1546e220edf2SJack Xiao return 0; 1547e220edf2SJack Xiao } 1548e220edf2SJack Xiao 1549e220edf2SJack Xiao static int mes_v12_1_sw_init(struct amdgpu_ip_block *ip_block) 1550e220edf2SJack Xiao { 1551e220edf2SJack Xiao struct amdgpu_device *adev = ip_block->adev; 1552e220edf2SJack Xiao int pipe, r, xcc_id, num_xcc = NUM_XCC(adev->gfx.xcc_mask); 1553e220edf2SJack Xiao 1554e220edf2SJack Xiao adev->mes.funcs = &mes_v12_1_funcs; 1555e220edf2SJack Xiao adev->mes.kiq_hw_init = &mes_v12_1_kiq_hw_init; 1556e220edf2SJack Xiao adev->mes.kiq_hw_fini = &mes_v12_1_kiq_hw_fini; 1557e220edf2SJack Xiao adev->mes.enable_legacy_queue_map = true; 1558e220edf2SJack Xiao 1559e220edf2SJack Xiao adev->mes.event_log_size = 1560e220edf2SJack Xiao adev->enable_uni_mes ? (AMDGPU_MAX_MES_PIPES * AMDGPU_MES_LOG_BUFFER_SIZE * num_xcc) : AMDGPU_MES_LOG_BUFFER_SIZE; 1561e220edf2SJack Xiao 1562e220edf2SJack Xiao r = amdgpu_mes_init(adev); 1563e220edf2SJack Xiao if (r) 1564e220edf2SJack Xiao return r; 1565e220edf2SJack Xiao 1566e220edf2SJack Xiao for (xcc_id = 0; xcc_id < num_xcc; xcc_id++) { 1567e220edf2SJack Xiao for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) { 1568e220edf2SJack Xiao r = mes_v12_1_allocate_eop_buf(adev, pipe, xcc_id); 1569e220edf2SJack Xiao if (r) 1570e220edf2SJack Xiao return r; 1571e220edf2SJack Xiao 1572e220edf2SJack Xiao r = mes_v12_1_mqd_sw_init(adev, pipe, xcc_id); 1573e220edf2SJack Xiao if (r) 1574e220edf2SJack Xiao return r; 1575e220edf2SJack Xiao 1576e220edf2SJack Xiao if (!adev->enable_uni_mes && pipe == 1577e220edf2SJack Xiao AMDGPU_MES_KIQ_PIPE) 1578e220edf2SJack Xiao r = mes_v12_1_kiq_ring_init(adev, xcc_id); 1579e220edf2SJack Xiao else 1580e220edf2SJack Xiao r = mes_v12_1_ring_init(adev, xcc_id, pipe); 1581e220edf2SJack Xiao if (r) 1582e220edf2SJack Xiao return r; 158375053887SJack Xiao 1584aa0f09f9SMukul Joshi if (adev->enable_uni_mes && num_xcc > 1) { 158575053887SJack Xiao r = mes_v12_1_allocate_shared_cmd_buf(adev, 158675053887SJack Xiao pipe, xcc_id); 158775053887SJack Xiao if (r) 158875053887SJack Xiao return r; 158975053887SJack Xiao } 1590e220edf2SJack Xiao } 1591e220edf2SJack Xiao } 1592e220edf2SJack Xiao 1593e220edf2SJack Xiao return 0; 1594e220edf2SJack Xiao } 1595e220edf2SJack Xiao 1596e220edf2SJack Xiao static int mes_v12_1_sw_fini(struct amdgpu_ip_block *ip_block) 1597e220edf2SJack Xiao { 1598e220edf2SJack Xiao struct amdgpu_device *adev = ip_block->adev; 1599e220edf2SJack Xiao int pipe, inst, xcc_id, num_xcc = NUM_XCC(adev->gfx.xcc_mask); 1600e220edf2SJack Xiao 1601e220edf2SJack Xiao for (xcc_id = 0; xcc_id < num_xcc; xcc_id++) { 1602e220edf2SJack Xiao for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) { 1603e220edf2SJack Xiao inst = MES_PIPE_INST(xcc_id, pipe); 1604e220edf2SJack Xiao 160575053887SJack Xiao amdgpu_bo_free_kernel(&adev->mes.shared_cmd_buf_obj[inst], 160675053887SJack Xiao &adev->mes.shared_cmd_buf_gpu_addr[inst], 160775053887SJack Xiao NULL); 160875053887SJack Xiao 1609e220edf2SJack Xiao kfree(adev->mes.mqd_backup[inst]); 1610e220edf2SJack Xiao 1611e220edf2SJack Xiao amdgpu_bo_free_kernel(&adev->mes.eop_gpu_obj[inst], 1612e220edf2SJack Xiao &adev->mes.eop_gpu_addr[inst], 1613e220edf2SJack Xiao NULL); 1614e220edf2SJack Xiao amdgpu_ucode_release(&adev->mes.fw[inst]); 1615e220edf2SJack Xiao 1616e220edf2SJack Xiao if (adev->enable_uni_mes || pipe == AMDGPU_MES_SCHED_PIPE) { 1617e220edf2SJack Xiao amdgpu_bo_free_kernel(&adev->mes.ring[inst].mqd_obj, 1618e220edf2SJack Xiao &adev->mes.ring[inst].mqd_gpu_addr, 1619e220edf2SJack Xiao &adev->mes.ring[inst].mqd_ptr); 1620e220edf2SJack Xiao amdgpu_ring_fini(&adev->mes.ring[inst]); 1621e220edf2SJack Xiao } 1622e220edf2SJack Xiao } 1623e220edf2SJack Xiao } 1624e220edf2SJack Xiao 1625e220edf2SJack Xiao for (xcc_id = 0; xcc_id < num_xcc; xcc_id++) { 1626e220edf2SJack Xiao if (!adev->enable_uni_mes) { 1627e220edf2SJack Xiao amdgpu_bo_free_kernel(&adev->gfx.kiq[xcc_id].ring.mqd_obj, 1628e220edf2SJack Xiao &adev->gfx.kiq[xcc_id].ring.mqd_gpu_addr, 1629e220edf2SJack Xiao &adev->gfx.kiq[xcc_id].ring.mqd_ptr); 1630e220edf2SJack Xiao amdgpu_ring_fini(&adev->gfx.kiq[xcc_id].ring); 1631e220edf2SJack Xiao } 1632e220edf2SJack Xiao 1633e220edf2SJack Xiao if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { 1634e220edf2SJack Xiao mes_v12_1_free_ucode_buffers(adev, 1635e220edf2SJack Xiao AMDGPU_MES_KIQ_PIPE, xcc_id); 1636e220edf2SJack Xiao mes_v12_1_free_ucode_buffers(adev, 1637e220edf2SJack Xiao AMDGPU_MES_SCHED_PIPE, xcc_id); 1638e220edf2SJack Xiao } 1639e220edf2SJack Xiao } 1640e220edf2SJack Xiao 1641e220edf2SJack Xiao amdgpu_mes_fini(adev); 1642e220edf2SJack Xiao return 0; 1643e220edf2SJack Xiao } 1644e220edf2SJack Xiao 1645e220edf2SJack Xiao static void mes_v12_1_kiq_dequeue_sched(struct amdgpu_device *adev, 1646e220edf2SJack Xiao int xcc_id) 1647e220edf2SJack Xiao { 1648e220edf2SJack Xiao uint32_t data; 1649e220edf2SJack Xiao int i; 1650e220edf2SJack Xiao 1651e220edf2SJack Xiao mutex_lock(&adev->srbm_mutex); 1652e220edf2SJack Xiao soc_v1_0_grbm_select(adev, 3, AMDGPU_MES_SCHED_PIPE, 0, 0, 1653e220edf2SJack Xiao GET_INST(GC, xcc_id)); 1654e220edf2SJack Xiao 1655e220edf2SJack Xiao /* disable the queue if it's active */ 1656e220edf2SJack Xiao if (RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_ACTIVE) & 1) { 1657e220edf2SJack Xiao WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_DEQUEUE_REQUEST, 1); 1658e220edf2SJack Xiao for (i = 0; i < adev->usec_timeout; i++) { 1659e220edf2SJack Xiao if (!(RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_ACTIVE) & 1)) 1660e220edf2SJack Xiao break; 1661e220edf2SJack Xiao udelay(1); 1662e220edf2SJack Xiao } 1663e220edf2SJack Xiao } 1664e220edf2SJack Xiao data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_DOORBELL_CONTROL); 1665e220edf2SJack Xiao data = REG_SET_FIELD(data, CP_HQD_PQ_DOORBELL_CONTROL, 1666e220edf2SJack Xiao DOORBELL_EN, 0); 1667e220edf2SJack Xiao data = REG_SET_FIELD(data, CP_HQD_PQ_DOORBELL_CONTROL, 1668e220edf2SJack Xiao DOORBELL_HIT, 1); 1669e220edf2SJack Xiao WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_DOORBELL_CONTROL, data); 1670e220edf2SJack Xiao 1671e220edf2SJack Xiao WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_DOORBELL_CONTROL, 0); 1672e220edf2SJack Xiao 1673e220edf2SJack Xiao WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_WPTR_LO, 0); 1674e220edf2SJack Xiao WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_WPTR_HI, 0); 1675e220edf2SJack Xiao WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_RPTR, 0); 1676e220edf2SJack Xiao 1677e220edf2SJack Xiao soc_v1_0_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id)); 1678e220edf2SJack Xiao mutex_unlock(&adev->srbm_mutex); 1679e220edf2SJack Xiao 1680e220edf2SJack Xiao adev->mes.ring[MES_PIPE_INST(xcc_id, 0)].sched.ready = false; 1681e220edf2SJack Xiao } 1682e220edf2SJack Xiao 1683e220edf2SJack Xiao static void mes_v12_1_kiq_setting(struct amdgpu_ring *ring, int xcc_id) 1684e220edf2SJack Xiao { 1685e220edf2SJack Xiao uint32_t tmp; 1686e220edf2SJack Xiao struct amdgpu_device *adev = ring->adev; 1687e220edf2SJack Xiao 1688e220edf2SJack Xiao /* tell RLC which is KIQ queue */ 1689e220edf2SJack Xiao tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CP_SCHEDULERS); 1690e220edf2SJack Xiao tmp &= 0xffffff00; 1691e220edf2SJack Xiao tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue); 1692e220edf2SJack Xiao WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CP_SCHEDULERS, tmp); 1693e220edf2SJack Xiao tmp |= 0x80; 1694e220edf2SJack Xiao WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CP_SCHEDULERS, tmp); 1695e220edf2SJack Xiao } 1696e220edf2SJack Xiao 1697e220edf2SJack Xiao static int mes_v12_1_kiq_hw_init(struct amdgpu_device *adev, uint32_t xcc_id) 1698e220edf2SJack Xiao { 1699e220edf2SJack Xiao int inst = MES_PIPE_INST(xcc_id, AMDGPU_MES_KIQ_PIPE); 1700e220edf2SJack Xiao int r = 0; 1701e220edf2SJack Xiao struct amdgpu_ip_block *ip_block; 1702e220edf2SJack Xiao 1703e220edf2SJack Xiao if (adev->enable_uni_mes) 1704e220edf2SJack Xiao mes_v12_1_kiq_setting(&adev->mes.ring[inst], xcc_id); 1705e220edf2SJack Xiao else 1706e220edf2SJack Xiao mes_v12_1_kiq_setting(&adev->gfx.kiq[xcc_id].ring, xcc_id); 1707e220edf2SJack Xiao 1708e220edf2SJack Xiao if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { 1709e220edf2SJack Xiao 1710e220edf2SJack Xiao r = mes_v12_1_load_microcode(adev, AMDGPU_MES_SCHED_PIPE, 1711e220edf2SJack Xiao false, xcc_id); 1712e220edf2SJack Xiao if (r) { 1713e220edf2SJack Xiao DRM_ERROR("failed to load MES fw, r=%d\n", r); 1714e220edf2SJack Xiao return r; 1715e220edf2SJack Xiao } 1716e220edf2SJack Xiao 1717e220edf2SJack Xiao r = mes_v12_1_load_microcode(adev, AMDGPU_MES_KIQ_PIPE, 1718e220edf2SJack Xiao true, xcc_id); 1719e220edf2SJack Xiao if (r) { 1720e220edf2SJack Xiao DRM_ERROR("failed to load MES kiq fw, r=%d\n", r); 1721e220edf2SJack Xiao return r; 1722e220edf2SJack Xiao } 1723e220edf2SJack Xiao 1724e220edf2SJack Xiao mes_v12_1_set_ucode_start_addr(adev, xcc_id); 1725e220edf2SJack Xiao 1726e220edf2SJack Xiao } else if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) 1727e220edf2SJack Xiao mes_v12_1_set_ucode_start_addr(adev, xcc_id); 1728e220edf2SJack Xiao 1729e220edf2SJack Xiao mes_v12_1_enable(adev, true, xcc_id); 1730e220edf2SJack Xiao 1731e220edf2SJack Xiao ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_MES); 1732e220edf2SJack Xiao if (unlikely(!ip_block)) { 1733e220edf2SJack Xiao dev_err(adev->dev, "Failed to get MES handle\n"); 1734e220edf2SJack Xiao return -EINVAL; 1735e220edf2SJack Xiao } 1736e220edf2SJack Xiao 1737e220edf2SJack Xiao r = mes_v12_1_queue_init(adev, AMDGPU_MES_KIQ_PIPE, xcc_id); 1738e220edf2SJack Xiao if (r) 1739e220edf2SJack Xiao goto failure; 1740e220edf2SJack Xiao 1741e220edf2SJack Xiao if (adev->enable_uni_mes) { 1742e220edf2SJack Xiao r = mes_v12_1_set_hw_resources(&adev->mes, 1743e220edf2SJack Xiao AMDGPU_MES_KIQ_PIPE, xcc_id); 1744e220edf2SJack Xiao if (r) 1745e220edf2SJack Xiao goto failure; 1746e220edf2SJack Xiao 1747e220edf2SJack Xiao mes_v12_1_set_hw_resources_1(&adev->mes, 1748e220edf2SJack Xiao AMDGPU_MES_KIQ_PIPE, xcc_id); 1749e220edf2SJack Xiao } 1750e220edf2SJack Xiao 1751e220edf2SJack Xiao if (adev->mes.enable_legacy_queue_map) { 1752a5192fbbSLikun Gao r = mes_v12_1_xcc_hw_init(ip_block, xcc_id); 1753e220edf2SJack Xiao if (r) 1754e220edf2SJack Xiao goto failure; 1755e220edf2SJack Xiao } 1756e220edf2SJack Xiao 1757e220edf2SJack Xiao return r; 1758e220edf2SJack Xiao 1759e220edf2SJack Xiao failure: 1760e220edf2SJack Xiao mes_v12_1_hw_fini(ip_block); 1761e220edf2SJack Xiao return r; 1762e220edf2SJack Xiao } 1763e220edf2SJack Xiao 1764e220edf2SJack Xiao static int mes_v12_1_kiq_hw_fini(struct amdgpu_device *adev, uint32_t xcc_id) 1765e220edf2SJack Xiao { 1766e220edf2SJack Xiao int inst = MES_PIPE_INST(xcc_id, AMDGPU_MES_SCHED_PIPE); 1767e220edf2SJack Xiao 1768e220edf2SJack Xiao if (adev->mes.ring[inst].sched.ready) { 1769e220edf2SJack Xiao if (adev->enable_uni_mes) 1770e220edf2SJack Xiao amdgpu_mes_unmap_legacy_queue(adev, 1771e220edf2SJack Xiao &adev->mes.ring[inst], 1772e220edf2SJack Xiao RESET_QUEUES, 0, 0, xcc_id); 1773e220edf2SJack Xiao else 1774e220edf2SJack Xiao mes_v12_1_kiq_dequeue_sched(adev, xcc_id); 1775e220edf2SJack Xiao 1776e220edf2SJack Xiao adev->mes.ring[inst].sched.ready = false; 1777e220edf2SJack Xiao } 1778e220edf2SJack Xiao 1779e220edf2SJack Xiao mes_v12_1_enable(adev, false, xcc_id); 1780e220edf2SJack Xiao 1781e220edf2SJack Xiao return 0; 1782e220edf2SJack Xiao } 1783e220edf2SJack Xiao 1784aa0f09f9SMukul Joshi static int mes_v12_1_setup_coop_mode(struct amdgpu_device *adev, int xcc_id) 1785aa0f09f9SMukul Joshi { 1786aa0f09f9SMukul Joshi u32 num_xcc_per_xcp, num_xcc = NUM_XCC(adev->gfx.xcc_mask); 1787aa0f09f9SMukul Joshi int r = 0; 1788aa0f09f9SMukul Joshi 1789aa0f09f9SMukul Joshi if (num_xcc == 1) 1790aa0f09f9SMukul Joshi return r; 1791aa0f09f9SMukul Joshi 1792aa0f09f9SMukul Joshi if (adev->gfx.funcs && 1793aa0f09f9SMukul Joshi adev->gfx.funcs->get_xccs_per_xcp) 1794aa0f09f9SMukul Joshi num_xcc_per_xcp = adev->gfx.funcs->get_xccs_per_xcp(adev); 1795aa0f09f9SMukul Joshi else 1796aa0f09f9SMukul Joshi return -EINVAL; 1797aa0f09f9SMukul Joshi 1798aa0f09f9SMukul Joshi switch (adev->xcp_mgr->mode) { 1799aa0f09f9SMukul Joshi case AMDGPU_SPX_PARTITION_MODE: 1800aa0f09f9SMukul Joshi adev->mes.enable_coop_mode = 1; 1801aa0f09f9SMukul Joshi adev->mes.master_xcc_ids[xcc_id] = 0; 1802aa0f09f9SMukul Joshi break; 1803aa0f09f9SMukul Joshi case AMDGPU_DPX_PARTITION_MODE: 1804aa0f09f9SMukul Joshi adev->mes.enable_coop_mode = 1; 1805aa0f09f9SMukul Joshi adev->mes.master_xcc_ids[xcc_id] = 1806aa0f09f9SMukul Joshi (xcc_id/num_xcc_per_xcp) * (num_xcc / 2); 1807aa0f09f9SMukul Joshi break; 1808aa0f09f9SMukul Joshi case AMDGPU_QPX_PARTITION_MODE: 1809aa0f09f9SMukul Joshi adev->mes.enable_coop_mode = 1; 1810aa0f09f9SMukul Joshi adev->mes.master_xcc_ids[xcc_id] = 1811aa0f09f9SMukul Joshi (xcc_id/num_xcc_per_xcp) * (num_xcc / 4); 1812aa0f09f9SMukul Joshi break; 1813aa0f09f9SMukul Joshi case AMDGPU_CPX_PARTITION_MODE: 1814aa0f09f9SMukul Joshi adev->mes.enable_coop_mode = 0; 1815aa0f09f9SMukul Joshi break; 1816aa0f09f9SMukul Joshi default: 1817aa0f09f9SMukul Joshi r = -EINVAL; 1818aa0f09f9SMukul Joshi break; 1819aa0f09f9SMukul Joshi } 1820aa0f09f9SMukul Joshi return r; 1821aa0f09f9SMukul Joshi } 1822aa0f09f9SMukul Joshi 1823e220edf2SJack Xiao static int mes_v12_1_xcc_hw_init(struct amdgpu_ip_block *ip_block, int xcc_id) 1824e220edf2SJack Xiao { 1825e220edf2SJack Xiao int r; 1826e220edf2SJack Xiao struct amdgpu_device *adev = ip_block->adev; 1827e220edf2SJack Xiao 1828e220edf2SJack Xiao if (adev->mes.ring[MES_PIPE_INST(xcc_id, 0)].sched.ready) 1829e220edf2SJack Xiao goto out; 1830e220edf2SJack Xiao 1831e220edf2SJack Xiao if (!adev->enable_mes_kiq) { 1832e220edf2SJack Xiao if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { 1833e220edf2SJack Xiao r = mes_v12_1_load_microcode(adev, 1834e220edf2SJack Xiao AMDGPU_MES_SCHED_PIPE, true, xcc_id); 1835e220edf2SJack Xiao if (r) { 1836e220edf2SJack Xiao DRM_ERROR("failed to MES fw, r=%d\n", r); 1837e220edf2SJack Xiao return r; 1838e220edf2SJack Xiao } 1839e220edf2SJack Xiao 1840e220edf2SJack Xiao mes_v12_1_set_ucode_start_addr(adev, xcc_id); 1841e220edf2SJack Xiao 1842e220edf2SJack Xiao } else if (adev->firmware.load_type == 1843e220edf2SJack Xiao AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) { 1844e220edf2SJack Xiao 1845e220edf2SJack Xiao mes_v12_1_set_ucode_start_addr(adev, xcc_id); 1846e220edf2SJack Xiao } 1847e220edf2SJack Xiao 1848e220edf2SJack Xiao mes_v12_1_enable(adev, true, xcc_id); 1849e220edf2SJack Xiao } 1850e220edf2SJack Xiao 1851e220edf2SJack Xiao /* Enable the MES to handle doorbell ring on unmapped queue */ 1852e220edf2SJack Xiao mes_v12_1_enable_unmapped_doorbell_handling(&adev->mes, true, xcc_id); 1853e220edf2SJack Xiao 1854e220edf2SJack Xiao r = mes_v12_1_queue_init(adev, AMDGPU_MES_SCHED_PIPE, xcc_id); 1855e220edf2SJack Xiao if (r) 1856e220edf2SJack Xiao goto failure; 1857e220edf2SJack Xiao 1858e220edf2SJack Xiao r = mes_v12_1_set_hw_resources(&adev->mes, 1859e220edf2SJack Xiao AMDGPU_MES_SCHED_PIPE, xcc_id); 1860e220edf2SJack Xiao if (r) 1861e220edf2SJack Xiao goto failure; 1862e220edf2SJack Xiao 1863aa0f09f9SMukul Joshi if (adev->enable_uni_mes) { 1864aa0f09f9SMukul Joshi r = mes_v12_1_setup_coop_mode(adev, xcc_id); 1865aa0f09f9SMukul Joshi if (r) 1866aa0f09f9SMukul Joshi goto failure; 1867e220edf2SJack Xiao mes_v12_1_set_hw_resources_1(&adev->mes, 1868e220edf2SJack Xiao AMDGPU_MES_SCHED_PIPE, xcc_id); 1869aa0f09f9SMukul Joshi } 1870e220edf2SJack Xiao mes_v12_1_init_aggregated_doorbell(&adev->mes, xcc_id); 1871e220edf2SJack Xiao 1872e220edf2SJack Xiao r = mes_v12_1_query_sched_status(&adev->mes, 1873e220edf2SJack Xiao AMDGPU_MES_SCHED_PIPE, xcc_id); 1874e220edf2SJack Xiao if (r) { 1875e220edf2SJack Xiao DRM_ERROR("MES is busy\n"); 1876e220edf2SJack Xiao goto failure; 1877e220edf2SJack Xiao } 1878e220edf2SJack Xiao 1879e220edf2SJack Xiao out: 1880e220edf2SJack Xiao /* 1881e220edf2SJack Xiao * Disable KIQ ring usage from the driver once MES is enabled. 1882e220edf2SJack Xiao * MES uses KIQ ring exclusively so driver cannot access KIQ ring 1883e220edf2SJack Xiao * with MES enabled. 1884e220edf2SJack Xiao */ 1885e220edf2SJack Xiao adev->gfx.kiq[xcc_id].ring.sched.ready = false; 1886e220edf2SJack Xiao adev->mes.ring[MES_PIPE_INST(xcc_id, 0)].sched.ready = true; 1887e220edf2SJack Xiao 1888e220edf2SJack Xiao return 0; 1889e220edf2SJack Xiao 1890e220edf2SJack Xiao failure: 1891e220edf2SJack Xiao mes_v12_1_hw_fini(ip_block); 1892e220edf2SJack Xiao return r; 1893e220edf2SJack Xiao } 1894e220edf2SJack Xiao 1895e220edf2SJack Xiao static int mes_v12_1_hw_init(struct amdgpu_ip_block *ip_block) 1896e220edf2SJack Xiao { 1897e220edf2SJack Xiao struct amdgpu_device *adev = ip_block->adev; 1898e220edf2SJack Xiao int r, xcc_id, num_xcc = NUM_XCC(adev->gfx.xcc_mask); 1899e220edf2SJack Xiao 1900e220edf2SJack Xiao for (xcc_id = 0; xcc_id < num_xcc; xcc_id++) { 1901e220edf2SJack Xiao r = mes_v12_1_xcc_hw_init(ip_block, xcc_id); 1902e220edf2SJack Xiao if (r) 1903e220edf2SJack Xiao return r; 1904e220edf2SJack Xiao } 1905e220edf2SJack Xiao 1906e220edf2SJack Xiao return 0; 1907e220edf2SJack Xiao } 1908e220edf2SJack Xiao 1909e220edf2SJack Xiao static int mes_v12_1_hw_fini(struct amdgpu_ip_block *ip_block) 1910e220edf2SJack Xiao { 1911e220edf2SJack Xiao return 0; 1912e220edf2SJack Xiao } 1913e220edf2SJack Xiao 1914e220edf2SJack Xiao static int mes_v12_1_suspend(struct amdgpu_ip_block *ip_block) 1915e220edf2SJack Xiao { 1916e220edf2SJack Xiao int r; 1917e220edf2SJack Xiao 1918e220edf2SJack Xiao r = amdgpu_mes_suspend(ip_block->adev); 1919e220edf2SJack Xiao if (r) 1920e220edf2SJack Xiao return r; 1921e220edf2SJack Xiao 1922e220edf2SJack Xiao return mes_v12_1_hw_fini(ip_block); 1923e220edf2SJack Xiao } 1924e220edf2SJack Xiao 1925e220edf2SJack Xiao static int mes_v12_1_resume(struct amdgpu_ip_block *ip_block) 1926e220edf2SJack Xiao { 1927e220edf2SJack Xiao int r; 1928e220edf2SJack Xiao 1929e220edf2SJack Xiao r = mes_v12_1_hw_init(ip_block); 1930e220edf2SJack Xiao if (r) 1931e220edf2SJack Xiao return r; 1932e220edf2SJack Xiao 1933e220edf2SJack Xiao return amdgpu_mes_resume(ip_block->adev); 1934e220edf2SJack Xiao } 1935e220edf2SJack Xiao 1936e220edf2SJack Xiao static int mes_v12_1_early_init(struct amdgpu_ip_block *ip_block) 1937e220edf2SJack Xiao { 1938e220edf2SJack Xiao struct amdgpu_device *adev = ip_block->adev; 1939e220edf2SJack Xiao int pipe, r; 1940e220edf2SJack Xiao 1941e220edf2SJack Xiao for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) { 1942e220edf2SJack Xiao r = amdgpu_mes_init_microcode(adev, pipe); 1943e220edf2SJack Xiao if (r) 1944e220edf2SJack Xiao return r; 1945e220edf2SJack Xiao } 1946e220edf2SJack Xiao 1947e220edf2SJack Xiao return 0; 1948e220edf2SJack Xiao } 1949e220edf2SJack Xiao 1950e220edf2SJack Xiao static const struct amd_ip_funcs mes_v12_1_ip_funcs = { 1951e220edf2SJack Xiao .name = "mes_v12_1", 1952e220edf2SJack Xiao .early_init = mes_v12_1_early_init, 1953e220edf2SJack Xiao .late_init = NULL, 1954e220edf2SJack Xiao .sw_init = mes_v12_1_sw_init, 1955e220edf2SJack Xiao .sw_fini = mes_v12_1_sw_fini, 1956e220edf2SJack Xiao .hw_init = mes_v12_1_hw_init, 1957e220edf2SJack Xiao .hw_fini = mes_v12_1_hw_fini, 1958e220edf2SJack Xiao .suspend = mes_v12_1_suspend, 1959e220edf2SJack Xiao .resume = mes_v12_1_resume, 1960e220edf2SJack Xiao }; 1961e220edf2SJack Xiao 1962e220edf2SJack Xiao const struct amdgpu_ip_block_version mes_v12_1_ip_block = { 1963e220edf2SJack Xiao .type = AMD_IP_BLOCK_TYPE_MES, 1964e220edf2SJack Xiao .major = 12, 1965e220edf2SJack Xiao .minor = 1, 1966e220edf2SJack Xiao .rev = 0, 1967e220edf2SJack Xiao .funcs = &mes_v12_1_ip_funcs, 1968e220edf2SJack Xiao }; 1969