1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright 2024 Advanced Micro Devices, Inc. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be included in 13 * all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 19 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 21 * OTHER DEALINGS IN THE SOFTWARE. 22 * 23 */ 24 #include "amdgpu.h" 25 #include "amdgpu_gfx.h" 26 #include "mes_userqueue.h" 27 #include "amdgpu_userq_fence.h" 28 29 #define AMDGPU_USERQ_PROC_CTX_SZ PAGE_SIZE 30 #define AMDGPU_USERQ_GANG_CTX_SZ PAGE_SIZE 31 32 static int 33 mes_userq_map_gtt_bo_to_gart(struct amdgpu_bo *bo) 34 { 35 int ret; 36 37 ret = amdgpu_bo_reserve(bo, true); 38 if (ret) { 39 DRM_ERROR("Failed to reserve bo. ret %d\n", ret); 40 goto err_reserve_bo_failed; 41 } 42 43 ret = amdgpu_ttm_alloc_gart(&bo->tbo); 44 if (ret) { 45 DRM_ERROR("Failed to bind bo to GART. ret %d\n", ret); 46 goto err_map_bo_gart_failed; 47 } 48 49 amdgpu_bo_unreserve(bo); 50 bo = amdgpu_bo_ref(bo); 51 52 return 0; 53 54 err_map_bo_gart_failed: 55 amdgpu_bo_unreserve(bo); 56 err_reserve_bo_failed: 57 return ret; 58 } 59 60 static int 61 mes_userq_create_wptr_mapping(struct amdgpu_userq_mgr *uq_mgr, 62 struct amdgpu_usermode_queue *queue, 63 uint64_t wptr) 64 { 65 struct amdgpu_bo_va_mapping *wptr_mapping; 66 struct amdgpu_vm *wptr_vm; 67 struct amdgpu_userq_obj *wptr_obj = &queue->wptr_obj; 68 int ret; 69 70 wptr_vm = queue->vm; 71 ret = amdgpu_bo_reserve(wptr_vm->root.bo, false); 72 if (ret) 73 return ret; 74 75 wptr &= AMDGPU_GMC_HOLE_MASK; 76 wptr_mapping = amdgpu_vm_bo_lookup_mapping(wptr_vm, wptr >> PAGE_SHIFT); 77 amdgpu_bo_unreserve(wptr_vm->root.bo); 78 if (!wptr_mapping) { 79 DRM_ERROR("Failed to lookup wptr bo\n"); 80 return -EINVAL; 81 } 82 83 wptr_obj->obj = wptr_mapping->bo_va->base.bo; 84 if (wptr_obj->obj->tbo.base.size > PAGE_SIZE) { 85 DRM_ERROR("Requested GART mapping for wptr bo larger than one page\n"); 86 return -EINVAL; 87 } 88 89 ret = mes_userq_map_gtt_bo_to_gart(wptr_obj->obj); 90 if (ret) { 91 DRM_ERROR("Failed to map wptr bo to GART\n"); 92 return ret; 93 } 94 95 queue->wptr_obj.gpu_addr = amdgpu_bo_gpu_offset_no_check(wptr_obj->obj); 96 return 0; 97 } 98 99 static int convert_to_mes_priority(int priority) 100 { 101 switch (priority) { 102 case AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_NORMAL_LOW: 103 default: 104 return AMDGPU_MES_PRIORITY_LEVEL_NORMAL; 105 case AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_LOW: 106 return AMDGPU_MES_PRIORITY_LEVEL_LOW; 107 case AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_NORMAL_HIGH: 108 return AMDGPU_MES_PRIORITY_LEVEL_MEDIUM; 109 case AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_HIGH: 110 return AMDGPU_MES_PRIORITY_LEVEL_HIGH; 111 } 112 } 113 114 static int mes_userq_map(struct amdgpu_userq_mgr *uq_mgr, 115 struct amdgpu_usermode_queue *queue) 116 { 117 struct amdgpu_device *adev = uq_mgr->adev; 118 struct amdgpu_userq_obj *ctx = &queue->fw_obj; 119 struct amdgpu_mqd_prop *userq_props = queue->userq_prop; 120 struct mes_add_queue_input queue_input; 121 int r; 122 123 memset(&queue_input, 0x0, sizeof(struct mes_add_queue_input)); 124 125 queue_input.process_va_start = 0; 126 queue_input.process_va_end = adev->vm_manager.max_pfn - 1; 127 128 /* set process quantum to 10 ms and gang quantum to 1 ms as default */ 129 queue_input.process_quantum = 100000; 130 queue_input.gang_quantum = 10000; 131 queue_input.paging = false; 132 133 queue_input.process_context_addr = ctx->gpu_addr; 134 queue_input.gang_context_addr = ctx->gpu_addr + AMDGPU_USERQ_PROC_CTX_SZ; 135 queue_input.inprocess_gang_priority = AMDGPU_MES_PRIORITY_LEVEL_NORMAL; 136 queue_input.gang_global_priority_level = convert_to_mes_priority(queue->priority); 137 138 queue_input.process_id = queue->vm->pasid; 139 queue_input.queue_type = queue->queue_type; 140 queue_input.mqd_addr = queue->mqd.gpu_addr; 141 queue_input.wptr_addr = userq_props->wptr_gpu_addr; 142 queue_input.queue_size = userq_props->queue_size >> 2; 143 queue_input.doorbell_offset = userq_props->doorbell_index; 144 queue_input.page_table_base_addr = amdgpu_gmc_pd_addr(queue->vm->root.bo); 145 queue_input.wptr_mc_addr = queue->wptr_obj.gpu_addr; 146 147 amdgpu_mes_lock(&adev->mes); 148 r = adev->mes.funcs->add_hw_queue(&adev->mes, &queue_input); 149 amdgpu_mes_unlock(&adev->mes); 150 if (r) { 151 DRM_ERROR("Failed to map queue in HW, err (%d)\n", r); 152 return r; 153 } 154 155 DRM_DEBUG_DRIVER("Queue (doorbell:%d) mapped successfully\n", userq_props->doorbell_index); 156 return 0; 157 } 158 159 static int mes_userq_unmap(struct amdgpu_userq_mgr *uq_mgr, 160 struct amdgpu_usermode_queue *queue) 161 { 162 struct amdgpu_device *adev = uq_mgr->adev; 163 struct mes_remove_queue_input queue_input; 164 struct amdgpu_userq_obj *ctx = &queue->fw_obj; 165 int r; 166 167 memset(&queue_input, 0x0, sizeof(struct mes_remove_queue_input)); 168 queue_input.doorbell_offset = queue->doorbell_index; 169 queue_input.gang_context_addr = ctx->gpu_addr + AMDGPU_USERQ_PROC_CTX_SZ; 170 171 amdgpu_mes_lock(&adev->mes); 172 r = adev->mes.funcs->remove_hw_queue(&adev->mes, &queue_input); 173 amdgpu_mes_unlock(&adev->mes); 174 if (r) 175 DRM_ERROR("Failed to unmap queue in HW, err (%d)\n", r); 176 return r; 177 } 178 179 static int mes_userq_create_ctx_space(struct amdgpu_userq_mgr *uq_mgr, 180 struct amdgpu_usermode_queue *queue, 181 struct drm_amdgpu_userq_in *mqd_user) 182 { 183 struct amdgpu_userq_obj *ctx = &queue->fw_obj; 184 int r, size; 185 186 /* 187 * The FW expects at least one page space allocated for 188 * process ctx and gang ctx each. Create an object 189 * for the same. 190 */ 191 size = AMDGPU_USERQ_PROC_CTX_SZ + AMDGPU_USERQ_GANG_CTX_SZ; 192 r = amdgpu_userq_create_object(uq_mgr, ctx, size); 193 if (r) { 194 DRM_ERROR("Failed to allocate ctx space bo for userqueue, err:%d\n", r); 195 return r; 196 } 197 198 return 0; 199 } 200 201 static int mes_userq_mqd_create(struct amdgpu_userq_mgr *uq_mgr, 202 struct drm_amdgpu_userq_in *args_in, 203 struct amdgpu_usermode_queue *queue) 204 { 205 struct amdgpu_device *adev = uq_mgr->adev; 206 struct amdgpu_mqd *mqd_hw_default = &adev->mqds[queue->queue_type]; 207 struct drm_amdgpu_userq_in *mqd_user = args_in; 208 struct amdgpu_mqd_prop *userq_props; 209 int r; 210 211 /* Structure to initialize MQD for userqueue using generic MQD init function */ 212 userq_props = kzalloc(sizeof(struct amdgpu_mqd_prop), GFP_KERNEL); 213 if (!userq_props) { 214 DRM_ERROR("Failed to allocate memory for userq_props\n"); 215 return -ENOMEM; 216 } 217 218 if (!mqd_user->wptr_va || !mqd_user->rptr_va || 219 !mqd_user->queue_va || mqd_user->queue_size == 0) { 220 DRM_ERROR("Invalid MQD parameters for userqueue\n"); 221 r = -EINVAL; 222 goto free_props; 223 } 224 225 r = amdgpu_userq_create_object(uq_mgr, &queue->mqd, mqd_hw_default->mqd_size); 226 if (r) { 227 DRM_ERROR("Failed to create MQD object for userqueue\n"); 228 goto free_props; 229 } 230 231 /* Initialize the MQD BO with user given values */ 232 userq_props->wptr_gpu_addr = mqd_user->wptr_va; 233 userq_props->rptr_gpu_addr = mqd_user->rptr_va; 234 userq_props->queue_size = mqd_user->queue_size; 235 userq_props->hqd_base_gpu_addr = mqd_user->queue_va; 236 userq_props->mqd_gpu_addr = queue->mqd.gpu_addr; 237 userq_props->use_doorbell = true; 238 userq_props->doorbell_index = queue->doorbell_index; 239 userq_props->fence_address = queue->fence_drv->gpu_addr; 240 241 if (queue->queue_type == AMDGPU_HW_IP_COMPUTE) { 242 struct drm_amdgpu_userq_mqd_compute_gfx11 *compute_mqd; 243 244 if (mqd_user->mqd_size != sizeof(*compute_mqd)) { 245 DRM_ERROR("Invalid compute IP MQD size\n"); 246 r = -EINVAL; 247 goto free_mqd; 248 } 249 250 compute_mqd = memdup_user(u64_to_user_ptr(mqd_user->mqd), mqd_user->mqd_size); 251 if (IS_ERR(compute_mqd)) { 252 DRM_ERROR("Failed to read user MQD\n"); 253 r = -ENOMEM; 254 goto free_mqd; 255 } 256 257 userq_props->eop_gpu_addr = compute_mqd->eop_va; 258 userq_props->hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_NORMAL; 259 userq_props->hqd_queue_priority = AMDGPU_GFX_QUEUE_PRIORITY_MINIMUM; 260 userq_props->hqd_active = false; 261 userq_props->tmz_queue = 262 mqd_user->flags & AMDGPU_USERQ_CREATE_FLAGS_QUEUE_SECURE; 263 kfree(compute_mqd); 264 } else if (queue->queue_type == AMDGPU_HW_IP_GFX) { 265 struct drm_amdgpu_userq_mqd_gfx11 *mqd_gfx_v11; 266 267 if (mqd_user->mqd_size != sizeof(*mqd_gfx_v11) || !mqd_user->mqd) { 268 DRM_ERROR("Invalid GFX MQD\n"); 269 r = -EINVAL; 270 goto free_mqd; 271 } 272 273 mqd_gfx_v11 = memdup_user(u64_to_user_ptr(mqd_user->mqd), mqd_user->mqd_size); 274 if (IS_ERR(mqd_gfx_v11)) { 275 DRM_ERROR("Failed to read user MQD\n"); 276 r = -ENOMEM; 277 goto free_mqd; 278 } 279 280 userq_props->shadow_addr = mqd_gfx_v11->shadow_va; 281 userq_props->csa_addr = mqd_gfx_v11->csa_va; 282 userq_props->tmz_queue = 283 mqd_user->flags & AMDGPU_USERQ_CREATE_FLAGS_QUEUE_SECURE; 284 kfree(mqd_gfx_v11); 285 } else if (queue->queue_type == AMDGPU_HW_IP_DMA) { 286 struct drm_amdgpu_userq_mqd_sdma_gfx11 *mqd_sdma_v11; 287 288 if (mqd_user->mqd_size != sizeof(*mqd_sdma_v11) || !mqd_user->mqd) { 289 DRM_ERROR("Invalid SDMA MQD\n"); 290 r = -EINVAL; 291 goto free_mqd; 292 } 293 294 mqd_sdma_v11 = memdup_user(u64_to_user_ptr(mqd_user->mqd), mqd_user->mqd_size); 295 if (IS_ERR(mqd_sdma_v11)) { 296 DRM_ERROR("Failed to read sdma user MQD\n"); 297 r = -ENOMEM; 298 goto free_mqd; 299 } 300 301 userq_props->csa_addr = mqd_sdma_v11->csa_va; 302 kfree(mqd_sdma_v11); 303 } 304 305 queue->userq_prop = userq_props; 306 307 r = mqd_hw_default->init_mqd(adev, (void *)queue->mqd.cpu_ptr, userq_props); 308 if (r) { 309 DRM_ERROR("Failed to initialize MQD for userqueue\n"); 310 goto free_mqd; 311 } 312 313 /* Create BO for FW operations */ 314 r = mes_userq_create_ctx_space(uq_mgr, queue, mqd_user); 315 if (r) { 316 DRM_ERROR("Failed to allocate BO for userqueue (%d)", r); 317 goto free_mqd; 318 } 319 320 /* FW expects WPTR BOs to be mapped into GART */ 321 r = mes_userq_create_wptr_mapping(uq_mgr, queue, userq_props->wptr_gpu_addr); 322 if (r) { 323 DRM_ERROR("Failed to create WPTR mapping\n"); 324 goto free_ctx; 325 } 326 327 return 0; 328 329 free_ctx: 330 amdgpu_userq_destroy_object(uq_mgr, &queue->fw_obj); 331 332 free_mqd: 333 amdgpu_userq_destroy_object(uq_mgr, &queue->mqd); 334 335 free_props: 336 kfree(userq_props); 337 338 return r; 339 } 340 341 static void 342 mes_userq_mqd_destroy(struct amdgpu_userq_mgr *uq_mgr, 343 struct amdgpu_usermode_queue *queue) 344 { 345 amdgpu_userq_destroy_object(uq_mgr, &queue->fw_obj); 346 kfree(queue->userq_prop); 347 amdgpu_userq_destroy_object(uq_mgr, &queue->mqd); 348 } 349 350 const struct amdgpu_userq_funcs userq_mes_funcs = { 351 .mqd_create = mes_userq_mqd_create, 352 .mqd_destroy = mes_userq_mqd_destroy, 353 .unmap = mes_userq_unmap, 354 .map = mes_userq_map, 355 }; 356