1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright 2024 Advanced Micro Devices, Inc. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be included in 13 * all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 19 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 21 * OTHER DEALINGS IN THE SOFTWARE. 22 * 23 */ 24 #include "amdgpu.h" 25 #include "amdgpu_gfx.h" 26 #include "mes_userqueue.h" 27 #include "amdgpu_userq_fence.h" 28 #include "v11_structs.h" 29 #include <linux/pm_runtime.h> 30 31 #define AMDGPU_USERQ_PROC_CTX_SZ PAGE_SIZE 32 #define AMDGPU_USERQ_GANG_CTX_SZ PAGE_SIZE 33 34 static int 35 mes_userq_map_gtt_bo_to_gart(struct amdgpu_bo *bo) 36 { 37 int ret; 38 39 ret = amdgpu_bo_reserve(bo, true); 40 if (ret) { 41 DRM_ERROR("Failed to reserve bo. ret %d\n", ret); 42 goto err_reserve_bo_failed; 43 } 44 45 ret = amdgpu_ttm_alloc_gart(&bo->tbo); 46 if (ret) { 47 DRM_ERROR("Failed to bind bo to GART. ret %d\n", ret); 48 goto err_map_bo_gart_failed; 49 } 50 51 amdgpu_bo_unreserve(bo); 52 bo = amdgpu_bo_ref(bo); 53 54 return 0; 55 56 err_map_bo_gart_failed: 57 amdgpu_bo_unreserve(bo); 58 err_reserve_bo_failed: 59 return ret; 60 } 61 62 static int 63 mes_userq_create_wptr_mapping(struct amdgpu_userq_mgr *uq_mgr, 64 struct amdgpu_usermode_queue *queue, 65 uint64_t wptr) 66 { 67 struct amdgpu_bo_va_mapping *wptr_mapping; 68 struct amdgpu_vm *wptr_vm; 69 struct amdgpu_userq_obj *wptr_obj = &queue->wptr_obj; 70 int ret; 71 72 wptr_vm = queue->vm; 73 ret = amdgpu_bo_reserve(wptr_vm->root.bo, false); 74 if (ret) 75 return ret; 76 77 wptr &= AMDGPU_GMC_HOLE_MASK; 78 wptr_mapping = amdgpu_vm_bo_lookup_mapping(wptr_vm, wptr >> PAGE_SHIFT); 79 amdgpu_bo_unreserve(wptr_vm->root.bo); 80 if (!wptr_mapping) { 81 DRM_ERROR("Failed to lookup wptr bo\n"); 82 return -EINVAL; 83 } 84 85 wptr_obj->obj = wptr_mapping->bo_va->base.bo; 86 if (wptr_obj->obj->tbo.base.size > PAGE_SIZE) { 87 DRM_ERROR("Requested GART mapping for wptr bo larger than one page\n"); 88 return -EINVAL; 89 } 90 91 ret = mes_userq_map_gtt_bo_to_gart(wptr_obj->obj); 92 if (ret) { 93 DRM_ERROR("Failed to map wptr bo to GART\n"); 94 return ret; 95 } 96 97 queue->wptr_obj.gpu_addr = amdgpu_bo_gpu_offset_no_check(wptr_obj->obj); 98 return 0; 99 } 100 101 static int mes_userq_map(struct amdgpu_userq_mgr *uq_mgr, 102 struct amdgpu_usermode_queue *queue, 103 struct amdgpu_mqd_prop *userq_props) 104 { 105 struct amdgpu_device *adev = uq_mgr->adev; 106 struct amdgpu_userq_obj *ctx = &queue->fw_obj; 107 struct mes_add_queue_input queue_input; 108 int r; 109 110 memset(&queue_input, 0x0, sizeof(struct mes_add_queue_input)); 111 112 queue_input.process_va_start = 0; 113 queue_input.process_va_end = adev->vm_manager.max_pfn - 1; 114 115 /* set process quantum to 10 ms and gang quantum to 1 ms as default */ 116 queue_input.process_quantum = 100000; 117 queue_input.gang_quantum = 10000; 118 queue_input.paging = false; 119 120 queue_input.process_context_addr = ctx->gpu_addr; 121 queue_input.gang_context_addr = ctx->gpu_addr + AMDGPU_USERQ_PROC_CTX_SZ; 122 queue_input.inprocess_gang_priority = AMDGPU_MES_PRIORITY_LEVEL_NORMAL; 123 queue_input.gang_global_priority_level = AMDGPU_MES_PRIORITY_LEVEL_NORMAL; 124 125 queue_input.process_id = queue->vm->pasid; 126 queue_input.queue_type = queue->queue_type; 127 queue_input.mqd_addr = queue->mqd.gpu_addr; 128 queue_input.wptr_addr = userq_props->wptr_gpu_addr; 129 queue_input.queue_size = userq_props->queue_size >> 2; 130 queue_input.doorbell_offset = userq_props->doorbell_index; 131 queue_input.page_table_base_addr = amdgpu_gmc_pd_addr(queue->vm->root.bo); 132 queue_input.wptr_mc_addr = queue->wptr_obj.gpu_addr; 133 134 amdgpu_mes_lock(&adev->mes); 135 r = adev->mes.funcs->add_hw_queue(&adev->mes, &queue_input); 136 amdgpu_mes_unlock(&adev->mes); 137 if (r) { 138 DRM_ERROR("Failed to map queue in HW, err (%d)\n", r); 139 return r; 140 } 141 142 queue->queue_active = true; 143 DRM_DEBUG_DRIVER("Queue (doorbell:%d) mapped successfully\n", userq_props->doorbell_index); 144 return 0; 145 } 146 147 static void mes_userq_unmap(struct amdgpu_userq_mgr *uq_mgr, 148 struct amdgpu_usermode_queue *queue) 149 { 150 struct amdgpu_device *adev = uq_mgr->adev; 151 struct mes_remove_queue_input queue_input; 152 struct amdgpu_userq_obj *ctx = &queue->fw_obj; 153 int r; 154 155 memset(&queue_input, 0x0, sizeof(struct mes_remove_queue_input)); 156 queue_input.doorbell_offset = queue->doorbell_index; 157 queue_input.gang_context_addr = ctx->gpu_addr + AMDGPU_USERQ_PROC_CTX_SZ; 158 159 amdgpu_mes_lock(&adev->mes); 160 r = adev->mes.funcs->remove_hw_queue(&adev->mes, &queue_input); 161 amdgpu_mes_unlock(&adev->mes); 162 if (r) 163 DRM_ERROR("Failed to unmap queue in HW, err (%d)\n", r); 164 queue->queue_active = false; 165 } 166 167 static int mes_userq_create_ctx_space(struct amdgpu_userq_mgr *uq_mgr, 168 struct amdgpu_usermode_queue *queue, 169 struct drm_amdgpu_userq_in *mqd_user) 170 { 171 struct amdgpu_userq_obj *ctx = &queue->fw_obj; 172 int r, size; 173 174 /* 175 * The FW expects at least one page space allocated for 176 * process ctx and gang ctx each. Create an object 177 * for the same. 178 */ 179 size = AMDGPU_USERQ_PROC_CTX_SZ + AMDGPU_USERQ_GANG_CTX_SZ; 180 r = amdgpu_userqueue_create_object(uq_mgr, ctx, size); 181 if (r) { 182 DRM_ERROR("Failed to allocate ctx space bo for userqueue, err:%d\n", r); 183 return r; 184 } 185 186 return 0; 187 } 188 189 static int mes_userq_mqd_create(struct amdgpu_userq_mgr *uq_mgr, 190 struct drm_amdgpu_userq_in *args_in, 191 struct amdgpu_usermode_queue *queue) 192 { 193 struct amdgpu_device *adev = uq_mgr->adev; 194 struct amdgpu_mqd *mqd_hw_default = &adev->mqds[queue->queue_type]; 195 struct drm_amdgpu_userq_in *mqd_user = args_in; 196 struct amdgpu_mqd_prop *userq_props; 197 int r; 198 199 /* Structure to initialize MQD for userqueue using generic MQD init function */ 200 userq_props = kzalloc(sizeof(struct amdgpu_mqd_prop), GFP_KERNEL); 201 if (!userq_props) { 202 DRM_ERROR("Failed to allocate memory for userq_props\n"); 203 return -ENOMEM; 204 } 205 206 if (!mqd_user->wptr_va || !mqd_user->rptr_va || 207 !mqd_user->queue_va || mqd_user->queue_size == 0) { 208 DRM_ERROR("Invalid MQD parameters for userqueue\n"); 209 r = -EINVAL; 210 goto free_props; 211 } 212 213 r = amdgpu_userqueue_create_object(uq_mgr, &queue->mqd, mqd_hw_default->mqd_size); 214 if (r) { 215 DRM_ERROR("Failed to create MQD object for userqueue\n"); 216 goto free_props; 217 } 218 219 /* Initialize the MQD BO with user given values */ 220 userq_props->wptr_gpu_addr = mqd_user->wptr_va; 221 userq_props->rptr_gpu_addr = mqd_user->rptr_va; 222 userq_props->queue_size = mqd_user->queue_size; 223 userq_props->hqd_base_gpu_addr = mqd_user->queue_va; 224 userq_props->mqd_gpu_addr = queue->mqd.gpu_addr; 225 userq_props->use_doorbell = true; 226 userq_props->doorbell_index = queue->doorbell_index; 227 userq_props->fence_address = queue->fence_drv->gpu_addr; 228 229 if (queue->queue_type == AMDGPU_HW_IP_COMPUTE) { 230 struct drm_amdgpu_userq_mqd_compute_gfx11 *compute_mqd; 231 232 if (mqd_user->mqd_size != sizeof(*compute_mqd)) { 233 DRM_ERROR("Invalid compute IP MQD size\n"); 234 r = -EINVAL; 235 goto free_mqd; 236 } 237 238 compute_mqd = memdup_user(u64_to_user_ptr(mqd_user->mqd), mqd_user->mqd_size); 239 if (IS_ERR(compute_mqd)) { 240 DRM_ERROR("Failed to read user MQD\n"); 241 r = -ENOMEM; 242 goto free_mqd; 243 } 244 245 userq_props->eop_gpu_addr = compute_mqd->eop_va; 246 userq_props->hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_NORMAL; 247 userq_props->hqd_queue_priority = AMDGPU_GFX_QUEUE_PRIORITY_MINIMUM; 248 userq_props->hqd_active = false; 249 kfree(compute_mqd); 250 } else if (queue->queue_type == AMDGPU_HW_IP_GFX) { 251 struct drm_amdgpu_userq_mqd_gfx11 *mqd_gfx_v11; 252 253 if (mqd_user->mqd_size != sizeof(*mqd_gfx_v11) || !mqd_user->mqd) { 254 DRM_ERROR("Invalid GFX MQD\n"); 255 r = -EINVAL; 256 goto free_mqd; 257 } 258 259 mqd_gfx_v11 = memdup_user(u64_to_user_ptr(mqd_user->mqd), mqd_user->mqd_size); 260 if (IS_ERR(mqd_gfx_v11)) { 261 DRM_ERROR("Failed to read user MQD\n"); 262 r = -ENOMEM; 263 goto free_mqd; 264 } 265 266 userq_props->shadow_addr = mqd_gfx_v11->shadow_va; 267 userq_props->csa_addr = mqd_gfx_v11->csa_va; 268 kfree(mqd_gfx_v11); 269 } else if (queue->queue_type == AMDGPU_HW_IP_DMA) { 270 struct drm_amdgpu_userq_mqd_sdma_gfx11 *mqd_sdma_v11; 271 272 if (mqd_user->mqd_size != sizeof(*mqd_sdma_v11) || !mqd_user->mqd) { 273 DRM_ERROR("Invalid SDMA MQD\n"); 274 r = -EINVAL; 275 goto free_mqd; 276 } 277 278 mqd_sdma_v11 = memdup_user(u64_to_user_ptr(mqd_user->mqd), mqd_user->mqd_size); 279 if (IS_ERR(mqd_sdma_v11)) { 280 DRM_ERROR("Failed to read sdma user MQD\n"); 281 r = -ENOMEM; 282 goto free_mqd; 283 } 284 285 userq_props->csa_addr = mqd_sdma_v11->csa_va; 286 kfree(mqd_sdma_v11); 287 } 288 289 queue->userq_prop = userq_props; 290 291 r = pm_runtime_get_sync(adev_to_drm(adev)->dev); 292 if (r < 0) { 293 dev_err(adev->dev, "pm_runtime_get_sync() failed for userqueue mqd create\n"); 294 goto deference_pm; 295 } 296 297 r = mqd_hw_default->init_mqd(adev, (void *)queue->mqd.cpu_ptr, userq_props); 298 if (r) { 299 DRM_ERROR("Failed to initialize MQD for userqueue\n"); 300 goto free_mqd; 301 } 302 303 /* Create BO for FW operations */ 304 r = mes_userq_create_ctx_space(uq_mgr, queue, mqd_user); 305 if (r) { 306 DRM_ERROR("Failed to allocate BO for userqueue (%d)", r); 307 goto free_mqd; 308 } 309 310 /* FW expects WPTR BOs to be mapped into GART */ 311 r = mes_userq_create_wptr_mapping(uq_mgr, queue, userq_props->wptr_gpu_addr); 312 if (r) { 313 DRM_ERROR("Failed to create WPTR mapping\n"); 314 goto free_ctx; 315 } 316 317 /* Map userqueue into FW using MES */ 318 r = mes_userq_map(uq_mgr, queue, userq_props); 319 if (r) { 320 DRM_ERROR("Failed to init MQD\n"); 321 goto free_ctx; 322 } 323 324 return 0; 325 326 free_ctx: 327 amdgpu_userqueue_destroy_object(uq_mgr, &queue->fw_obj); 328 329 free_mqd: 330 amdgpu_userqueue_destroy_object(uq_mgr, &queue->mqd); 331 pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); 332 deference_pm: 333 pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); 334 335 free_props: 336 kfree(userq_props); 337 338 return r; 339 } 340 341 static void 342 mes_userq_mqd_destroy(struct amdgpu_userq_mgr *uq_mgr, 343 struct amdgpu_usermode_queue *queue) 344 { 345 struct amdgpu_device *adev = uq_mgr->adev; 346 347 if (queue->queue_active) 348 mes_userq_unmap(uq_mgr, queue); 349 350 amdgpu_userqueue_destroy_object(uq_mgr, &queue->fw_obj); 351 kfree(queue->userq_prop); 352 amdgpu_userqueue_destroy_object(uq_mgr, &queue->mqd); 353 354 pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); 355 pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); 356 } 357 358 static int mes_userq_suspend(struct amdgpu_userq_mgr *uq_mgr, 359 struct amdgpu_usermode_queue *queue) 360 { 361 if (queue->queue_active) 362 mes_userq_unmap(uq_mgr, queue); 363 364 return 0; 365 } 366 367 static int mes_userq_resume(struct amdgpu_userq_mgr *uq_mgr, 368 struct amdgpu_usermode_queue *queue) 369 { 370 int ret; 371 372 if (queue->queue_active) 373 return 0; 374 375 ret = mes_userq_map(uq_mgr, queue, queue->userq_prop); 376 if (ret) { 377 DRM_ERROR("Failed to resume queue\n"); 378 return ret; 379 } 380 return 0; 381 } 382 383 const struct amdgpu_userq_funcs userq_mes_funcs = { 384 .mqd_create = mes_userq_mqd_create, 385 .mqd_destroy = mes_userq_mqd_destroy, 386 .suspend = mes_userq_suspend, 387 .resume = mes_userq_resume, 388 }; 389