1 /* 2 * Copyright 2015 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 * Authors: monk liu <monk.liu@amd.com> 23 */ 24 25 #include <drm/drm_auth.h> 26 #include <drm/drm_drv.h> 27 #include "amdgpu.h" 28 #include "amdgpu_sched.h" 29 #include "amdgpu_ras.h" 30 #include <linux/nospec.h> 31 32 #define to_amdgpu_ctx_entity(e) \ 33 container_of((e), struct amdgpu_ctx_entity, entity) 34 35 const unsigned int amdgpu_ctx_num_entities[AMDGPU_HW_IP_NUM] = { 36 [AMDGPU_HW_IP_GFX] = 1, 37 [AMDGPU_HW_IP_COMPUTE] = 4, 38 [AMDGPU_HW_IP_DMA] = 2, 39 [AMDGPU_HW_IP_UVD] = 1, 40 [AMDGPU_HW_IP_VCE] = 1, 41 [AMDGPU_HW_IP_UVD_ENC] = 1, 42 [AMDGPU_HW_IP_VCN_DEC] = 1, 43 [AMDGPU_HW_IP_VCN_ENC] = 1, 44 [AMDGPU_HW_IP_VCN_JPEG] = 1, 45 [AMDGPU_HW_IP_VPE] = 1, 46 }; 47 48 bool amdgpu_ctx_priority_is_valid(int32_t ctx_prio) 49 { 50 switch (ctx_prio) { 51 case AMDGPU_CTX_PRIORITY_VERY_LOW: 52 case AMDGPU_CTX_PRIORITY_LOW: 53 case AMDGPU_CTX_PRIORITY_NORMAL: 54 case AMDGPU_CTX_PRIORITY_HIGH: 55 case AMDGPU_CTX_PRIORITY_VERY_HIGH: 56 return true; 57 default: 58 case AMDGPU_CTX_PRIORITY_UNSET: 59 /* UNSET priority is not valid and we don't carry that 60 * around, but set it to NORMAL in the only place this 61 * function is called, amdgpu_ctx_ioctl(). 62 */ 63 return false; 64 } 65 } 66 67 static enum drm_sched_priority 68 amdgpu_ctx_to_drm_sched_prio(int32_t ctx_prio) 69 { 70 switch (ctx_prio) { 71 case AMDGPU_CTX_PRIORITY_UNSET: 72 pr_warn_once("AMD-->DRM context priority value UNSET-->NORMAL"); 73 return DRM_SCHED_PRIORITY_NORMAL; 74 75 case AMDGPU_CTX_PRIORITY_VERY_LOW: 76 return DRM_SCHED_PRIORITY_LOW; 77 78 case AMDGPU_CTX_PRIORITY_LOW: 79 return DRM_SCHED_PRIORITY_LOW; 80 81 case AMDGPU_CTX_PRIORITY_NORMAL: 82 return DRM_SCHED_PRIORITY_NORMAL; 83 84 case AMDGPU_CTX_PRIORITY_HIGH: 85 return DRM_SCHED_PRIORITY_HIGH; 86 87 case AMDGPU_CTX_PRIORITY_VERY_HIGH: 88 return DRM_SCHED_PRIORITY_HIGH; 89 90 /* This should not happen as we sanitized userspace provided priority 91 * already, WARN if this happens. 92 */ 93 default: 94 WARN(1, "Invalid context priority %d\n", ctx_prio); 95 return DRM_SCHED_PRIORITY_NORMAL; 96 } 97 98 } 99 100 static int amdgpu_ctx_priority_permit(struct drm_file *filp, 101 int32_t priority) 102 { 103 /* NORMAL and below are accessible by everyone */ 104 if (priority <= AMDGPU_CTX_PRIORITY_NORMAL) 105 return 0; 106 107 if (capable(CAP_SYS_NICE)) 108 return 0; 109 110 if (drm_is_current_master(filp)) 111 return 0; 112 113 return -EACCES; 114 } 115 116 static enum amdgpu_gfx_pipe_priority amdgpu_ctx_prio_to_gfx_pipe_prio(int32_t prio) 117 { 118 switch (prio) { 119 case AMDGPU_CTX_PRIORITY_HIGH: 120 case AMDGPU_CTX_PRIORITY_VERY_HIGH: 121 return AMDGPU_GFX_PIPE_PRIO_HIGH; 122 default: 123 return AMDGPU_GFX_PIPE_PRIO_NORMAL; 124 } 125 } 126 127 static enum amdgpu_ring_priority_level amdgpu_ctx_sched_prio_to_ring_prio(int32_t prio) 128 { 129 switch (prio) { 130 case AMDGPU_CTX_PRIORITY_HIGH: 131 return AMDGPU_RING_PRIO_1; 132 case AMDGPU_CTX_PRIORITY_VERY_HIGH: 133 return AMDGPU_RING_PRIO_2; 134 default: 135 return AMDGPU_RING_PRIO_0; 136 } 137 } 138 139 static unsigned int amdgpu_ctx_get_hw_prio(struct amdgpu_ctx *ctx, u32 hw_ip) 140 { 141 struct amdgpu_device *adev = ctx->mgr->adev; 142 unsigned int hw_prio; 143 int32_t ctx_prio; 144 145 ctx_prio = (ctx->override_priority == AMDGPU_CTX_PRIORITY_UNSET) ? 146 ctx->init_priority : ctx->override_priority; 147 148 switch (hw_ip) { 149 case AMDGPU_HW_IP_GFX: 150 case AMDGPU_HW_IP_COMPUTE: 151 hw_prio = amdgpu_ctx_prio_to_gfx_pipe_prio(ctx_prio); 152 break; 153 case AMDGPU_HW_IP_VCE: 154 case AMDGPU_HW_IP_VCN_ENC: 155 hw_prio = amdgpu_ctx_sched_prio_to_ring_prio(ctx_prio); 156 break; 157 default: 158 hw_prio = AMDGPU_RING_PRIO_DEFAULT; 159 break; 160 } 161 162 hw_ip = array_index_nospec(hw_ip, AMDGPU_HW_IP_NUM); 163 if (adev->gpu_sched[hw_ip][hw_prio].num_scheds == 0) 164 hw_prio = AMDGPU_RING_PRIO_DEFAULT; 165 166 return hw_prio; 167 } 168 169 /* Calculate the time spend on the hw */ 170 static ktime_t amdgpu_ctx_fence_time(struct dma_fence *fence) 171 { 172 struct drm_sched_fence *s_fence; 173 174 if (!fence) 175 return ns_to_ktime(0); 176 177 /* When the fence is not even scheduled it can't have spend time */ 178 s_fence = to_drm_sched_fence(fence); 179 if (!test_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &s_fence->scheduled.flags)) 180 return ns_to_ktime(0); 181 182 /* When it is still running account how much already spend */ 183 if (!test_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &s_fence->finished.flags)) 184 return ktime_sub(ktime_get(), s_fence->scheduled.timestamp); 185 186 return ktime_sub(s_fence->finished.timestamp, 187 s_fence->scheduled.timestamp); 188 } 189 190 static ktime_t amdgpu_ctx_entity_time(struct amdgpu_ctx *ctx, 191 struct amdgpu_ctx_entity *centity) 192 { 193 ktime_t res = ns_to_ktime(0); 194 uint32_t i; 195 196 spin_lock(&ctx->ring_lock); 197 for (i = 0; i < amdgpu_sched_jobs; i++) { 198 res = ktime_add(res, amdgpu_ctx_fence_time(centity->fences[i])); 199 } 200 spin_unlock(&ctx->ring_lock); 201 return res; 202 } 203 204 static int amdgpu_ctx_init_entity(struct amdgpu_ctx *ctx, u32 hw_ip, 205 const u32 ring) 206 { 207 struct drm_gpu_scheduler **scheds = NULL, *sched = NULL; 208 struct amdgpu_device *adev = ctx->mgr->adev; 209 struct amdgpu_ctx_entity *entity; 210 enum drm_sched_priority drm_prio; 211 unsigned int hw_prio, num_scheds; 212 int32_t ctx_prio; 213 int r; 214 215 entity = kzalloc(struct_size(entity, fences, amdgpu_sched_jobs), 216 GFP_KERNEL); 217 if (!entity) 218 return -ENOMEM; 219 220 ctx_prio = (ctx->override_priority == AMDGPU_CTX_PRIORITY_UNSET) ? 221 ctx->init_priority : ctx->override_priority; 222 entity->hw_ip = hw_ip; 223 entity->sequence = 1; 224 hw_prio = amdgpu_ctx_get_hw_prio(ctx, hw_ip); 225 drm_prio = amdgpu_ctx_to_drm_sched_prio(ctx_prio); 226 227 hw_ip = array_index_nospec(hw_ip, AMDGPU_HW_IP_NUM); 228 229 if (!(adev)->xcp_mgr) { 230 scheds = adev->gpu_sched[hw_ip][hw_prio].sched; 231 num_scheds = adev->gpu_sched[hw_ip][hw_prio].num_scheds; 232 } else { 233 struct amdgpu_fpriv *fpriv; 234 235 fpriv = container_of(ctx->ctx_mgr, struct amdgpu_fpriv, ctx_mgr); 236 r = amdgpu_xcp_select_scheds(adev, hw_ip, hw_prio, fpriv, 237 &num_scheds, &scheds); 238 if (r) 239 goto cleanup_entity; 240 } 241 242 /* disable load balance if the hw engine retains context among dependent jobs */ 243 if (hw_ip == AMDGPU_HW_IP_VCN_ENC || 244 hw_ip == AMDGPU_HW_IP_VCN_DEC || 245 hw_ip == AMDGPU_HW_IP_UVD_ENC || 246 hw_ip == AMDGPU_HW_IP_UVD) { 247 sched = drm_sched_pick_best(scheds, num_scheds); 248 scheds = &sched; 249 num_scheds = 1; 250 } 251 252 r = drm_sched_entity_init(&entity->entity, drm_prio, scheds, num_scheds, 253 &ctx->guilty); 254 if (r) 255 goto error_free_entity; 256 257 /* It's not an error if we fail to install the new entity */ 258 if (cmpxchg(&ctx->entities[hw_ip][ring], NULL, entity)) 259 goto cleanup_entity; 260 261 return 0; 262 263 cleanup_entity: 264 drm_sched_entity_fini(&entity->entity); 265 266 error_free_entity: 267 kfree(entity); 268 269 return r; 270 } 271 272 static ktime_t amdgpu_ctx_fini_entity(struct amdgpu_device *adev, 273 struct amdgpu_ctx_entity *entity) 274 { 275 ktime_t res = ns_to_ktime(0); 276 int i; 277 278 if (!entity) 279 return res; 280 281 for (i = 0; i < amdgpu_sched_jobs; ++i) { 282 res = ktime_add(res, amdgpu_ctx_fence_time(entity->fences[i])); 283 dma_fence_put(entity->fences[i]); 284 } 285 286 amdgpu_xcp_release_sched(adev, entity); 287 288 kfree(entity); 289 return res; 290 } 291 292 static int amdgpu_ctx_get_stable_pstate(struct amdgpu_ctx *ctx, 293 u32 *stable_pstate) 294 { 295 struct amdgpu_device *adev = ctx->mgr->adev; 296 enum amd_dpm_forced_level current_level; 297 298 current_level = amdgpu_dpm_get_performance_level(adev); 299 300 switch (current_level) { 301 case AMD_DPM_FORCED_LEVEL_PROFILE_STANDARD: 302 *stable_pstate = AMDGPU_CTX_STABLE_PSTATE_STANDARD; 303 break; 304 case AMD_DPM_FORCED_LEVEL_PROFILE_MIN_SCLK: 305 *stable_pstate = AMDGPU_CTX_STABLE_PSTATE_MIN_SCLK; 306 break; 307 case AMD_DPM_FORCED_LEVEL_PROFILE_MIN_MCLK: 308 *stable_pstate = AMDGPU_CTX_STABLE_PSTATE_MIN_MCLK; 309 break; 310 case AMD_DPM_FORCED_LEVEL_PROFILE_PEAK: 311 *stable_pstate = AMDGPU_CTX_STABLE_PSTATE_PEAK; 312 break; 313 default: 314 *stable_pstate = AMDGPU_CTX_STABLE_PSTATE_NONE; 315 break; 316 } 317 return 0; 318 } 319 320 static int amdgpu_ctx_init(struct amdgpu_ctx_mgr *mgr, int32_t priority, 321 struct drm_file *filp, struct amdgpu_ctx *ctx) 322 { 323 struct amdgpu_fpriv *fpriv = filp->driver_priv; 324 u32 current_stable_pstate; 325 int r; 326 327 r = amdgpu_ctx_priority_permit(filp, priority); 328 if (r) 329 return r; 330 331 memset(ctx, 0, sizeof(*ctx)); 332 333 kref_init(&ctx->refcount); 334 ctx->mgr = mgr; 335 spin_lock_init(&ctx->ring_lock); 336 337 ctx->reset_counter = atomic_read(&mgr->adev->gpu_reset_counter); 338 ctx->reset_counter_query = ctx->reset_counter; 339 ctx->generation = amdgpu_vm_generation(mgr->adev, &fpriv->vm); 340 ctx->init_priority = priority; 341 ctx->override_priority = AMDGPU_CTX_PRIORITY_UNSET; 342 343 r = amdgpu_ctx_get_stable_pstate(ctx, ¤t_stable_pstate); 344 if (r) 345 return r; 346 347 if (mgr->adev->pm.stable_pstate_ctx) 348 ctx->stable_pstate = mgr->adev->pm.stable_pstate_ctx->stable_pstate; 349 else 350 ctx->stable_pstate = current_stable_pstate; 351 352 ctx->ctx_mgr = &(fpriv->ctx_mgr); 353 return 0; 354 } 355 356 static int amdgpu_ctx_set_stable_pstate(struct amdgpu_ctx *ctx, 357 u32 stable_pstate) 358 { 359 struct amdgpu_device *adev = ctx->mgr->adev; 360 enum amd_dpm_forced_level level; 361 u32 current_stable_pstate; 362 int r; 363 364 mutex_lock(&adev->pm.stable_pstate_ctx_lock); 365 if (adev->pm.stable_pstate_ctx && adev->pm.stable_pstate_ctx != ctx) { 366 r = -EBUSY; 367 goto done; 368 } 369 370 r = amdgpu_ctx_get_stable_pstate(ctx, ¤t_stable_pstate); 371 if (r || (stable_pstate == current_stable_pstate)) 372 goto done; 373 374 switch (stable_pstate) { 375 case AMDGPU_CTX_STABLE_PSTATE_NONE: 376 level = AMD_DPM_FORCED_LEVEL_AUTO; 377 break; 378 case AMDGPU_CTX_STABLE_PSTATE_STANDARD: 379 level = AMD_DPM_FORCED_LEVEL_PROFILE_STANDARD; 380 break; 381 case AMDGPU_CTX_STABLE_PSTATE_MIN_SCLK: 382 level = AMD_DPM_FORCED_LEVEL_PROFILE_MIN_SCLK; 383 break; 384 case AMDGPU_CTX_STABLE_PSTATE_MIN_MCLK: 385 level = AMD_DPM_FORCED_LEVEL_PROFILE_MIN_MCLK; 386 break; 387 case AMDGPU_CTX_STABLE_PSTATE_PEAK: 388 level = AMD_DPM_FORCED_LEVEL_PROFILE_PEAK; 389 break; 390 default: 391 r = -EINVAL; 392 goto done; 393 } 394 395 r = amdgpu_dpm_force_performance_level(adev, level); 396 397 if (level == AMD_DPM_FORCED_LEVEL_AUTO) 398 adev->pm.stable_pstate_ctx = NULL; 399 else 400 adev->pm.stable_pstate_ctx = ctx; 401 done: 402 mutex_unlock(&adev->pm.stable_pstate_ctx_lock); 403 404 return r; 405 } 406 407 static void amdgpu_ctx_fini(struct kref *ref) 408 { 409 struct amdgpu_ctx *ctx = container_of(ref, struct amdgpu_ctx, refcount); 410 struct amdgpu_ctx_mgr *mgr = ctx->mgr; 411 struct amdgpu_device *adev = mgr->adev; 412 unsigned i, j, idx; 413 414 if (!adev) 415 return; 416 417 for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) { 418 for (j = 0; j < AMDGPU_MAX_ENTITY_NUM; ++j) { 419 ktime_t spend; 420 421 spend = amdgpu_ctx_fini_entity(adev, ctx->entities[i][j]); 422 atomic64_add(ktime_to_ns(spend), &mgr->time_spend[i]); 423 } 424 } 425 426 if (drm_dev_enter(adev_to_drm(adev), &idx)) { 427 amdgpu_ctx_set_stable_pstate(ctx, ctx->stable_pstate); 428 drm_dev_exit(idx); 429 } 430 431 kfree(ctx); 432 } 433 434 int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, u32 hw_ip, u32 instance, 435 u32 ring, struct drm_sched_entity **entity) 436 { 437 int r; 438 struct drm_sched_entity *ctx_entity; 439 440 if (hw_ip >= AMDGPU_HW_IP_NUM) { 441 DRM_ERROR("unknown HW IP type: %d\n", hw_ip); 442 return -EINVAL; 443 } 444 445 /* Right now all IPs have only one instance - multiple rings. */ 446 if (instance != 0) { 447 DRM_DEBUG("invalid ip instance: %d\n", instance); 448 return -EINVAL; 449 } 450 451 if (ring >= amdgpu_ctx_num_entities[hw_ip]) { 452 DRM_DEBUG("invalid ring: %d %d\n", hw_ip, ring); 453 return -EINVAL; 454 } 455 456 if (ctx->entities[hw_ip][ring] == NULL) { 457 r = amdgpu_ctx_init_entity(ctx, hw_ip, ring); 458 if (r) 459 return r; 460 } 461 462 ctx_entity = &ctx->entities[hw_ip][ring]->entity; 463 r = drm_sched_entity_error(ctx_entity); 464 if (r) { 465 DRM_DEBUG("error entity %p\n", ctx_entity); 466 return r; 467 } 468 469 *entity = ctx_entity; 470 return 0; 471 } 472 473 static int amdgpu_ctx_alloc(struct amdgpu_device *adev, 474 struct amdgpu_fpriv *fpriv, 475 struct drm_file *filp, 476 int32_t priority, 477 uint32_t *id) 478 { 479 struct amdgpu_ctx_mgr *mgr = &fpriv->ctx_mgr; 480 struct amdgpu_ctx *ctx; 481 int r; 482 483 ctx = kmalloc(sizeof(*ctx), GFP_KERNEL); 484 if (!ctx) 485 return -ENOMEM; 486 487 mutex_lock(&mgr->lock); 488 r = idr_alloc(&mgr->ctx_handles, ctx, 1, AMDGPU_VM_MAX_NUM_CTX, GFP_KERNEL); 489 if (r < 0) { 490 mutex_unlock(&mgr->lock); 491 kfree(ctx); 492 return r; 493 } 494 495 *id = (uint32_t)r; 496 r = amdgpu_ctx_init(mgr, priority, filp, ctx); 497 if (r) { 498 idr_remove(&mgr->ctx_handles, *id); 499 *id = 0; 500 kfree(ctx); 501 } 502 mutex_unlock(&mgr->lock); 503 return r; 504 } 505 506 static void amdgpu_ctx_do_release(struct kref *ref) 507 { 508 struct amdgpu_ctx *ctx; 509 u32 i, j; 510 511 ctx = container_of(ref, struct amdgpu_ctx, refcount); 512 for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) { 513 for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) { 514 if (!ctx->entities[i][j]) 515 continue; 516 517 drm_sched_entity_destroy(&ctx->entities[i][j]->entity); 518 } 519 } 520 521 amdgpu_ctx_fini(ref); 522 } 523 524 static int amdgpu_ctx_free(struct amdgpu_fpriv *fpriv, uint32_t id) 525 { 526 struct amdgpu_ctx_mgr *mgr = &fpriv->ctx_mgr; 527 struct amdgpu_ctx *ctx; 528 529 mutex_lock(&mgr->lock); 530 ctx = idr_remove(&mgr->ctx_handles, id); 531 if (ctx) 532 kref_put(&ctx->refcount, amdgpu_ctx_do_release); 533 mutex_unlock(&mgr->lock); 534 return ctx ? 0 : -EINVAL; 535 } 536 537 static int amdgpu_ctx_query(struct amdgpu_device *adev, 538 struct amdgpu_fpriv *fpriv, uint32_t id, 539 union drm_amdgpu_ctx_out *out) 540 { 541 struct amdgpu_ctx *ctx; 542 struct amdgpu_ctx_mgr *mgr; 543 unsigned reset_counter; 544 545 if (!fpriv) 546 return -EINVAL; 547 548 mgr = &fpriv->ctx_mgr; 549 mutex_lock(&mgr->lock); 550 ctx = idr_find(&mgr->ctx_handles, id); 551 if (!ctx) { 552 mutex_unlock(&mgr->lock); 553 return -EINVAL; 554 } 555 556 /* TODO: these two are always zero */ 557 out->state.flags = 0x0; 558 out->state.hangs = 0x0; 559 560 /* determine if a GPU reset has occured since the last call */ 561 reset_counter = atomic_read(&adev->gpu_reset_counter); 562 /* TODO: this should ideally return NO, GUILTY, or INNOCENT. */ 563 if (ctx->reset_counter_query == reset_counter) 564 out->state.reset_status = AMDGPU_CTX_NO_RESET; 565 else 566 out->state.reset_status = AMDGPU_CTX_UNKNOWN_RESET; 567 ctx->reset_counter_query = reset_counter; 568 569 mutex_unlock(&mgr->lock); 570 return 0; 571 } 572 573 #define AMDGPU_RAS_COUNTE_DELAY_MS 3000 574 575 static int amdgpu_ctx_query2(struct amdgpu_device *adev, 576 struct amdgpu_fpriv *fpriv, uint32_t id, 577 union drm_amdgpu_ctx_out *out) 578 { 579 struct amdgpu_ras *con = amdgpu_ras_get_context(adev); 580 struct amdgpu_ctx *ctx; 581 struct amdgpu_ctx_mgr *mgr; 582 583 if (!fpriv) 584 return -EINVAL; 585 586 mgr = &fpriv->ctx_mgr; 587 mutex_lock(&mgr->lock); 588 ctx = idr_find(&mgr->ctx_handles, id); 589 if (!ctx) { 590 mutex_unlock(&mgr->lock); 591 return -EINVAL; 592 } 593 594 out->state.flags = 0x0; 595 out->state.hangs = 0x0; 596 597 if (ctx->reset_counter != atomic_read(&adev->gpu_reset_counter)) 598 out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_RESET; 599 600 if (ctx->generation != amdgpu_vm_generation(adev, &fpriv->vm)) 601 out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_VRAMLOST; 602 603 if (atomic_read(&ctx->guilty)) 604 out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_GUILTY; 605 606 if (amdgpu_in_reset(adev)) 607 out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_RESET_IN_PROGRESS; 608 609 if (adev->ras_enabled && con) { 610 /* Return the cached values in O(1), 611 * and schedule delayed work to cache 612 * new vaues. 613 */ 614 int ce_count, ue_count; 615 616 ce_count = atomic_read(&con->ras_ce_count); 617 ue_count = atomic_read(&con->ras_ue_count); 618 619 if (ce_count != ctx->ras_counter_ce) { 620 ctx->ras_counter_ce = ce_count; 621 out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_RAS_CE; 622 } 623 624 if (ue_count != ctx->ras_counter_ue) { 625 ctx->ras_counter_ue = ue_count; 626 out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_RAS_UE; 627 } 628 629 schedule_delayed_work(&con->ras_counte_delay_work, 630 msecs_to_jiffies(AMDGPU_RAS_COUNTE_DELAY_MS)); 631 } 632 633 mutex_unlock(&mgr->lock); 634 return 0; 635 } 636 637 static int amdgpu_ctx_stable_pstate(struct amdgpu_device *adev, 638 struct amdgpu_fpriv *fpriv, uint32_t id, 639 bool set, u32 *stable_pstate) 640 { 641 struct amdgpu_ctx *ctx; 642 struct amdgpu_ctx_mgr *mgr; 643 int r; 644 645 if (!fpriv) 646 return -EINVAL; 647 648 mgr = &fpriv->ctx_mgr; 649 mutex_lock(&mgr->lock); 650 ctx = idr_find(&mgr->ctx_handles, id); 651 if (!ctx) { 652 mutex_unlock(&mgr->lock); 653 return -EINVAL; 654 } 655 656 if (set) 657 r = amdgpu_ctx_set_stable_pstate(ctx, *stable_pstate); 658 else 659 r = amdgpu_ctx_get_stable_pstate(ctx, stable_pstate); 660 661 mutex_unlock(&mgr->lock); 662 return r; 663 } 664 665 int amdgpu_ctx_ioctl(struct drm_device *dev, void *data, 666 struct drm_file *filp) 667 { 668 int r; 669 uint32_t id, stable_pstate; 670 int32_t priority; 671 672 union drm_amdgpu_ctx *args = data; 673 struct amdgpu_device *adev = drm_to_adev(dev); 674 struct amdgpu_fpriv *fpriv = filp->driver_priv; 675 676 id = args->in.ctx_id; 677 priority = args->in.priority; 678 679 /* For backwards compatibility, we need to accept ioctls with garbage 680 * in the priority field. Garbage values in the priority field, result 681 * in the priority being set to NORMAL. 682 */ 683 if (!amdgpu_ctx_priority_is_valid(priority)) 684 priority = AMDGPU_CTX_PRIORITY_NORMAL; 685 686 switch (args->in.op) { 687 case AMDGPU_CTX_OP_ALLOC_CTX: 688 r = amdgpu_ctx_alloc(adev, fpriv, filp, priority, &id); 689 args->out.alloc.ctx_id = id; 690 break; 691 case AMDGPU_CTX_OP_FREE_CTX: 692 r = amdgpu_ctx_free(fpriv, id); 693 break; 694 case AMDGPU_CTX_OP_QUERY_STATE: 695 r = amdgpu_ctx_query(adev, fpriv, id, &args->out); 696 break; 697 case AMDGPU_CTX_OP_QUERY_STATE2: 698 r = amdgpu_ctx_query2(adev, fpriv, id, &args->out); 699 break; 700 case AMDGPU_CTX_OP_GET_STABLE_PSTATE: 701 if (args->in.flags) 702 return -EINVAL; 703 r = amdgpu_ctx_stable_pstate(adev, fpriv, id, false, &stable_pstate); 704 if (!r) 705 args->out.pstate.flags = stable_pstate; 706 break; 707 case AMDGPU_CTX_OP_SET_STABLE_PSTATE: 708 if (args->in.flags & ~AMDGPU_CTX_STABLE_PSTATE_FLAGS_MASK) 709 return -EINVAL; 710 stable_pstate = args->in.flags & AMDGPU_CTX_STABLE_PSTATE_FLAGS_MASK; 711 if (stable_pstate > AMDGPU_CTX_STABLE_PSTATE_PEAK) 712 return -EINVAL; 713 r = amdgpu_ctx_stable_pstate(adev, fpriv, id, true, &stable_pstate); 714 break; 715 default: 716 return -EINVAL; 717 } 718 719 return r; 720 } 721 722 struct amdgpu_ctx *amdgpu_ctx_get(struct amdgpu_fpriv *fpriv, uint32_t id) 723 { 724 struct amdgpu_ctx *ctx; 725 struct amdgpu_ctx_mgr *mgr; 726 727 if (!fpriv) 728 return NULL; 729 730 mgr = &fpriv->ctx_mgr; 731 732 mutex_lock(&mgr->lock); 733 ctx = idr_find(&mgr->ctx_handles, id); 734 if (ctx) 735 kref_get(&ctx->refcount); 736 mutex_unlock(&mgr->lock); 737 return ctx; 738 } 739 740 int amdgpu_ctx_put(struct amdgpu_ctx *ctx) 741 { 742 if (ctx == NULL) 743 return -EINVAL; 744 745 kref_put(&ctx->refcount, amdgpu_ctx_do_release); 746 return 0; 747 } 748 749 uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, 750 struct drm_sched_entity *entity, 751 struct dma_fence *fence) 752 { 753 struct amdgpu_ctx_entity *centity = to_amdgpu_ctx_entity(entity); 754 uint64_t seq = centity->sequence; 755 struct dma_fence *other = NULL; 756 unsigned idx = 0; 757 758 idx = seq & (amdgpu_sched_jobs - 1); 759 other = centity->fences[idx]; 760 WARN_ON(other && !dma_fence_is_signaled(other)); 761 762 dma_fence_get(fence); 763 764 spin_lock(&ctx->ring_lock); 765 centity->fences[idx] = fence; 766 centity->sequence++; 767 spin_unlock(&ctx->ring_lock); 768 769 atomic64_add(ktime_to_ns(amdgpu_ctx_fence_time(other)), 770 &ctx->mgr->time_spend[centity->hw_ip]); 771 772 dma_fence_put(other); 773 return seq; 774 } 775 776 struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx, 777 struct drm_sched_entity *entity, 778 uint64_t seq) 779 { 780 struct amdgpu_ctx_entity *centity = to_amdgpu_ctx_entity(entity); 781 struct dma_fence *fence; 782 783 spin_lock(&ctx->ring_lock); 784 785 if (seq == ~0ull) 786 seq = centity->sequence - 1; 787 788 if (seq >= centity->sequence) { 789 spin_unlock(&ctx->ring_lock); 790 return ERR_PTR(-EINVAL); 791 } 792 793 794 if (seq + amdgpu_sched_jobs < centity->sequence) { 795 spin_unlock(&ctx->ring_lock); 796 return NULL; 797 } 798 799 fence = dma_fence_get(centity->fences[seq & (amdgpu_sched_jobs - 1)]); 800 spin_unlock(&ctx->ring_lock); 801 802 return fence; 803 } 804 805 static void amdgpu_ctx_set_entity_priority(struct amdgpu_ctx *ctx, 806 struct amdgpu_ctx_entity *aentity, 807 int hw_ip, 808 int32_t priority) 809 { 810 struct amdgpu_device *adev = ctx->mgr->adev; 811 unsigned int hw_prio; 812 struct drm_gpu_scheduler **scheds = NULL; 813 unsigned num_scheds; 814 815 /* set sw priority */ 816 drm_sched_entity_set_priority(&aentity->entity, 817 amdgpu_ctx_to_drm_sched_prio(priority)); 818 819 /* set hw priority */ 820 if (hw_ip == AMDGPU_HW_IP_COMPUTE || hw_ip == AMDGPU_HW_IP_GFX) { 821 hw_prio = amdgpu_ctx_get_hw_prio(ctx, hw_ip); 822 hw_prio = array_index_nospec(hw_prio, AMDGPU_RING_PRIO_MAX); 823 scheds = adev->gpu_sched[hw_ip][hw_prio].sched; 824 num_scheds = adev->gpu_sched[hw_ip][hw_prio].num_scheds; 825 drm_sched_entity_modify_sched(&aentity->entity, scheds, 826 num_scheds); 827 } 828 } 829 830 void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx, 831 int32_t priority) 832 { 833 int32_t ctx_prio; 834 unsigned i, j; 835 836 ctx->override_priority = priority; 837 838 ctx_prio = (ctx->override_priority == AMDGPU_CTX_PRIORITY_UNSET) ? 839 ctx->init_priority : ctx->override_priority; 840 for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) { 841 for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) { 842 if (!ctx->entities[i][j]) 843 continue; 844 845 amdgpu_ctx_set_entity_priority(ctx, ctx->entities[i][j], 846 i, ctx_prio); 847 } 848 } 849 } 850 851 int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx, 852 struct drm_sched_entity *entity) 853 { 854 struct amdgpu_ctx_entity *centity = to_amdgpu_ctx_entity(entity); 855 struct dma_fence *other; 856 unsigned idx; 857 long r; 858 859 spin_lock(&ctx->ring_lock); 860 idx = centity->sequence & (amdgpu_sched_jobs - 1); 861 other = dma_fence_get(centity->fences[idx]); 862 spin_unlock(&ctx->ring_lock); 863 864 if (!other) 865 return 0; 866 867 r = dma_fence_wait(other, true); 868 if (r < 0 && r != -ERESTARTSYS) 869 DRM_ERROR("Error (%ld) waiting for fence!\n", r); 870 871 dma_fence_put(other); 872 return r; 873 } 874 875 void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr, 876 struct amdgpu_device *adev) 877 { 878 unsigned int i; 879 880 mgr->adev = adev; 881 mutex_init(&mgr->lock); 882 idr_init_base(&mgr->ctx_handles, 1); 883 884 for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) 885 atomic64_set(&mgr->time_spend[i], 0); 886 } 887 888 long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long timeout) 889 { 890 struct amdgpu_ctx *ctx; 891 struct idr *idp; 892 uint32_t id, i, j; 893 894 idp = &mgr->ctx_handles; 895 896 mutex_lock(&mgr->lock); 897 idr_for_each_entry(idp, ctx, id) { 898 for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) { 899 for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) { 900 struct drm_sched_entity *entity; 901 902 if (!ctx->entities[i][j]) 903 continue; 904 905 entity = &ctx->entities[i][j]->entity; 906 timeout = drm_sched_entity_flush(entity, timeout); 907 } 908 } 909 } 910 mutex_unlock(&mgr->lock); 911 return timeout; 912 } 913 914 void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr) 915 { 916 struct amdgpu_ctx *ctx; 917 struct idr *idp; 918 uint32_t id, i, j; 919 920 idp = &mgr->ctx_handles; 921 922 idr_for_each_entry(idp, ctx, id) { 923 if (kref_read(&ctx->refcount) != 1) { 924 DRM_ERROR("ctx %p is still alive\n", ctx); 925 continue; 926 } 927 928 for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) { 929 for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) { 930 struct drm_sched_entity *entity; 931 932 if (!ctx->entities[i][j]) 933 continue; 934 935 entity = &ctx->entities[i][j]->entity; 936 drm_sched_entity_fini(entity); 937 } 938 } 939 } 940 } 941 942 void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr) 943 { 944 struct amdgpu_ctx *ctx; 945 struct idr *idp; 946 uint32_t id; 947 948 amdgpu_ctx_mgr_entity_fini(mgr); 949 950 idp = &mgr->ctx_handles; 951 952 idr_for_each_entry(idp, ctx, id) { 953 if (kref_put(&ctx->refcount, amdgpu_ctx_fini) != 1) 954 DRM_ERROR("ctx %p is still alive\n", ctx); 955 } 956 957 idr_destroy(&mgr->ctx_handles); 958 mutex_destroy(&mgr->lock); 959 } 960 961 void amdgpu_ctx_mgr_usage(struct amdgpu_ctx_mgr *mgr, 962 ktime_t usage[AMDGPU_HW_IP_NUM]) 963 { 964 struct amdgpu_ctx *ctx; 965 unsigned int hw_ip, i; 966 uint32_t id; 967 968 /* 969 * This is a little bit racy because it can be that a ctx or a fence are 970 * destroyed just in the moment we try to account them. But that is ok 971 * since exactly that case is explicitely allowed by the interface. 972 */ 973 mutex_lock(&mgr->lock); 974 for (hw_ip = 0; hw_ip < AMDGPU_HW_IP_NUM; ++hw_ip) { 975 uint64_t ns = atomic64_read(&mgr->time_spend[hw_ip]); 976 977 usage[hw_ip] = ns_to_ktime(ns); 978 } 979 980 idr_for_each_entry(&mgr->ctx_handles, ctx, id) { 981 for (hw_ip = 0; hw_ip < AMDGPU_HW_IP_NUM; ++hw_ip) { 982 for (i = 0; i < amdgpu_ctx_num_entities[hw_ip]; ++i) { 983 struct amdgpu_ctx_entity *centity; 984 ktime_t spend; 985 986 centity = ctx->entities[hw_ip][i]; 987 if (!centity) 988 continue; 989 spend = amdgpu_ctx_entity_time(ctx, centity); 990 usage[hw_ip] = ktime_add(usage[hw_ip], spend); 991 } 992 } 993 } 994 mutex_unlock(&mgr->lock); 995 } 996