1 /* 2 * Copyright 2017 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 */ 23 #include "amdgpu_ids.h" 24 25 #include <linux/idr.h> 26 #include <linux/dma-fence-array.h> 27 28 29 #include "amdgpu.h" 30 #include "amdgpu_trace.h" 31 32 /* 33 * PASID manager 34 * 35 * PASIDs are global address space identifiers that can be shared 36 * between the GPU, an IOMMU and the driver. VMs on different devices 37 * may use the same PASID if they share the same address 38 * space. Therefore PASIDs are allocated using IDR cyclic allocator 39 * (similar to kernel PID allocation) which naturally delays reuse. 40 * VMs are looked up from the PASID per amdgpu_device. 41 */ 42 43 static DEFINE_IDR(amdgpu_pasid_idr); 44 static DEFINE_SPINLOCK(amdgpu_pasid_idr_lock); 45 46 /* Helper to free pasid from a fence callback */ 47 struct amdgpu_pasid_cb { 48 struct dma_fence_cb cb; 49 u32 pasid; 50 }; 51 52 /** 53 * amdgpu_pasid_alloc - Allocate a PASID 54 * @bits: Maximum width of the PASID in bits, must be at least 1 55 * 56 * Uses kernel's IDR cyclic allocator (same as PID allocation). 57 * Allocates sequentially with automatic wrap-around. 58 * 59 * Returns a positive integer on success. Returns %-EINVAL if bits==0. 60 * Returns %-ENOSPC if no PASID was available. Returns %-ENOMEM on 61 * memory allocation failure. 62 */ 63 int amdgpu_pasid_alloc(unsigned int bits) 64 { 65 int pasid; 66 67 if (bits == 0) 68 return -EINVAL; 69 70 spin_lock(&amdgpu_pasid_idr_lock); 71 /* TODO: Need to replace the idr with an xarry, and then 72 * handle the internal locking with ATOMIC safe paths. 73 */ 74 pasid = idr_alloc_cyclic(&amdgpu_pasid_idr, NULL, 1, 75 1U << bits, GFP_ATOMIC); 76 spin_unlock(&amdgpu_pasid_idr_lock); 77 78 if (pasid >= 0) 79 trace_amdgpu_pasid_allocated(pasid); 80 81 return pasid; 82 } 83 84 /** 85 * amdgpu_pasid_free - Free a PASID 86 * @pasid: PASID to free 87 */ 88 void amdgpu_pasid_free(u32 pasid) 89 { 90 trace_amdgpu_pasid_freed(pasid); 91 92 spin_lock(&amdgpu_pasid_idr_lock); 93 idr_remove(&amdgpu_pasid_idr, pasid); 94 spin_unlock(&amdgpu_pasid_idr_lock); 95 } 96 97 static void amdgpu_pasid_free_cb(struct dma_fence *fence, 98 struct dma_fence_cb *_cb) 99 { 100 struct amdgpu_pasid_cb *cb = 101 container_of(_cb, struct amdgpu_pasid_cb, cb); 102 103 amdgpu_pasid_free(cb->pasid); 104 dma_fence_put(fence); 105 kfree(cb); 106 } 107 108 /** 109 * amdgpu_pasid_free_delayed - free pasid when fences signal 110 * 111 * @resv: reservation object with the fences to wait for 112 * @pasid: pasid to free 113 * 114 * Free the pasid only after all the fences in resv are signaled. 115 */ 116 void amdgpu_pasid_free_delayed(struct dma_resv *resv, 117 u32 pasid) 118 { 119 struct amdgpu_pasid_cb *cb; 120 struct dma_fence *fence; 121 int r; 122 123 r = dma_resv_get_singleton(resv, DMA_RESV_USAGE_BOOKKEEP, &fence); 124 if (r) 125 goto fallback; 126 127 if (!fence) { 128 amdgpu_pasid_free(pasid); 129 return; 130 } 131 132 cb = kmalloc_obj(*cb); 133 if (!cb) { 134 /* Last resort when we are OOM */ 135 dma_fence_wait(fence, false); 136 dma_fence_put(fence); 137 amdgpu_pasid_free(pasid); 138 } else { 139 cb->pasid = pasid; 140 if (dma_fence_add_callback(fence, &cb->cb, 141 amdgpu_pasid_free_cb)) 142 amdgpu_pasid_free_cb(fence, &cb->cb); 143 } 144 145 return; 146 147 fallback: 148 /* Not enough memory for the delayed delete, as last resort 149 * block for all the fences to complete. 150 */ 151 dma_resv_wait_timeout(resv, DMA_RESV_USAGE_BOOKKEEP, 152 false, MAX_SCHEDULE_TIMEOUT); 153 amdgpu_pasid_free(pasid); 154 } 155 156 /* 157 * VMID manager 158 * 159 * VMIDs are a per VMHUB identifier for page tables handling. 160 */ 161 162 /** 163 * amdgpu_vmid_had_gpu_reset - check if reset occured since last use 164 * 165 * @adev: amdgpu_device pointer 166 * @id: VMID structure 167 * 168 * Check if GPU reset occured since last use of the VMID. 169 */ 170 bool amdgpu_vmid_had_gpu_reset(struct amdgpu_device *adev, 171 struct amdgpu_vmid *id) 172 { 173 return id->current_gpu_reset_count != 174 atomic_read(&adev->gpu_reset_counter); 175 } 176 177 /* Check if we need to switch to another set of resources */ 178 static bool amdgpu_vmid_gds_switch_needed(struct amdgpu_vmid *id, 179 struct amdgpu_job *job) 180 { 181 return id->gds_base != job->gds_base || 182 id->gds_size != job->gds_size || 183 id->gws_base != job->gws_base || 184 id->gws_size != job->gws_size || 185 id->oa_base != job->oa_base || 186 id->oa_size != job->oa_size; 187 } 188 189 /* Check if the id is compatible with the job */ 190 static bool amdgpu_vmid_compatible(struct amdgpu_vmid *id, 191 struct amdgpu_job *job) 192 { 193 return id->pd_gpu_addr == job->vm_pd_addr && 194 !amdgpu_vmid_gds_switch_needed(id, job); 195 } 196 197 /** 198 * amdgpu_vmid_grab_idle - grab idle VMID 199 * 200 * @ring: ring we want to submit job to 201 * @idle: resulting idle VMID 202 * @fence: fence to wait for if no id could be grabbed 203 * 204 * Try to find an idle VMID, if none is idle add a fence to wait to the sync 205 * object. Returns -ENOMEM when we are out of memory. 206 */ 207 static int amdgpu_vmid_grab_idle(struct amdgpu_ring *ring, 208 struct amdgpu_vmid **idle, 209 struct dma_fence **fence) 210 { 211 struct amdgpu_device *adev = ring->adev; 212 unsigned vmhub = ring->vm_hub; 213 struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub]; 214 215 /* If anybody is waiting for a VMID let everybody wait for fairness */ 216 if (!dma_fence_is_signaled(ring->vmid_wait)) { 217 *fence = dma_fence_get(ring->vmid_wait); 218 return 0; 219 } 220 221 /* Check if we have an idle VMID */ 222 list_for_each_entry_reverse((*idle), &id_mgr->ids_lru, list) { 223 /* Don't use per engine and per process VMID at the same time */ 224 struct amdgpu_ring *r = adev->vm_manager.concurrent_flush ? 225 NULL : ring; 226 227 *fence = amdgpu_sync_peek_fence(&(*idle)->active, r); 228 if (!(*fence)) 229 return 0; 230 } 231 232 /* 233 * If we can't find a idle VMID to use, wait on a fence from the least 234 * recently used in the hope that it will be available soon. 235 */ 236 *idle = NULL; 237 dma_fence_put(ring->vmid_wait); 238 ring->vmid_wait = dma_fence_get(*fence); 239 240 /* This is the reference we return */ 241 dma_fence_get(*fence); 242 return 0; 243 } 244 245 /** 246 * amdgpu_vmid_grab_reserved - try to assign reserved VMID 247 * 248 * @vm: vm to allocate id for 249 * @ring: ring we want to submit job to 250 * @job: job who wants to use the VMID 251 * @id: resulting VMID 252 * @fence: fence to wait for if no id could be grabbed 253 * 254 * Try to assign a reserved VMID. 255 */ 256 static int amdgpu_vmid_grab_reserved(struct amdgpu_vm *vm, 257 struct amdgpu_ring *ring, 258 struct amdgpu_job *job, 259 struct amdgpu_vmid **id, 260 struct dma_fence **fence) 261 { 262 struct amdgpu_device *adev = ring->adev; 263 unsigned vmhub = ring->vm_hub; 264 uint64_t fence_context = adev->fence_context + ring->idx; 265 bool needs_flush = vm->use_cpu_for_update; 266 uint64_t updates = amdgpu_vm_tlb_seq(vm); 267 int r; 268 269 *id = vm->reserved_vmid[vmhub]; 270 if ((*id)->owner != vm->immediate.fence_context || 271 !amdgpu_vmid_compatible(*id, job) || 272 (*id)->flushed_updates < updates || 273 !(*id)->last_flush || 274 ((*id)->last_flush->context != fence_context && 275 !dma_fence_is_signaled((*id)->last_flush))) 276 needs_flush = true; 277 278 if ((*id)->owner != vm->immediate.fence_context || 279 (!adev->vm_manager.concurrent_flush && needs_flush)) { 280 struct dma_fence *tmp; 281 282 /* Don't use per engine and per process VMID at the 283 * same time 284 */ 285 if (adev->vm_manager.concurrent_flush) 286 ring = NULL; 287 288 /* to prevent one context starved by another context */ 289 (*id)->pd_gpu_addr = 0; 290 tmp = amdgpu_sync_peek_fence(&(*id)->active, ring); 291 if (tmp) { 292 *id = NULL; 293 *fence = dma_fence_get(tmp); 294 return 0; 295 } 296 } 297 298 /* Good we can use this VMID. Remember this submission as 299 * user of the VMID. 300 */ 301 r = amdgpu_sync_fence(&(*id)->active, &job->base.s_fence->finished, 302 GFP_ATOMIC); 303 if (r) 304 return r; 305 306 job->vm_needs_flush = needs_flush; 307 job->spm_update_needed = true; 308 return 0; 309 } 310 311 /** 312 * amdgpu_vmid_grab_used - try to reuse a VMID 313 * 314 * @vm: vm to allocate id for 315 * @ring: ring we want to submit job to 316 * @job: job who wants to use the VMID 317 * @id: resulting VMID 318 * 319 * Try to reuse a VMID for this submission. 320 */ 321 static int amdgpu_vmid_grab_used(struct amdgpu_vm *vm, 322 struct amdgpu_ring *ring, 323 struct amdgpu_job *job, 324 struct amdgpu_vmid **id) 325 { 326 struct amdgpu_device *adev = ring->adev; 327 unsigned vmhub = ring->vm_hub; 328 struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub]; 329 uint64_t fence_context = adev->fence_context + ring->idx; 330 uint64_t updates = amdgpu_vm_tlb_seq(vm); 331 int r; 332 333 job->vm_needs_flush = vm->use_cpu_for_update; 334 335 /* Check if we can use a VMID already assigned to this VM */ 336 list_for_each_entry_reverse((*id), &id_mgr->ids_lru, list) { 337 bool needs_flush = vm->use_cpu_for_update; 338 339 /* Check all the prerequisites to using this VMID */ 340 if ((*id)->owner != vm->immediate.fence_context) 341 continue; 342 343 if (!amdgpu_vmid_compatible(*id, job)) 344 continue; 345 346 if (!(*id)->last_flush || 347 ((*id)->last_flush->context != fence_context && 348 !dma_fence_is_signaled((*id)->last_flush))) 349 needs_flush = true; 350 351 if ((*id)->flushed_updates < updates) 352 needs_flush = true; 353 354 if (needs_flush && !adev->vm_manager.concurrent_flush) 355 continue; 356 357 /* Good, we can use this VMID. Remember this submission as 358 * user of the VMID. 359 */ 360 r = amdgpu_sync_fence(&(*id)->active, 361 &job->base.s_fence->finished, 362 GFP_ATOMIC); 363 if (r) 364 return r; 365 366 job->vm_needs_flush |= needs_flush; 367 return 0; 368 } 369 370 *id = NULL; 371 return 0; 372 } 373 374 /** 375 * amdgpu_vmid_grab - allocate the next free VMID 376 * 377 * @vm: vm to allocate id for 378 * @ring: ring we want to submit job to 379 * @job: job who wants to use the VMID 380 * @fence: fence to wait for if no id could be grabbed 381 * 382 * Allocate an id for the vm, adding fences to the sync obj as necessary. 383 */ 384 int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring, 385 struct amdgpu_job *job, struct dma_fence **fence) 386 { 387 struct amdgpu_device *adev = ring->adev; 388 unsigned vmhub = ring->vm_hub; 389 struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub]; 390 struct amdgpu_vmid *idle = NULL; 391 struct amdgpu_vmid *id = NULL; 392 int r = 0; 393 394 mutex_lock(&id_mgr->lock); 395 r = amdgpu_vmid_grab_idle(ring, &idle, fence); 396 if (r || !idle) 397 goto error; 398 399 if (amdgpu_vmid_uses_reserved(vm, vmhub)) { 400 r = amdgpu_vmid_grab_reserved(vm, ring, job, &id, fence); 401 if (r || !id) 402 goto error; 403 } else { 404 r = amdgpu_vmid_grab_used(vm, ring, job, &id); 405 if (r) 406 goto error; 407 408 if (!id) { 409 /* Still no ID to use? Then use the idle one found earlier */ 410 id = idle; 411 412 /* Remember this submission as user of the VMID */ 413 r = amdgpu_sync_fence(&id->active, 414 &job->base.s_fence->finished, 415 GFP_ATOMIC); 416 if (r) 417 goto error; 418 419 job->vm_needs_flush = true; 420 } 421 422 list_move_tail(&id->list, &id_mgr->ids_lru); 423 } 424 425 job->gds_switch_needed = amdgpu_vmid_gds_switch_needed(id, job); 426 if (job->vm_needs_flush) { 427 id->flushed_updates = amdgpu_vm_tlb_seq(vm); 428 dma_fence_put(id->last_flush); 429 id->last_flush = NULL; 430 } 431 job->vmid = id - id_mgr->ids; 432 job->pasid = vm->pasid; 433 434 id->gds_base = job->gds_base; 435 id->gds_size = job->gds_size; 436 id->gws_base = job->gws_base; 437 id->gws_size = job->gws_size; 438 id->oa_base = job->oa_base; 439 id->oa_size = job->oa_size; 440 id->pd_gpu_addr = job->vm_pd_addr; 441 id->owner = vm->immediate.fence_context; 442 443 trace_amdgpu_vm_grab_id(vm, ring, job); 444 445 error: 446 mutex_unlock(&id_mgr->lock); 447 return r; 448 } 449 450 /* 451 * amdgpu_vmid_uses_reserved - check if a VM will use a reserved VMID 452 * @vm: the VM to check 453 * @vmhub: the VMHUB which will be used 454 * 455 * Returns: True if the VM will use a reserved VMID. 456 */ 457 bool amdgpu_vmid_uses_reserved(struct amdgpu_vm *vm, unsigned int vmhub) 458 { 459 return vm->reserved_vmid[vmhub]; 460 } 461 462 /* 463 * amdgpu_vmid_alloc_reserved - reserve a specific VMID for this vm 464 * @adev: amdgpu device structure 465 * @vm: the VM to reserve an ID for 466 * @vmhub: the VMHUB which should be used 467 * 468 * Mostly used to have a reserved VMID for debugging and SPM. 469 * 470 * Returns: 0 for success, -ENOENT if an ID is already reserved. 471 */ 472 int amdgpu_vmid_alloc_reserved(struct amdgpu_device *adev, struct amdgpu_vm *vm, 473 unsigned vmhub) 474 { 475 struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub]; 476 struct amdgpu_vmid *id; 477 int r = 0; 478 479 mutex_lock(&id_mgr->lock); 480 if (vm->reserved_vmid[vmhub]) 481 goto unlock; 482 if (id_mgr->reserved_vmid) { 483 r = -ENOENT; 484 goto unlock; 485 } 486 /* Remove from normal round robin handling */ 487 id = list_first_entry(&id_mgr->ids_lru, struct amdgpu_vmid, list); 488 list_del_init(&id->list); 489 vm->reserved_vmid[vmhub] = id; 490 id_mgr->reserved_vmid = true; 491 mutex_unlock(&id_mgr->lock); 492 493 return 0; 494 unlock: 495 mutex_unlock(&id_mgr->lock); 496 return r; 497 } 498 499 /* 500 * amdgpu_vmid_free_reserved - free up a reserved VMID again 501 * @adev: amdgpu device structure 502 * @vm: the VM with the reserved ID 503 * @vmhub: the VMHUB which should be used 504 */ 505 void amdgpu_vmid_free_reserved(struct amdgpu_device *adev, struct amdgpu_vm *vm, 506 unsigned vmhub) 507 { 508 struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub]; 509 510 mutex_lock(&id_mgr->lock); 511 if (vm->reserved_vmid[vmhub]) { 512 list_add(&vm->reserved_vmid[vmhub]->list, 513 &id_mgr->ids_lru); 514 vm->reserved_vmid[vmhub] = NULL; 515 id_mgr->reserved_vmid = false; 516 } 517 mutex_unlock(&id_mgr->lock); 518 } 519 520 /** 521 * amdgpu_vmid_reset - reset VMID to zero 522 * 523 * @adev: amdgpu device structure 524 * @vmhub: vmhub type 525 * @vmid: vmid number to use 526 * 527 * Reset saved GDW, GWS and OA to force switch on next flush. 528 */ 529 void amdgpu_vmid_reset(struct amdgpu_device *adev, unsigned vmhub, 530 unsigned vmid) 531 { 532 struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub]; 533 struct amdgpu_vmid *id = &id_mgr->ids[vmid]; 534 535 mutex_lock(&id_mgr->lock); 536 id->owner = 0; 537 id->gds_base = 0; 538 id->gds_size = 0; 539 id->gws_base = 0; 540 id->gws_size = 0; 541 id->oa_base = 0; 542 id->oa_size = 0; 543 mutex_unlock(&id_mgr->lock); 544 } 545 546 /** 547 * amdgpu_vmid_reset_all - reset VMID to zero 548 * 549 * @adev: amdgpu device structure 550 * 551 * Reset VMID to force flush on next use 552 */ 553 void amdgpu_vmid_reset_all(struct amdgpu_device *adev) 554 { 555 unsigned i, j; 556 557 for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) { 558 struct amdgpu_vmid_mgr *id_mgr = 559 &adev->vm_manager.id_mgr[i]; 560 561 for (j = 1; j < id_mgr->num_ids; ++j) 562 amdgpu_vmid_reset(adev, i, j); 563 } 564 } 565 566 /** 567 * amdgpu_vmid_mgr_init - init the VMID manager 568 * 569 * @adev: amdgpu_device pointer 570 * 571 * Initialize the VM manager structures 572 */ 573 void amdgpu_vmid_mgr_init(struct amdgpu_device *adev) 574 { 575 unsigned i, j; 576 577 for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) { 578 struct amdgpu_vmid_mgr *id_mgr = 579 &adev->vm_manager.id_mgr[i]; 580 581 mutex_init(&id_mgr->lock); 582 INIT_LIST_HEAD(&id_mgr->ids_lru); 583 584 /* for GC <10, SDMA uses MMHUB so use first_kfd_vmid for both GC and MM */ 585 if (amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(10, 0, 0)) 586 /* manage only VMIDs not used by KFD */ 587 id_mgr->num_ids = adev->vm_manager.first_kfd_vmid; 588 else if (AMDGPU_IS_MMHUB0(i) || 589 AMDGPU_IS_MMHUB1(i)) 590 id_mgr->num_ids = 16; 591 else 592 /* manage only VMIDs not used by KFD */ 593 id_mgr->num_ids = adev->vm_manager.first_kfd_vmid; 594 595 /* skip over VMID 0, since it is the system VM */ 596 for (j = 1; j < id_mgr->num_ids; ++j) { 597 amdgpu_vmid_reset(adev, i, j); 598 amdgpu_sync_create(&id_mgr->ids[j].active); 599 list_add_tail(&id_mgr->ids[j].list, &id_mgr->ids_lru); 600 } 601 } 602 } 603 604 /** 605 * amdgpu_vmid_mgr_fini - cleanup VM manager 606 * 607 * @adev: amdgpu_device pointer 608 * 609 * Cleanup the VM manager and free resources. 610 */ 611 void amdgpu_vmid_mgr_fini(struct amdgpu_device *adev) 612 { 613 unsigned i, j; 614 615 for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) { 616 struct amdgpu_vmid_mgr *id_mgr = 617 &adev->vm_manager.id_mgr[i]; 618 619 mutex_destroy(&id_mgr->lock); 620 for (j = 0; j < AMDGPU_NUM_VMID; ++j) { 621 struct amdgpu_vmid *id = &id_mgr->ids[j]; 622 623 amdgpu_sync_free(&id->active); 624 dma_fence_put(id->last_flush); 625 dma_fence_put(id->pasid_mapping); 626 } 627 } 628 } 629 630 /** 631 * amdgpu_pasid_mgr_cleanup - cleanup PASID manager 632 * 633 * Cleanup the IDR allocator. 634 */ 635 void amdgpu_pasid_mgr_cleanup(void) 636 { 637 spin_lock(&amdgpu_pasid_idr_lock); 638 idr_destroy(&amdgpu_pasid_idr); 639 spin_unlock(&amdgpu_pasid_idr_lock); 640 } 641