1 /* 2 * Copyright 2009 Jerome Glisse. 3 * All Rights Reserved. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the 7 * "Software"), to deal in the Software without restriction, including 8 * without limitation the rights to use, copy, modify, merge, publish, 9 * distribute, sub license, and/or sell copies of the Software, and to 10 * permit persons to whom the Software is furnished to do so, subject to 11 * the following conditions: 12 * 13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 16 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, 17 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 18 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 19 * USE OR OTHER DEALINGS IN THE SOFTWARE. 20 * 21 * The above copyright notice and this permission notice (including the 22 * next paragraph) shall be included in all copies or substantial portions 23 * of the Software. 24 * 25 */ 26 /* 27 * Authors: 28 * Jerome Glisse <glisse@freedesktop.org> 29 * Dave Airlie 30 */ 31 #include <linux/seq_file.h> 32 #include <linux/atomic.h> 33 #include <linux/wait.h> 34 #include <linux/kref.h> 35 #include <linux/slab.h> 36 #include <linux/firmware.h> 37 #include <drm/drmP.h> 38 #include "amdgpu.h" 39 #include "amdgpu_trace.h" 40 41 /* 42 * Fences 43 * Fences mark an event in the GPUs pipeline and are used 44 * for GPU/CPU synchronization. When the fence is written, 45 * it is expected that all buffers associated with that fence 46 * are no longer in use by the associated ring on the GPU and 47 * that the the relevant GPU caches have been flushed. 48 */ 49 50 /** 51 * amdgpu_fence_write - write a fence value 52 * 53 * @ring: ring the fence is associated with 54 * @seq: sequence number to write 55 * 56 * Writes a fence value to memory (all asics). 57 */ 58 static void amdgpu_fence_write(struct amdgpu_ring *ring, u32 seq) 59 { 60 struct amdgpu_fence_driver *drv = &ring->fence_drv; 61 62 if (drv->cpu_addr) 63 *drv->cpu_addr = cpu_to_le32(seq); 64 } 65 66 /** 67 * amdgpu_fence_read - read a fence value 68 * 69 * @ring: ring the fence is associated with 70 * 71 * Reads a fence value from memory (all asics). 72 * Returns the value of the fence read from memory. 73 */ 74 static u32 amdgpu_fence_read(struct amdgpu_ring *ring) 75 { 76 struct amdgpu_fence_driver *drv = &ring->fence_drv; 77 u32 seq = 0; 78 79 if (drv->cpu_addr) 80 seq = le32_to_cpu(*drv->cpu_addr); 81 else 82 seq = lower_32_bits(atomic64_read(&drv->last_seq)); 83 84 return seq; 85 } 86 87 /** 88 * amdgpu_fence_schedule_check - schedule lockup check 89 * 90 * @ring: pointer to struct amdgpu_ring 91 * 92 * Queues a delayed work item to check for lockups. 93 */ 94 static void amdgpu_fence_schedule_check(struct amdgpu_ring *ring) 95 { 96 /* 97 * Do not reset the timer here with mod_delayed_work, 98 * this can livelock in an interaction with TTM delayed destroy. 99 */ 100 queue_delayed_work(system_power_efficient_wq, 101 &ring->fence_drv.lockup_work, 102 AMDGPU_FENCE_JIFFIES_TIMEOUT); 103 } 104 105 /** 106 * amdgpu_fence_emit - emit a fence on the requested ring 107 * 108 * @ring: ring the fence is associated with 109 * @owner: creator of the fence 110 * @fence: amdgpu fence object 111 * 112 * Emits a fence command on the requested ring (all asics). 113 * Returns 0 on success, -ENOMEM on failure. 114 */ 115 int amdgpu_fence_emit(struct amdgpu_ring *ring, void *owner, 116 struct amdgpu_fence **fence) 117 { 118 struct amdgpu_device *adev = ring->adev; 119 120 /* we are protected by the ring emission mutex */ 121 *fence = kmalloc(sizeof(struct amdgpu_fence), GFP_KERNEL); 122 if ((*fence) == NULL) { 123 return -ENOMEM; 124 } 125 (*fence)->seq = ++ring->fence_drv.sync_seq[ring->idx]; 126 (*fence)->ring = ring; 127 (*fence)->owner = owner; 128 fence_init(&(*fence)->base, &amdgpu_fence_ops, 129 &adev->fence_queue.lock, adev->fence_context + ring->idx, 130 (*fence)->seq); 131 amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr, 132 (*fence)->seq, 133 AMDGPU_FENCE_FLAG_INT); 134 trace_amdgpu_fence_emit(ring->adev->ddev, ring->idx, (*fence)->seq); 135 return 0; 136 } 137 138 /** 139 * amdgpu_fence_check_signaled - callback from fence_queue 140 * 141 * this function is called with fence_queue lock held, which is also used 142 * for the fence locking itself, so unlocked variants are used for 143 * fence_signal, and remove_wait_queue. 144 */ 145 static int amdgpu_fence_check_signaled(wait_queue_t *wait, unsigned mode, int flags, void *key) 146 { 147 struct amdgpu_fence *fence; 148 struct amdgpu_device *adev; 149 u64 seq; 150 int ret; 151 152 fence = container_of(wait, struct amdgpu_fence, fence_wake); 153 adev = fence->ring->adev; 154 155 /* 156 * We cannot use amdgpu_fence_process here because we're already 157 * in the waitqueue, in a call from wake_up_all. 158 */ 159 seq = atomic64_read(&fence->ring->fence_drv.last_seq); 160 if (seq >= fence->seq) { 161 ret = fence_signal_locked(&fence->base); 162 if (!ret) 163 FENCE_TRACE(&fence->base, "signaled from irq context\n"); 164 else 165 FENCE_TRACE(&fence->base, "was already signaled\n"); 166 167 amdgpu_irq_put(adev, fence->ring->fence_drv.irq_src, 168 fence->ring->fence_drv.irq_type); 169 __remove_wait_queue(&adev->fence_queue, &fence->fence_wake); 170 fence_put(&fence->base); 171 } else 172 FENCE_TRACE(&fence->base, "pending\n"); 173 return 0; 174 } 175 176 /** 177 * amdgpu_fence_activity - check for fence activity 178 * 179 * @ring: pointer to struct amdgpu_ring 180 * 181 * Checks the current fence value and calculates the last 182 * signalled fence value. Returns true if activity occured 183 * on the ring, and the fence_queue should be waken up. 184 */ 185 static bool amdgpu_fence_activity(struct amdgpu_ring *ring) 186 { 187 uint64_t seq, last_seq, last_emitted; 188 unsigned count_loop = 0; 189 bool wake = false; 190 191 /* Note there is a scenario here for an infinite loop but it's 192 * very unlikely to happen. For it to happen, the current polling 193 * process need to be interrupted by another process and another 194 * process needs to update the last_seq btw the atomic read and 195 * xchg of the current process. 196 * 197 * More over for this to go in infinite loop there need to be 198 * continuously new fence signaled ie amdgpu_fence_read needs 199 * to return a different value each time for both the currently 200 * polling process and the other process that xchg the last_seq 201 * btw atomic read and xchg of the current process. And the 202 * value the other process set as last seq must be higher than 203 * the seq value we just read. Which means that current process 204 * need to be interrupted after amdgpu_fence_read and before 205 * atomic xchg. 206 * 207 * To be even more safe we count the number of time we loop and 208 * we bail after 10 loop just accepting the fact that we might 209 * have temporarly set the last_seq not to the true real last 210 * seq but to an older one. 211 */ 212 last_seq = atomic64_read(&ring->fence_drv.last_seq); 213 do { 214 last_emitted = ring->fence_drv.sync_seq[ring->idx]; 215 seq = amdgpu_fence_read(ring); 216 seq |= last_seq & 0xffffffff00000000LL; 217 if (seq < last_seq) { 218 seq &= 0xffffffff; 219 seq |= last_emitted & 0xffffffff00000000LL; 220 } 221 222 if (seq <= last_seq || seq > last_emitted) { 223 break; 224 } 225 /* If we loop over we don't want to return without 226 * checking if a fence is signaled as it means that the 227 * seq we just read is different from the previous on. 228 */ 229 wake = true; 230 last_seq = seq; 231 if ((count_loop++) > 10) { 232 /* We looped over too many time leave with the 233 * fact that we might have set an older fence 234 * seq then the current real last seq as signaled 235 * by the hw. 236 */ 237 break; 238 } 239 } while (atomic64_xchg(&ring->fence_drv.last_seq, seq) > seq); 240 241 if (seq < last_emitted) 242 amdgpu_fence_schedule_check(ring); 243 244 return wake; 245 } 246 247 /** 248 * amdgpu_fence_check_lockup - check for hardware lockup 249 * 250 * @work: delayed work item 251 * 252 * Checks for fence activity and if there is none probe 253 * the hardware if a lockup occured. 254 */ 255 static void amdgpu_fence_check_lockup(struct work_struct *work) 256 { 257 struct amdgpu_fence_driver *fence_drv; 258 struct amdgpu_ring *ring; 259 260 fence_drv = container_of(work, struct amdgpu_fence_driver, 261 lockup_work.work); 262 ring = fence_drv->ring; 263 264 if (!down_read_trylock(&ring->adev->exclusive_lock)) { 265 /* just reschedule the check if a reset is going on */ 266 amdgpu_fence_schedule_check(ring); 267 return; 268 } 269 270 if (fence_drv->delayed_irq && ring->adev->ddev->irq_enabled) { 271 fence_drv->delayed_irq = false; 272 amdgpu_irq_update(ring->adev, fence_drv->irq_src, 273 fence_drv->irq_type); 274 } 275 276 if (amdgpu_fence_activity(ring)) 277 wake_up_all(&ring->adev->fence_queue); 278 else if (amdgpu_ring_is_lockup(ring)) { 279 /* good news we believe it's a lockup */ 280 dev_warn(ring->adev->dev, "GPU lockup (current fence id " 281 "0x%016llx last fence id 0x%016llx on ring %d)\n", 282 (uint64_t)atomic64_read(&fence_drv->last_seq), 283 fence_drv->sync_seq[ring->idx], ring->idx); 284 285 /* remember that we need an reset */ 286 ring->adev->needs_reset = true; 287 wake_up_all(&ring->adev->fence_queue); 288 } 289 up_read(&ring->adev->exclusive_lock); 290 } 291 292 /** 293 * amdgpu_fence_process - process a fence 294 * 295 * @adev: amdgpu_device pointer 296 * @ring: ring index the fence is associated with 297 * 298 * Checks the current fence value and wakes the fence queue 299 * if the sequence number has increased (all asics). 300 */ 301 void amdgpu_fence_process(struct amdgpu_ring *ring) 302 { 303 uint64_t seq, last_seq, last_emitted; 304 unsigned count_loop = 0; 305 bool wake = false; 306 307 /* Note there is a scenario here for an infinite loop but it's 308 * very unlikely to happen. For it to happen, the current polling 309 * process need to be interrupted by another process and another 310 * process needs to update the last_seq btw the atomic read and 311 * xchg of the current process. 312 * 313 * More over for this to go in infinite loop there need to be 314 * continuously new fence signaled ie amdgpu_fence_read needs 315 * to return a different value each time for both the currently 316 * polling process and the other process that xchg the last_seq 317 * btw atomic read and xchg of the current process. And the 318 * value the other process set as last seq must be higher than 319 * the seq value we just read. Which means that current process 320 * need to be interrupted after amdgpu_fence_read and before 321 * atomic xchg. 322 * 323 * To be even more safe we count the number of time we loop and 324 * we bail after 10 loop just accepting the fact that we might 325 * have temporarly set the last_seq not to the true real last 326 * seq but to an older one. 327 */ 328 last_seq = atomic64_read(&ring->fence_drv.last_seq); 329 do { 330 last_emitted = ring->fence_drv.sync_seq[ring->idx]; 331 seq = amdgpu_fence_read(ring); 332 seq |= last_seq & 0xffffffff00000000LL; 333 if (seq < last_seq) { 334 seq &= 0xffffffff; 335 seq |= last_emitted & 0xffffffff00000000LL; 336 } 337 338 if (seq <= last_seq || seq > last_emitted) { 339 break; 340 } 341 /* If we loop over we don't want to return without 342 * checking if a fence is signaled as it means that the 343 * seq we just read is different from the previous on. 344 */ 345 wake = true; 346 last_seq = seq; 347 if ((count_loop++) > 10) { 348 /* We looped over too many time leave with the 349 * fact that we might have set an older fence 350 * seq then the current real last seq as signaled 351 * by the hw. 352 */ 353 break; 354 } 355 } while (atomic64_xchg(&ring->fence_drv.last_seq, seq) > seq); 356 357 if (wake) 358 wake_up_all(&ring->adev->fence_queue); 359 } 360 361 /** 362 * amdgpu_fence_seq_signaled - check if a fence sequence number has signaled 363 * 364 * @ring: ring the fence is associated with 365 * @seq: sequence number 366 * 367 * Check if the last signaled fence sequnce number is >= the requested 368 * sequence number (all asics). 369 * Returns true if the fence has signaled (current fence value 370 * is >= requested value) or false if it has not (current fence 371 * value is < the requested value. Helper function for 372 * amdgpu_fence_signaled(). 373 */ 374 static bool amdgpu_fence_seq_signaled(struct amdgpu_ring *ring, u64 seq) 375 { 376 if (atomic64_read(&ring->fence_drv.last_seq) >= seq) 377 return true; 378 379 /* poll new last sequence at least once */ 380 amdgpu_fence_process(ring); 381 if (atomic64_read(&ring->fence_drv.last_seq) >= seq) 382 return true; 383 384 return false; 385 } 386 387 static bool amdgpu_fence_is_signaled(struct fence *f) 388 { 389 struct amdgpu_fence *fence = to_amdgpu_fence(f); 390 struct amdgpu_ring *ring = fence->ring; 391 struct amdgpu_device *adev = ring->adev; 392 393 if (atomic64_read(&ring->fence_drv.last_seq) >= fence->seq) 394 return true; 395 396 if (down_read_trylock(&adev->exclusive_lock)) { 397 amdgpu_fence_process(ring); 398 up_read(&adev->exclusive_lock); 399 400 if (atomic64_read(&ring->fence_drv.last_seq) >= fence->seq) 401 return true; 402 } 403 return false; 404 } 405 406 /** 407 * amdgpu_fence_enable_signaling - enable signalling on fence 408 * @fence: fence 409 * 410 * This function is called with fence_queue lock held, and adds a callback 411 * to fence_queue that checks if this fence is signaled, and if so it 412 * signals the fence and removes itself. 413 */ 414 static bool amdgpu_fence_enable_signaling(struct fence *f) 415 { 416 struct amdgpu_fence *fence = to_amdgpu_fence(f); 417 struct amdgpu_ring *ring = fence->ring; 418 struct amdgpu_device *adev = ring->adev; 419 420 if (atomic64_read(&ring->fence_drv.last_seq) >= fence->seq) 421 return false; 422 423 if (down_read_trylock(&adev->exclusive_lock)) { 424 amdgpu_irq_get(adev, ring->fence_drv.irq_src, 425 ring->fence_drv.irq_type); 426 if (amdgpu_fence_activity(ring)) 427 wake_up_all_locked(&adev->fence_queue); 428 429 /* did fence get signaled after we enabled the sw irq? */ 430 if (atomic64_read(&ring->fence_drv.last_seq) >= fence->seq) { 431 amdgpu_irq_put(adev, ring->fence_drv.irq_src, 432 ring->fence_drv.irq_type); 433 up_read(&adev->exclusive_lock); 434 return false; 435 } 436 437 up_read(&adev->exclusive_lock); 438 } else { 439 /* we're probably in a lockup, lets not fiddle too much */ 440 if (amdgpu_irq_get_delayed(adev, ring->fence_drv.irq_src, 441 ring->fence_drv.irq_type)) 442 ring->fence_drv.delayed_irq = true; 443 amdgpu_fence_schedule_check(ring); 444 } 445 446 fence->fence_wake.flags = 0; 447 fence->fence_wake.private = NULL; 448 fence->fence_wake.func = amdgpu_fence_check_signaled; 449 __add_wait_queue(&adev->fence_queue, &fence->fence_wake); 450 fence_get(f); 451 FENCE_TRACE(&fence->base, "armed on ring %i!\n", ring->idx); 452 return true; 453 } 454 455 /** 456 * amdgpu_fence_signaled - check if a fence has signaled 457 * 458 * @fence: amdgpu fence object 459 * 460 * Check if the requested fence has signaled (all asics). 461 * Returns true if the fence has signaled or false if it has not. 462 */ 463 bool amdgpu_fence_signaled(struct amdgpu_fence *fence) 464 { 465 if (!fence) 466 return true; 467 468 if (amdgpu_fence_seq_signaled(fence->ring, fence->seq)) { 469 if (!fence_signal(&fence->base)) 470 FENCE_TRACE(&fence->base, "signaled from amdgpu_fence_signaled\n"); 471 return true; 472 } 473 474 return false; 475 } 476 477 /** 478 * amdgpu_fence_any_seq_signaled - check if any sequence number is signaled 479 * 480 * @adev: amdgpu device pointer 481 * @seq: sequence numbers 482 * 483 * Check if the last signaled fence sequnce number is >= the requested 484 * sequence number (all asics). 485 * Returns true if any has signaled (current value is >= requested value) 486 * or false if it has not. Helper function for amdgpu_fence_wait_seq. 487 */ 488 static bool amdgpu_fence_any_seq_signaled(struct amdgpu_device *adev, u64 *seq) 489 { 490 unsigned i; 491 492 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { 493 if (!adev->rings[i] || !seq[i]) 494 continue; 495 496 if (amdgpu_fence_seq_signaled(adev->rings[i], seq[i])) 497 return true; 498 } 499 500 return false; 501 } 502 503 /** 504 * amdgpu_fence_wait_seq_timeout - wait for a specific sequence numbers 505 * 506 * @adev: amdgpu device pointer 507 * @target_seq: sequence number(s) we want to wait for 508 * @intr: use interruptable sleep 509 * @timeout: maximum time to wait, or MAX_SCHEDULE_TIMEOUT for infinite wait 510 * 511 * Wait for the requested sequence number(s) to be written by any ring 512 * (all asics). Sequnce number array is indexed by ring id. 513 * @intr selects whether to use interruptable (true) or non-interruptable 514 * (false) sleep when waiting for the sequence number. Helper function 515 * for amdgpu_fence_wait_*(). 516 * Returns remaining time if the sequence number has passed, 0 when 517 * the wait timeout, or an error for all other cases. 518 * -EDEADLK is returned when a GPU lockup has been detected. 519 */ 520 long amdgpu_fence_wait_seq_timeout(struct amdgpu_device *adev, u64 *target_seq, 521 bool intr, long timeout) 522 { 523 uint64_t last_seq[AMDGPU_MAX_RINGS]; 524 bool signaled; 525 int i, r; 526 527 if (timeout == 0) { 528 return amdgpu_fence_any_seq_signaled(adev, target_seq); 529 } 530 531 while (!amdgpu_fence_any_seq_signaled(adev, target_seq)) { 532 533 /* Save current sequence values, used to check for GPU lockups */ 534 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { 535 struct amdgpu_ring *ring = adev->rings[i]; 536 537 if (!ring || !target_seq[i]) 538 continue; 539 540 last_seq[i] = atomic64_read(&ring->fence_drv.last_seq); 541 trace_amdgpu_fence_wait_begin(adev->ddev, i, target_seq[i]); 542 amdgpu_irq_get(adev, ring->fence_drv.irq_src, 543 ring->fence_drv.irq_type); 544 } 545 546 if (intr) { 547 r = wait_event_interruptible_timeout(adev->fence_queue, ( 548 (signaled = amdgpu_fence_any_seq_signaled(adev, target_seq)) 549 || adev->needs_reset), AMDGPU_FENCE_JIFFIES_TIMEOUT); 550 } else { 551 r = wait_event_timeout(adev->fence_queue, ( 552 (signaled = amdgpu_fence_any_seq_signaled(adev, target_seq)) 553 || adev->needs_reset), AMDGPU_FENCE_JIFFIES_TIMEOUT); 554 } 555 556 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { 557 struct amdgpu_ring *ring = adev->rings[i]; 558 559 if (!ring || !target_seq[i]) 560 continue; 561 562 amdgpu_irq_put(adev, ring->fence_drv.irq_src, 563 ring->fence_drv.irq_type); 564 trace_amdgpu_fence_wait_end(adev->ddev, i, target_seq[i]); 565 } 566 567 if (unlikely(r < 0)) 568 return r; 569 570 if (unlikely(!signaled)) { 571 572 if (adev->needs_reset) 573 return -EDEADLK; 574 575 /* we were interrupted for some reason and fence 576 * isn't signaled yet, resume waiting */ 577 if (r) 578 continue; 579 580 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { 581 struct amdgpu_ring *ring = adev->rings[i]; 582 583 if (!ring || !target_seq[i]) 584 continue; 585 586 if (last_seq[i] != atomic64_read(&ring->fence_drv.last_seq)) 587 break; 588 } 589 590 if (i != AMDGPU_MAX_RINGS) 591 continue; 592 593 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { 594 if (!adev->rings[i] || !target_seq[i]) 595 continue; 596 597 if (amdgpu_ring_is_lockup(adev->rings[i])) 598 break; 599 } 600 601 if (i < AMDGPU_MAX_RINGS) { 602 /* good news we believe it's a lockup */ 603 dev_warn(adev->dev, "GPU lockup (waiting for " 604 "0x%016llx last fence id 0x%016llx on" 605 " ring %d)\n", 606 target_seq[i], last_seq[i], i); 607 608 /* remember that we need an reset */ 609 adev->needs_reset = true; 610 wake_up_all(&adev->fence_queue); 611 return -EDEADLK; 612 } 613 614 if (timeout < MAX_SCHEDULE_TIMEOUT) { 615 timeout -= AMDGPU_FENCE_JIFFIES_TIMEOUT; 616 if (timeout <= 0) { 617 return 0; 618 } 619 } 620 } 621 } 622 return timeout; 623 } 624 625 /** 626 * amdgpu_fence_wait - wait for a fence to signal 627 * 628 * @fence: amdgpu fence object 629 * @intr: use interruptable sleep 630 * 631 * Wait for the requested fence to signal (all asics). 632 * @intr selects whether to use interruptable (true) or non-interruptable 633 * (false) sleep when waiting for the fence. 634 * Returns 0 if the fence has passed, error for all other cases. 635 */ 636 int amdgpu_fence_wait(struct amdgpu_fence *fence, bool intr) 637 { 638 uint64_t seq[AMDGPU_MAX_RINGS] = {}; 639 long r; 640 641 seq[fence->ring->idx] = fence->seq; 642 r = amdgpu_fence_wait_seq_timeout(fence->ring->adev, seq, intr, MAX_SCHEDULE_TIMEOUT); 643 if (r < 0) { 644 return r; 645 } 646 647 r = fence_signal(&fence->base); 648 if (!r) 649 FENCE_TRACE(&fence->base, "signaled from fence_wait\n"); 650 return 0; 651 } 652 653 /** 654 * amdgpu_fence_wait_any - wait for a fence to signal on any ring 655 * 656 * @adev: amdgpu device pointer 657 * @fences: amdgpu fence object(s) 658 * @intr: use interruptable sleep 659 * 660 * Wait for any requested fence to signal (all asics). Fence 661 * array is indexed by ring id. @intr selects whether to use 662 * interruptable (true) or non-interruptable (false) sleep when 663 * waiting for the fences. Used by the suballocator. 664 * Returns 0 if any fence has passed, error for all other cases. 665 */ 666 int amdgpu_fence_wait_any(struct amdgpu_device *adev, 667 struct amdgpu_fence **fences, 668 bool intr) 669 { 670 uint64_t seq[AMDGPU_MAX_RINGS]; 671 unsigned i, num_rings = 0; 672 long r; 673 674 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { 675 seq[i] = 0; 676 677 if (!fences[i]) { 678 continue; 679 } 680 681 seq[i] = fences[i]->seq; 682 ++num_rings; 683 } 684 685 /* nothing to wait for ? */ 686 if (num_rings == 0) 687 return -ENOENT; 688 689 r = amdgpu_fence_wait_seq_timeout(adev, seq, intr, MAX_SCHEDULE_TIMEOUT); 690 if (r < 0) { 691 return r; 692 } 693 return 0; 694 } 695 696 /** 697 * amdgpu_fence_wait_next - wait for the next fence to signal 698 * 699 * @adev: amdgpu device pointer 700 * @ring: ring index the fence is associated with 701 * 702 * Wait for the next fence on the requested ring to signal (all asics). 703 * Returns 0 if the next fence has passed, error for all other cases. 704 * Caller must hold ring lock. 705 */ 706 int amdgpu_fence_wait_next(struct amdgpu_ring *ring) 707 { 708 uint64_t seq[AMDGPU_MAX_RINGS] = {}; 709 long r; 710 711 seq[ring->idx] = atomic64_read(&ring->fence_drv.last_seq) + 1ULL; 712 if (seq[ring->idx] >= ring->fence_drv.sync_seq[ring->idx]) { 713 /* nothing to wait for, last_seq is 714 already the last emited fence */ 715 return -ENOENT; 716 } 717 r = amdgpu_fence_wait_seq_timeout(ring->adev, seq, false, MAX_SCHEDULE_TIMEOUT); 718 if (r < 0) 719 return r; 720 return 0; 721 } 722 723 /** 724 * amdgpu_fence_wait_empty - wait for all fences to signal 725 * 726 * @adev: amdgpu device pointer 727 * @ring: ring index the fence is associated with 728 * 729 * Wait for all fences on the requested ring to signal (all asics). 730 * Returns 0 if the fences have passed, error for all other cases. 731 * Caller must hold ring lock. 732 */ 733 int amdgpu_fence_wait_empty(struct amdgpu_ring *ring) 734 { 735 struct amdgpu_device *adev = ring->adev; 736 uint64_t seq[AMDGPU_MAX_RINGS] = {}; 737 long r; 738 739 seq[ring->idx] = ring->fence_drv.sync_seq[ring->idx]; 740 if (!seq[ring->idx]) 741 return 0; 742 743 r = amdgpu_fence_wait_seq_timeout(adev, seq, false, MAX_SCHEDULE_TIMEOUT); 744 if (r < 0) { 745 if (r == -EDEADLK) 746 return -EDEADLK; 747 748 dev_err(adev->dev, "error waiting for ring[%d] to become idle (%ld)\n", 749 ring->idx, r); 750 } 751 return 0; 752 } 753 754 /** 755 * amdgpu_fence_ref - take a ref on a fence 756 * 757 * @fence: amdgpu fence object 758 * 759 * Take a reference on a fence (all asics). 760 * Returns the fence. 761 */ 762 struct amdgpu_fence *amdgpu_fence_ref(struct amdgpu_fence *fence) 763 { 764 fence_get(&fence->base); 765 return fence; 766 } 767 768 /** 769 * amdgpu_fence_unref - remove a ref on a fence 770 * 771 * @fence: amdgpu fence object 772 * 773 * Remove a reference on a fence (all asics). 774 */ 775 void amdgpu_fence_unref(struct amdgpu_fence **fence) 776 { 777 struct amdgpu_fence *tmp = *fence; 778 779 *fence = NULL; 780 if (tmp) 781 fence_put(&tmp->base); 782 } 783 784 /** 785 * amdgpu_fence_count_emitted - get the count of emitted fences 786 * 787 * @ring: ring the fence is associated with 788 * 789 * Get the number of fences emitted on the requested ring (all asics). 790 * Returns the number of emitted fences on the ring. Used by the 791 * dynpm code to ring track activity. 792 */ 793 unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring) 794 { 795 uint64_t emitted; 796 797 /* We are not protected by ring lock when reading the last sequence 798 * but it's ok to report slightly wrong fence count here. 799 */ 800 amdgpu_fence_process(ring); 801 emitted = ring->fence_drv.sync_seq[ring->idx] 802 - atomic64_read(&ring->fence_drv.last_seq); 803 /* to avoid 32bits warp around */ 804 if (emitted > 0x10000000) 805 emitted = 0x10000000; 806 807 return (unsigned)emitted; 808 } 809 810 /** 811 * amdgpu_fence_need_sync - do we need a semaphore 812 * 813 * @fence: amdgpu fence object 814 * @dst_ring: which ring to check against 815 * 816 * Check if the fence needs to be synced against another ring 817 * (all asics). If so, we need to emit a semaphore. 818 * Returns true if we need to sync with another ring, false if 819 * not. 820 */ 821 bool amdgpu_fence_need_sync(struct amdgpu_fence *fence, 822 struct amdgpu_ring *dst_ring) 823 { 824 struct amdgpu_fence_driver *fdrv; 825 826 if (!fence) 827 return false; 828 829 if (fence->ring == dst_ring) 830 return false; 831 832 /* we are protected by the ring mutex */ 833 fdrv = &dst_ring->fence_drv; 834 if (fence->seq <= fdrv->sync_seq[fence->ring->idx]) 835 return false; 836 837 return true; 838 } 839 840 /** 841 * amdgpu_fence_note_sync - record the sync point 842 * 843 * @fence: amdgpu fence object 844 * @dst_ring: which ring to check against 845 * 846 * Note the sequence number at which point the fence will 847 * be synced with the requested ring (all asics). 848 */ 849 void amdgpu_fence_note_sync(struct amdgpu_fence *fence, 850 struct amdgpu_ring *dst_ring) 851 { 852 struct amdgpu_fence_driver *dst, *src; 853 unsigned i; 854 855 if (!fence) 856 return; 857 858 if (fence->ring == dst_ring) 859 return; 860 861 /* we are protected by the ring mutex */ 862 src = &fence->ring->fence_drv; 863 dst = &dst_ring->fence_drv; 864 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { 865 if (i == dst_ring->idx) 866 continue; 867 868 dst->sync_seq[i] = max(dst->sync_seq[i], src->sync_seq[i]); 869 } 870 } 871 872 /** 873 * amdgpu_fence_driver_start_ring - make the fence driver 874 * ready for use on the requested ring. 875 * 876 * @ring: ring to start the fence driver on 877 * @irq_src: interrupt source to use for this ring 878 * @irq_type: interrupt type to use for this ring 879 * 880 * Make the fence driver ready for processing (all asics). 881 * Not all asics have all rings, so each asic will only 882 * start the fence driver on the rings it has. 883 * Returns 0 for success, errors for failure. 884 */ 885 int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring, 886 struct amdgpu_irq_src *irq_src, 887 unsigned irq_type) 888 { 889 struct amdgpu_device *adev = ring->adev; 890 uint64_t index; 891 892 if (ring != &adev->uvd.ring) { 893 ring->fence_drv.cpu_addr = &adev->wb.wb[ring->fence_offs]; 894 ring->fence_drv.gpu_addr = adev->wb.gpu_addr + (ring->fence_offs * 4); 895 } else { 896 /* put fence directly behind firmware */ 897 index = ALIGN(adev->uvd.fw->size, 8); 898 ring->fence_drv.cpu_addr = adev->uvd.cpu_addr + index; 899 ring->fence_drv.gpu_addr = adev->uvd.gpu_addr + index; 900 } 901 amdgpu_fence_write(ring, atomic64_read(&ring->fence_drv.last_seq)); 902 ring->fence_drv.initialized = true; 903 ring->fence_drv.irq_src = irq_src; 904 ring->fence_drv.irq_type = irq_type; 905 dev_info(adev->dev, "fence driver on ring %d use gpu addr 0x%016llx, " 906 "cpu addr 0x%p\n", ring->idx, 907 ring->fence_drv.gpu_addr, ring->fence_drv.cpu_addr); 908 return 0; 909 } 910 911 /** 912 * amdgpu_fence_driver_init_ring - init the fence driver 913 * for the requested ring. 914 * 915 * @ring: ring to init the fence driver on 916 * 917 * Init the fence driver for the requested ring (all asics). 918 * Helper function for amdgpu_fence_driver_init(). 919 */ 920 void amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring) 921 { 922 int i; 923 924 ring->fence_drv.cpu_addr = NULL; 925 ring->fence_drv.gpu_addr = 0; 926 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) 927 ring->fence_drv.sync_seq[i] = 0; 928 929 atomic64_set(&ring->fence_drv.last_seq, 0); 930 ring->fence_drv.initialized = false; 931 932 INIT_DELAYED_WORK(&ring->fence_drv.lockup_work, 933 amdgpu_fence_check_lockup); 934 ring->fence_drv.ring = ring; 935 } 936 937 /** 938 * amdgpu_fence_driver_init - init the fence driver 939 * for all possible rings. 940 * 941 * @adev: amdgpu device pointer 942 * 943 * Init the fence driver for all possible rings (all asics). 944 * Not all asics have all rings, so each asic will only 945 * start the fence driver on the rings it has using 946 * amdgpu_fence_driver_start_ring(). 947 * Returns 0 for success. 948 */ 949 int amdgpu_fence_driver_init(struct amdgpu_device *adev) 950 { 951 init_waitqueue_head(&adev->fence_queue); 952 if (amdgpu_debugfs_fence_init(adev)) 953 dev_err(adev->dev, "fence debugfs file creation failed\n"); 954 955 return 0; 956 } 957 958 /** 959 * amdgpu_fence_driver_fini - tear down the fence driver 960 * for all possible rings. 961 * 962 * @adev: amdgpu device pointer 963 * 964 * Tear down the fence driver for all possible rings (all asics). 965 */ 966 void amdgpu_fence_driver_fini(struct amdgpu_device *adev) 967 { 968 int i, r; 969 970 mutex_lock(&adev->ring_lock); 971 for (i = 0; i < AMDGPU_MAX_RINGS; i++) { 972 struct amdgpu_ring *ring = adev->rings[i]; 973 if (!ring || !ring->fence_drv.initialized) 974 continue; 975 r = amdgpu_fence_wait_empty(ring); 976 if (r) { 977 /* no need to trigger GPU reset as we are unloading */ 978 amdgpu_fence_driver_force_completion(adev); 979 } 980 wake_up_all(&adev->fence_queue); 981 ring->fence_drv.initialized = false; 982 } 983 mutex_unlock(&adev->ring_lock); 984 } 985 986 /** 987 * amdgpu_fence_driver_force_completion - force all fence waiter to complete 988 * 989 * @adev: amdgpu device pointer 990 * 991 * In case of GPU reset failure make sure no process keep waiting on fence 992 * that will never complete. 993 */ 994 void amdgpu_fence_driver_force_completion(struct amdgpu_device *adev) 995 { 996 int i; 997 998 for (i = 0; i < AMDGPU_MAX_RINGS; i++) { 999 struct amdgpu_ring *ring = adev->rings[i]; 1000 if (!ring || !ring->fence_drv.initialized) 1001 continue; 1002 1003 amdgpu_fence_write(ring, ring->fence_drv.sync_seq[i]); 1004 } 1005 } 1006 1007 1008 /* 1009 * Fence debugfs 1010 */ 1011 #if defined(CONFIG_DEBUG_FS) 1012 static int amdgpu_debugfs_fence_info(struct seq_file *m, void *data) 1013 { 1014 struct drm_info_node *node = (struct drm_info_node *)m->private; 1015 struct drm_device *dev = node->minor->dev; 1016 struct amdgpu_device *adev = dev->dev_private; 1017 int i, j; 1018 1019 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { 1020 struct amdgpu_ring *ring = adev->rings[i]; 1021 if (!ring || !ring->fence_drv.initialized) 1022 continue; 1023 1024 amdgpu_fence_process(ring); 1025 1026 seq_printf(m, "--- ring %d ---\n", i); 1027 seq_printf(m, "Last signaled fence 0x%016llx\n", 1028 (unsigned long long)atomic64_read(&ring->fence_drv.last_seq)); 1029 seq_printf(m, "Last emitted 0x%016llx\n", 1030 ring->fence_drv.sync_seq[i]); 1031 1032 for (j = 0; j < AMDGPU_MAX_RINGS; ++j) { 1033 struct amdgpu_ring *other = adev->rings[j]; 1034 if (i != j && other && other->fence_drv.initialized) 1035 seq_printf(m, "Last sync to ring %d 0x%016llx\n", 1036 j, ring->fence_drv.sync_seq[j]); 1037 } 1038 } 1039 return 0; 1040 } 1041 1042 static struct drm_info_list amdgpu_debugfs_fence_list[] = { 1043 {"amdgpu_fence_info", &amdgpu_debugfs_fence_info, 0, NULL}, 1044 }; 1045 #endif 1046 1047 int amdgpu_debugfs_fence_init(struct amdgpu_device *adev) 1048 { 1049 #if defined(CONFIG_DEBUG_FS) 1050 return amdgpu_debugfs_add_files(adev, amdgpu_debugfs_fence_list, 1); 1051 #else 1052 return 0; 1053 #endif 1054 } 1055 1056 static const char *amdgpu_fence_get_driver_name(struct fence *fence) 1057 { 1058 return "amdgpu"; 1059 } 1060 1061 static const char *amdgpu_fence_get_timeline_name(struct fence *f) 1062 { 1063 struct amdgpu_fence *fence = to_amdgpu_fence(f); 1064 return (const char *)fence->ring->name; 1065 } 1066 1067 static inline bool amdgpu_test_signaled(struct amdgpu_fence *fence) 1068 { 1069 return test_bit(FENCE_FLAG_SIGNALED_BIT, &fence->base.flags); 1070 } 1071 1072 struct amdgpu_wait_cb { 1073 struct fence_cb base; 1074 struct task_struct *task; 1075 }; 1076 1077 static void amdgpu_fence_wait_cb(struct fence *fence, struct fence_cb *cb) 1078 { 1079 struct amdgpu_wait_cb *wait = 1080 container_of(cb, struct amdgpu_wait_cb, base); 1081 wake_up_process(wait->task); 1082 } 1083 1084 static signed long amdgpu_fence_default_wait(struct fence *f, bool intr, 1085 signed long t) 1086 { 1087 struct amdgpu_fence *fence = to_amdgpu_fence(f); 1088 struct amdgpu_device *adev = fence->ring->adev; 1089 struct amdgpu_wait_cb cb; 1090 1091 cb.task = current; 1092 1093 if (fence_add_callback(f, &cb.base, amdgpu_fence_wait_cb)) 1094 return t; 1095 1096 while (t > 0) { 1097 if (intr) 1098 set_current_state(TASK_INTERRUPTIBLE); 1099 else 1100 set_current_state(TASK_UNINTERRUPTIBLE); 1101 1102 /* 1103 * amdgpu_test_signaled must be called after 1104 * set_current_state to prevent a race with wake_up_process 1105 */ 1106 if (amdgpu_test_signaled(fence)) 1107 break; 1108 1109 if (adev->needs_reset) { 1110 t = -EDEADLK; 1111 break; 1112 } 1113 1114 t = schedule_timeout(t); 1115 1116 if (t > 0 && intr && signal_pending(current)) 1117 t = -ERESTARTSYS; 1118 } 1119 1120 __set_current_state(TASK_RUNNING); 1121 fence_remove_callback(f, &cb.base); 1122 1123 return t; 1124 } 1125 1126 const struct fence_ops amdgpu_fence_ops = { 1127 .get_driver_name = amdgpu_fence_get_driver_name, 1128 .get_timeline_name = amdgpu_fence_get_timeline_name, 1129 .enable_signaling = amdgpu_fence_enable_signaling, 1130 .signaled = amdgpu_fence_is_signaled, 1131 .wait = amdgpu_fence_default_wait, 1132 .release = NULL, 1133 }; 1134