1 /* 2 * Copyright 2009 Jerome Glisse. 3 * All Rights Reserved. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the 7 * "Software"), to deal in the Software without restriction, including 8 * without limitation the rights to use, copy, modify, merge, publish, 9 * distribute, sub license, and/or sell copies of the Software, and to 10 * permit persons to whom the Software is furnished to do so, subject to 11 * the following conditions: 12 * 13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 16 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, 17 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 18 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 19 * USE OR OTHER DEALINGS IN THE SOFTWARE. 20 * 21 * The above copyright notice and this permission notice (including the 22 * next paragraph) shall be included in all copies or substantial portions 23 * of the Software. 24 * 25 */ 26 /* 27 * Authors: 28 * Jerome Glisse <glisse@freedesktop.org> 29 * Dave Airlie 30 */ 31 #include <linux/seq_file.h> 32 #include <linux/atomic.h> 33 #include <linux/wait.h> 34 #include <linux/list.h> 35 #include <linux/kref.h> 36 #include <linux/slab.h> 37 #include "drmP.h" 38 #include "drm.h" 39 #include "radeon_reg.h" 40 #include "radeon.h" 41 #include "radeon_trace.h" 42 43 /* 44 * Fences 45 * Fences mark an event in the GPUs pipeline and are used 46 * for GPU/CPU synchronization. When the fence is written, 47 * it is expected that all buffers associated with that fence 48 * are no longer in use by the associated ring on the GPU and 49 * that the the relevant GPU caches have been flushed. Whether 50 * we use a scratch register or memory location depends on the asic 51 * and whether writeback is enabled. 52 */ 53 54 /** 55 * radeon_fence_write - write a fence value 56 * 57 * @rdev: radeon_device pointer 58 * @seq: sequence number to write 59 * @ring: ring index the fence is associated with 60 * 61 * Writes a fence value to memory or a scratch register (all asics). 62 */ 63 static void radeon_fence_write(struct radeon_device *rdev, u32 seq, int ring) 64 { 65 struct radeon_fence_driver *drv = &rdev->fence_drv[ring]; 66 if (likely(rdev->wb.enabled || !drv->scratch_reg)) { 67 *drv->cpu_addr = cpu_to_le32(seq); 68 } else { 69 WREG32(drv->scratch_reg, seq); 70 } 71 } 72 73 /** 74 * radeon_fence_read - read a fence value 75 * 76 * @rdev: radeon_device pointer 77 * @ring: ring index the fence is associated with 78 * 79 * Reads a fence value from memory or a scratch register (all asics). 80 * Returns the value of the fence read from memory or register. 81 */ 82 static u32 radeon_fence_read(struct radeon_device *rdev, int ring) 83 { 84 struct radeon_fence_driver *drv = &rdev->fence_drv[ring]; 85 u32 seq = 0; 86 87 if (likely(rdev->wb.enabled || !drv->scratch_reg)) { 88 seq = le32_to_cpu(*drv->cpu_addr); 89 } else { 90 seq = RREG32(drv->scratch_reg); 91 } 92 return seq; 93 } 94 95 /** 96 * radeon_fence_emit - emit a fence on the requested ring 97 * 98 * @rdev: radeon_device pointer 99 * @fence: radeon fence object 100 * @ring: ring index the fence is associated with 101 * 102 * Emits a fence command on the requested ring (all asics). 103 * Returns 0 on success, -ENOMEM on failure. 104 */ 105 int radeon_fence_emit(struct radeon_device *rdev, 106 struct radeon_fence **fence, 107 int ring) 108 { 109 /* we are protected by the ring emission mutex */ 110 *fence = kmalloc(sizeof(struct radeon_fence), GFP_KERNEL); 111 if ((*fence) == NULL) { 112 return -ENOMEM; 113 } 114 kref_init(&((*fence)->kref)); 115 (*fence)->rdev = rdev; 116 (*fence)->seq = ++rdev->fence_drv[ring].sync_seq[ring]; 117 (*fence)->ring = ring; 118 radeon_fence_ring_emit(rdev, ring, *fence); 119 trace_radeon_fence_emit(rdev->ddev, (*fence)->seq); 120 return 0; 121 } 122 123 /** 124 * radeon_fence_process - process a fence 125 * 126 * @rdev: radeon_device pointer 127 * @ring: ring index the fence is associated with 128 * 129 * Checks the current fence value and wakes the fence queue 130 * if the sequence number has increased (all asics). 131 */ 132 void radeon_fence_process(struct radeon_device *rdev, int ring) 133 { 134 uint64_t seq, last_seq, last_emitted; 135 unsigned count_loop = 0; 136 bool wake = false; 137 138 /* Note there is a scenario here for an infinite loop but it's 139 * very unlikely to happen. For it to happen, the current polling 140 * process need to be interrupted by another process and another 141 * process needs to update the last_seq btw the atomic read and 142 * xchg of the current process. 143 * 144 * More over for this to go in infinite loop there need to be 145 * continuously new fence signaled ie radeon_fence_read needs 146 * to return a different value each time for both the currently 147 * polling process and the other process that xchg the last_seq 148 * btw atomic read and xchg of the current process. And the 149 * value the other process set as last seq must be higher than 150 * the seq value we just read. Which means that current process 151 * need to be interrupted after radeon_fence_read and before 152 * atomic xchg. 153 * 154 * To be even more safe we count the number of time we loop and 155 * we bail after 10 loop just accepting the fact that we might 156 * have temporarly set the last_seq not to the true real last 157 * seq but to an older one. 158 */ 159 last_seq = atomic64_read(&rdev->fence_drv[ring].last_seq); 160 do { 161 last_emitted = rdev->fence_drv[ring].sync_seq[ring]; 162 seq = radeon_fence_read(rdev, ring); 163 seq |= last_seq & 0xffffffff00000000LL; 164 if (seq < last_seq) { 165 seq &= 0xffffffff; 166 seq |= last_emitted & 0xffffffff00000000LL; 167 } 168 169 if (seq <= last_seq || seq > last_emitted) { 170 break; 171 } 172 /* If we loop over we don't want to return without 173 * checking if a fence is signaled as it means that the 174 * seq we just read is different from the previous on. 175 */ 176 wake = true; 177 last_seq = seq; 178 if ((count_loop++) > 10) { 179 /* We looped over too many time leave with the 180 * fact that we might have set an older fence 181 * seq then the current real last seq as signaled 182 * by the hw. 183 */ 184 break; 185 } 186 } while (atomic64_xchg(&rdev->fence_drv[ring].last_seq, seq) > seq); 187 188 if (wake) { 189 rdev->fence_drv[ring].last_activity = jiffies; 190 wake_up_all(&rdev->fence_queue); 191 } 192 } 193 194 /** 195 * radeon_fence_destroy - destroy a fence 196 * 197 * @kref: fence kref 198 * 199 * Frees the fence object (all asics). 200 */ 201 static void radeon_fence_destroy(struct kref *kref) 202 { 203 struct radeon_fence *fence; 204 205 fence = container_of(kref, struct radeon_fence, kref); 206 kfree(fence); 207 } 208 209 /** 210 * radeon_fence_seq_signaled - check if a fence sequeuce number has signaled 211 * 212 * @rdev: radeon device pointer 213 * @seq: sequence number 214 * @ring: ring index the fence is associated with 215 * 216 * Check if the last singled fence sequnce number is >= the requested 217 * sequence number (all asics). 218 * Returns true if the fence has signaled (current fence value 219 * is >= requested value) or false if it has not (current fence 220 * value is < the requested value. Helper function for 221 * radeon_fence_signaled(). 222 */ 223 static bool radeon_fence_seq_signaled(struct radeon_device *rdev, 224 u64 seq, unsigned ring) 225 { 226 if (atomic64_read(&rdev->fence_drv[ring].last_seq) >= seq) { 227 return true; 228 } 229 /* poll new last sequence at least once */ 230 radeon_fence_process(rdev, ring); 231 if (atomic64_read(&rdev->fence_drv[ring].last_seq) >= seq) { 232 return true; 233 } 234 return false; 235 } 236 237 /** 238 * radeon_fence_signaled - check if a fence has signaled 239 * 240 * @fence: radeon fence object 241 * 242 * Check if the requested fence has signaled (all asics). 243 * Returns true if the fence has signaled or false if it has not. 244 */ 245 bool radeon_fence_signaled(struct radeon_fence *fence) 246 { 247 if (!fence) { 248 return true; 249 } 250 if (fence->seq == RADEON_FENCE_SIGNALED_SEQ) { 251 return true; 252 } 253 if (radeon_fence_seq_signaled(fence->rdev, fence->seq, fence->ring)) { 254 fence->seq = RADEON_FENCE_SIGNALED_SEQ; 255 return true; 256 } 257 return false; 258 } 259 260 /** 261 * radeon_fence_wait_seq - wait for a specific sequence number 262 * 263 * @rdev: radeon device pointer 264 * @target_seq: sequence number we want to wait for 265 * @ring: ring index the fence is associated with 266 * @intr: use interruptable sleep 267 * @lock_ring: whether the ring should be locked or not 268 * 269 * Wait for the requested sequence number to be written (all asics). 270 * @intr selects whether to use interruptable (true) or non-interruptable 271 * (false) sleep when waiting for the sequence number. Helper function 272 * for radeon_fence_wait(), et al. 273 * Returns 0 if the sequence number has passed, error for all other cases. 274 * -EDEADLK is returned when a GPU lockup has been detected and the ring is 275 * marked as not ready so no further jobs get scheduled until a successful 276 * reset. 277 */ 278 static int radeon_fence_wait_seq(struct radeon_device *rdev, u64 target_seq, 279 unsigned ring, bool intr, bool lock_ring) 280 { 281 unsigned long timeout, last_activity; 282 uint64_t seq; 283 unsigned i; 284 bool signaled; 285 int r; 286 287 while (target_seq > atomic64_read(&rdev->fence_drv[ring].last_seq)) { 288 if (!rdev->ring[ring].ready) { 289 return -EBUSY; 290 } 291 292 timeout = jiffies - RADEON_FENCE_JIFFIES_TIMEOUT; 293 if (time_after(rdev->fence_drv[ring].last_activity, timeout)) { 294 /* the normal case, timeout is somewhere before last_activity */ 295 timeout = rdev->fence_drv[ring].last_activity - timeout; 296 } else { 297 /* either jiffies wrapped around, or no fence was signaled in the last 500ms 298 * anyway we will just wait for the minimum amount and then check for a lockup 299 */ 300 timeout = 1; 301 } 302 seq = atomic64_read(&rdev->fence_drv[ring].last_seq); 303 /* Save current last activity valuee, used to check for GPU lockups */ 304 last_activity = rdev->fence_drv[ring].last_activity; 305 306 trace_radeon_fence_wait_begin(rdev->ddev, seq); 307 radeon_irq_kms_sw_irq_get(rdev, ring); 308 if (intr) { 309 r = wait_event_interruptible_timeout(rdev->fence_queue, 310 (signaled = radeon_fence_seq_signaled(rdev, target_seq, ring)), 311 timeout); 312 } else { 313 r = wait_event_timeout(rdev->fence_queue, 314 (signaled = radeon_fence_seq_signaled(rdev, target_seq, ring)), 315 timeout); 316 } 317 radeon_irq_kms_sw_irq_put(rdev, ring); 318 if (unlikely(r < 0)) { 319 return r; 320 } 321 trace_radeon_fence_wait_end(rdev->ddev, seq); 322 323 if (unlikely(!signaled)) { 324 /* we were interrupted for some reason and fence 325 * isn't signaled yet, resume waiting */ 326 if (r) { 327 continue; 328 } 329 330 /* check if sequence value has changed since last_activity */ 331 if (seq != atomic64_read(&rdev->fence_drv[ring].last_seq)) { 332 continue; 333 } 334 335 if (lock_ring) { 336 mutex_lock(&rdev->ring_lock); 337 } 338 339 /* test if somebody else has already decided that this is a lockup */ 340 if (last_activity != rdev->fence_drv[ring].last_activity) { 341 if (lock_ring) { 342 mutex_unlock(&rdev->ring_lock); 343 } 344 continue; 345 } 346 347 if (radeon_ring_is_lockup(rdev, ring, &rdev->ring[ring])) { 348 /* good news we believe it's a lockup */ 349 dev_warn(rdev->dev, "GPU lockup (waiting for 0x%016llx last fence id 0x%016llx)\n", 350 target_seq, seq); 351 352 /* change last activity so nobody else think there is a lockup */ 353 for (i = 0; i < RADEON_NUM_RINGS; ++i) { 354 rdev->fence_drv[i].last_activity = jiffies; 355 } 356 357 /* mark the ring as not ready any more */ 358 rdev->ring[ring].ready = false; 359 if (lock_ring) { 360 mutex_unlock(&rdev->ring_lock); 361 } 362 return -EDEADLK; 363 } 364 365 if (lock_ring) { 366 mutex_unlock(&rdev->ring_lock); 367 } 368 } 369 } 370 return 0; 371 } 372 373 /** 374 * radeon_fence_wait - wait for a fence to signal 375 * 376 * @fence: radeon fence object 377 * @intr: use interruptable sleep 378 * 379 * Wait for the requested fence to signal (all asics). 380 * @intr selects whether to use interruptable (true) or non-interruptable 381 * (false) sleep when waiting for the fence. 382 * Returns 0 if the fence has passed, error for all other cases. 383 */ 384 int radeon_fence_wait(struct radeon_fence *fence, bool intr) 385 { 386 int r; 387 388 if (fence == NULL) { 389 WARN(1, "Querying an invalid fence : %p !\n", fence); 390 return -EINVAL; 391 } 392 393 r = radeon_fence_wait_seq(fence->rdev, fence->seq, 394 fence->ring, intr, true); 395 if (r) { 396 return r; 397 } 398 fence->seq = RADEON_FENCE_SIGNALED_SEQ; 399 return 0; 400 } 401 402 bool radeon_fence_any_seq_signaled(struct radeon_device *rdev, u64 *seq) 403 { 404 unsigned i; 405 406 for (i = 0; i < RADEON_NUM_RINGS; ++i) { 407 if (seq[i] && radeon_fence_seq_signaled(rdev, seq[i], i)) { 408 return true; 409 } 410 } 411 return false; 412 } 413 414 /** 415 * radeon_fence_wait_any_seq - wait for a sequence number on any ring 416 * 417 * @rdev: radeon device pointer 418 * @target_seq: sequence number(s) we want to wait for 419 * @intr: use interruptable sleep 420 * 421 * Wait for the requested sequence number(s) to be written by any ring 422 * (all asics). Sequnce number array is indexed by ring id. 423 * @intr selects whether to use interruptable (true) or non-interruptable 424 * (false) sleep when waiting for the sequence number. Helper function 425 * for radeon_fence_wait_any(), et al. 426 * Returns 0 if the sequence number has passed, error for all other cases. 427 */ 428 static int radeon_fence_wait_any_seq(struct radeon_device *rdev, 429 u64 *target_seq, bool intr) 430 { 431 unsigned long timeout, last_activity, tmp; 432 unsigned i, ring = RADEON_NUM_RINGS; 433 bool signaled; 434 int r; 435 436 for (i = 0, last_activity = 0; i < RADEON_NUM_RINGS; ++i) { 437 if (!target_seq[i]) { 438 continue; 439 } 440 441 /* use the most recent one as indicator */ 442 if (time_after(rdev->fence_drv[i].last_activity, last_activity)) { 443 last_activity = rdev->fence_drv[i].last_activity; 444 } 445 446 /* For lockup detection just pick the lowest ring we are 447 * actively waiting for 448 */ 449 if (i < ring) { 450 ring = i; 451 } 452 } 453 454 /* nothing to wait for ? */ 455 if (ring == RADEON_NUM_RINGS) { 456 return -ENOENT; 457 } 458 459 while (!radeon_fence_any_seq_signaled(rdev, target_seq)) { 460 timeout = jiffies - RADEON_FENCE_JIFFIES_TIMEOUT; 461 if (time_after(last_activity, timeout)) { 462 /* the normal case, timeout is somewhere before last_activity */ 463 timeout = last_activity - timeout; 464 } else { 465 /* either jiffies wrapped around, or no fence was signaled in the last 500ms 466 * anyway we will just wait for the minimum amount and then check for a lockup 467 */ 468 timeout = 1; 469 } 470 471 trace_radeon_fence_wait_begin(rdev->ddev, target_seq[ring]); 472 for (i = 0; i < RADEON_NUM_RINGS; ++i) { 473 if (target_seq[i]) { 474 radeon_irq_kms_sw_irq_get(rdev, i); 475 } 476 } 477 if (intr) { 478 r = wait_event_interruptible_timeout(rdev->fence_queue, 479 (signaled = radeon_fence_any_seq_signaled(rdev, target_seq)), 480 timeout); 481 } else { 482 r = wait_event_timeout(rdev->fence_queue, 483 (signaled = radeon_fence_any_seq_signaled(rdev, target_seq)), 484 timeout); 485 } 486 for (i = 0; i < RADEON_NUM_RINGS; ++i) { 487 if (target_seq[i]) { 488 radeon_irq_kms_sw_irq_put(rdev, i); 489 } 490 } 491 if (unlikely(r < 0)) { 492 return r; 493 } 494 trace_radeon_fence_wait_end(rdev->ddev, target_seq[ring]); 495 496 if (unlikely(!signaled)) { 497 /* we were interrupted for some reason and fence 498 * isn't signaled yet, resume waiting */ 499 if (r) { 500 continue; 501 } 502 503 mutex_lock(&rdev->ring_lock); 504 for (i = 0, tmp = 0; i < RADEON_NUM_RINGS; ++i) { 505 if (time_after(rdev->fence_drv[i].last_activity, tmp)) { 506 tmp = rdev->fence_drv[i].last_activity; 507 } 508 } 509 /* test if somebody else has already decided that this is a lockup */ 510 if (last_activity != tmp) { 511 last_activity = tmp; 512 mutex_unlock(&rdev->ring_lock); 513 continue; 514 } 515 516 if (radeon_ring_is_lockup(rdev, ring, &rdev->ring[ring])) { 517 /* good news we believe it's a lockup */ 518 dev_warn(rdev->dev, "GPU lockup (waiting for 0x%016llx)\n", 519 target_seq[ring]); 520 521 /* change last activity so nobody else think there is a lockup */ 522 for (i = 0; i < RADEON_NUM_RINGS; ++i) { 523 rdev->fence_drv[i].last_activity = jiffies; 524 } 525 526 /* mark the ring as not ready any more */ 527 rdev->ring[ring].ready = false; 528 mutex_unlock(&rdev->ring_lock); 529 return -EDEADLK; 530 } 531 mutex_unlock(&rdev->ring_lock); 532 } 533 } 534 return 0; 535 } 536 537 /** 538 * radeon_fence_wait_any - wait for a fence to signal on any ring 539 * 540 * @rdev: radeon device pointer 541 * @fences: radeon fence object(s) 542 * @intr: use interruptable sleep 543 * 544 * Wait for any requested fence to signal (all asics). Fence 545 * array is indexed by ring id. @intr selects whether to use 546 * interruptable (true) or non-interruptable (false) sleep when 547 * waiting for the fences. Used by the suballocator. 548 * Returns 0 if any fence has passed, error for all other cases. 549 */ 550 int radeon_fence_wait_any(struct radeon_device *rdev, 551 struct radeon_fence **fences, 552 bool intr) 553 { 554 uint64_t seq[RADEON_NUM_RINGS]; 555 unsigned i; 556 int r; 557 558 for (i = 0; i < RADEON_NUM_RINGS; ++i) { 559 seq[i] = 0; 560 561 if (!fences[i]) { 562 continue; 563 } 564 565 if (fences[i]->seq == RADEON_FENCE_SIGNALED_SEQ) { 566 /* something was allready signaled */ 567 return 0; 568 } 569 570 seq[i] = fences[i]->seq; 571 } 572 573 r = radeon_fence_wait_any_seq(rdev, seq, intr); 574 if (r) { 575 return r; 576 } 577 return 0; 578 } 579 580 /** 581 * radeon_fence_wait_next_locked - wait for the next fence to signal 582 * 583 * @rdev: radeon device pointer 584 * @ring: ring index the fence is associated with 585 * 586 * Wait for the next fence on the requested ring to signal (all asics). 587 * Returns 0 if the next fence has passed, error for all other cases. 588 * Caller must hold ring lock. 589 */ 590 int radeon_fence_wait_next_locked(struct radeon_device *rdev, int ring) 591 { 592 uint64_t seq; 593 594 seq = atomic64_read(&rdev->fence_drv[ring].last_seq) + 1ULL; 595 if (seq >= rdev->fence_drv[ring].sync_seq[ring]) { 596 /* nothing to wait for, last_seq is 597 already the last emited fence */ 598 return -ENOENT; 599 } 600 return radeon_fence_wait_seq(rdev, seq, ring, false, false); 601 } 602 603 /** 604 * radeon_fence_wait_empty_locked - wait for all fences to signal 605 * 606 * @rdev: radeon device pointer 607 * @ring: ring index the fence is associated with 608 * 609 * Wait for all fences on the requested ring to signal (all asics). 610 * Returns 0 if the fences have passed, error for all other cases. 611 * Caller must hold ring lock. 612 */ 613 void radeon_fence_wait_empty_locked(struct radeon_device *rdev, int ring) 614 { 615 uint64_t seq = rdev->fence_drv[ring].sync_seq[ring]; 616 617 while(1) { 618 int r; 619 r = radeon_fence_wait_seq(rdev, seq, ring, false, false); 620 if (r == -EDEADLK) { 621 mutex_unlock(&rdev->ring_lock); 622 r = radeon_gpu_reset(rdev); 623 mutex_lock(&rdev->ring_lock); 624 if (!r) 625 continue; 626 } 627 if (r) { 628 dev_err(rdev->dev, "error waiting for ring to become" 629 " idle (%d)\n", r); 630 } 631 return; 632 } 633 } 634 635 /** 636 * radeon_fence_ref - take a ref on a fence 637 * 638 * @fence: radeon fence object 639 * 640 * Take a reference on a fence (all asics). 641 * Returns the fence. 642 */ 643 struct radeon_fence *radeon_fence_ref(struct radeon_fence *fence) 644 { 645 kref_get(&fence->kref); 646 return fence; 647 } 648 649 /** 650 * radeon_fence_unref - remove a ref on a fence 651 * 652 * @fence: radeon fence object 653 * 654 * Remove a reference on a fence (all asics). 655 */ 656 void radeon_fence_unref(struct radeon_fence **fence) 657 { 658 struct radeon_fence *tmp = *fence; 659 660 *fence = NULL; 661 if (tmp) { 662 kref_put(&tmp->kref, radeon_fence_destroy); 663 } 664 } 665 666 /** 667 * radeon_fence_count_emitted - get the count of emitted fences 668 * 669 * @rdev: radeon device pointer 670 * @ring: ring index the fence is associated with 671 * 672 * Get the number of fences emitted on the requested ring (all asics). 673 * Returns the number of emitted fences on the ring. Used by the 674 * dynpm code to ring track activity. 675 */ 676 unsigned radeon_fence_count_emitted(struct radeon_device *rdev, int ring) 677 { 678 uint64_t emitted; 679 680 /* We are not protected by ring lock when reading the last sequence 681 * but it's ok to report slightly wrong fence count here. 682 */ 683 radeon_fence_process(rdev, ring); 684 emitted = rdev->fence_drv[ring].sync_seq[ring] 685 - atomic64_read(&rdev->fence_drv[ring].last_seq); 686 /* to avoid 32bits warp around */ 687 if (emitted > 0x10000000) { 688 emitted = 0x10000000; 689 } 690 return (unsigned)emitted; 691 } 692 693 /** 694 * radeon_fence_need_sync - do we need a semaphore 695 * 696 * @fence: radeon fence object 697 * @dst_ring: which ring to check against 698 * 699 * Check if the fence needs to be synced against another ring 700 * (all asics). If so, we need to emit a semaphore. 701 * Returns true if we need to sync with another ring, false if 702 * not. 703 */ 704 bool radeon_fence_need_sync(struct radeon_fence *fence, int dst_ring) 705 { 706 struct radeon_fence_driver *fdrv; 707 708 if (!fence) { 709 return false; 710 } 711 712 if (fence->ring == dst_ring) { 713 return false; 714 } 715 716 /* we are protected by the ring mutex */ 717 fdrv = &fence->rdev->fence_drv[dst_ring]; 718 if (fence->seq <= fdrv->sync_seq[fence->ring]) { 719 return false; 720 } 721 722 return true; 723 } 724 725 /** 726 * radeon_fence_note_sync - record the sync point 727 * 728 * @fence: radeon fence object 729 * @dst_ring: which ring to check against 730 * 731 * Note the sequence number at which point the fence will 732 * be synced with the requested ring (all asics). 733 */ 734 void radeon_fence_note_sync(struct radeon_fence *fence, int dst_ring) 735 { 736 struct radeon_fence_driver *dst, *src; 737 unsigned i; 738 739 if (!fence) { 740 return; 741 } 742 743 if (fence->ring == dst_ring) { 744 return; 745 } 746 747 /* we are protected by the ring mutex */ 748 src = &fence->rdev->fence_drv[fence->ring]; 749 dst = &fence->rdev->fence_drv[dst_ring]; 750 for (i = 0; i < RADEON_NUM_RINGS; ++i) { 751 if (i == dst_ring) { 752 continue; 753 } 754 dst->sync_seq[i] = max(dst->sync_seq[i], src->sync_seq[i]); 755 } 756 } 757 758 /** 759 * radeon_fence_driver_start_ring - make the fence driver 760 * ready for use on the requested ring. 761 * 762 * @rdev: radeon device pointer 763 * @ring: ring index to start the fence driver on 764 * 765 * Make the fence driver ready for processing (all asics). 766 * Not all asics have all rings, so each asic will only 767 * start the fence driver on the rings it has. 768 * Returns 0 for success, errors for failure. 769 */ 770 int radeon_fence_driver_start_ring(struct radeon_device *rdev, int ring) 771 { 772 uint64_t index; 773 int r; 774 775 radeon_scratch_free(rdev, rdev->fence_drv[ring].scratch_reg); 776 if (rdev->wb.use_event) { 777 rdev->fence_drv[ring].scratch_reg = 0; 778 index = R600_WB_EVENT_OFFSET + ring * 4; 779 } else { 780 r = radeon_scratch_get(rdev, &rdev->fence_drv[ring].scratch_reg); 781 if (r) { 782 dev_err(rdev->dev, "fence failed to get scratch register\n"); 783 return r; 784 } 785 index = RADEON_WB_SCRATCH_OFFSET + 786 rdev->fence_drv[ring].scratch_reg - 787 rdev->scratch.reg_base; 788 } 789 rdev->fence_drv[ring].cpu_addr = &rdev->wb.wb[index/4]; 790 rdev->fence_drv[ring].gpu_addr = rdev->wb.gpu_addr + index; 791 radeon_fence_write(rdev, atomic64_read(&rdev->fence_drv[ring].last_seq), ring); 792 rdev->fence_drv[ring].initialized = true; 793 dev_info(rdev->dev, "fence driver on ring %d use gpu addr 0x%016llx and cpu addr 0x%p\n", 794 ring, rdev->fence_drv[ring].gpu_addr, rdev->fence_drv[ring].cpu_addr); 795 return 0; 796 } 797 798 /** 799 * radeon_fence_driver_init_ring - init the fence driver 800 * for the requested ring. 801 * 802 * @rdev: radeon device pointer 803 * @ring: ring index to start the fence driver on 804 * 805 * Init the fence driver for the requested ring (all asics). 806 * Helper function for radeon_fence_driver_init(). 807 */ 808 static void radeon_fence_driver_init_ring(struct radeon_device *rdev, int ring) 809 { 810 int i; 811 812 rdev->fence_drv[ring].scratch_reg = -1; 813 rdev->fence_drv[ring].cpu_addr = NULL; 814 rdev->fence_drv[ring].gpu_addr = 0; 815 for (i = 0; i < RADEON_NUM_RINGS; ++i) 816 rdev->fence_drv[ring].sync_seq[i] = 0; 817 atomic64_set(&rdev->fence_drv[ring].last_seq, 0); 818 rdev->fence_drv[ring].last_activity = jiffies; 819 rdev->fence_drv[ring].initialized = false; 820 } 821 822 /** 823 * radeon_fence_driver_init - init the fence driver 824 * for all possible rings. 825 * 826 * @rdev: radeon device pointer 827 * 828 * Init the fence driver for all possible rings (all asics). 829 * Not all asics have all rings, so each asic will only 830 * start the fence driver on the rings it has using 831 * radeon_fence_driver_start_ring(). 832 * Returns 0 for success. 833 */ 834 int radeon_fence_driver_init(struct radeon_device *rdev) 835 { 836 int ring; 837 838 init_waitqueue_head(&rdev->fence_queue); 839 for (ring = 0; ring < RADEON_NUM_RINGS; ring++) { 840 radeon_fence_driver_init_ring(rdev, ring); 841 } 842 if (radeon_debugfs_fence_init(rdev)) { 843 dev_err(rdev->dev, "fence debugfs file creation failed\n"); 844 } 845 return 0; 846 } 847 848 /** 849 * radeon_fence_driver_fini - tear down the fence driver 850 * for all possible rings. 851 * 852 * @rdev: radeon device pointer 853 * 854 * Tear down the fence driver for all possible rings (all asics). 855 */ 856 void radeon_fence_driver_fini(struct radeon_device *rdev) 857 { 858 int ring; 859 860 mutex_lock(&rdev->ring_lock); 861 for (ring = 0; ring < RADEON_NUM_RINGS; ring++) { 862 if (!rdev->fence_drv[ring].initialized) 863 continue; 864 radeon_fence_wait_empty_locked(rdev, ring); 865 wake_up_all(&rdev->fence_queue); 866 radeon_scratch_free(rdev, rdev->fence_drv[ring].scratch_reg); 867 rdev->fence_drv[ring].initialized = false; 868 } 869 mutex_unlock(&rdev->ring_lock); 870 } 871 872 873 /* 874 * Fence debugfs 875 */ 876 #if defined(CONFIG_DEBUG_FS) 877 static int radeon_debugfs_fence_info(struct seq_file *m, void *data) 878 { 879 struct drm_info_node *node = (struct drm_info_node *)m->private; 880 struct drm_device *dev = node->minor->dev; 881 struct radeon_device *rdev = dev->dev_private; 882 int i, j; 883 884 for (i = 0; i < RADEON_NUM_RINGS; ++i) { 885 if (!rdev->fence_drv[i].initialized) 886 continue; 887 888 seq_printf(m, "--- ring %d ---\n", i); 889 seq_printf(m, "Last signaled fence 0x%016llx\n", 890 (unsigned long long)atomic64_read(&rdev->fence_drv[i].last_seq)); 891 seq_printf(m, "Last emitted 0x%016llx\n", 892 rdev->fence_drv[i].sync_seq[i]); 893 894 for (j = 0; j < RADEON_NUM_RINGS; ++j) { 895 if (i != j && rdev->fence_drv[j].initialized) 896 seq_printf(m, "Last sync to ring %d 0x%016llx\n", 897 j, rdev->fence_drv[i].sync_seq[j]); 898 } 899 } 900 return 0; 901 } 902 903 static struct drm_info_list radeon_debugfs_fence_list[] = { 904 {"radeon_fence_info", &radeon_debugfs_fence_info, 0, NULL}, 905 }; 906 #endif 907 908 int radeon_debugfs_fence_init(struct radeon_device *rdev) 909 { 910 #if defined(CONFIG_DEBUG_FS) 911 return radeon_debugfs_add_files(rdev, radeon_debugfs_fence_list, 1); 912 #else 913 return 0; 914 #endif 915 } 916