1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Fence mechanism for dma-buf and to allow for asynchronous dma access 4 * 5 * Copyright (C) 2012 Canonical Ltd 6 * Copyright (C) 2012 Texas Instruments 7 * 8 * Authors: 9 * Rob Clark <robdclark@gmail.com> 10 * Maarten Lankhorst <maarten.lankhorst@canonical.com> 11 */ 12 13 #include <linux/slab.h> 14 #include <linux/export.h> 15 #include <linux/atomic.h> 16 #include <linux/dma-fence.h> 17 #include <linux/sched/signal.h> 18 #include <linux/seq_file.h> 19 20 #define CREATE_TRACE_POINTS 21 #include <trace/events/dma_fence.h> 22 23 EXPORT_TRACEPOINT_SYMBOL(dma_fence_emit); 24 EXPORT_TRACEPOINT_SYMBOL(dma_fence_enable_signal); 25 EXPORT_TRACEPOINT_SYMBOL(dma_fence_signaled); 26 27 static struct dma_fence dma_fence_stub; 28 29 /* 30 * fence context counter: each execution context should have its own 31 * fence context, this allows checking if fences belong to the same 32 * context or not. One device can have multiple separate contexts, 33 * and they're used if some engine can run independently of another. 34 */ 35 static atomic64_t dma_fence_context_counter = ATOMIC64_INIT(1); 36 37 /** 38 * DOC: DMA fences overview 39 * 40 * DMA fences, represented by &struct dma_fence, are the kernel internal 41 * synchronization primitive for DMA operations like GPU rendering, video 42 * encoding/decoding, or displaying buffers on a screen. 43 * 44 * A fence is initialized using dma_fence_init() and completed using 45 * dma_fence_signal(). Fences are associated with a context, allocated through 46 * dma_fence_context_alloc(), and all fences on the same context are 47 * fully ordered. 48 * 49 * Since the purposes of fences is to facilitate cross-device and 50 * cross-application synchronization, there's multiple ways to use one: 51 * 52 * - Individual fences can be exposed as a &sync_file, accessed as a file 53 * descriptor from userspace, created by calling sync_file_create(). This is 54 * called explicit fencing, since userspace passes around explicit 55 * synchronization points. 56 * 57 * - Some subsystems also have their own explicit fencing primitives, like 58 * &drm_syncobj. Compared to &sync_file, a &drm_syncobj allows the underlying 59 * fence to be updated. 60 * 61 * - Then there's also implicit fencing, where the synchronization points are 62 * implicitly passed around as part of shared &dma_buf instances. Such 63 * implicit fences are stored in &struct dma_resv through the 64 * &dma_buf.resv pointer. 65 */ 66 67 /** 68 * DOC: fence cross-driver contract 69 * 70 * Since &dma_fence provide a cross driver contract, all drivers must follow the 71 * same rules: 72 * 73 * * Fences must complete in a reasonable time. Fences which represent kernels 74 * and shaders submitted by userspace, which could run forever, must be backed 75 * up by timeout and gpu hang recovery code. Minimally that code must prevent 76 * further command submission and force complete all in-flight fences, e.g. 77 * when the driver or hardware do not support gpu reset, or if the gpu reset 78 * failed for some reason. Ideally the driver supports gpu recovery which only 79 * affects the offending userspace context, and no other userspace 80 * submissions. 81 * 82 * * Drivers may have different ideas of what completion within a reasonable 83 * time means. Some hang recovery code uses a fixed timeout, others a mix 84 * between observing forward progress and increasingly strict timeouts. 85 * Drivers should not try to second guess timeout handling of fences from 86 * other drivers. 87 * 88 * * To ensure there's no deadlocks of dma_fence_wait() against other locks 89 * drivers should annotate all code required to reach dma_fence_signal(), 90 * which completes the fences, with dma_fence_begin_signalling() and 91 * dma_fence_end_signalling(). 92 * 93 * * Drivers are allowed to call dma_fence_wait() while holding dma_resv_lock(). 94 * This means any code required for fence completion cannot acquire a 95 * &dma_resv lock. Note that this also pulls in the entire established 96 * locking hierarchy around dma_resv_lock() and dma_resv_unlock(). 97 * 98 * * Drivers are allowed to call dma_fence_wait() from their &shrinker 99 * callbacks. This means any code required for fence completion cannot 100 * allocate memory with GFP_KERNEL. 101 * 102 * * Drivers are allowed to call dma_fence_wait() from their &mmu_notifier 103 * respectively &mmu_interval_notifier callbacks. This means any code required 104 * for fence completion cannot allocate memory with GFP_NOFS or GFP_NOIO. 105 * Only GFP_ATOMIC is permissible, which might fail. 106 * 107 * Note that only GPU drivers have a reasonable excuse for both requiring 108 * &mmu_interval_notifier and &shrinker callbacks at the same time as having to 109 * track asynchronous compute work using &dma_fence. No driver outside of 110 * drivers/gpu should ever call dma_fence_wait() in such contexts. 111 */ 112 113 static const char *dma_fence_stub_get_name(struct dma_fence *fence) 114 { 115 return "stub"; 116 } 117 118 static const struct dma_fence_ops dma_fence_stub_ops = { 119 .get_driver_name = dma_fence_stub_get_name, 120 .get_timeline_name = dma_fence_stub_get_name, 121 }; 122 123 static int __init dma_fence_init_stub(void) 124 { 125 dma_fence_init(&dma_fence_stub, &dma_fence_stub_ops, NULL, 0, 0); 126 set_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, 127 &dma_fence_stub.flags); 128 dma_fence_signal(&dma_fence_stub); 129 return 0; 130 } 131 subsys_initcall(dma_fence_init_stub); 132 133 /** 134 * dma_fence_get_stub - return a signaled fence 135 * 136 * Return a stub fence which is already signaled. The fence's timestamp 137 * corresponds to the initialisation time of the linux kernel. 138 */ 139 struct dma_fence *dma_fence_get_stub(void) 140 { 141 return dma_fence_get(&dma_fence_stub); 142 } 143 EXPORT_SYMBOL(dma_fence_get_stub); 144 145 /** 146 * dma_fence_allocate_private_stub - return a private, signaled fence 147 * @timestamp: timestamp when the fence was signaled 148 * 149 * Return a newly allocated and signaled stub fence. 150 */ 151 struct dma_fence *dma_fence_allocate_private_stub(ktime_t timestamp) 152 { 153 struct dma_fence *fence; 154 155 fence = kzalloc_obj(*fence); 156 if (fence == NULL) 157 return NULL; 158 159 dma_fence_init(fence, &dma_fence_stub_ops, NULL, 0, 0); 160 set_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, 161 &fence->flags); 162 163 dma_fence_signal_timestamp(fence, timestamp); 164 165 return fence; 166 } 167 EXPORT_SYMBOL(dma_fence_allocate_private_stub); 168 169 /** 170 * dma_fence_context_alloc - allocate an array of fence contexts 171 * @num: amount of contexts to allocate 172 * 173 * This function will return the first index of the number of fence contexts 174 * allocated. The fence context is used for setting &dma_fence.context to a 175 * unique number by passing the context to dma_fence_init(). 176 */ 177 u64 dma_fence_context_alloc(unsigned num) 178 { 179 WARN_ON(!num); 180 return atomic64_fetch_add(num, &dma_fence_context_counter); 181 } 182 EXPORT_SYMBOL(dma_fence_context_alloc); 183 184 /** 185 * DOC: fence signalling annotation 186 * 187 * Proving correctness of all the kernel code around &dma_fence through code 188 * review and testing is tricky for a few reasons: 189 * 190 * * It is a cross-driver contract, and therefore all drivers must follow the 191 * same rules for lock nesting order, calling contexts for various functions 192 * and anything else significant for in-kernel interfaces. But it is also 193 * impossible to test all drivers in a single machine, hence brute-force N vs. 194 * N testing of all combinations is impossible. Even just limiting to the 195 * possible combinations is infeasible. 196 * 197 * * There is an enormous amount of driver code involved. For render drivers 198 * there's the tail of command submission, after fences are published, 199 * scheduler code, interrupt and workers to process job completion, 200 * and timeout, gpu reset and gpu hang recovery code. Plus for integration 201 * with core mm with have &mmu_notifier, respectively &mmu_interval_notifier, 202 * and &shrinker. For modesetting drivers there's the commit tail functions 203 * between when fences for an atomic modeset are published, and when the 204 * corresponding vblank completes, including any interrupt processing and 205 * related workers. Auditing all that code, across all drivers, is not 206 * feasible. 207 * 208 * * Due to how many other subsystems are involved and the locking hierarchies 209 * this pulls in there is extremely thin wiggle-room for driver-specific 210 * differences. &dma_fence interacts with almost all of the core memory 211 * handling through page fault handlers via &dma_resv, dma_resv_lock() and 212 * dma_resv_unlock(). On the other side it also interacts through all 213 * allocation sites through &mmu_notifier and &shrinker. 214 * 215 * Furthermore lockdep does not handle cross-release dependencies, which means 216 * any deadlocks between dma_fence_wait() and dma_fence_signal() can't be caught 217 * at runtime with some quick testing. The simplest example is one thread 218 * waiting on a &dma_fence while holding a lock:: 219 * 220 * lock(A); 221 * dma_fence_wait(B); 222 * unlock(A); 223 * 224 * while the other thread is stuck trying to acquire the same lock, which 225 * prevents it from signalling the fence the previous thread is stuck waiting 226 * on:: 227 * 228 * lock(A); 229 * unlock(A); 230 * dma_fence_signal(B); 231 * 232 * By manually annotating all code relevant to signalling a &dma_fence we can 233 * teach lockdep about these dependencies, which also helps with the validation 234 * headache since now lockdep can check all the rules for us:: 235 * 236 * cookie = dma_fence_begin_signalling(); 237 * lock(A); 238 * unlock(A); 239 * dma_fence_signal(B); 240 * dma_fence_end_signalling(cookie); 241 * 242 * For using dma_fence_begin_signalling() and dma_fence_end_signalling() to 243 * annotate critical sections the following rules need to be observed: 244 * 245 * * All code necessary to complete a &dma_fence must be annotated, from the 246 * point where a fence is accessible to other threads, to the point where 247 * dma_fence_signal() is called. Un-annotated code can contain deadlock issues, 248 * and due to the very strict rules and many corner cases it is infeasible to 249 * catch these just with review or normal stress testing. 250 * 251 * * &struct dma_resv deserves a special note, since the readers are only 252 * protected by rcu. This means the signalling critical section starts as soon 253 * as the new fences are installed, even before dma_resv_unlock() is called. 254 * 255 * * The only exception are fast paths and opportunistic signalling code, which 256 * calls dma_fence_signal() purely as an optimization, but is not required to 257 * guarantee completion of a &dma_fence. The usual example is a wait IOCTL 258 * which calls dma_fence_signal(), while the mandatory completion path goes 259 * through a hardware interrupt and possible job completion worker. 260 * 261 * * To aid composability of code, the annotations can be freely nested, as long 262 * as the overall locking hierarchy is consistent. The annotations also work 263 * both in interrupt and process context. Due to implementation details this 264 * requires that callers pass an opaque cookie from 265 * dma_fence_begin_signalling() to dma_fence_end_signalling(). 266 * 267 * * Validation against the cross driver contract is implemented by priming 268 * lockdep with the relevant hierarchy at boot-up. This means even just 269 * testing with a single device is enough to validate a driver, at least as 270 * far as deadlocks with dma_fence_wait() against dma_fence_signal() are 271 * concerned. 272 */ 273 #ifdef CONFIG_LOCKDEP 274 static struct lockdep_map dma_fence_lockdep_map = { 275 .name = "dma_fence_map" 276 }; 277 278 /** 279 * dma_fence_begin_signalling - begin a critical DMA fence signalling section 280 * 281 * Drivers should use this to annotate the beginning of any code section 282 * required to eventually complete &dma_fence by calling dma_fence_signal(). 283 * 284 * The end of these critical sections are annotated with 285 * dma_fence_end_signalling(). 286 * 287 * Returns: 288 * 289 * Opaque cookie needed by the implementation, which needs to be passed to 290 * dma_fence_end_signalling(). 291 */ 292 bool dma_fence_begin_signalling(void) 293 { 294 /* explicitly nesting ... */ 295 if (lock_is_held_type(&dma_fence_lockdep_map, 1)) 296 return true; 297 298 /* rely on might_sleep check for soft/hardirq locks */ 299 if (in_atomic()) 300 return true; 301 302 /* ... and non-recursive successful read_trylock */ 303 lock_acquire(&dma_fence_lockdep_map, 0, 1, 1, 1, NULL, _RET_IP_); 304 305 return false; 306 } 307 EXPORT_SYMBOL(dma_fence_begin_signalling); 308 309 /** 310 * dma_fence_end_signalling - end a critical DMA fence signalling section 311 * @cookie: opaque cookie from dma_fence_begin_signalling() 312 * 313 * Closes a critical section annotation opened by dma_fence_begin_signalling(). 314 */ 315 void dma_fence_end_signalling(bool cookie) 316 { 317 if (cookie) 318 return; 319 320 lock_release(&dma_fence_lockdep_map, _RET_IP_); 321 } 322 EXPORT_SYMBOL(dma_fence_end_signalling); 323 324 void __dma_fence_might_wait(void) 325 { 326 bool tmp; 327 328 tmp = lock_is_held_type(&dma_fence_lockdep_map, 1); 329 if (tmp) 330 lock_release(&dma_fence_lockdep_map, _THIS_IP_); 331 lock_map_acquire(&dma_fence_lockdep_map); 332 lock_map_release(&dma_fence_lockdep_map); 333 if (tmp) 334 lock_acquire(&dma_fence_lockdep_map, 0, 1, 1, 1, NULL, _THIS_IP_); 335 } 336 #endif 337 338 /** 339 * dma_fence_signal_timestamp_locked - signal completion of a fence 340 * @fence: the fence to signal 341 * @timestamp: fence signal timestamp in kernel's CLOCK_MONOTONIC time domain 342 * 343 * Signal completion for software callbacks on a fence, this will unblock 344 * dma_fence_wait() calls and run all the callbacks added with 345 * dma_fence_add_callback(). Can be called multiple times, but since a fence 346 * can only go from the unsignaled to the signaled state and not back, it will 347 * only be effective the first time. Set the timestamp provided as the fence 348 * signal timestamp. 349 * 350 * Unlike dma_fence_signal_timestamp(), this function must be called with 351 * &dma_fence.lock held. 352 */ 353 void dma_fence_signal_timestamp_locked(struct dma_fence *fence, 354 ktime_t timestamp) 355 { 356 const struct dma_fence_ops *ops; 357 struct dma_fence_cb *cur, *tmp; 358 struct list_head cb_list; 359 360 dma_fence_assert_held(fence); 361 362 if (unlikely(test_and_set_bit(DMA_FENCE_FLAG_SIGNALED_BIT, 363 &fence->flags))) 364 return; 365 366 /* 367 * When neither a release nor a wait operation is specified set the ops 368 * pointer to NULL to allow the fence structure to become independent 369 * from who originally issued it. 370 */ 371 ops = rcu_dereference_protected(fence->ops, true); 372 if (!ops->release && !ops->wait) 373 RCU_INIT_POINTER(fence->ops, NULL); 374 375 /* Stash the cb_list before replacing it with the timestamp */ 376 list_replace(&fence->cb_list, &cb_list); 377 378 fence->timestamp = timestamp; 379 set_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &fence->flags); 380 trace_dma_fence_signaled(fence); 381 382 list_for_each_entry_safe(cur, tmp, &cb_list, node) { 383 INIT_LIST_HEAD(&cur->node); 384 cur->func(fence, cur); 385 } 386 } 387 EXPORT_SYMBOL(dma_fence_signal_timestamp_locked); 388 389 /** 390 * dma_fence_signal_timestamp - signal completion of a fence 391 * @fence: the fence to signal 392 * @timestamp: fence signal timestamp in kernel's CLOCK_MONOTONIC time domain 393 * 394 * Signal completion for software callbacks on a fence, this will unblock 395 * dma_fence_wait() calls and run all the callbacks added with 396 * dma_fence_add_callback(). Can be called multiple times, but since a fence 397 * can only go from the unsignaled to the signaled state and not back, it will 398 * only be effective the first time. Set the timestamp provided as the fence 399 * signal timestamp. 400 */ 401 void dma_fence_signal_timestamp(struct dma_fence *fence, ktime_t timestamp) 402 { 403 unsigned long flags; 404 405 if (WARN_ON(!fence)) 406 return; 407 408 dma_fence_lock_irqsave(fence, flags); 409 dma_fence_signal_timestamp_locked(fence, timestamp); 410 dma_fence_unlock_irqrestore(fence, flags); 411 } 412 EXPORT_SYMBOL(dma_fence_signal_timestamp); 413 414 /** 415 * dma_fence_signal_locked - signal completion of a fence 416 * @fence: the fence to signal 417 * 418 * Signal completion for software callbacks on a fence, this will unblock 419 * dma_fence_wait() calls and run all the callbacks added with 420 * dma_fence_add_callback(). Can be called multiple times, but since a fence 421 * can only go from the unsignaled to the signaled state and not back, it will 422 * only be effective the first time. 423 * 424 * Unlike dma_fence_signal(), this function must be called with &dma_fence.lock 425 * held. 426 */ 427 void dma_fence_signal_locked(struct dma_fence *fence) 428 { 429 dma_fence_signal_timestamp_locked(fence, ktime_get()); 430 } 431 EXPORT_SYMBOL(dma_fence_signal_locked); 432 433 /** 434 * dma_fence_check_and_signal_locked - signal the fence if it's not yet signaled 435 * @fence: the fence to check and signal 436 * 437 * Checks whether a fence was signaled and signals it if it was not yet signaled. 438 * 439 * Unlike dma_fence_check_and_signal(), this function must be called with 440 * &struct dma_fence.lock being held. 441 * 442 * Return: true if fence has been signaled already, false otherwise. 443 */ 444 bool dma_fence_check_and_signal_locked(struct dma_fence *fence) 445 { 446 bool ret; 447 448 ret = dma_fence_test_signaled_flag(fence); 449 dma_fence_signal_locked(fence); 450 451 return ret; 452 } 453 EXPORT_SYMBOL(dma_fence_check_and_signal_locked); 454 455 /** 456 * dma_fence_check_and_signal - signal the fence if it's not yet signaled 457 * @fence: the fence to check and signal 458 * 459 * Checks whether a fence was signaled and signals it if it was not yet signaled. 460 * All this is done in a race-free manner. 461 * 462 * Return: true if fence has been signaled already, false otherwise. 463 */ 464 bool dma_fence_check_and_signal(struct dma_fence *fence) 465 { 466 unsigned long flags; 467 bool ret; 468 469 dma_fence_lock_irqsave(fence, flags); 470 ret = dma_fence_check_and_signal_locked(fence); 471 dma_fence_unlock_irqrestore(fence, flags); 472 473 return ret; 474 } 475 EXPORT_SYMBOL(dma_fence_check_and_signal); 476 477 /** 478 * dma_fence_signal - signal completion of a fence 479 * @fence: the fence to signal 480 * 481 * Signal completion for software callbacks on a fence, this will unblock 482 * dma_fence_wait() calls and run all the callbacks added with 483 * dma_fence_add_callback(). Can be called multiple times, but since a fence 484 * can only go from the unsignaled to the signaled state and not back, it will 485 * only be effective the first time. 486 */ 487 void dma_fence_signal(struct dma_fence *fence) 488 { 489 unsigned long flags; 490 bool tmp; 491 492 if (WARN_ON(!fence)) 493 return; 494 495 tmp = dma_fence_begin_signalling(); 496 497 dma_fence_lock_irqsave(fence, flags); 498 dma_fence_signal_timestamp_locked(fence, ktime_get()); 499 dma_fence_unlock_irqrestore(fence, flags); 500 501 dma_fence_end_signalling(tmp); 502 } 503 EXPORT_SYMBOL(dma_fence_signal); 504 505 /** 506 * dma_fence_wait_timeout - sleep until the fence gets signaled 507 * or until timeout elapses 508 * @fence: the fence to wait on 509 * @intr: if true, do an interruptible wait 510 * @timeout: timeout value in jiffies, or MAX_SCHEDULE_TIMEOUT 511 * 512 * Returns -ERESTARTSYS if interrupted, 0 if the wait timed out, or the 513 * remaining timeout in jiffies on success. Other error values may be 514 * returned on custom implementations. 515 * 516 * Performs a synchronous wait on this fence. It is assumed the caller 517 * directly or indirectly (buf-mgr between reservation and committing) 518 * holds a reference to the fence, otherwise the fence might be 519 * freed before return, resulting in undefined behavior. 520 * 521 * See also dma_fence_wait() and dma_fence_wait_any_timeout(). 522 */ 523 signed long 524 dma_fence_wait_timeout(struct dma_fence *fence, bool intr, signed long timeout) 525 { 526 const struct dma_fence_ops *ops; 527 signed long ret; 528 529 if (WARN_ON(timeout < 0)) 530 return -EINVAL; 531 532 might_sleep(); 533 534 __dma_fence_might_wait(); 535 536 dma_fence_enable_sw_signaling(fence); 537 538 rcu_read_lock(); 539 ops = rcu_dereference(fence->ops); 540 trace_dma_fence_wait_start(fence); 541 if (ops && ops->wait) { 542 /* 543 * Implementing the wait ops is deprecated and not supported for 544 * issuers of fences who need their lifetime to be independent 545 * of their module after they signal, so it is ok to use the 546 * ops outside the RCU protected section. 547 */ 548 rcu_read_unlock(); 549 ret = ops->wait(fence, intr, timeout); 550 } else { 551 rcu_read_unlock(); 552 ret = dma_fence_default_wait(fence, intr, timeout); 553 } 554 if (trace_dma_fence_wait_end_enabled()) { 555 rcu_read_lock(); 556 trace_dma_fence_wait_end(fence); 557 rcu_read_unlock(); 558 } 559 return ret; 560 } 561 EXPORT_SYMBOL(dma_fence_wait_timeout); 562 563 /** 564 * dma_fence_release - default release function for fences 565 * @kref: &dma_fence.recfount 566 * 567 * This is the default release functions for &dma_fence. Drivers shouldn't call 568 * this directly, but instead call dma_fence_put(). 569 */ 570 void dma_fence_release(struct kref *kref) 571 { 572 struct dma_fence *fence = 573 container_of(kref, struct dma_fence, refcount); 574 const struct dma_fence_ops *ops; 575 576 rcu_read_lock(); 577 trace_dma_fence_destroy(fence); 578 579 if (!list_empty(&fence->cb_list) && 580 !dma_fence_test_signaled_flag(fence)) { 581 const char __rcu *timeline; 582 const char __rcu *driver; 583 unsigned long flags; 584 585 driver = dma_fence_driver_name(fence); 586 timeline = dma_fence_timeline_name(fence); 587 588 WARN(1, 589 "Fence %s:%s:%llx:%llx released with pending signals!\n", 590 rcu_dereference(driver), rcu_dereference(timeline), 591 fence->context, fence->seqno); 592 593 /* 594 * Failed to signal before release, likely a refcounting issue. 595 * 596 * This should never happen, but if it does make sure that we 597 * don't leave chains dangling. We set the error flag first 598 * so that the callbacks know this signal is due to an error. 599 */ 600 dma_fence_lock_irqsave(fence, flags); 601 fence->error = -EDEADLK; 602 dma_fence_signal_locked(fence); 603 dma_fence_unlock_irqrestore(fence, flags); 604 } 605 606 ops = rcu_dereference(fence->ops); 607 if (ops && ops->release) 608 ops->release(fence); 609 else 610 dma_fence_free(fence); 611 rcu_read_unlock(); 612 } 613 EXPORT_SYMBOL(dma_fence_release); 614 615 /** 616 * dma_fence_free - default release function for &dma_fence. 617 * @fence: fence to release 618 * 619 * This is the default implementation for &dma_fence_ops.release. It calls 620 * kfree_rcu() on @fence. 621 */ 622 void dma_fence_free(struct dma_fence *fence) 623 { 624 kfree_rcu(fence, rcu); 625 } 626 EXPORT_SYMBOL(dma_fence_free); 627 628 static bool __dma_fence_enable_signaling(struct dma_fence *fence) 629 { 630 const struct dma_fence_ops *ops; 631 bool was_set; 632 633 dma_fence_assert_held(fence); 634 635 was_set = test_and_set_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, 636 &fence->flags); 637 638 if (dma_fence_test_signaled_flag(fence)) 639 return false; 640 641 rcu_read_lock(); 642 ops = rcu_dereference(fence->ops); 643 if (!was_set && ops && ops->enable_signaling) { 644 trace_dma_fence_enable_signal(fence); 645 646 if (!ops->enable_signaling(fence)) { 647 rcu_read_unlock(); 648 dma_fence_signal_locked(fence); 649 return false; 650 } 651 } 652 rcu_read_unlock(); 653 654 return true; 655 } 656 657 /** 658 * dma_fence_enable_sw_signaling - enable signaling on fence 659 * @fence: the fence to enable 660 * 661 * This will request for sw signaling to be enabled, to make the fence 662 * complete as soon as possible. This calls &dma_fence_ops.enable_signaling 663 * internally. 664 */ 665 void dma_fence_enable_sw_signaling(struct dma_fence *fence) 666 { 667 unsigned long flags; 668 669 dma_fence_lock_irqsave(fence, flags); 670 __dma_fence_enable_signaling(fence); 671 dma_fence_unlock_irqrestore(fence, flags); 672 } 673 EXPORT_SYMBOL(dma_fence_enable_sw_signaling); 674 675 /** 676 * dma_fence_add_callback - add a callback to be called when the fence 677 * is signaled 678 * @fence: the fence to wait on 679 * @cb: the callback to register 680 * @func: the function to call 681 * 682 * Add a software callback to the fence. The caller should keep a reference to 683 * the fence. 684 * 685 * @cb will be initialized by dma_fence_add_callback(), no initialization 686 * by the caller is required. Any number of callbacks can be registered 687 * to a fence, but a callback can only be registered to one fence at a time. 688 * 689 * If fence is already signaled, this function will return -ENOENT (and 690 * *not* call the callback). 691 * 692 * Note that the callback can be called from an atomic context or irq context. 693 * 694 * Returns 0 in case of success, -ENOENT if the fence is already signaled 695 * and -EINVAL in case of error. 696 */ 697 int dma_fence_add_callback(struct dma_fence *fence, struct dma_fence_cb *cb, 698 dma_fence_func_t func) 699 { 700 unsigned long flags; 701 int ret = 0; 702 703 if (WARN_ON(!fence || !func)) 704 return -EINVAL; 705 706 if (dma_fence_test_signaled_flag(fence)) { 707 INIT_LIST_HEAD(&cb->node); 708 return -ENOENT; 709 } 710 711 dma_fence_lock_irqsave(fence, flags); 712 if (__dma_fence_enable_signaling(fence)) { 713 cb->func = func; 714 list_add_tail(&cb->node, &fence->cb_list); 715 } else { 716 INIT_LIST_HEAD(&cb->node); 717 ret = -ENOENT; 718 } 719 dma_fence_unlock_irqrestore(fence, flags); 720 721 return ret; 722 } 723 EXPORT_SYMBOL(dma_fence_add_callback); 724 725 /** 726 * dma_fence_get_status - returns the status upon completion 727 * @fence: the dma_fence to query 728 * 729 * This wraps dma_fence_get_status_locked() to return the error status 730 * condition on a signaled fence. See dma_fence_get_status_locked() for more 731 * details. 732 * 733 * Returns 0 if the fence has not yet been signaled, 1 if the fence has 734 * been signaled without an error condition, or a negative error code 735 * if the fence has been completed in err. 736 */ 737 int dma_fence_get_status(struct dma_fence *fence) 738 { 739 unsigned long flags; 740 int status; 741 742 dma_fence_lock_irqsave(fence, flags); 743 status = dma_fence_get_status_locked(fence); 744 dma_fence_unlock_irqrestore(fence, flags); 745 746 return status; 747 } 748 EXPORT_SYMBOL(dma_fence_get_status); 749 750 /** 751 * dma_fence_remove_callback - remove a callback from the signaling list 752 * @fence: the fence to wait on 753 * @cb: the callback to remove 754 * 755 * Remove a previously queued callback from the fence. This function returns 756 * true if the callback is successfully removed, or false if the fence has 757 * already been signaled. 758 * 759 * *WARNING*: 760 * Cancelling a callback should only be done if you really know what you're 761 * doing, since deadlocks and race conditions could occur all too easily. For 762 * this reason, it should only ever be done on hardware lockup recovery, 763 * with a reference held to the fence. 764 * 765 * Behaviour is undefined if @cb has not been added to @fence using 766 * dma_fence_add_callback() beforehand. 767 */ 768 bool 769 dma_fence_remove_callback(struct dma_fence *fence, struct dma_fence_cb *cb) 770 { 771 unsigned long flags; 772 bool ret; 773 774 dma_fence_lock_irqsave(fence, flags); 775 ret = !list_empty(&cb->node); 776 if (ret) 777 list_del_init(&cb->node); 778 dma_fence_unlock_irqrestore(fence, flags); 779 780 return ret; 781 } 782 EXPORT_SYMBOL(dma_fence_remove_callback); 783 784 struct default_wait_cb { 785 struct dma_fence_cb base; 786 struct task_struct *task; 787 }; 788 789 static void 790 dma_fence_default_wait_cb(struct dma_fence *fence, struct dma_fence_cb *cb) 791 { 792 struct default_wait_cb *wait = 793 container_of(cb, struct default_wait_cb, base); 794 795 wake_up_state(wait->task, TASK_NORMAL); 796 } 797 798 /** 799 * dma_fence_default_wait - default sleep until the fence gets signaled 800 * or until timeout elapses 801 * @fence: the fence to wait on 802 * @intr: if true, do an interruptible wait 803 * @timeout: timeout value in jiffies, or MAX_SCHEDULE_TIMEOUT 804 * 805 * Returns -ERESTARTSYS if interrupted, 0 if the wait timed out, or the 806 * remaining timeout in jiffies on success. If timeout is zero the value one is 807 * returned if the fence is already signaled for consistency with other 808 * functions taking a jiffies timeout. 809 */ 810 signed long 811 dma_fence_default_wait(struct dma_fence *fence, bool intr, signed long timeout) 812 { 813 struct default_wait_cb cb; 814 unsigned long flags; 815 signed long ret = timeout ? timeout : 1; 816 817 dma_fence_lock_irqsave(fence, flags); 818 819 if (dma_fence_test_signaled_flag(fence)) 820 goto out; 821 822 if (intr && signal_pending(current)) { 823 ret = -ERESTARTSYS; 824 goto out; 825 } 826 827 if (!timeout) { 828 ret = 0; 829 goto out; 830 } 831 832 cb.base.func = dma_fence_default_wait_cb; 833 cb.task = current; 834 list_add(&cb.base.node, &fence->cb_list); 835 836 while (!dma_fence_test_signaled_flag(fence) && ret > 0) { 837 if (intr) 838 __set_current_state(TASK_INTERRUPTIBLE); 839 else 840 __set_current_state(TASK_UNINTERRUPTIBLE); 841 dma_fence_unlock_irqrestore(fence, flags); 842 843 ret = schedule_timeout(ret); 844 845 dma_fence_lock_irqsave(fence, flags); 846 if (ret > 0 && intr && signal_pending(current)) 847 ret = -ERESTARTSYS; 848 } 849 850 if (!list_empty(&cb.base.node)) 851 list_del(&cb.base.node); 852 __set_current_state(TASK_RUNNING); 853 854 out: 855 dma_fence_unlock_irqrestore(fence, flags); 856 return ret; 857 } 858 EXPORT_SYMBOL(dma_fence_default_wait); 859 860 static bool 861 dma_fence_test_signaled_any(struct dma_fence **fences, uint32_t count, 862 uint32_t *idx) 863 { 864 int i; 865 866 for (i = 0; i < count; ++i) { 867 struct dma_fence *fence = fences[i]; 868 if (dma_fence_test_signaled_flag(fence)) { 869 if (idx) 870 *idx = i; 871 return true; 872 } 873 } 874 return false; 875 } 876 877 /** 878 * dma_fence_wait_any_timeout - sleep until any fence gets signaled 879 * or until timeout elapses 880 * @fences: array of fences to wait on 881 * @count: number of fences to wait on 882 * @intr: if true, do an interruptible wait 883 * @timeout: timeout value in jiffies, or MAX_SCHEDULE_TIMEOUT 884 * @idx: used to store the first signaled fence index, meaningful only on 885 * positive return 886 * 887 * Returns -EINVAL on custom fence wait implementation, -ERESTARTSYS if 888 * interrupted, 0 if the wait timed out, or the remaining timeout in jiffies 889 * on success. 890 * 891 * Synchronous waits for the first fence in the array to be signaled. The 892 * caller needs to hold a reference to all fences in the array, otherwise a 893 * fence might be freed before return, resulting in undefined behavior. 894 * 895 * See also dma_fence_wait() and dma_fence_wait_timeout(). 896 */ 897 signed long 898 dma_fence_wait_any_timeout(struct dma_fence **fences, uint32_t count, 899 bool intr, signed long timeout, uint32_t *idx) 900 { 901 struct default_wait_cb *cb; 902 signed long ret = timeout; 903 unsigned i; 904 905 if (WARN_ON(!fences || !count || timeout < 0)) 906 return -EINVAL; 907 908 if (timeout == 0) { 909 for (i = 0; i < count; ++i) 910 if (dma_fence_is_signaled(fences[i])) { 911 if (idx) 912 *idx = i; 913 return 1; 914 } 915 916 return 0; 917 } 918 919 cb = kzalloc_objs(struct default_wait_cb, count); 920 if (cb == NULL) { 921 ret = -ENOMEM; 922 goto err_free_cb; 923 } 924 925 for (i = 0; i < count; ++i) { 926 struct dma_fence *fence = fences[i]; 927 928 cb[i].task = current; 929 if (dma_fence_add_callback(fence, &cb[i].base, 930 dma_fence_default_wait_cb)) { 931 /* This fence is already signaled */ 932 if (idx) 933 *idx = i; 934 goto fence_rm_cb; 935 } 936 } 937 938 while (ret > 0) { 939 if (intr) 940 set_current_state(TASK_INTERRUPTIBLE); 941 else 942 set_current_state(TASK_UNINTERRUPTIBLE); 943 944 if (dma_fence_test_signaled_any(fences, count, idx)) 945 break; 946 947 ret = schedule_timeout(ret); 948 949 if (ret > 0 && intr && signal_pending(current)) 950 ret = -ERESTARTSYS; 951 } 952 953 __set_current_state(TASK_RUNNING); 954 955 fence_rm_cb: 956 while (i-- > 0) 957 dma_fence_remove_callback(fences[i], &cb[i].base); 958 959 err_free_cb: 960 kfree(cb); 961 962 return ret; 963 } 964 EXPORT_SYMBOL(dma_fence_wait_any_timeout); 965 966 /** 967 * DOC: deadline hints 968 * 969 * In an ideal world, it would be possible to pipeline a workload sufficiently 970 * that a utilization based device frequency governor could arrive at a minimum 971 * frequency that meets the requirements of the use-case, in order to minimize 972 * power consumption. But in the real world there are many workloads which 973 * defy this ideal. For example, but not limited to: 974 * 975 * * Workloads that ping-pong between device and CPU, with alternating periods 976 * of CPU waiting for device, and device waiting on CPU. This can result in 977 * devfreq and cpufreq seeing idle time in their respective domains and in 978 * result reduce frequency. 979 * 980 * * Workloads that interact with a periodic time based deadline, such as double 981 * buffered GPU rendering vs vblank sync'd page flipping. In this scenario, 982 * missing a vblank deadline results in an *increase* in idle time on the GPU 983 * (since it has to wait an additional vblank period), sending a signal to 984 * the GPU's devfreq to reduce frequency, when in fact the opposite is what is 985 * needed. 986 * 987 * To this end, deadline hint(s) can be set on a &dma_fence via &dma_fence_set_deadline 988 * (or indirectly via userspace facing ioctls like &sync_set_deadline). 989 * The deadline hint provides a way for the waiting driver, or userspace, to 990 * convey an appropriate sense of urgency to the signaling driver. 991 * 992 * A deadline hint is given in absolute ktime (CLOCK_MONOTONIC for userspace 993 * facing APIs). The time could either be some point in the future (such as 994 * the vblank based deadline for page-flipping, or the start of a compositor's 995 * composition cycle), or the current time to indicate an immediate deadline 996 * hint (Ie. forward progress cannot be made until this fence is signaled). 997 * 998 * Multiple deadlines may be set on a given fence, even in parallel. See the 999 * documentation for &dma_fence_ops.set_deadline. 1000 * 1001 * The deadline hint is just that, a hint. The driver that created the fence 1002 * may react by increasing frequency, making different scheduling choices, etc. 1003 * Or doing nothing at all. 1004 */ 1005 1006 /** 1007 * dma_fence_set_deadline - set desired fence-wait deadline hint 1008 * @fence: the fence that is to be waited on 1009 * @deadline: the time by which the waiter hopes for the fence to be 1010 * signaled 1011 * 1012 * Give the fence signaler a hint about an upcoming deadline, such as 1013 * vblank, by which point the waiter would prefer the fence to be 1014 * signaled by. This is intended to give feedback to the fence signaler 1015 * to aid in power management decisions, such as boosting GPU frequency 1016 * if a periodic vblank deadline is approaching but the fence is not 1017 * yet signaled.. 1018 */ 1019 void dma_fence_set_deadline(struct dma_fence *fence, ktime_t deadline) 1020 { 1021 const struct dma_fence_ops *ops; 1022 1023 rcu_read_lock(); 1024 ops = rcu_dereference(fence->ops); 1025 if (ops && ops->set_deadline && !dma_fence_is_signaled(fence)) 1026 ops->set_deadline(fence, deadline); 1027 rcu_read_unlock(); 1028 } 1029 EXPORT_SYMBOL(dma_fence_set_deadline); 1030 1031 /** 1032 * dma_fence_describe - Dump fence description into seq_file 1033 * @fence: the fence to describe 1034 * @seq: the seq_file to put the textual description into 1035 * 1036 * Dump a textual description of the fence and it's state into the seq_file. 1037 */ 1038 void dma_fence_describe(struct dma_fence *fence, struct seq_file *seq) 1039 { 1040 const char __rcu *timeline = (const char __rcu *)""; 1041 const char __rcu *driver = (const char __rcu *)""; 1042 const char *signaled = ""; 1043 1044 rcu_read_lock(); 1045 1046 if (!dma_fence_is_signaled(fence)) { 1047 timeline = dma_fence_timeline_name(fence); 1048 driver = dma_fence_driver_name(fence); 1049 signaled = "un"; 1050 } 1051 1052 seq_printf(seq, "%llu:%llu %s %s %ssignalled\n", 1053 fence->context, fence->seqno, timeline, driver, 1054 signaled); 1055 1056 rcu_read_unlock(); 1057 } 1058 EXPORT_SYMBOL(dma_fence_describe); 1059 1060 static void 1061 __dma_fence_init(struct dma_fence *fence, const struct dma_fence_ops *ops, 1062 spinlock_t *lock, u64 context, u64 seqno, unsigned long flags) 1063 { 1064 BUG_ON(!ops || !ops->get_driver_name || !ops->get_timeline_name); 1065 1066 kref_init(&fence->refcount); 1067 /* 1068 * While it is counter intuitive to protect a constant function pointer 1069 * table by RCU it allows modules to wait for an RCU grace period 1070 * before they unload, to make sure that nobody is executing their 1071 * functions any more. 1072 */ 1073 RCU_INIT_POINTER(fence->ops, ops); 1074 INIT_LIST_HEAD(&fence->cb_list); 1075 fence->context = context; 1076 fence->seqno = seqno; 1077 fence->flags = flags | BIT(DMA_FENCE_FLAG_INITIALIZED_BIT); 1078 if (lock) { 1079 fence->extern_lock = lock; 1080 } else { 1081 spin_lock_init(&fence->inline_lock); 1082 fence->flags |= BIT(DMA_FENCE_FLAG_INLINE_LOCK_BIT); 1083 } 1084 fence->error = 0; 1085 1086 trace_dma_fence_init(fence); 1087 } 1088 1089 /** 1090 * dma_fence_init - Initialize a custom fence. 1091 * @fence: the fence to initialize 1092 * @ops: the dma_fence_ops for operations on this fence 1093 * @lock: optional irqsafe spinlock to use for locking this fence 1094 * @context: the execution context this fence is run on 1095 * @seqno: a linear increasing sequence number for this context 1096 * 1097 * Initializes an allocated fence, the caller doesn't have to keep its 1098 * refcount after committing with this fence, but it will need to hold a 1099 * refcount again if &dma_fence_ops.enable_signaling gets called. 1100 * 1101 * context and seqno are used for easy comparison between fences, allowing 1102 * to check which fence is later by simply using dma_fence_later(). 1103 * 1104 * It is strongly discouraged to provide an external lock because this couples 1105 * lock and fence life time. This is only allowed for legacy use cases when 1106 * multiple fences need to be prevented from signaling out of order. 1107 */ 1108 void 1109 dma_fence_init(struct dma_fence *fence, const struct dma_fence_ops *ops, 1110 spinlock_t *lock, u64 context, u64 seqno) 1111 { 1112 __dma_fence_init(fence, ops, lock, context, seqno, 0UL); 1113 } 1114 EXPORT_SYMBOL(dma_fence_init); 1115 1116 /** 1117 * dma_fence_init64 - Initialize a custom fence with 64-bit seqno support. 1118 * @fence: the fence to initialize 1119 * @ops: the dma_fence_ops for operations on this fence 1120 * @lock: optional irqsafe spinlock to use for locking this fence 1121 * @context: the execution context this fence is run on 1122 * @seqno: a linear increasing sequence number for this context 1123 * 1124 * Initializes an allocated fence, the caller doesn't have to keep its 1125 * refcount after committing with this fence, but it will need to hold a 1126 * refcount again if &dma_fence_ops.enable_signaling gets called. 1127 * 1128 * Context and seqno are used for easy comparison between fences, allowing 1129 * to check which fence is later by simply using dma_fence_later(). 1130 * 1131 * It is strongly discouraged to provide an external lock because this couples 1132 * lock and fence life time. This is only allowed for legacy use cases when 1133 * multiple fences need to be prevented from signaling out of order. 1134 */ 1135 void 1136 dma_fence_init64(struct dma_fence *fence, const struct dma_fence_ops *ops, 1137 spinlock_t *lock, u64 context, u64 seqno) 1138 { 1139 __dma_fence_init(fence, ops, lock, context, seqno, 1140 BIT(DMA_FENCE_FLAG_SEQNO64_BIT)); 1141 } 1142 EXPORT_SYMBOL(dma_fence_init64); 1143 1144 /** 1145 * dma_fence_driver_name - Access the driver name 1146 * @fence: the fence to query 1147 * 1148 * Returns a driver name backing the dma-fence implementation. 1149 * 1150 * IMPORTANT CONSIDERATION: 1151 * Dma-fence contract stipulates that access to driver provided data (data not 1152 * directly embedded into the object itself), such as the &dma_fence.lock and 1153 * memory potentially accessed by the &dma_fence.ops functions, is forbidden 1154 * after the fence has been signalled. Drivers are allowed to free that data, 1155 * and some do. 1156 * 1157 * To allow safe access drivers are mandated to guarantee a RCU grace period 1158 * between signalling the fence and freeing said data. 1159 * 1160 * As such access to the driver name is only valid inside a RCU locked section. 1161 * The pointer MUST be both queried and USED ONLY WITHIN a SINGLE block guarded 1162 * by the &rcu_read_lock and &rcu_read_unlock pair. 1163 */ 1164 const char __rcu *dma_fence_driver_name(struct dma_fence *fence) 1165 { 1166 const struct dma_fence_ops *ops; 1167 1168 /* RCU protection is required for safe access to returned string */ 1169 ops = rcu_dereference(fence->ops); 1170 if (!dma_fence_test_signaled_flag(fence)) 1171 return (const char __rcu *)ops->get_driver_name(fence); 1172 else 1173 return (const char __rcu *)"detached-driver"; 1174 } 1175 EXPORT_SYMBOL(dma_fence_driver_name); 1176 1177 /** 1178 * dma_fence_timeline_name - Access the timeline name 1179 * @fence: the fence to query 1180 * 1181 * Returns a timeline name provided by the dma-fence implementation. 1182 * 1183 * IMPORTANT CONSIDERATION: 1184 * Dma-fence contract stipulates that access to driver provided data (data not 1185 * directly embedded into the object itself), such as the &dma_fence.lock and 1186 * memory potentially accessed by the &dma_fence.ops functions, is forbidden 1187 * after the fence has been signalled. Drivers are allowed to free that data, 1188 * and some do. 1189 * 1190 * To allow safe access drivers are mandated to guarantee a RCU grace period 1191 * between signalling the fence and freeing said data. 1192 * 1193 * As such access to the driver name is only valid inside a RCU locked section. 1194 * The pointer MUST be both queried and USED ONLY WITHIN a SINGLE block guarded 1195 * by the &rcu_read_lock and &rcu_read_unlock pair. 1196 */ 1197 const char __rcu *dma_fence_timeline_name(struct dma_fence *fence) 1198 { 1199 const struct dma_fence_ops *ops; 1200 1201 /* RCU protection is required for safe access to returned string */ 1202 ops = rcu_dereference(fence->ops); 1203 if (!dma_fence_test_signaled_flag(fence)) 1204 return (const char __rcu *)ops->get_driver_name(fence); 1205 else 1206 return (const char __rcu *)"signaled-timeline"; 1207 } 1208 EXPORT_SYMBOL(dma_fence_timeline_name); 1209