1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2022 Intel Corporation 4 */ 5 6 #include "xe_pm.h" 7 8 #include <linux/fault-inject.h> 9 #include <linux/pm_runtime.h> 10 #include <linux/suspend.h> 11 12 #include <drm/drm_managed.h> 13 #include <drm/ttm/ttm_placement.h> 14 15 #include "display/xe_display.h" 16 #include "xe_bo.h" 17 #include "xe_bo_evict.h" 18 #include "xe_device.h" 19 #include "xe_ggtt.h" 20 #include "xe_gt.h" 21 #include "xe_gt_idle.h" 22 #include "xe_i2c.h" 23 #include "xe_irq.h" 24 #include "xe_late_bind_fw.h" 25 #include "xe_pcode.h" 26 #include "xe_pxp.h" 27 #include "xe_sriov_vf_ccs.h" 28 #include "xe_trace.h" 29 #include "xe_vm.h" 30 #include "xe_wa.h" 31 32 /** 33 * DOC: Xe Power Management 34 * 35 * Xe PM implements the main routines for both system level suspend states and 36 * for the opportunistic runtime suspend states. 37 * 38 * System Level Suspend (S-States) - In general this is OS initiated suspend 39 * driven by ACPI for achieving S0ix (a.k.a. S2idle, freeze), S3 (suspend to ram), 40 * S4 (disk). The main functions here are `xe_pm_suspend` and `xe_pm_resume`. They 41 * are the main point for the suspend to and resume from these states. 42 * 43 * PCI Device Suspend (D-States) - This is the opportunistic PCIe device low power 44 * state D3, controlled by the PCI subsystem and ACPI with the help from the 45 * runtime_pm infrastructure. 46 * PCI D3 is special and can mean D3hot, where Vcc power is on for keeping memory 47 * alive and quicker low latency resume or D3Cold where Vcc power is off for 48 * better power savings. 49 * The Vcc control of PCI hierarchy can only be controlled at the PCI root port 50 * level, while the device driver can be behind multiple bridges/switches and 51 * paired with other devices. For this reason, the PCI subsystem cannot perform 52 * the transition towards D3Cold. The lowest runtime PM possible from the PCI 53 * subsystem is D3hot. Then, if all these paired devices in the same root port 54 * are in D3hot, ACPI will assist here and run its own methods (_PR3 and _OFF) 55 * to perform the transition from D3hot to D3cold. Xe may disallow this 56 * transition by calling pci_d3cold_disable(root_pdev) before going to runtime 57 * suspend. It will be based on runtime conditions such as VRAM usage for a 58 * quick and low latency resume for instance. 59 * 60 * Runtime PM - This infrastructure provided by the Linux kernel allows the 61 * device drivers to indicate when the can be runtime suspended, so the device 62 * could be put at D3 (if supported), or allow deeper package sleep states 63 * (PC-states), and/or other low level power states. Xe PM component provides 64 * `xe_pm_runtime_suspend` and `xe_pm_runtime_resume` functions that PCI 65 * subsystem will call before transition to/from runtime suspend. 66 * 67 * Also, Xe PM provides get and put functions that Xe driver will use to 68 * indicate activity. In order to avoid locking complications with the memory 69 * management, whenever possible, these get and put functions needs to be called 70 * from the higher/outer levels. 71 * The main cases that need to be protected from the outer levels are: IOCTL, 72 * sysfs, debugfs, dma-buf sharing, GPU execution. 73 * 74 * This component is not responsible for GT idleness (RC6) nor GT frequency 75 * management (RPS). 76 */ 77 78 #ifdef CONFIG_LOCKDEP 79 static struct lockdep_map xe_pm_runtime_d3cold_map = { 80 .name = "xe_rpm_d3cold_map" 81 }; 82 83 static struct lockdep_map xe_pm_runtime_nod3cold_map = { 84 .name = "xe_rpm_nod3cold_map" 85 }; 86 87 static struct lockdep_map xe_pm_block_lockdep_map = { 88 .name = "xe_pm_block_map", 89 }; 90 #endif 91 92 static void xe_pm_block_begin_signalling(void) 93 { 94 lock_acquire_shared_recursive(&xe_pm_block_lockdep_map, 0, 1, NULL, _RET_IP_); 95 } 96 97 static void xe_pm_block_end_signalling(void) 98 { 99 lock_release(&xe_pm_block_lockdep_map, _RET_IP_); 100 } 101 102 /** 103 * xe_pm_might_block_on_suspend() - Annotate that the code might block on suspend 104 * 105 * Annotation to use where the code might block or seize to make 106 * progress pending resume completion. 107 */ 108 void xe_pm_might_block_on_suspend(void) 109 { 110 lock_map_acquire(&xe_pm_block_lockdep_map); 111 lock_map_release(&xe_pm_block_lockdep_map); 112 } 113 114 /** 115 * xe_pm_block_on_suspend() - Block pending suspend. 116 * @xe: The xe device about to be suspended. 117 * 118 * Block if the pm notifier has start evicting bos, to avoid 119 * racing and validating those bos back. The function is 120 * annotated to ensure no locks are held that are also grabbed 121 * in the pm notifier or the device suspend / resume. 122 * This is intended to be used by freezable tasks only. 123 * (Not freezable workqueues), with the intention that the function 124 * returns %-ERESTARTSYS when tasks are frozen during suspend, 125 * and allows the task to freeze. The caller must be able to 126 * handle the %-ERESTARTSYS. 127 * 128 * Return: %0 on success, %-ERESTARTSYS on signal pending or 129 * if freezing requested. 130 */ 131 int xe_pm_block_on_suspend(struct xe_device *xe) 132 { 133 xe_pm_might_block_on_suspend(); 134 135 return wait_for_completion_interruptible(&xe->pm_block); 136 } 137 138 /** 139 * xe_rpm_reclaim_safe() - Whether runtime resume can be done from reclaim context 140 * @xe: The xe device. 141 * 142 * Return: true if it is safe to runtime resume from reclaim context. 143 * false otherwise. 144 */ 145 bool xe_rpm_reclaim_safe(const struct xe_device *xe) 146 { 147 return !xe->d3cold.capable; 148 } 149 150 static void xe_rpm_lockmap_acquire(const struct xe_device *xe) 151 { 152 lock_map_acquire(xe_rpm_reclaim_safe(xe) ? 153 &xe_pm_runtime_nod3cold_map : 154 &xe_pm_runtime_d3cold_map); 155 } 156 157 static void xe_rpm_lockmap_release(const struct xe_device *xe) 158 { 159 lock_map_release(xe_rpm_reclaim_safe(xe) ? 160 &xe_pm_runtime_nod3cold_map : 161 &xe_pm_runtime_d3cold_map); 162 } 163 164 /** 165 * xe_pm_suspend - Helper for System suspend, i.e. S0->S3 / S0->S2idle 166 * @xe: xe device instance 167 * 168 * Return: 0 on success 169 */ 170 int xe_pm_suspend(struct xe_device *xe) 171 { 172 struct xe_gt *gt; 173 u8 id; 174 int err; 175 176 drm_dbg(&xe->drm, "Suspending device\n"); 177 xe_pm_block_begin_signalling(); 178 trace_xe_pm_suspend(xe, __builtin_return_address(0)); 179 180 err = xe_pxp_pm_suspend(xe->pxp); 181 if (err) 182 goto err; 183 184 xe_late_bind_wait_for_worker_completion(&xe->late_bind); 185 186 for_each_gt(gt, xe, id) 187 xe_gt_suspend_prepare(gt); 188 189 xe_display_pm_suspend(xe); 190 191 /* FIXME: Super racey... */ 192 err = xe_bo_evict_all(xe); 193 if (err) 194 goto err_display; 195 196 for_each_gt(gt, xe, id) { 197 err = xe_gt_suspend(gt); 198 if (err) 199 goto err_display; 200 } 201 202 xe_irq_suspend(xe); 203 204 xe_display_pm_suspend_late(xe); 205 206 xe_i2c_pm_suspend(xe); 207 208 drm_dbg(&xe->drm, "Device suspended\n"); 209 xe_pm_block_end_signalling(); 210 211 return 0; 212 213 err_display: 214 xe_display_pm_resume(xe); 215 xe_pxp_pm_resume(xe->pxp); 216 err: 217 drm_dbg(&xe->drm, "Device suspend failed %d\n", err); 218 xe_pm_block_end_signalling(); 219 return err; 220 } 221 222 /** 223 * xe_pm_resume - Helper for System resume S3->S0 / S2idle->S0 224 * @xe: xe device instance 225 * 226 * Return: 0 on success 227 */ 228 int xe_pm_resume(struct xe_device *xe) 229 { 230 struct xe_tile *tile; 231 struct xe_gt *gt; 232 u8 id; 233 int err; 234 235 xe_pm_block_begin_signalling(); 236 drm_dbg(&xe->drm, "Resuming device\n"); 237 trace_xe_pm_resume(xe, __builtin_return_address(0)); 238 239 for_each_gt(gt, xe, id) 240 xe_gt_idle_disable_c6(gt); 241 242 for_each_tile(tile, xe, id) 243 xe_wa_apply_tile_workarounds(tile); 244 245 err = xe_pcode_ready(xe, true); 246 if (err) 247 return err; 248 249 xe_display_pm_resume_early(xe); 250 251 /* 252 * This only restores pinned memory which is the memory required for the 253 * GT(s) to resume. 254 */ 255 err = xe_bo_restore_early(xe); 256 if (err) 257 goto err; 258 259 xe_i2c_pm_resume(xe, true); 260 261 xe_irq_resume(xe); 262 263 for_each_gt(gt, xe, id) { 264 err = xe_gt_resume(gt); 265 if (err) 266 break; 267 } 268 269 /* 270 * Try to bring up display before bailing from GT resume failure, 271 * so we don't leave the user clueless with a blank screen. 272 */ 273 xe_display_pm_resume(xe); 274 if (err) 275 goto err; 276 277 err = xe_bo_restore_late(xe); 278 if (err) 279 goto err; 280 281 xe_pxp_pm_resume(xe->pxp); 282 283 if (IS_VF_CCS_READY(xe)) 284 xe_sriov_vf_ccs_register_context(xe); 285 286 xe_late_bind_fw_load(&xe->late_bind); 287 288 drm_dbg(&xe->drm, "Device resumed\n"); 289 xe_pm_block_end_signalling(); 290 return 0; 291 err: 292 drm_dbg(&xe->drm, "Device resume failed %d\n", err); 293 xe_pm_block_end_signalling(); 294 return err; 295 } 296 297 static bool xe_pm_pci_d3cold_capable(struct xe_device *xe) 298 { 299 struct pci_dev *pdev = to_pci_dev(xe->drm.dev); 300 struct pci_dev *root_pdev; 301 302 root_pdev = pcie_find_root_port(pdev); 303 if (!root_pdev) 304 return false; 305 306 /* D3Cold requires PME capability */ 307 if (!pci_pme_capable(root_pdev, PCI_D3cold)) { 308 drm_dbg(&xe->drm, "d3cold: PME# not supported\n"); 309 return false; 310 } 311 312 /* D3Cold requires _PR3 power resource */ 313 if (!pci_pr3_present(root_pdev)) { 314 drm_dbg(&xe->drm, "d3cold: ACPI _PR3 not present\n"); 315 return false; 316 } 317 318 return true; 319 } 320 321 static void xe_pm_runtime_init(struct xe_device *xe) 322 { 323 struct device *dev = xe->drm.dev; 324 325 /* Our current VFs do not support RPM. so, disable it */ 326 if (IS_SRIOV_VF(xe)) 327 return; 328 329 /* 330 * Disable the system suspend direct complete optimization. 331 * We need to ensure that the regular device suspend/resume functions 332 * are called since our runtime_pm cannot guarantee local memory 333 * eviction for d3cold. 334 * TODO: Check HDA audio dependencies claimed by i915, and then enforce 335 * this option to integrated graphics as well. 336 */ 337 if (IS_DGFX(xe)) 338 dev_pm_set_driver_flags(dev, DPM_FLAG_NO_DIRECT_COMPLETE); 339 340 pm_runtime_use_autosuspend(dev); 341 pm_runtime_set_autosuspend_delay(dev, 1000); 342 pm_runtime_set_active(dev); 343 pm_runtime_allow(dev); 344 pm_runtime_mark_last_busy(dev); 345 pm_runtime_put(dev); 346 } 347 348 int xe_pm_init_early(struct xe_device *xe) 349 { 350 int err; 351 352 INIT_LIST_HEAD(&xe->mem_access.vram_userfault.list); 353 354 err = drmm_mutex_init(&xe->drm, &xe->mem_access.vram_userfault.lock); 355 if (err) 356 return err; 357 358 err = drmm_mutex_init(&xe->drm, &xe->d3cold.lock); 359 if (err) 360 return err; 361 362 xe->d3cold.capable = xe_pm_pci_d3cold_capable(xe); 363 return 0; 364 } 365 ALLOW_ERROR_INJECTION(xe_pm_init_early, ERRNO); /* See xe_pci_probe() */ 366 367 static u32 vram_threshold_value(struct xe_device *xe) 368 { 369 /* FIXME: D3Cold temporarily disabled by default on BMG */ 370 if (xe->info.platform == XE_BATTLEMAGE) 371 return 0; 372 373 return DEFAULT_VRAM_THRESHOLD; 374 } 375 376 static void xe_pm_wake_rebind_workers(struct xe_device *xe) 377 { 378 struct xe_vm *vm, *next; 379 380 mutex_lock(&xe->rebind_resume_lock); 381 list_for_each_entry_safe(vm, next, &xe->rebind_resume_list, 382 preempt.pm_activate_link) { 383 list_del_init(&vm->preempt.pm_activate_link); 384 xe_vm_resume_rebind_worker(vm); 385 } 386 mutex_unlock(&xe->rebind_resume_lock); 387 } 388 389 static int xe_pm_notifier_callback(struct notifier_block *nb, 390 unsigned long action, void *data) 391 { 392 struct xe_device *xe = container_of(nb, struct xe_device, pm_notifier); 393 int err = 0; 394 395 switch (action) { 396 case PM_HIBERNATION_PREPARE: 397 case PM_SUSPEND_PREPARE: 398 { 399 struct xe_validation_ctx ctx; 400 401 reinit_completion(&xe->pm_block); 402 xe_pm_block_begin_signalling(); 403 xe_pm_runtime_get(xe); 404 (void)xe_validation_ctx_init(&ctx, &xe->val, NULL, 405 (struct xe_val_flags) {.exclusive = true}); 406 err = xe_bo_evict_all_user(xe); 407 xe_validation_ctx_fini(&ctx); 408 if (err) 409 drm_dbg(&xe->drm, "Notifier evict user failed (%d)\n", err); 410 411 err = xe_bo_notifier_prepare_all_pinned(xe); 412 if (err) 413 drm_dbg(&xe->drm, "Notifier prepare pin failed (%d)\n", err); 414 /* 415 * Keep the runtime pm reference until post hibernation / post suspend to 416 * avoid a runtime suspend interfering with evicted objects or backup 417 * allocations. 418 */ 419 xe_pm_block_end_signalling(); 420 break; 421 } 422 case PM_POST_HIBERNATION: 423 case PM_POST_SUSPEND: 424 complete_all(&xe->pm_block); 425 xe_pm_wake_rebind_workers(xe); 426 xe_bo_notifier_unprepare_all_pinned(xe); 427 xe_pm_runtime_put(xe); 428 break; 429 } 430 431 return NOTIFY_DONE; 432 } 433 434 /** 435 * xe_pm_init - Initialize Xe Power Management 436 * @xe: xe device instance 437 * 438 * This component is responsible for System and Device sleep states. 439 * 440 * Returns 0 for success, negative error code otherwise. 441 */ 442 int xe_pm_init(struct xe_device *xe) 443 { 444 u32 vram_threshold; 445 int err; 446 447 xe->pm_notifier.notifier_call = xe_pm_notifier_callback; 448 err = register_pm_notifier(&xe->pm_notifier); 449 if (err) 450 return err; 451 452 err = drmm_mutex_init(&xe->drm, &xe->rebind_resume_lock); 453 if (err) 454 goto err_unregister; 455 456 init_completion(&xe->pm_block); 457 complete_all(&xe->pm_block); 458 INIT_LIST_HEAD(&xe->rebind_resume_list); 459 460 /* For now suspend/resume is only allowed with GuC */ 461 if (!xe_device_uc_enabled(xe)) 462 return 0; 463 464 if (xe->d3cold.capable) { 465 vram_threshold = vram_threshold_value(xe); 466 err = xe_pm_set_vram_threshold(xe, vram_threshold); 467 if (err) 468 goto err_unregister; 469 } 470 471 xe_pm_runtime_init(xe); 472 return 0; 473 474 err_unregister: 475 unregister_pm_notifier(&xe->pm_notifier); 476 return err; 477 } 478 479 static void xe_pm_runtime_fini(struct xe_device *xe) 480 { 481 struct device *dev = xe->drm.dev; 482 483 /* Our current VFs do not support RPM. so, disable it */ 484 if (IS_SRIOV_VF(xe)) 485 return; 486 487 pm_runtime_get_sync(dev); 488 pm_runtime_forbid(dev); 489 } 490 491 /** 492 * xe_pm_fini - Finalize PM 493 * @xe: xe device instance 494 */ 495 void xe_pm_fini(struct xe_device *xe) 496 { 497 if (xe_device_uc_enabled(xe)) 498 xe_pm_runtime_fini(xe); 499 500 unregister_pm_notifier(&xe->pm_notifier); 501 } 502 503 static void xe_pm_write_callback_task(struct xe_device *xe, 504 struct task_struct *task) 505 { 506 WRITE_ONCE(xe->pm_callback_task, task); 507 508 /* 509 * Just in case it's somehow possible for our writes to be reordered to 510 * the extent that something else re-uses the task written in 511 * pm_callback_task. For example after returning from the callback, but 512 * before the reordered write that resets pm_callback_task back to NULL. 513 */ 514 smp_mb(); /* pairs with xe_pm_read_callback_task */ 515 } 516 517 struct task_struct *xe_pm_read_callback_task(struct xe_device *xe) 518 { 519 smp_mb(); /* pairs with xe_pm_write_callback_task */ 520 521 return READ_ONCE(xe->pm_callback_task); 522 } 523 524 /** 525 * xe_pm_runtime_suspended - Check if runtime_pm state is suspended 526 * @xe: xe device instance 527 * 528 * This does not provide any guarantee that the device is going to remain 529 * suspended as it might be racing with the runtime state transitions. 530 * It can be used only as a non-reliable assertion, to ensure that we are not in 531 * the sleep state while trying to access some memory for instance. 532 * 533 * Returns true if PCI device is suspended, false otherwise. 534 */ 535 bool xe_pm_runtime_suspended(struct xe_device *xe) 536 { 537 return pm_runtime_suspended(xe->drm.dev); 538 } 539 540 /** 541 * xe_pm_runtime_suspend - Prepare our device for D3hot/D3Cold 542 * @xe: xe device instance 543 * 544 * Returns 0 for success, negative error code otherwise. 545 */ 546 int xe_pm_runtime_suspend(struct xe_device *xe) 547 { 548 struct xe_bo *bo, *on; 549 struct xe_gt *gt; 550 u8 id; 551 int err = 0; 552 553 trace_xe_pm_runtime_suspend(xe, __builtin_return_address(0)); 554 /* Disable access_ongoing asserts and prevent recursive pm calls */ 555 xe_pm_write_callback_task(xe, current); 556 557 /* 558 * The actual xe_pm_runtime_put() is always async underneath, so 559 * exactly where that is called should makes no difference to us. However 560 * we still need to be very careful with the locks that this callback 561 * acquires and the locks that are acquired and held by any callers of 562 * xe_runtime_pm_get(). We already have the matching annotation 563 * on that side, but we also need it here. For example lockdep should be 564 * able to tell us if the following scenario is in theory possible: 565 * 566 * CPU0 | CPU1 (kworker) 567 * lock(A) | 568 * | xe_pm_runtime_suspend() 569 * | lock(A) 570 * xe_pm_runtime_get() | 571 * 572 * This will clearly deadlock since rpm core needs to wait for 573 * xe_pm_runtime_suspend() to complete, but here we are holding lock(A) 574 * on CPU0 which prevents CPU1 making forward progress. With the 575 * annotation here and in xe_pm_runtime_get() lockdep will see 576 * the potential lock inversion and give us a nice splat. 577 */ 578 xe_rpm_lockmap_acquire(xe); 579 580 err = xe_pxp_pm_suspend(xe->pxp); 581 if (err) 582 goto out; 583 584 /* 585 * Applying lock for entire list op as xe_ttm_bo_destroy and xe_bo_move_notify 586 * also checks and deletes bo entry from user fault list. 587 */ 588 mutex_lock(&xe->mem_access.vram_userfault.lock); 589 list_for_each_entry_safe(bo, on, 590 &xe->mem_access.vram_userfault.list, vram_userfault_link) 591 xe_bo_runtime_pm_release_mmap_offset(bo); 592 mutex_unlock(&xe->mem_access.vram_userfault.lock); 593 594 xe_display_pm_runtime_suspend(xe); 595 596 if (xe->d3cold.allowed) { 597 err = xe_bo_evict_all(xe); 598 if (err) 599 goto out_resume; 600 } 601 602 for_each_gt(gt, xe, id) { 603 err = xe->d3cold.allowed ? xe_gt_suspend(gt) : xe_gt_runtime_suspend(gt); 604 if (err) 605 goto out_resume; 606 } 607 608 xe_irq_suspend(xe); 609 610 xe_display_pm_runtime_suspend_late(xe); 611 612 xe_i2c_pm_suspend(xe); 613 614 xe_rpm_lockmap_release(xe); 615 xe_pm_write_callback_task(xe, NULL); 616 return 0; 617 618 out_resume: 619 xe_display_pm_runtime_resume(xe); 620 xe_pxp_pm_resume(xe->pxp); 621 out: 622 xe_rpm_lockmap_release(xe); 623 xe_pm_write_callback_task(xe, NULL); 624 return err; 625 } 626 627 /** 628 * xe_pm_runtime_resume - Waking up from D3hot/D3Cold 629 * @xe: xe device instance 630 * 631 * Returns 0 for success, negative error code otherwise. 632 */ 633 int xe_pm_runtime_resume(struct xe_device *xe) 634 { 635 struct xe_gt *gt; 636 u8 id; 637 int err = 0; 638 639 trace_xe_pm_runtime_resume(xe, __builtin_return_address(0)); 640 /* Disable access_ongoing asserts and prevent recursive pm calls */ 641 xe_pm_write_callback_task(xe, current); 642 643 xe_rpm_lockmap_acquire(xe); 644 645 if (xe->d3cold.allowed) { 646 for_each_gt(gt, xe, id) 647 xe_gt_idle_disable_c6(gt); 648 649 err = xe_pcode_ready(xe, true); 650 if (err) 651 goto out; 652 653 xe_display_pm_resume_early(xe); 654 655 /* 656 * This only restores pinned memory which is the memory 657 * required for the GT(s) to resume. 658 */ 659 err = xe_bo_restore_early(xe); 660 if (err) 661 goto out; 662 } 663 664 xe_i2c_pm_resume(xe, xe->d3cold.allowed); 665 666 xe_irq_resume(xe); 667 668 for_each_gt(gt, xe, id) { 669 err = xe->d3cold.allowed ? xe_gt_resume(gt) : xe_gt_runtime_resume(gt); 670 if (err) 671 break; 672 } 673 674 /* 675 * Try to bring up display before bailing from GT resume failure, 676 * so we don't leave the user clueless with a blank screen. 677 */ 678 xe_display_pm_runtime_resume(xe); 679 if (err) 680 goto out; 681 682 if (xe->d3cold.allowed) { 683 err = xe_bo_restore_late(xe); 684 if (err) 685 goto out; 686 } 687 688 xe_pxp_pm_resume(xe->pxp); 689 690 if (IS_VF_CCS_READY(xe)) 691 xe_sriov_vf_ccs_register_context(xe); 692 693 if (xe->d3cold.allowed) 694 xe_late_bind_fw_load(&xe->late_bind); 695 696 out: 697 xe_rpm_lockmap_release(xe); 698 xe_pm_write_callback_task(xe, NULL); 699 return err; 700 } 701 702 /* 703 * For places where resume is synchronous it can be quite easy to deadlock 704 * if we are not careful. Also in practice it might be quite timing 705 * sensitive to ever see the 0 -> 1 transition with the callers locks 706 * held, so deadlocks might exist but are hard for lockdep to ever see. 707 * With this in mind, help lockdep learn about the potentially scary 708 * stuff that can happen inside the runtime_resume callback by acquiring 709 * a dummy lock (it doesn't protect anything and gets compiled out on 710 * non-debug builds). Lockdep then only needs to see the 711 * xe_pm_runtime_xxx_map -> runtime_resume callback once, and then can 712 * hopefully validate all the (callers_locks) -> xe_pm_runtime_xxx_map. 713 * For example if the (callers_locks) are ever grabbed in the 714 * runtime_resume callback, lockdep should give us a nice splat. 715 */ 716 static void xe_rpm_might_enter_cb(const struct xe_device *xe) 717 { 718 xe_rpm_lockmap_acquire(xe); 719 xe_rpm_lockmap_release(xe); 720 } 721 722 /* 723 * Prime the lockdep maps for known locking orders that need to 724 * be supported but that may not always occur on all systems. 725 */ 726 static void xe_pm_runtime_lockdep_prime(void) 727 { 728 struct dma_resv lockdep_resv; 729 730 dma_resv_init(&lockdep_resv); 731 lock_map_acquire(&xe_pm_runtime_d3cold_map); 732 /* D3Cold takes the dma_resv locks to evict bos */ 733 dma_resv_lock(&lockdep_resv, NULL); 734 dma_resv_unlock(&lockdep_resv); 735 lock_map_release(&xe_pm_runtime_d3cold_map); 736 737 /* Shrinkers might like to wake up the device under reclaim. */ 738 fs_reclaim_acquire(GFP_KERNEL); 739 lock_map_acquire(&xe_pm_runtime_nod3cold_map); 740 lock_map_release(&xe_pm_runtime_nod3cold_map); 741 fs_reclaim_release(GFP_KERNEL); 742 } 743 744 /** 745 * xe_pm_runtime_get - Get a runtime_pm reference and resume synchronously 746 * @xe: xe device instance 747 * 748 * When possible, scope-based runtime PM (through guard(xe_pm_runtime)) is 749 * be preferred over direct usage of this function. Manual get/put handling 750 * should only be used when the function contains goto-based logic which 751 * can break scope-based handling, or when the lifetime of the runtime PM 752 * reference does not match a specific scope (e.g., runtime PM obtained in one 753 * function and released in a different one). 754 */ 755 void xe_pm_runtime_get(struct xe_device *xe) 756 { 757 trace_xe_pm_runtime_get(xe, __builtin_return_address(0)); 758 pm_runtime_get_noresume(xe->drm.dev); 759 760 if (xe_pm_read_callback_task(xe) == current) 761 return; 762 763 xe_rpm_might_enter_cb(xe); 764 pm_runtime_resume(xe->drm.dev); 765 } 766 767 /** 768 * xe_pm_runtime_put - Put the runtime_pm reference back and mark as idle 769 * @xe: xe device instance 770 */ 771 void xe_pm_runtime_put(struct xe_device *xe) 772 { 773 trace_xe_pm_runtime_put(xe, __builtin_return_address(0)); 774 if (xe_pm_read_callback_task(xe) == current) { 775 pm_runtime_put_noidle(xe->drm.dev); 776 } else { 777 pm_runtime_mark_last_busy(xe->drm.dev); 778 pm_runtime_put(xe->drm.dev); 779 } 780 } 781 782 /** 783 * xe_pm_runtime_get_ioctl - Get a runtime_pm reference before ioctl 784 * @xe: xe device instance 785 * 786 * When possible, scope-based runtime PM (through 787 * ACQUIRE(xe_pm_runtime_ioctl, ...)) is be preferred over direct usage of this 788 * function. Manual get/put handling should only be used when the function 789 * contains goto-based logic which can break scope-based handling, or when the 790 * lifetime of the runtime PM reference does not match a specific scope (e.g., 791 * runtime PM obtained in one function and released in a different one). 792 * 793 * Returns: Any number greater than or equal to 0 for success, negative error 794 * code otherwise. 795 */ 796 int xe_pm_runtime_get_ioctl(struct xe_device *xe) 797 { 798 trace_xe_pm_runtime_get_ioctl(xe, __builtin_return_address(0)); 799 if (WARN_ON(xe_pm_read_callback_task(xe) == current)) 800 return -ELOOP; 801 802 xe_rpm_might_enter_cb(xe); 803 return pm_runtime_get_sync(xe->drm.dev); 804 } 805 806 /** 807 * xe_pm_runtime_get_if_active - Get a runtime_pm reference if device active 808 * @xe: xe device instance 809 * 810 * Return: True if device is awake (regardless the previous number of references) 811 * and a new reference was taken, false otherwise. 812 */ 813 bool xe_pm_runtime_get_if_active(struct xe_device *xe) 814 { 815 return pm_runtime_get_if_active(xe->drm.dev) > 0; 816 } 817 818 /** 819 * xe_pm_runtime_get_if_in_use - Get a new reference if device is active with previous ref taken 820 * @xe: xe device instance 821 * 822 * Return: True if device is awake, a previous reference had been already taken, 823 * and a new reference was now taken, false otherwise. 824 */ 825 bool xe_pm_runtime_get_if_in_use(struct xe_device *xe) 826 { 827 if (xe_pm_read_callback_task(xe) == current) { 828 /* The device is awake, grab the ref and move on */ 829 pm_runtime_get_noresume(xe->drm.dev); 830 return true; 831 } 832 833 return pm_runtime_get_if_in_use(xe->drm.dev) > 0; 834 } 835 836 /* 837 * Very unreliable! Should only be used to suppress the false positive case 838 * in the missing outer rpm protection warning. 839 */ 840 static bool xe_pm_suspending_or_resuming(struct xe_device *xe) 841 { 842 #ifdef CONFIG_PM 843 struct device *dev = xe->drm.dev; 844 845 return dev->power.runtime_status == RPM_SUSPENDING || 846 dev->power.runtime_status == RPM_RESUMING || 847 pm_suspend_in_progress(); 848 #else 849 return false; 850 #endif 851 } 852 853 /** 854 * xe_pm_runtime_get_noresume - Bump runtime PM usage counter without resuming 855 * @xe: xe device instance 856 * 857 * This function should be used in inner places where it is surely already 858 * protected by outer-bound callers of `xe_pm_runtime_get`. 859 * It will warn if not protected. 860 * The reference should be put back after this function regardless, since it 861 * will always bump the usage counter, regardless. 862 * 863 * When possible, scope-based runtime PM (through guard(xe_pm_runtime_noresume)) 864 * is be preferred over direct usage of this function. Manual get/put handling 865 * should only be used when the function contains goto-based logic which can 866 * break scope-based handling, or when the lifetime of the runtime PM reference 867 * does not match a specific scope (e.g., runtime PM obtained in one function 868 * and released in a different one). 869 */ 870 void xe_pm_runtime_get_noresume(struct xe_device *xe) 871 { 872 bool ref; 873 874 ref = xe_pm_runtime_get_if_in_use(xe); 875 876 if (!ref) { 877 pm_runtime_get_noresume(xe->drm.dev); 878 drm_WARN(&xe->drm, !xe_pm_suspending_or_resuming(xe), 879 "Missing outer runtime PM protection\n"); 880 } 881 } 882 883 /** 884 * xe_pm_runtime_resume_and_get - Resume, then get a runtime_pm ref if awake. 885 * @xe: xe device instance 886 * 887 * Returns: True if device is awake and the reference was taken, false otherwise. 888 */ 889 bool xe_pm_runtime_resume_and_get(struct xe_device *xe) 890 { 891 if (xe_pm_read_callback_task(xe) == current) { 892 /* The device is awake, grab the ref and move on */ 893 pm_runtime_get_noresume(xe->drm.dev); 894 return true; 895 } 896 897 xe_rpm_might_enter_cb(xe); 898 return pm_runtime_resume_and_get(xe->drm.dev) >= 0; 899 } 900 901 /** 902 * xe_pm_assert_unbounded_bridge - Disable PM on unbounded pcie parent bridge 903 * @xe: xe device instance 904 */ 905 void xe_pm_assert_unbounded_bridge(struct xe_device *xe) 906 { 907 struct pci_dev *pdev = to_pci_dev(xe->drm.dev); 908 struct pci_dev *bridge = pci_upstream_bridge(pdev); 909 910 if (!bridge) 911 return; 912 913 if (!bridge->driver) { 914 drm_warn(&xe->drm, "unbounded parent pci bridge, device won't support any PM support.\n"); 915 device_set_pm_not_required(&pdev->dev); 916 } 917 } 918 919 /** 920 * xe_pm_set_vram_threshold - Set a VRAM threshold for allowing/blocking D3Cold 921 * @xe: xe device instance 922 * @threshold: VRAM size in MiB for the D3cold threshold 923 * 924 * Return: 925 * * 0 - success 926 * * -EINVAL - invalid argument 927 */ 928 int xe_pm_set_vram_threshold(struct xe_device *xe, u32 threshold) 929 { 930 struct ttm_resource_manager *man; 931 u32 vram_total_mb = 0; 932 int i; 933 934 for (i = XE_PL_VRAM0; i <= XE_PL_VRAM1; ++i) { 935 man = ttm_manager_type(&xe->ttm, i); 936 if (man) 937 vram_total_mb += DIV_ROUND_UP_ULL(man->size, 1024 * 1024); 938 } 939 940 drm_dbg(&xe->drm, "Total vram %u mb\n", vram_total_mb); 941 942 if (threshold > vram_total_mb) 943 return -EINVAL; 944 945 mutex_lock(&xe->d3cold.lock); 946 xe->d3cold.vram_threshold = threshold; 947 mutex_unlock(&xe->d3cold.lock); 948 949 return 0; 950 } 951 952 /** 953 * xe_pm_d3cold_allowed_toggle - Check conditions to toggle d3cold.allowed 954 * @xe: xe device instance 955 * 956 * To be called during runtime_pm idle callback. 957 * Check for all the D3Cold conditions ahead of runtime suspend. 958 */ 959 void xe_pm_d3cold_allowed_toggle(struct xe_device *xe) 960 { 961 struct ttm_resource_manager *man; 962 u32 total_vram_used_mb = 0; 963 u64 vram_used; 964 int i; 965 966 if (!xe->d3cold.capable) { 967 xe->d3cold.allowed = false; 968 return; 969 } 970 971 for (i = XE_PL_VRAM0; i <= XE_PL_VRAM1; ++i) { 972 man = ttm_manager_type(&xe->ttm, i); 973 if (man) { 974 vram_used = ttm_resource_manager_usage(man); 975 total_vram_used_mb += DIV_ROUND_UP_ULL(vram_used, 1024 * 1024); 976 } 977 } 978 979 mutex_lock(&xe->d3cold.lock); 980 981 if (total_vram_used_mb < xe->d3cold.vram_threshold) 982 xe->d3cold.allowed = true; 983 else 984 xe->d3cold.allowed = false; 985 986 mutex_unlock(&xe->d3cold.lock); 987 } 988 989 /** 990 * xe_pm_module_init() - Perform xe_pm specific module initialization. 991 * 992 * Return: 0 on success. Currently doesn't fail. 993 */ 994 int __init xe_pm_module_init(void) 995 { 996 xe_pm_runtime_lockdep_prime(); 997 return 0; 998 } 999