1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2022 Intel Corporation 4 */ 5 6 #include "xe_pm.h" 7 8 #include <linux/fault-inject.h> 9 #include <linux/pm_runtime.h> 10 #include <linux/suspend.h> 11 #include <linux/dmi.h> 12 13 #include <drm/drm_managed.h> 14 #include <drm/ttm/ttm_placement.h> 15 16 #include "display/xe_display.h" 17 #include "xe_bo.h" 18 #include "xe_bo_evict.h" 19 #include "xe_device.h" 20 #include "xe_ggtt.h" 21 #include "xe_gt.h" 22 #include "xe_gt_idle.h" 23 #include "xe_i2c.h" 24 #include "xe_irq.h" 25 #include "xe_late_bind_fw.h" 26 #include "xe_pcode.h" 27 #include "xe_printk.h" 28 #include "xe_pxp.h" 29 #include "xe_sriov_vf_ccs.h" 30 #include "xe_sysctrl.h" 31 #include "xe_trace.h" 32 #include "xe_vm.h" 33 #include "xe_wa.h" 34 35 /** 36 * DOC: Xe Power Management 37 * 38 * Xe PM implements the main routines for both system level suspend states and 39 * for the opportunistic runtime suspend states. 40 * 41 * System Level Suspend (S-States) - In general this is OS initiated suspend 42 * driven by ACPI for achieving S0ix (a.k.a. S2idle, freeze), S3 (suspend to ram), 43 * S4 (disk). The main functions here are `xe_pm_suspend` and `xe_pm_resume`. They 44 * are the main point for the suspend to and resume from these states. 45 * 46 * PCI Device Suspend (D-States) - This is the opportunistic PCIe device low power 47 * state D3, controlled by the PCI subsystem and ACPI with the help from the 48 * runtime_pm infrastructure. 49 * PCI D3 is special and can mean D3hot, where Vcc power is on for keeping memory 50 * alive and quicker low latency resume or D3Cold where Vcc power is off for 51 * better power savings. 52 * The Vcc control of PCI hierarchy can only be controlled at the PCI root port 53 * level, while the device driver can be behind multiple bridges/switches and 54 * paired with other devices. For this reason, the PCI subsystem cannot perform 55 * the transition towards D3Cold. The lowest runtime PM possible from the PCI 56 * subsystem is D3hot. Then, if all these paired devices in the same root port 57 * are in D3hot, ACPI will assist here and run its own methods (_PR3 and _OFF) 58 * to perform the transition from D3hot to D3cold. Xe may disallow this 59 * transition by calling pci_d3cold_disable(root_pdev) before going to runtime 60 * suspend. It will be based on runtime conditions such as VRAM usage for a 61 * quick and low latency resume for instance. 62 * 63 * Runtime PM - This infrastructure provided by the Linux kernel allows the 64 * device drivers to indicate when the can be runtime suspended, so the device 65 * could be put at D3 (if supported), or allow deeper package sleep states 66 * (PC-states), and/or other low level power states. Xe PM component provides 67 * `xe_pm_runtime_suspend` and `xe_pm_runtime_resume` functions that PCI 68 * subsystem will call before transition to/from runtime suspend. 69 * 70 * Also, Xe PM provides get and put functions that Xe driver will use to 71 * indicate activity. In order to avoid locking complications with the memory 72 * management, whenever possible, these get and put functions needs to be called 73 * from the higher/outer levels. 74 * The main cases that need to be protected from the outer levels are: IOCTL, 75 * sysfs, debugfs, dma-buf sharing, GPU execution. 76 * 77 * This component is not responsible for GT idleness (RC6) nor GT frequency 78 * management (RPS). 79 */ 80 81 #ifdef CONFIG_LOCKDEP 82 static struct lockdep_map xe_pm_runtime_d3cold_map = { 83 .name = "xe_rpm_d3cold_map" 84 }; 85 86 static struct lockdep_map xe_pm_runtime_nod3cold_map = { 87 .name = "xe_rpm_nod3cold_map" 88 }; 89 90 static struct lockdep_map xe_pm_block_lockdep_map = { 91 .name = "xe_pm_block_map", 92 }; 93 #endif 94 95 static void xe_pm_block_begin_signalling(void) 96 { 97 lock_acquire_shared_recursive(&xe_pm_block_lockdep_map, 0, 1, NULL, _RET_IP_); 98 } 99 100 static void xe_pm_block_end_signalling(void) 101 { 102 lock_release(&xe_pm_block_lockdep_map, _RET_IP_); 103 } 104 105 /** 106 * xe_pm_might_block_on_suspend() - Annotate that the code might block on suspend 107 * 108 * Annotation to use where the code might block or seize to make 109 * progress pending resume completion. 110 */ 111 void xe_pm_might_block_on_suspend(void) 112 { 113 lock_map_acquire(&xe_pm_block_lockdep_map); 114 lock_map_release(&xe_pm_block_lockdep_map); 115 } 116 117 /** 118 * xe_pm_block_on_suspend() - Block pending suspend. 119 * @xe: The xe device about to be suspended. 120 * 121 * Block if the pm notifier has start evicting bos, to avoid 122 * racing and validating those bos back. The function is 123 * annotated to ensure no locks are held that are also grabbed 124 * in the pm notifier or the device suspend / resume. 125 * This is intended to be used by freezable tasks only. 126 * (Not freezable workqueues), with the intention that the function 127 * returns %-ERESTARTSYS when tasks are frozen during suspend, 128 * and allows the task to freeze. The caller must be able to 129 * handle the %-ERESTARTSYS. 130 * 131 * Return: %0 on success, %-ERESTARTSYS on signal pending or 132 * if freezing requested. 133 */ 134 int xe_pm_block_on_suspend(struct xe_device *xe) 135 { 136 xe_pm_might_block_on_suspend(); 137 138 return wait_for_completion_interruptible(&xe->pm_block); 139 } 140 141 /** 142 * xe_rpm_reclaim_safe() - Whether runtime resume can be done from reclaim context 143 * @xe: The xe device. 144 * 145 * Return: true if it is safe to runtime resume from reclaim context. 146 * false otherwise. 147 */ 148 bool xe_rpm_reclaim_safe(const struct xe_device *xe) 149 { 150 return !xe->d3cold.capable; 151 } 152 153 static void xe_rpm_lockmap_acquire(const struct xe_device *xe) 154 { 155 lock_map_acquire(xe_rpm_reclaim_safe(xe) ? 156 &xe_pm_runtime_nod3cold_map : 157 &xe_pm_runtime_d3cold_map); 158 } 159 160 static void xe_rpm_lockmap_release(const struct xe_device *xe) 161 { 162 lock_map_release(xe_rpm_reclaim_safe(xe) ? 163 &xe_pm_runtime_nod3cold_map : 164 &xe_pm_runtime_d3cold_map); 165 } 166 167 /** 168 * xe_pm_suspend - Helper for System suspend, i.e. S0->S3 / S0->S2idle 169 * @xe: xe device instance 170 * 171 * Return: 0 on success 172 */ 173 int xe_pm_suspend(struct xe_device *xe) 174 { 175 struct xe_gt *gt; 176 u8 id; 177 int err; 178 179 drm_dbg(&xe->drm, "Suspending device\n"); 180 xe_pm_block_begin_signalling(); 181 trace_xe_pm_suspend(xe, __builtin_return_address(0)); 182 183 err = xe_pxp_pm_suspend(xe->pxp); 184 if (err) 185 goto err; 186 187 xe_late_bind_wait_for_worker_completion(&xe->late_bind); 188 189 for_each_gt(gt, xe, id) 190 xe_gt_suspend_prepare(gt); 191 192 xe_display_pm_suspend(xe); 193 194 /* FIXME: Super racey... */ 195 err = xe_bo_evict_all(xe); 196 if (err) 197 goto err_display; 198 199 for_each_gt(gt, xe, id) { 200 err = xe_gt_suspend(gt); 201 if (err) 202 goto err_display; 203 } 204 205 xe_irq_suspend(xe); 206 207 xe_display_pm_suspend_late(xe); 208 209 xe_i2c_pm_suspend(xe); 210 211 drm_dbg(&xe->drm, "Device suspended\n"); 212 xe_pm_block_end_signalling(); 213 214 return 0; 215 216 err_display: 217 xe_display_pm_resume(xe); 218 xe_pxp_pm_resume(xe->pxp); 219 err: 220 drm_dbg(&xe->drm, "Device suspend failed %d\n", err); 221 xe_pm_block_end_signalling(); 222 return err; 223 } 224 225 /** 226 * xe_pm_resume - Helper for System resume S3->S0 / S2idle->S0 227 * @xe: xe device instance 228 * 229 * Return: 0 on success 230 */ 231 int xe_pm_resume(struct xe_device *xe) 232 { 233 struct xe_tile *tile; 234 struct xe_gt *gt; 235 u8 id; 236 int err; 237 238 xe_pm_block_begin_signalling(); 239 drm_dbg(&xe->drm, "Resuming device\n"); 240 trace_xe_pm_resume(xe, __builtin_return_address(0)); 241 242 for_each_gt(gt, xe, id) 243 xe_gt_idle_disable_c6(gt); 244 245 for_each_tile(tile, xe, id) 246 xe_wa_apply_tile_workarounds(tile); 247 248 err = xe_pcode_ready(xe, true); 249 if (err) 250 return err; 251 252 xe_display_pm_resume_early(xe); 253 254 /* 255 * This only restores pinned memory which is the memory required for the 256 * GT(s) to resume. 257 */ 258 err = xe_bo_restore_early(xe); 259 if (err) 260 goto err; 261 262 xe_i2c_pm_resume(xe, true); 263 264 xe_sysctrl_pm_resume(xe); 265 266 xe_irq_resume(xe); 267 268 for_each_gt(gt, xe, id) { 269 err = xe_gt_resume(gt); 270 if (err) 271 break; 272 } 273 274 /* 275 * Try to bring up display before bailing from GT resume failure, 276 * so we don't leave the user clueless with a blank screen. 277 */ 278 xe_display_pm_resume(xe); 279 if (err) 280 goto err; 281 282 err = xe_bo_restore_late(xe); 283 if (err) 284 goto err; 285 286 xe_pxp_pm_resume(xe->pxp); 287 288 if (IS_VF_CCS_READY(xe)) 289 xe_sriov_vf_ccs_register_context(xe); 290 291 xe_late_bind_fw_load(&xe->late_bind); 292 293 drm_dbg(&xe->drm, "Device resumed\n"); 294 xe_pm_block_end_signalling(); 295 return 0; 296 err: 297 drm_dbg(&xe->drm, "Device resume failed %d\n", err); 298 xe_pm_block_end_signalling(); 299 return err; 300 } 301 302 static bool xe_pm_pci_d3cold_capable(struct xe_device *xe) 303 { 304 struct pci_dev *pdev = to_pci_dev(xe->drm.dev); 305 struct pci_dev *root_pdev; 306 307 root_pdev = pcie_find_root_port(pdev); 308 if (!root_pdev) 309 return false; 310 311 /* D3Cold requires PME capability */ 312 if (!pci_pme_capable(root_pdev, PCI_D3cold)) { 313 drm_dbg(&xe->drm, "d3cold: PME# not supported\n"); 314 return false; 315 } 316 317 /* D3Cold requires _PR3 power resource */ 318 if (!pci_pr3_present(root_pdev)) { 319 drm_dbg(&xe->drm, "d3cold: ACPI _PR3 not present\n"); 320 return false; 321 } 322 323 return true; 324 } 325 326 static void xe_pm_runtime_init(struct xe_device *xe) 327 { 328 struct device *dev = xe->drm.dev; 329 330 /* Our current VFs do not support RPM. so, disable it */ 331 if (IS_SRIOV_VF(xe)) 332 return; 333 334 /* 335 * Disable the system suspend direct complete optimization. 336 * We need to ensure that the regular device suspend/resume functions 337 * are called since our runtime_pm cannot guarantee local memory 338 * eviction for d3cold. 339 * TODO: Check HDA audio dependencies claimed by i915, and then enforce 340 * this option to integrated graphics as well. 341 */ 342 if (IS_DGFX(xe)) 343 dev_pm_set_driver_flags(dev, DPM_FLAG_NO_DIRECT_COMPLETE); 344 345 pm_runtime_use_autosuspend(dev); 346 pm_runtime_set_autosuspend_delay(dev, 1000); 347 pm_runtime_set_active(dev); 348 pm_runtime_allow(dev); 349 pm_runtime_mark_last_busy(dev); 350 pm_runtime_put(dev); 351 } 352 353 /** 354 * xe_pm_init_early() - Initialize Xe Power Management 355 * @xe: the &xe_device instance 356 * 357 * Initialize everything that is a "software-only" state that does not 358 * require access to any of the device's hardware data. 359 * 360 * Return: 0 on success or a negative error code on failure. 361 */ 362 int xe_pm_init_early(struct xe_device *xe) 363 { 364 int err; 365 366 init_completion(&xe->pm_block); 367 complete_all(&xe->pm_block); 368 INIT_LIST_HEAD(&xe->rebind_resume_list); 369 INIT_LIST_HEAD(&xe->mem_access.vram_userfault.list); 370 371 err = drmm_mutex_init(&xe->drm, &xe->mem_access.vram_userfault.lock); 372 if (err) 373 return err; 374 375 err = drmm_mutex_init(&xe->drm, &xe->d3cold.lock); 376 if (err) 377 return err; 378 379 err = drmm_mutex_init(&xe->drm, &xe->rebind_resume_lock); 380 if (err) 381 return err; 382 383 return 0; 384 } 385 ALLOW_ERROR_INJECTION(xe_pm_init_early, ERRNO); /* See xe_pci_probe() */ 386 387 /** 388 * xe_pm_probe() - Initialize Xe Power Management 389 * @xe: the &xe_device instance 390 * 391 * Check d3cold capability. 392 * 393 * Return: 0 on success or a negative error code on failure. 394 */ 395 int xe_pm_probe(struct xe_device *xe) 396 { 397 xe->d3cold.capable = xe_pm_pci_d3cold_capable(xe); 398 xe_dbg(xe, "d3cold: capable=%s\n", str_yes_no(xe->d3cold.capable)); 399 400 return 0; 401 } 402 403 static u32 vram_threshold_value(struct xe_device *xe) 404 { 405 if (xe->info.platform == XE_BATTLEMAGE) { 406 const char *product_name; 407 408 product_name = dmi_get_system_info(DMI_PRODUCT_NAME); 409 if (product_name && strstr(product_name, "NUC13RNG")) { 410 drm_warn(&xe->drm, "BMG + D3Cold not supported on this platform\n"); 411 return 0; 412 } 413 } 414 415 return DEFAULT_VRAM_THRESHOLD; 416 } 417 418 static void xe_pm_wake_rebind_workers(struct xe_device *xe) 419 { 420 struct xe_vm *vm, *next; 421 422 mutex_lock(&xe->rebind_resume_lock); 423 list_for_each_entry_safe(vm, next, &xe->rebind_resume_list, 424 preempt.pm_activate_link) { 425 list_del_init(&vm->preempt.pm_activate_link); 426 xe_vm_resume_rebind_worker(vm); 427 } 428 mutex_unlock(&xe->rebind_resume_lock); 429 } 430 431 static int xe_pm_notifier_callback(struct notifier_block *nb, 432 unsigned long action, void *data) 433 { 434 struct xe_device *xe = container_of(nb, struct xe_device, pm_notifier); 435 int err = 0; 436 437 switch (action) { 438 case PM_HIBERNATION_PREPARE: 439 case PM_SUSPEND_PREPARE: 440 { 441 struct xe_validation_ctx ctx; 442 443 reinit_completion(&xe->pm_block); 444 xe_pm_block_begin_signalling(); 445 xe_pm_runtime_get(xe); 446 (void)xe_validation_ctx_init(&ctx, &xe->val, NULL, 447 (struct xe_val_flags) {.exclusive = true}); 448 err = xe_bo_evict_all_user(xe); 449 xe_validation_ctx_fini(&ctx); 450 if (err) 451 drm_dbg(&xe->drm, "Notifier evict user failed (%d)\n", err); 452 453 err = xe_bo_notifier_prepare_all_pinned(xe); 454 if (err) 455 drm_dbg(&xe->drm, "Notifier prepare pin failed (%d)\n", err); 456 /* 457 * Keep the runtime pm reference until post hibernation / post suspend to 458 * avoid a runtime suspend interfering with evicted objects or backup 459 * allocations. 460 */ 461 xe_pm_block_end_signalling(); 462 break; 463 } 464 case PM_POST_HIBERNATION: 465 case PM_POST_SUSPEND: 466 complete_all(&xe->pm_block); 467 xe_pm_wake_rebind_workers(xe); 468 xe_bo_notifier_unprepare_all_pinned(xe); 469 xe_pm_runtime_put(xe); 470 break; 471 } 472 473 return NOTIFY_DONE; 474 } 475 476 /** 477 * xe_pm_init - Initialize Xe Power Management 478 * @xe: xe device instance 479 * 480 * This component is responsible for System and Device sleep states. 481 * 482 * Returns 0 for success, negative error code otherwise. 483 */ 484 int xe_pm_init(struct xe_device *xe) 485 { 486 u32 vram_threshold; 487 int err; 488 489 xe->pm_notifier.notifier_call = xe_pm_notifier_callback; 490 err = register_pm_notifier(&xe->pm_notifier); 491 if (err) 492 return err; 493 494 /* For now suspend/resume is only allowed with GuC */ 495 if (!xe_device_uc_enabled(xe)) 496 return 0; 497 498 if (xe->d3cold.capable) { 499 vram_threshold = vram_threshold_value(xe); 500 err = xe_pm_set_vram_threshold(xe, vram_threshold); 501 if (err) 502 goto err_unregister; 503 } 504 505 xe_pm_runtime_init(xe); 506 return 0; 507 508 err_unregister: 509 unregister_pm_notifier(&xe->pm_notifier); 510 return err; 511 } 512 513 static void xe_pm_runtime_fini(struct xe_device *xe) 514 { 515 struct device *dev = xe->drm.dev; 516 517 /* Our current VFs do not support RPM. so, disable it */ 518 if (IS_SRIOV_VF(xe)) 519 return; 520 521 pm_runtime_get_sync(dev); 522 pm_runtime_forbid(dev); 523 } 524 525 /** 526 * xe_pm_fini - Finalize PM 527 * @xe: xe device instance 528 */ 529 void xe_pm_fini(struct xe_device *xe) 530 { 531 if (xe_device_uc_enabled(xe)) 532 xe_pm_runtime_fini(xe); 533 534 unregister_pm_notifier(&xe->pm_notifier); 535 } 536 537 static void xe_pm_write_callback_task(struct xe_device *xe, 538 struct task_struct *task) 539 { 540 WRITE_ONCE(xe->pm_callback_task, task); 541 542 /* 543 * Just in case it's somehow possible for our writes to be reordered to 544 * the extent that something else re-uses the task written in 545 * pm_callback_task. For example after returning from the callback, but 546 * before the reordered write that resets pm_callback_task back to NULL. 547 */ 548 smp_mb(); /* pairs with xe_pm_read_callback_task */ 549 } 550 551 struct task_struct *xe_pm_read_callback_task(struct xe_device *xe) 552 { 553 smp_mb(); /* pairs with xe_pm_write_callback_task */ 554 555 return READ_ONCE(xe->pm_callback_task); 556 } 557 558 /** 559 * xe_pm_runtime_suspended - Check if runtime_pm state is suspended 560 * @xe: xe device instance 561 * 562 * This does not provide any guarantee that the device is going to remain 563 * suspended as it might be racing with the runtime state transitions. 564 * It can be used only as a non-reliable assertion, to ensure that we are not in 565 * the sleep state while trying to access some memory for instance. 566 * 567 * Returns true if PCI device is suspended, false otherwise. 568 */ 569 bool xe_pm_runtime_suspended(struct xe_device *xe) 570 { 571 return pm_runtime_suspended(xe->drm.dev); 572 } 573 574 /** 575 * xe_pm_runtime_suspend - Prepare our device for D3hot/D3Cold 576 * @xe: xe device instance 577 * 578 * Returns 0 for success, negative error code otherwise. 579 */ 580 int xe_pm_runtime_suspend(struct xe_device *xe) 581 { 582 struct xe_bo *bo, *on; 583 struct xe_gt *gt; 584 u8 id; 585 int err = 0; 586 587 trace_xe_pm_runtime_suspend(xe, __builtin_return_address(0)); 588 /* Disable access_ongoing asserts and prevent recursive pm calls */ 589 xe_pm_write_callback_task(xe, current); 590 591 /* 592 * The actual xe_pm_runtime_put() is always async underneath, so 593 * exactly where that is called should makes no difference to us. However 594 * we still need to be very careful with the locks that this callback 595 * acquires and the locks that are acquired and held by any callers of 596 * xe_runtime_pm_get(). We already have the matching annotation 597 * on that side, but we also need it here. For example lockdep should be 598 * able to tell us if the following scenario is in theory possible: 599 * 600 * CPU0 | CPU1 (kworker) 601 * lock(A) | 602 * | xe_pm_runtime_suspend() 603 * | lock(A) 604 * xe_pm_runtime_get() | 605 * 606 * This will clearly deadlock since rpm core needs to wait for 607 * xe_pm_runtime_suspend() to complete, but here we are holding lock(A) 608 * on CPU0 which prevents CPU1 making forward progress. With the 609 * annotation here and in xe_pm_runtime_get() lockdep will see 610 * the potential lock inversion and give us a nice splat. 611 */ 612 xe_rpm_lockmap_acquire(xe); 613 614 err = xe_pxp_pm_suspend(xe->pxp); 615 if (err) 616 goto out; 617 618 /* 619 * Applying lock for entire list op as xe_ttm_bo_destroy and xe_bo_move_notify 620 * also checks and deletes bo entry from user fault list. 621 */ 622 mutex_lock(&xe->mem_access.vram_userfault.lock); 623 list_for_each_entry_safe(bo, on, 624 &xe->mem_access.vram_userfault.list, vram_userfault_link) 625 xe_bo_runtime_pm_release_mmap_offset(bo); 626 mutex_unlock(&xe->mem_access.vram_userfault.lock); 627 628 xe_display_pm_runtime_suspend(xe); 629 630 if (xe->d3cold.allowed) { 631 err = xe_bo_evict_all(xe); 632 if (err) 633 goto out_resume; 634 } 635 636 for_each_gt(gt, xe, id) { 637 err = xe->d3cold.allowed ? xe_gt_suspend(gt) : xe_gt_runtime_suspend(gt); 638 if (err) 639 goto out_resume; 640 } 641 642 xe_irq_suspend(xe); 643 644 xe_display_pm_runtime_suspend_late(xe); 645 646 xe_i2c_pm_suspend(xe); 647 648 xe_rpm_lockmap_release(xe); 649 xe_pm_write_callback_task(xe, NULL); 650 return 0; 651 652 out_resume: 653 xe_display_pm_runtime_resume(xe); 654 xe_pxp_pm_resume(xe->pxp); 655 out: 656 xe_rpm_lockmap_release(xe); 657 xe_pm_write_callback_task(xe, NULL); 658 return err; 659 } 660 661 /** 662 * xe_pm_runtime_resume - Waking up from D3hot/D3Cold 663 * @xe: xe device instance 664 * 665 * Returns 0 for success, negative error code otherwise. 666 */ 667 int xe_pm_runtime_resume(struct xe_device *xe) 668 { 669 struct xe_gt *gt; 670 u8 id; 671 int err = 0; 672 673 trace_xe_pm_runtime_resume(xe, __builtin_return_address(0)); 674 /* Disable access_ongoing asserts and prevent recursive pm calls */ 675 xe_pm_write_callback_task(xe, current); 676 677 xe_rpm_lockmap_acquire(xe); 678 679 if (xe->d3cold.allowed) { 680 for_each_gt(gt, xe, id) 681 xe_gt_idle_disable_c6(gt); 682 683 err = xe_pcode_ready(xe, true); 684 if (err) 685 goto out; 686 687 xe_display_pm_resume_early(xe); 688 689 /* 690 * This only restores pinned memory which is the memory 691 * required for the GT(s) to resume. 692 */ 693 err = xe_bo_restore_early(xe); 694 if (err) 695 goto out; 696 } 697 698 xe_i2c_pm_resume(xe, xe->d3cold.allowed); 699 700 if (xe->d3cold.allowed) 701 xe_sysctrl_pm_resume(xe); 702 703 xe_irq_resume(xe); 704 705 for_each_gt(gt, xe, id) { 706 err = xe->d3cold.allowed ? xe_gt_resume(gt) : xe_gt_runtime_resume(gt); 707 if (err) 708 break; 709 } 710 711 /* 712 * Try to bring up display before bailing from GT resume failure, 713 * so we don't leave the user clueless with a blank screen. 714 */ 715 xe_display_pm_runtime_resume(xe); 716 if (err) 717 goto out; 718 719 if (xe->d3cold.allowed) { 720 err = xe_bo_restore_late(xe); 721 if (err) 722 goto out; 723 } 724 725 xe_pxp_pm_resume(xe->pxp); 726 727 if (IS_VF_CCS_READY(xe)) 728 xe_sriov_vf_ccs_register_context(xe); 729 730 if (xe->d3cold.allowed) 731 xe_late_bind_fw_load(&xe->late_bind); 732 733 out: 734 xe_rpm_lockmap_release(xe); 735 xe_pm_write_callback_task(xe, NULL); 736 return err; 737 } 738 739 /* 740 * For places where resume is synchronous it can be quite easy to deadlock 741 * if we are not careful. Also in practice it might be quite timing 742 * sensitive to ever see the 0 -> 1 transition with the callers locks 743 * held, so deadlocks might exist but are hard for lockdep to ever see. 744 * With this in mind, help lockdep learn about the potentially scary 745 * stuff that can happen inside the runtime_resume callback by acquiring 746 * a dummy lock (it doesn't protect anything and gets compiled out on 747 * non-debug builds). Lockdep then only needs to see the 748 * xe_pm_runtime_xxx_map -> runtime_resume callback once, and then can 749 * hopefully validate all the (callers_locks) -> xe_pm_runtime_xxx_map. 750 * For example if the (callers_locks) are ever grabbed in the 751 * runtime_resume callback, lockdep should give us a nice splat. 752 */ 753 static void xe_rpm_might_enter_cb(const struct xe_device *xe) 754 { 755 xe_rpm_lockmap_acquire(xe); 756 xe_rpm_lockmap_release(xe); 757 } 758 759 /* 760 * Prime the lockdep maps for known locking orders that need to 761 * be supported but that may not always occur on all systems. 762 */ 763 static void xe_pm_runtime_lockdep_prime(void) 764 { 765 struct dma_resv lockdep_resv; 766 767 dma_resv_init(&lockdep_resv); 768 lock_map_acquire(&xe_pm_runtime_d3cold_map); 769 /* D3Cold takes the dma_resv locks to evict bos */ 770 dma_resv_lock(&lockdep_resv, NULL); 771 dma_resv_unlock(&lockdep_resv); 772 lock_map_release(&xe_pm_runtime_d3cold_map); 773 774 /* Shrinkers might like to wake up the device under reclaim. */ 775 fs_reclaim_acquire(GFP_KERNEL); 776 lock_map_acquire(&xe_pm_runtime_nod3cold_map); 777 lock_map_release(&xe_pm_runtime_nod3cold_map); 778 fs_reclaim_release(GFP_KERNEL); 779 } 780 781 /** 782 * xe_pm_runtime_get - Get a runtime_pm reference and resume synchronously 783 * @xe: xe device instance 784 * 785 * When possible, scope-based runtime PM (through guard(xe_pm_runtime)) is 786 * be preferred over direct usage of this function. Manual get/put handling 787 * should only be used when the function contains goto-based logic which 788 * can break scope-based handling, or when the lifetime of the runtime PM 789 * reference does not match a specific scope (e.g., runtime PM obtained in one 790 * function and released in a different one). 791 */ 792 void xe_pm_runtime_get(struct xe_device *xe) 793 { 794 trace_xe_pm_runtime_get(xe, __builtin_return_address(0)); 795 pm_runtime_get_noresume(xe->drm.dev); 796 797 if (xe_pm_read_callback_task(xe) == current) 798 return; 799 800 xe_rpm_might_enter_cb(xe); 801 pm_runtime_resume(xe->drm.dev); 802 } 803 804 /** 805 * xe_pm_runtime_put - Put the runtime_pm reference back and mark as idle 806 * @xe: xe device instance 807 */ 808 void xe_pm_runtime_put(struct xe_device *xe) 809 { 810 trace_xe_pm_runtime_put(xe, __builtin_return_address(0)); 811 if (xe_pm_read_callback_task(xe) == current) { 812 pm_runtime_put_noidle(xe->drm.dev); 813 } else { 814 pm_runtime_mark_last_busy(xe->drm.dev); 815 pm_runtime_put(xe->drm.dev); 816 } 817 } 818 819 /** 820 * xe_pm_runtime_get_ioctl - Get a runtime_pm reference before ioctl 821 * @xe: xe device instance 822 * 823 * When possible, scope-based runtime PM (through 824 * ACQUIRE(xe_pm_runtime_ioctl, ...)) is be preferred over direct usage of this 825 * function. Manual get/put handling should only be used when the function 826 * contains goto-based logic which can break scope-based handling, or when the 827 * lifetime of the runtime PM reference does not match a specific scope (e.g., 828 * runtime PM obtained in one function and released in a different one). 829 * 830 * Returns: Any number greater than or equal to 0 for success, negative error 831 * code otherwise. 832 */ 833 int xe_pm_runtime_get_ioctl(struct xe_device *xe) 834 { 835 trace_xe_pm_runtime_get_ioctl(xe, __builtin_return_address(0)); 836 if (WARN_ON(xe_pm_read_callback_task(xe) == current)) 837 return -ELOOP; 838 839 xe_rpm_might_enter_cb(xe); 840 return pm_runtime_get_sync(xe->drm.dev); 841 } 842 843 /** 844 * xe_pm_runtime_get_if_active - Get a runtime_pm reference if device active 845 * @xe: xe device instance 846 * 847 * Return: True if device is awake (regardless the previous number of references) 848 * and a new reference was taken, false otherwise. 849 */ 850 bool xe_pm_runtime_get_if_active(struct xe_device *xe) 851 { 852 return pm_runtime_get_if_active(xe->drm.dev) > 0; 853 } 854 855 /** 856 * xe_pm_runtime_get_if_in_use - Get a new reference if device is active with previous ref taken 857 * @xe: xe device instance 858 * 859 * Return: True if device is awake, a previous reference had been already taken, 860 * and a new reference was now taken, false otherwise. 861 */ 862 bool xe_pm_runtime_get_if_in_use(struct xe_device *xe) 863 { 864 if (xe_pm_read_callback_task(xe) == current) { 865 /* The device is awake, grab the ref and move on */ 866 pm_runtime_get_noresume(xe->drm.dev); 867 return true; 868 } 869 870 return pm_runtime_get_if_in_use(xe->drm.dev) > 0; 871 } 872 873 /* 874 * Very unreliable! Should only be used to suppress the false positive case 875 * in the missing outer rpm protection warning. 876 */ 877 static bool xe_pm_suspending_or_resuming(struct xe_device *xe) 878 { 879 #ifdef CONFIG_PM 880 struct device *dev = xe->drm.dev; 881 882 return dev->power.runtime_status == RPM_SUSPENDING || 883 dev->power.runtime_status == RPM_RESUMING || 884 pm_suspend_in_progress(); 885 #else 886 return false; 887 #endif 888 } 889 890 /** 891 * xe_pm_runtime_get_noresume - Bump runtime PM usage counter without resuming 892 * @xe: xe device instance 893 * 894 * This function should be used in inner places where it is surely already 895 * protected by outer-bound callers of `xe_pm_runtime_get`. 896 * It will warn if not protected. 897 * The reference should be put back after this function regardless, since it 898 * will always bump the usage counter, regardless. 899 * 900 * When possible, scope-based runtime PM (through guard(xe_pm_runtime_noresume)) 901 * is be preferred over direct usage of this function. Manual get/put handling 902 * should only be used when the function contains goto-based logic which can 903 * break scope-based handling, or when the lifetime of the runtime PM reference 904 * does not match a specific scope (e.g., runtime PM obtained in one function 905 * and released in a different one). 906 */ 907 void xe_pm_runtime_get_noresume(struct xe_device *xe) 908 { 909 bool ref; 910 911 ref = xe_pm_runtime_get_if_in_use(xe); 912 913 if (!ref) { 914 pm_runtime_get_noresume(xe->drm.dev); 915 drm_WARN(&xe->drm, !xe_pm_suspending_or_resuming(xe), 916 "Missing outer runtime PM protection\n"); 917 } 918 } 919 920 /** 921 * xe_pm_runtime_resume_and_get - Resume, then get a runtime_pm ref if awake. 922 * @xe: xe device instance 923 * 924 * Returns: True if device is awake and the reference was taken, false otherwise. 925 */ 926 bool xe_pm_runtime_resume_and_get(struct xe_device *xe) 927 { 928 if (xe_pm_read_callback_task(xe) == current) { 929 /* The device is awake, grab the ref and move on */ 930 pm_runtime_get_noresume(xe->drm.dev); 931 return true; 932 } 933 934 xe_rpm_might_enter_cb(xe); 935 return pm_runtime_resume_and_get(xe->drm.dev) >= 0; 936 } 937 938 /** 939 * xe_pm_assert_unbounded_bridge - Disable PM on unbounded pcie parent bridge 940 * @xe: xe device instance 941 */ 942 void xe_pm_assert_unbounded_bridge(struct xe_device *xe) 943 { 944 struct pci_dev *pdev = to_pci_dev(xe->drm.dev); 945 struct pci_dev *bridge = pci_upstream_bridge(pdev); 946 947 if (!bridge) 948 return; 949 950 if (!bridge->driver) { 951 drm_warn(&xe->drm, "unbounded parent pci bridge, device won't support any PM support.\n"); 952 device_set_pm_not_required(&pdev->dev); 953 } 954 } 955 956 /** 957 * xe_pm_set_vram_threshold - Set a VRAM threshold for allowing/blocking D3Cold 958 * @xe: xe device instance 959 * @threshold: VRAM size in MiB for the D3cold threshold 960 * 961 * Return: 962 * * 0 - success 963 * * -EINVAL - invalid argument 964 */ 965 int xe_pm_set_vram_threshold(struct xe_device *xe, u32 threshold) 966 { 967 struct ttm_resource_manager *man; 968 u32 vram_total_mb = 0; 969 int i; 970 971 for (i = XE_PL_VRAM0; i <= XE_PL_VRAM1; ++i) { 972 man = ttm_manager_type(&xe->ttm, i); 973 if (man) 974 vram_total_mb += DIV_ROUND_UP_ULL(man->size, 1024 * 1024); 975 } 976 977 drm_dbg(&xe->drm, "Total vram %u mb\n", vram_total_mb); 978 979 if (threshold > vram_total_mb) 980 return -EINVAL; 981 982 mutex_lock(&xe->d3cold.lock); 983 xe->d3cold.vram_threshold = threshold; 984 mutex_unlock(&xe->d3cold.lock); 985 986 return 0; 987 } 988 989 /** 990 * xe_pm_d3cold_allowed_toggle - Check conditions to toggle d3cold.allowed 991 * @xe: xe device instance 992 * 993 * To be called during runtime_pm idle callback. 994 * Check for all the D3Cold conditions ahead of runtime suspend. 995 */ 996 void xe_pm_d3cold_allowed_toggle(struct xe_device *xe) 997 { 998 struct ttm_resource_manager *man; 999 u32 total_vram_used_mb = 0; 1000 u64 vram_used; 1001 int i; 1002 1003 if (!xe->d3cold.capable) { 1004 xe->d3cold.allowed = false; 1005 return; 1006 } 1007 1008 for (i = XE_PL_VRAM0; i <= XE_PL_VRAM1; ++i) { 1009 man = ttm_manager_type(&xe->ttm, i); 1010 if (man) { 1011 vram_used = ttm_resource_manager_usage(man); 1012 total_vram_used_mb += DIV_ROUND_UP_ULL(vram_used, 1024 * 1024); 1013 } 1014 } 1015 1016 mutex_lock(&xe->d3cold.lock); 1017 1018 if (total_vram_used_mb < xe->d3cold.vram_threshold) 1019 xe->d3cold.allowed = true; 1020 else 1021 xe->d3cold.allowed = false; 1022 1023 mutex_unlock(&xe->d3cold.lock); 1024 } 1025 1026 /** 1027 * xe_pm_module_init() - Perform xe_pm specific module initialization. 1028 * 1029 * Return: 0 on success. Currently doesn't fail. 1030 */ 1031 int __init xe_pm_module_init(void) 1032 { 1033 xe_pm_runtime_lockdep_prime(); 1034 return 0; 1035 } 1036