1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2022 Intel Corporation 4 */ 5 6 #include "xe_pm.h" 7 8 #include <linux/fault-inject.h> 9 #include <linux/pm_runtime.h> 10 #include <linux/suspend.h> 11 #include <linux/dmi.h> 12 13 #include <drm/drm_managed.h> 14 #include <drm/ttm/ttm_placement.h> 15 16 #include "display/xe_display.h" 17 #include "xe_bo.h" 18 #include "xe_bo_evict.h" 19 #include "xe_device.h" 20 #include "xe_ggtt.h" 21 #include "xe_gt.h" 22 #include "xe_gt_idle.h" 23 #include "xe_i2c.h" 24 #include "xe_irq.h" 25 #include "xe_late_bind_fw.h" 26 #include "xe_pcode.h" 27 #include "xe_pxp.h" 28 #include "xe_sriov_vf_ccs.h" 29 #include "xe_trace.h" 30 #include "xe_vm.h" 31 #include "xe_wa.h" 32 33 /** 34 * DOC: Xe Power Management 35 * 36 * Xe PM implements the main routines for both system level suspend states and 37 * for the opportunistic runtime suspend states. 38 * 39 * System Level Suspend (S-States) - In general this is OS initiated suspend 40 * driven by ACPI for achieving S0ix (a.k.a. S2idle, freeze), S3 (suspend to ram), 41 * S4 (disk). The main functions here are `xe_pm_suspend` and `xe_pm_resume`. They 42 * are the main point for the suspend to and resume from these states. 43 * 44 * PCI Device Suspend (D-States) - This is the opportunistic PCIe device low power 45 * state D3, controlled by the PCI subsystem and ACPI with the help from the 46 * runtime_pm infrastructure. 47 * PCI D3 is special and can mean D3hot, where Vcc power is on for keeping memory 48 * alive and quicker low latency resume or D3Cold where Vcc power is off for 49 * better power savings. 50 * The Vcc control of PCI hierarchy can only be controlled at the PCI root port 51 * level, while the device driver can be behind multiple bridges/switches and 52 * paired with other devices. For this reason, the PCI subsystem cannot perform 53 * the transition towards D3Cold. The lowest runtime PM possible from the PCI 54 * subsystem is D3hot. Then, if all these paired devices in the same root port 55 * are in D3hot, ACPI will assist here and run its own methods (_PR3 and _OFF) 56 * to perform the transition from D3hot to D3cold. Xe may disallow this 57 * transition by calling pci_d3cold_disable(root_pdev) before going to runtime 58 * suspend. It will be based on runtime conditions such as VRAM usage for a 59 * quick and low latency resume for instance. 60 * 61 * Runtime PM - This infrastructure provided by the Linux kernel allows the 62 * device drivers to indicate when the can be runtime suspended, so the device 63 * could be put at D3 (if supported), or allow deeper package sleep states 64 * (PC-states), and/or other low level power states. Xe PM component provides 65 * `xe_pm_runtime_suspend` and `xe_pm_runtime_resume` functions that PCI 66 * subsystem will call before transition to/from runtime suspend. 67 * 68 * Also, Xe PM provides get and put functions that Xe driver will use to 69 * indicate activity. In order to avoid locking complications with the memory 70 * management, whenever possible, these get and put functions needs to be called 71 * from the higher/outer levels. 72 * The main cases that need to be protected from the outer levels are: IOCTL, 73 * sysfs, debugfs, dma-buf sharing, GPU execution. 74 * 75 * This component is not responsible for GT idleness (RC6) nor GT frequency 76 * management (RPS). 77 */ 78 79 #ifdef CONFIG_LOCKDEP 80 static struct lockdep_map xe_pm_runtime_d3cold_map = { 81 .name = "xe_rpm_d3cold_map" 82 }; 83 84 static struct lockdep_map xe_pm_runtime_nod3cold_map = { 85 .name = "xe_rpm_nod3cold_map" 86 }; 87 88 static struct lockdep_map xe_pm_block_lockdep_map = { 89 .name = "xe_pm_block_map", 90 }; 91 #endif 92 93 static void xe_pm_block_begin_signalling(void) 94 { 95 lock_acquire_shared_recursive(&xe_pm_block_lockdep_map, 0, 1, NULL, _RET_IP_); 96 } 97 98 static void xe_pm_block_end_signalling(void) 99 { 100 lock_release(&xe_pm_block_lockdep_map, _RET_IP_); 101 } 102 103 /** 104 * xe_pm_might_block_on_suspend() - Annotate that the code might block on suspend 105 * 106 * Annotation to use where the code might block or seize to make 107 * progress pending resume completion. 108 */ 109 void xe_pm_might_block_on_suspend(void) 110 { 111 lock_map_acquire(&xe_pm_block_lockdep_map); 112 lock_map_release(&xe_pm_block_lockdep_map); 113 } 114 115 /** 116 * xe_pm_block_on_suspend() - Block pending suspend. 117 * @xe: The xe device about to be suspended. 118 * 119 * Block if the pm notifier has start evicting bos, to avoid 120 * racing and validating those bos back. The function is 121 * annotated to ensure no locks are held that are also grabbed 122 * in the pm notifier or the device suspend / resume. 123 * This is intended to be used by freezable tasks only. 124 * (Not freezable workqueues), with the intention that the function 125 * returns %-ERESTARTSYS when tasks are frozen during suspend, 126 * and allows the task to freeze. The caller must be able to 127 * handle the %-ERESTARTSYS. 128 * 129 * Return: %0 on success, %-ERESTARTSYS on signal pending or 130 * if freezing requested. 131 */ 132 int xe_pm_block_on_suspend(struct xe_device *xe) 133 { 134 xe_pm_might_block_on_suspend(); 135 136 return wait_for_completion_interruptible(&xe->pm_block); 137 } 138 139 /** 140 * xe_rpm_reclaim_safe() - Whether runtime resume can be done from reclaim context 141 * @xe: The xe device. 142 * 143 * Return: true if it is safe to runtime resume from reclaim context. 144 * false otherwise. 145 */ 146 bool xe_rpm_reclaim_safe(const struct xe_device *xe) 147 { 148 return !xe->d3cold.capable; 149 } 150 151 static void xe_rpm_lockmap_acquire(const struct xe_device *xe) 152 { 153 lock_map_acquire(xe_rpm_reclaim_safe(xe) ? 154 &xe_pm_runtime_nod3cold_map : 155 &xe_pm_runtime_d3cold_map); 156 } 157 158 static void xe_rpm_lockmap_release(const struct xe_device *xe) 159 { 160 lock_map_release(xe_rpm_reclaim_safe(xe) ? 161 &xe_pm_runtime_nod3cold_map : 162 &xe_pm_runtime_d3cold_map); 163 } 164 165 /** 166 * xe_pm_suspend - Helper for System suspend, i.e. S0->S3 / S0->S2idle 167 * @xe: xe device instance 168 * 169 * Return: 0 on success 170 */ 171 int xe_pm_suspend(struct xe_device *xe) 172 { 173 struct xe_gt *gt; 174 u8 id; 175 int err; 176 177 drm_dbg(&xe->drm, "Suspending device\n"); 178 xe_pm_block_begin_signalling(); 179 trace_xe_pm_suspend(xe, __builtin_return_address(0)); 180 181 err = xe_pxp_pm_suspend(xe->pxp); 182 if (err) 183 goto err; 184 185 xe_late_bind_wait_for_worker_completion(&xe->late_bind); 186 187 for_each_gt(gt, xe, id) 188 xe_gt_suspend_prepare(gt); 189 190 xe_display_pm_suspend(xe); 191 192 /* FIXME: Super racey... */ 193 err = xe_bo_evict_all(xe); 194 if (err) 195 goto err_display; 196 197 for_each_gt(gt, xe, id) { 198 err = xe_gt_suspend(gt); 199 if (err) 200 goto err_display; 201 } 202 203 xe_irq_suspend(xe); 204 205 xe_display_pm_suspend_late(xe); 206 207 xe_i2c_pm_suspend(xe); 208 209 drm_dbg(&xe->drm, "Device suspended\n"); 210 xe_pm_block_end_signalling(); 211 212 return 0; 213 214 err_display: 215 xe_display_pm_resume(xe); 216 xe_pxp_pm_resume(xe->pxp); 217 err: 218 drm_dbg(&xe->drm, "Device suspend failed %d\n", err); 219 xe_pm_block_end_signalling(); 220 return err; 221 } 222 223 /** 224 * xe_pm_resume - Helper for System resume S3->S0 / S2idle->S0 225 * @xe: xe device instance 226 * 227 * Return: 0 on success 228 */ 229 int xe_pm_resume(struct xe_device *xe) 230 { 231 struct xe_tile *tile; 232 struct xe_gt *gt; 233 u8 id; 234 int err; 235 236 xe_pm_block_begin_signalling(); 237 drm_dbg(&xe->drm, "Resuming device\n"); 238 trace_xe_pm_resume(xe, __builtin_return_address(0)); 239 240 for_each_gt(gt, xe, id) 241 xe_gt_idle_disable_c6(gt); 242 243 for_each_tile(tile, xe, id) 244 xe_wa_apply_tile_workarounds(tile); 245 246 err = xe_pcode_ready(xe, true); 247 if (err) 248 return err; 249 250 xe_display_pm_resume_early(xe); 251 252 /* 253 * This only restores pinned memory which is the memory required for the 254 * GT(s) to resume. 255 */ 256 err = xe_bo_restore_early(xe); 257 if (err) 258 goto err; 259 260 xe_i2c_pm_resume(xe, true); 261 262 xe_irq_resume(xe); 263 264 for_each_gt(gt, xe, id) { 265 err = xe_gt_resume(gt); 266 if (err) 267 break; 268 } 269 270 /* 271 * Try to bring up display before bailing from GT resume failure, 272 * so we don't leave the user clueless with a blank screen. 273 */ 274 xe_display_pm_resume(xe); 275 if (err) 276 goto err; 277 278 err = xe_bo_restore_late(xe); 279 if (err) 280 goto err; 281 282 xe_pxp_pm_resume(xe->pxp); 283 284 if (IS_VF_CCS_READY(xe)) 285 xe_sriov_vf_ccs_register_context(xe); 286 287 xe_late_bind_fw_load(&xe->late_bind); 288 289 drm_dbg(&xe->drm, "Device resumed\n"); 290 xe_pm_block_end_signalling(); 291 return 0; 292 err: 293 drm_dbg(&xe->drm, "Device resume failed %d\n", err); 294 xe_pm_block_end_signalling(); 295 return err; 296 } 297 298 static bool xe_pm_pci_d3cold_capable(struct xe_device *xe) 299 { 300 struct pci_dev *pdev = to_pci_dev(xe->drm.dev); 301 struct pci_dev *root_pdev; 302 303 root_pdev = pcie_find_root_port(pdev); 304 if (!root_pdev) 305 return false; 306 307 /* D3Cold requires PME capability */ 308 if (!pci_pme_capable(root_pdev, PCI_D3cold)) { 309 drm_dbg(&xe->drm, "d3cold: PME# not supported\n"); 310 return false; 311 } 312 313 /* D3Cold requires _PR3 power resource */ 314 if (!pci_pr3_present(root_pdev)) { 315 drm_dbg(&xe->drm, "d3cold: ACPI _PR3 not present\n"); 316 return false; 317 } 318 319 return true; 320 } 321 322 static void xe_pm_runtime_init(struct xe_device *xe) 323 { 324 struct device *dev = xe->drm.dev; 325 326 /* Our current VFs do not support RPM. so, disable it */ 327 if (IS_SRIOV_VF(xe)) 328 return; 329 330 /* 331 * Disable the system suspend direct complete optimization. 332 * We need to ensure that the regular device suspend/resume functions 333 * are called since our runtime_pm cannot guarantee local memory 334 * eviction for d3cold. 335 * TODO: Check HDA audio dependencies claimed by i915, and then enforce 336 * this option to integrated graphics as well. 337 */ 338 if (IS_DGFX(xe)) 339 dev_pm_set_driver_flags(dev, DPM_FLAG_NO_DIRECT_COMPLETE); 340 341 pm_runtime_use_autosuspend(dev); 342 pm_runtime_set_autosuspend_delay(dev, 1000); 343 pm_runtime_set_active(dev); 344 pm_runtime_allow(dev); 345 pm_runtime_mark_last_busy(dev); 346 pm_runtime_put(dev); 347 } 348 349 int xe_pm_init_early(struct xe_device *xe) 350 { 351 int err; 352 353 INIT_LIST_HEAD(&xe->mem_access.vram_userfault.list); 354 355 err = drmm_mutex_init(&xe->drm, &xe->mem_access.vram_userfault.lock); 356 if (err) 357 return err; 358 359 err = drmm_mutex_init(&xe->drm, &xe->d3cold.lock); 360 if (err) 361 return err; 362 363 xe->d3cold.capable = xe_pm_pci_d3cold_capable(xe); 364 return 0; 365 } 366 ALLOW_ERROR_INJECTION(xe_pm_init_early, ERRNO); /* See xe_pci_probe() */ 367 368 static u32 vram_threshold_value(struct xe_device *xe) 369 { 370 if (xe->info.platform == XE_BATTLEMAGE) { 371 const char *product_name; 372 373 product_name = dmi_get_system_info(DMI_PRODUCT_NAME); 374 if (product_name && strstr(product_name, "NUC13RNG")) { 375 drm_warn(&xe->drm, "BMG + D3Cold not supported on this platform\n"); 376 return 0; 377 } 378 } 379 380 return DEFAULT_VRAM_THRESHOLD; 381 } 382 383 static void xe_pm_wake_rebind_workers(struct xe_device *xe) 384 { 385 struct xe_vm *vm, *next; 386 387 mutex_lock(&xe->rebind_resume_lock); 388 list_for_each_entry_safe(vm, next, &xe->rebind_resume_list, 389 preempt.pm_activate_link) { 390 list_del_init(&vm->preempt.pm_activate_link); 391 xe_vm_resume_rebind_worker(vm); 392 } 393 mutex_unlock(&xe->rebind_resume_lock); 394 } 395 396 static int xe_pm_notifier_callback(struct notifier_block *nb, 397 unsigned long action, void *data) 398 { 399 struct xe_device *xe = container_of(nb, struct xe_device, pm_notifier); 400 int err = 0; 401 402 switch (action) { 403 case PM_HIBERNATION_PREPARE: 404 case PM_SUSPEND_PREPARE: 405 { 406 struct xe_validation_ctx ctx; 407 408 reinit_completion(&xe->pm_block); 409 xe_pm_block_begin_signalling(); 410 xe_pm_runtime_get(xe); 411 (void)xe_validation_ctx_init(&ctx, &xe->val, NULL, 412 (struct xe_val_flags) {.exclusive = true}); 413 err = xe_bo_evict_all_user(xe); 414 xe_validation_ctx_fini(&ctx); 415 if (err) 416 drm_dbg(&xe->drm, "Notifier evict user failed (%d)\n", err); 417 418 err = xe_bo_notifier_prepare_all_pinned(xe); 419 if (err) 420 drm_dbg(&xe->drm, "Notifier prepare pin failed (%d)\n", err); 421 /* 422 * Keep the runtime pm reference until post hibernation / post suspend to 423 * avoid a runtime suspend interfering with evicted objects or backup 424 * allocations. 425 */ 426 xe_pm_block_end_signalling(); 427 break; 428 } 429 case PM_POST_HIBERNATION: 430 case PM_POST_SUSPEND: 431 complete_all(&xe->pm_block); 432 xe_pm_wake_rebind_workers(xe); 433 xe_bo_notifier_unprepare_all_pinned(xe); 434 xe_pm_runtime_put(xe); 435 break; 436 } 437 438 return NOTIFY_DONE; 439 } 440 441 /** 442 * xe_pm_init - Initialize Xe Power Management 443 * @xe: xe device instance 444 * 445 * This component is responsible for System and Device sleep states. 446 * 447 * Returns 0 for success, negative error code otherwise. 448 */ 449 int xe_pm_init(struct xe_device *xe) 450 { 451 u32 vram_threshold; 452 int err; 453 454 xe->pm_notifier.notifier_call = xe_pm_notifier_callback; 455 err = register_pm_notifier(&xe->pm_notifier); 456 if (err) 457 return err; 458 459 err = drmm_mutex_init(&xe->drm, &xe->rebind_resume_lock); 460 if (err) 461 goto err_unregister; 462 463 init_completion(&xe->pm_block); 464 complete_all(&xe->pm_block); 465 INIT_LIST_HEAD(&xe->rebind_resume_list); 466 467 /* For now suspend/resume is only allowed with GuC */ 468 if (!xe_device_uc_enabled(xe)) 469 return 0; 470 471 if (xe->d3cold.capable) { 472 vram_threshold = vram_threshold_value(xe); 473 err = xe_pm_set_vram_threshold(xe, vram_threshold); 474 if (err) 475 goto err_unregister; 476 } 477 478 xe_pm_runtime_init(xe); 479 return 0; 480 481 err_unregister: 482 unregister_pm_notifier(&xe->pm_notifier); 483 return err; 484 } 485 486 static void xe_pm_runtime_fini(struct xe_device *xe) 487 { 488 struct device *dev = xe->drm.dev; 489 490 /* Our current VFs do not support RPM. so, disable it */ 491 if (IS_SRIOV_VF(xe)) 492 return; 493 494 pm_runtime_get_sync(dev); 495 pm_runtime_forbid(dev); 496 } 497 498 /** 499 * xe_pm_fini - Finalize PM 500 * @xe: xe device instance 501 */ 502 void xe_pm_fini(struct xe_device *xe) 503 { 504 if (xe_device_uc_enabled(xe)) 505 xe_pm_runtime_fini(xe); 506 507 unregister_pm_notifier(&xe->pm_notifier); 508 } 509 510 static void xe_pm_write_callback_task(struct xe_device *xe, 511 struct task_struct *task) 512 { 513 WRITE_ONCE(xe->pm_callback_task, task); 514 515 /* 516 * Just in case it's somehow possible for our writes to be reordered to 517 * the extent that something else re-uses the task written in 518 * pm_callback_task. For example after returning from the callback, but 519 * before the reordered write that resets pm_callback_task back to NULL. 520 */ 521 smp_mb(); /* pairs with xe_pm_read_callback_task */ 522 } 523 524 struct task_struct *xe_pm_read_callback_task(struct xe_device *xe) 525 { 526 smp_mb(); /* pairs with xe_pm_write_callback_task */ 527 528 return READ_ONCE(xe->pm_callback_task); 529 } 530 531 /** 532 * xe_pm_runtime_suspended - Check if runtime_pm state is suspended 533 * @xe: xe device instance 534 * 535 * This does not provide any guarantee that the device is going to remain 536 * suspended as it might be racing with the runtime state transitions. 537 * It can be used only as a non-reliable assertion, to ensure that we are not in 538 * the sleep state while trying to access some memory for instance. 539 * 540 * Returns true if PCI device is suspended, false otherwise. 541 */ 542 bool xe_pm_runtime_suspended(struct xe_device *xe) 543 { 544 return pm_runtime_suspended(xe->drm.dev); 545 } 546 547 /** 548 * xe_pm_runtime_suspend - Prepare our device for D3hot/D3Cold 549 * @xe: xe device instance 550 * 551 * Returns 0 for success, negative error code otherwise. 552 */ 553 int xe_pm_runtime_suspend(struct xe_device *xe) 554 { 555 struct xe_bo *bo, *on; 556 struct xe_gt *gt; 557 u8 id; 558 int err = 0; 559 560 trace_xe_pm_runtime_suspend(xe, __builtin_return_address(0)); 561 /* Disable access_ongoing asserts and prevent recursive pm calls */ 562 xe_pm_write_callback_task(xe, current); 563 564 /* 565 * The actual xe_pm_runtime_put() is always async underneath, so 566 * exactly where that is called should makes no difference to us. However 567 * we still need to be very careful with the locks that this callback 568 * acquires and the locks that are acquired and held by any callers of 569 * xe_runtime_pm_get(). We already have the matching annotation 570 * on that side, but we also need it here. For example lockdep should be 571 * able to tell us if the following scenario is in theory possible: 572 * 573 * CPU0 | CPU1 (kworker) 574 * lock(A) | 575 * | xe_pm_runtime_suspend() 576 * | lock(A) 577 * xe_pm_runtime_get() | 578 * 579 * This will clearly deadlock since rpm core needs to wait for 580 * xe_pm_runtime_suspend() to complete, but here we are holding lock(A) 581 * on CPU0 which prevents CPU1 making forward progress. With the 582 * annotation here and in xe_pm_runtime_get() lockdep will see 583 * the potential lock inversion and give us a nice splat. 584 */ 585 xe_rpm_lockmap_acquire(xe); 586 587 err = xe_pxp_pm_suspend(xe->pxp); 588 if (err) 589 goto out; 590 591 /* 592 * Applying lock for entire list op as xe_ttm_bo_destroy and xe_bo_move_notify 593 * also checks and deletes bo entry from user fault list. 594 */ 595 mutex_lock(&xe->mem_access.vram_userfault.lock); 596 list_for_each_entry_safe(bo, on, 597 &xe->mem_access.vram_userfault.list, vram_userfault_link) 598 xe_bo_runtime_pm_release_mmap_offset(bo); 599 mutex_unlock(&xe->mem_access.vram_userfault.lock); 600 601 xe_display_pm_runtime_suspend(xe); 602 603 if (xe->d3cold.allowed) { 604 err = xe_bo_evict_all(xe); 605 if (err) 606 goto out_resume; 607 } 608 609 for_each_gt(gt, xe, id) { 610 err = xe->d3cold.allowed ? xe_gt_suspend(gt) : xe_gt_runtime_suspend(gt); 611 if (err) 612 goto out_resume; 613 } 614 615 xe_irq_suspend(xe); 616 617 xe_display_pm_runtime_suspend_late(xe); 618 619 xe_i2c_pm_suspend(xe); 620 621 xe_rpm_lockmap_release(xe); 622 xe_pm_write_callback_task(xe, NULL); 623 return 0; 624 625 out_resume: 626 xe_display_pm_runtime_resume(xe); 627 xe_pxp_pm_resume(xe->pxp); 628 out: 629 xe_rpm_lockmap_release(xe); 630 xe_pm_write_callback_task(xe, NULL); 631 return err; 632 } 633 634 /** 635 * xe_pm_runtime_resume - Waking up from D3hot/D3Cold 636 * @xe: xe device instance 637 * 638 * Returns 0 for success, negative error code otherwise. 639 */ 640 int xe_pm_runtime_resume(struct xe_device *xe) 641 { 642 struct xe_gt *gt; 643 u8 id; 644 int err = 0; 645 646 trace_xe_pm_runtime_resume(xe, __builtin_return_address(0)); 647 /* Disable access_ongoing asserts and prevent recursive pm calls */ 648 xe_pm_write_callback_task(xe, current); 649 650 xe_rpm_lockmap_acquire(xe); 651 652 if (xe->d3cold.allowed) { 653 for_each_gt(gt, xe, id) 654 xe_gt_idle_disable_c6(gt); 655 656 err = xe_pcode_ready(xe, true); 657 if (err) 658 goto out; 659 660 xe_display_pm_resume_early(xe); 661 662 /* 663 * This only restores pinned memory which is the memory 664 * required for the GT(s) to resume. 665 */ 666 err = xe_bo_restore_early(xe); 667 if (err) 668 goto out; 669 } 670 671 xe_i2c_pm_resume(xe, xe->d3cold.allowed); 672 673 xe_irq_resume(xe); 674 675 for_each_gt(gt, xe, id) { 676 err = xe->d3cold.allowed ? xe_gt_resume(gt) : xe_gt_runtime_resume(gt); 677 if (err) 678 break; 679 } 680 681 /* 682 * Try to bring up display before bailing from GT resume failure, 683 * so we don't leave the user clueless with a blank screen. 684 */ 685 xe_display_pm_runtime_resume(xe); 686 if (err) 687 goto out; 688 689 if (xe->d3cold.allowed) { 690 err = xe_bo_restore_late(xe); 691 if (err) 692 goto out; 693 } 694 695 xe_pxp_pm_resume(xe->pxp); 696 697 if (IS_VF_CCS_READY(xe)) 698 xe_sriov_vf_ccs_register_context(xe); 699 700 if (xe->d3cold.allowed) 701 xe_late_bind_fw_load(&xe->late_bind); 702 703 out: 704 xe_rpm_lockmap_release(xe); 705 xe_pm_write_callback_task(xe, NULL); 706 return err; 707 } 708 709 /* 710 * For places where resume is synchronous it can be quite easy to deadlock 711 * if we are not careful. Also in practice it might be quite timing 712 * sensitive to ever see the 0 -> 1 transition with the callers locks 713 * held, so deadlocks might exist but are hard for lockdep to ever see. 714 * With this in mind, help lockdep learn about the potentially scary 715 * stuff that can happen inside the runtime_resume callback by acquiring 716 * a dummy lock (it doesn't protect anything and gets compiled out on 717 * non-debug builds). Lockdep then only needs to see the 718 * xe_pm_runtime_xxx_map -> runtime_resume callback once, and then can 719 * hopefully validate all the (callers_locks) -> xe_pm_runtime_xxx_map. 720 * For example if the (callers_locks) are ever grabbed in the 721 * runtime_resume callback, lockdep should give us a nice splat. 722 */ 723 static void xe_rpm_might_enter_cb(const struct xe_device *xe) 724 { 725 xe_rpm_lockmap_acquire(xe); 726 xe_rpm_lockmap_release(xe); 727 } 728 729 /* 730 * Prime the lockdep maps for known locking orders that need to 731 * be supported but that may not always occur on all systems. 732 */ 733 static void xe_pm_runtime_lockdep_prime(void) 734 { 735 struct dma_resv lockdep_resv; 736 737 dma_resv_init(&lockdep_resv); 738 lock_map_acquire(&xe_pm_runtime_d3cold_map); 739 /* D3Cold takes the dma_resv locks to evict bos */ 740 dma_resv_lock(&lockdep_resv, NULL); 741 dma_resv_unlock(&lockdep_resv); 742 lock_map_release(&xe_pm_runtime_d3cold_map); 743 744 /* Shrinkers might like to wake up the device under reclaim. */ 745 fs_reclaim_acquire(GFP_KERNEL); 746 lock_map_acquire(&xe_pm_runtime_nod3cold_map); 747 lock_map_release(&xe_pm_runtime_nod3cold_map); 748 fs_reclaim_release(GFP_KERNEL); 749 } 750 751 /** 752 * xe_pm_runtime_get - Get a runtime_pm reference and resume synchronously 753 * @xe: xe device instance 754 * 755 * When possible, scope-based runtime PM (through guard(xe_pm_runtime)) is 756 * be preferred over direct usage of this function. Manual get/put handling 757 * should only be used when the function contains goto-based logic which 758 * can break scope-based handling, or when the lifetime of the runtime PM 759 * reference does not match a specific scope (e.g., runtime PM obtained in one 760 * function and released in a different one). 761 */ 762 void xe_pm_runtime_get(struct xe_device *xe) 763 { 764 trace_xe_pm_runtime_get(xe, __builtin_return_address(0)); 765 pm_runtime_get_noresume(xe->drm.dev); 766 767 if (xe_pm_read_callback_task(xe) == current) 768 return; 769 770 xe_rpm_might_enter_cb(xe); 771 pm_runtime_resume(xe->drm.dev); 772 } 773 774 /** 775 * xe_pm_runtime_put - Put the runtime_pm reference back and mark as idle 776 * @xe: xe device instance 777 */ 778 void xe_pm_runtime_put(struct xe_device *xe) 779 { 780 trace_xe_pm_runtime_put(xe, __builtin_return_address(0)); 781 if (xe_pm_read_callback_task(xe) == current) { 782 pm_runtime_put_noidle(xe->drm.dev); 783 } else { 784 pm_runtime_mark_last_busy(xe->drm.dev); 785 pm_runtime_put(xe->drm.dev); 786 } 787 } 788 789 /** 790 * xe_pm_runtime_get_ioctl - Get a runtime_pm reference before ioctl 791 * @xe: xe device instance 792 * 793 * When possible, scope-based runtime PM (through 794 * ACQUIRE(xe_pm_runtime_ioctl, ...)) is be preferred over direct usage of this 795 * function. Manual get/put handling should only be used when the function 796 * contains goto-based logic which can break scope-based handling, or when the 797 * lifetime of the runtime PM reference does not match a specific scope (e.g., 798 * runtime PM obtained in one function and released in a different one). 799 * 800 * Returns: Any number greater than or equal to 0 for success, negative error 801 * code otherwise. 802 */ 803 int xe_pm_runtime_get_ioctl(struct xe_device *xe) 804 { 805 trace_xe_pm_runtime_get_ioctl(xe, __builtin_return_address(0)); 806 if (WARN_ON(xe_pm_read_callback_task(xe) == current)) 807 return -ELOOP; 808 809 xe_rpm_might_enter_cb(xe); 810 return pm_runtime_get_sync(xe->drm.dev); 811 } 812 813 /** 814 * xe_pm_runtime_get_if_active - Get a runtime_pm reference if device active 815 * @xe: xe device instance 816 * 817 * Return: True if device is awake (regardless the previous number of references) 818 * and a new reference was taken, false otherwise. 819 */ 820 bool xe_pm_runtime_get_if_active(struct xe_device *xe) 821 { 822 return pm_runtime_get_if_active(xe->drm.dev) > 0; 823 } 824 825 /** 826 * xe_pm_runtime_get_if_in_use - Get a new reference if device is active with previous ref taken 827 * @xe: xe device instance 828 * 829 * Return: True if device is awake, a previous reference had been already taken, 830 * and a new reference was now taken, false otherwise. 831 */ 832 bool xe_pm_runtime_get_if_in_use(struct xe_device *xe) 833 { 834 if (xe_pm_read_callback_task(xe) == current) { 835 /* The device is awake, grab the ref and move on */ 836 pm_runtime_get_noresume(xe->drm.dev); 837 return true; 838 } 839 840 return pm_runtime_get_if_in_use(xe->drm.dev) > 0; 841 } 842 843 /* 844 * Very unreliable! Should only be used to suppress the false positive case 845 * in the missing outer rpm protection warning. 846 */ 847 static bool xe_pm_suspending_or_resuming(struct xe_device *xe) 848 { 849 #ifdef CONFIG_PM 850 struct device *dev = xe->drm.dev; 851 852 return dev->power.runtime_status == RPM_SUSPENDING || 853 dev->power.runtime_status == RPM_RESUMING || 854 pm_suspend_in_progress(); 855 #else 856 return false; 857 #endif 858 } 859 860 /** 861 * xe_pm_runtime_get_noresume - Bump runtime PM usage counter without resuming 862 * @xe: xe device instance 863 * 864 * This function should be used in inner places where it is surely already 865 * protected by outer-bound callers of `xe_pm_runtime_get`. 866 * It will warn if not protected. 867 * The reference should be put back after this function regardless, since it 868 * will always bump the usage counter, regardless. 869 * 870 * When possible, scope-based runtime PM (through guard(xe_pm_runtime_noresume)) 871 * is be preferred over direct usage of this function. Manual get/put handling 872 * should only be used when the function contains goto-based logic which can 873 * break scope-based handling, or when the lifetime of the runtime PM reference 874 * does not match a specific scope (e.g., runtime PM obtained in one function 875 * and released in a different one). 876 */ 877 void xe_pm_runtime_get_noresume(struct xe_device *xe) 878 { 879 bool ref; 880 881 ref = xe_pm_runtime_get_if_in_use(xe); 882 883 if (!ref) { 884 pm_runtime_get_noresume(xe->drm.dev); 885 drm_WARN(&xe->drm, !xe_pm_suspending_or_resuming(xe), 886 "Missing outer runtime PM protection\n"); 887 } 888 } 889 890 /** 891 * xe_pm_runtime_resume_and_get - Resume, then get a runtime_pm ref if awake. 892 * @xe: xe device instance 893 * 894 * Returns: True if device is awake and the reference was taken, false otherwise. 895 */ 896 bool xe_pm_runtime_resume_and_get(struct xe_device *xe) 897 { 898 if (xe_pm_read_callback_task(xe) == current) { 899 /* The device is awake, grab the ref and move on */ 900 pm_runtime_get_noresume(xe->drm.dev); 901 return true; 902 } 903 904 xe_rpm_might_enter_cb(xe); 905 return pm_runtime_resume_and_get(xe->drm.dev) >= 0; 906 } 907 908 /** 909 * xe_pm_assert_unbounded_bridge - Disable PM on unbounded pcie parent bridge 910 * @xe: xe device instance 911 */ 912 void xe_pm_assert_unbounded_bridge(struct xe_device *xe) 913 { 914 struct pci_dev *pdev = to_pci_dev(xe->drm.dev); 915 struct pci_dev *bridge = pci_upstream_bridge(pdev); 916 917 if (!bridge) 918 return; 919 920 if (!bridge->driver) { 921 drm_warn(&xe->drm, "unbounded parent pci bridge, device won't support any PM support.\n"); 922 device_set_pm_not_required(&pdev->dev); 923 } 924 } 925 926 /** 927 * xe_pm_set_vram_threshold - Set a VRAM threshold for allowing/blocking D3Cold 928 * @xe: xe device instance 929 * @threshold: VRAM size in MiB for the D3cold threshold 930 * 931 * Return: 932 * * 0 - success 933 * * -EINVAL - invalid argument 934 */ 935 int xe_pm_set_vram_threshold(struct xe_device *xe, u32 threshold) 936 { 937 struct ttm_resource_manager *man; 938 u32 vram_total_mb = 0; 939 int i; 940 941 for (i = XE_PL_VRAM0; i <= XE_PL_VRAM1; ++i) { 942 man = ttm_manager_type(&xe->ttm, i); 943 if (man) 944 vram_total_mb += DIV_ROUND_UP_ULL(man->size, 1024 * 1024); 945 } 946 947 drm_dbg(&xe->drm, "Total vram %u mb\n", vram_total_mb); 948 949 if (threshold > vram_total_mb) 950 return -EINVAL; 951 952 mutex_lock(&xe->d3cold.lock); 953 xe->d3cold.vram_threshold = threshold; 954 mutex_unlock(&xe->d3cold.lock); 955 956 return 0; 957 } 958 959 /** 960 * xe_pm_d3cold_allowed_toggle - Check conditions to toggle d3cold.allowed 961 * @xe: xe device instance 962 * 963 * To be called during runtime_pm idle callback. 964 * Check for all the D3Cold conditions ahead of runtime suspend. 965 */ 966 void xe_pm_d3cold_allowed_toggle(struct xe_device *xe) 967 { 968 struct ttm_resource_manager *man; 969 u32 total_vram_used_mb = 0; 970 u64 vram_used; 971 int i; 972 973 if (!xe->d3cold.capable) { 974 xe->d3cold.allowed = false; 975 return; 976 } 977 978 for (i = XE_PL_VRAM0; i <= XE_PL_VRAM1; ++i) { 979 man = ttm_manager_type(&xe->ttm, i); 980 if (man) { 981 vram_used = ttm_resource_manager_usage(man); 982 total_vram_used_mb += DIV_ROUND_UP_ULL(vram_used, 1024 * 1024); 983 } 984 } 985 986 mutex_lock(&xe->d3cold.lock); 987 988 if (total_vram_used_mb < xe->d3cold.vram_threshold) 989 xe->d3cold.allowed = true; 990 else 991 xe->d3cold.allowed = false; 992 993 mutex_unlock(&xe->d3cold.lock); 994 } 995 996 /** 997 * xe_pm_module_init() - Perform xe_pm specific module initialization. 998 * 999 * Return: 0 on success. Currently doesn't fail. 1000 */ 1001 int __init xe_pm_module_init(void) 1002 { 1003 xe_pm_runtime_lockdep_prime(); 1004 return 0; 1005 } 1006