1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2022 Intel Corporation 4 */ 5 6 #include "xe_pm.h" 7 8 #include <linux/fault-inject.h> 9 #include <linux/pm_runtime.h> 10 #include <linux/suspend.h> 11 #include <linux/dmi.h> 12 13 #include <drm/drm_managed.h> 14 #include <drm/ttm/ttm_placement.h> 15 16 #include "display/xe_display.h" 17 #include "xe_bo.h" 18 #include "xe_bo_evict.h" 19 #include "xe_device.h" 20 #include "xe_ggtt.h" 21 #include "xe_gt.h" 22 #include "xe_gt_idle.h" 23 #include "xe_i2c.h" 24 #include "xe_irq.h" 25 #include "xe_late_bind_fw.h" 26 #include "xe_pcode.h" 27 #include "xe_pxp.h" 28 #include "xe_sriov_vf_ccs.h" 29 #include "xe_sysctrl.h" 30 #include "xe_trace.h" 31 #include "xe_vm.h" 32 #include "xe_wa.h" 33 34 /** 35 * DOC: Xe Power Management 36 * 37 * Xe PM implements the main routines for both system level suspend states and 38 * for the opportunistic runtime suspend states. 39 * 40 * System Level Suspend (S-States) - In general this is OS initiated suspend 41 * driven by ACPI for achieving S0ix (a.k.a. S2idle, freeze), S3 (suspend to ram), 42 * S4 (disk). The main functions here are `xe_pm_suspend` and `xe_pm_resume`. They 43 * are the main point for the suspend to and resume from these states. 44 * 45 * PCI Device Suspend (D-States) - This is the opportunistic PCIe device low power 46 * state D3, controlled by the PCI subsystem and ACPI with the help from the 47 * runtime_pm infrastructure. 48 * PCI D3 is special and can mean D3hot, where Vcc power is on for keeping memory 49 * alive and quicker low latency resume or D3Cold where Vcc power is off for 50 * better power savings. 51 * The Vcc control of PCI hierarchy can only be controlled at the PCI root port 52 * level, while the device driver can be behind multiple bridges/switches and 53 * paired with other devices. For this reason, the PCI subsystem cannot perform 54 * the transition towards D3Cold. The lowest runtime PM possible from the PCI 55 * subsystem is D3hot. Then, if all these paired devices in the same root port 56 * are in D3hot, ACPI will assist here and run its own methods (_PR3 and _OFF) 57 * to perform the transition from D3hot to D3cold. Xe may disallow this 58 * transition by calling pci_d3cold_disable(root_pdev) before going to runtime 59 * suspend. It will be based on runtime conditions such as VRAM usage for a 60 * quick and low latency resume for instance. 61 * 62 * Runtime PM - This infrastructure provided by the Linux kernel allows the 63 * device drivers to indicate when the can be runtime suspended, so the device 64 * could be put at D3 (if supported), or allow deeper package sleep states 65 * (PC-states), and/or other low level power states. Xe PM component provides 66 * `xe_pm_runtime_suspend` and `xe_pm_runtime_resume` functions that PCI 67 * subsystem will call before transition to/from runtime suspend. 68 * 69 * Also, Xe PM provides get and put functions that Xe driver will use to 70 * indicate activity. In order to avoid locking complications with the memory 71 * management, whenever possible, these get and put functions needs to be called 72 * from the higher/outer levels. 73 * The main cases that need to be protected from the outer levels are: IOCTL, 74 * sysfs, debugfs, dma-buf sharing, GPU execution. 75 * 76 * This component is not responsible for GT idleness (RC6) nor GT frequency 77 * management (RPS). 78 */ 79 80 #ifdef CONFIG_LOCKDEP 81 static struct lockdep_map xe_pm_runtime_d3cold_map = { 82 .name = "xe_rpm_d3cold_map" 83 }; 84 85 static struct lockdep_map xe_pm_runtime_nod3cold_map = { 86 .name = "xe_rpm_nod3cold_map" 87 }; 88 89 static struct lockdep_map xe_pm_block_lockdep_map = { 90 .name = "xe_pm_block_map", 91 }; 92 #endif 93 94 static void xe_pm_block_begin_signalling(void) 95 { 96 lock_acquire_shared_recursive(&xe_pm_block_lockdep_map, 0, 1, NULL, _RET_IP_); 97 } 98 99 static void xe_pm_block_end_signalling(void) 100 { 101 lock_release(&xe_pm_block_lockdep_map, _RET_IP_); 102 } 103 104 /** 105 * xe_pm_might_block_on_suspend() - Annotate that the code might block on suspend 106 * 107 * Annotation to use where the code might block or seize to make 108 * progress pending resume completion. 109 */ 110 void xe_pm_might_block_on_suspend(void) 111 { 112 lock_map_acquire(&xe_pm_block_lockdep_map); 113 lock_map_release(&xe_pm_block_lockdep_map); 114 } 115 116 /** 117 * xe_pm_block_on_suspend() - Block pending suspend. 118 * @xe: The xe device about to be suspended. 119 * 120 * Block if the pm notifier has start evicting bos, to avoid 121 * racing and validating those bos back. The function is 122 * annotated to ensure no locks are held that are also grabbed 123 * in the pm notifier or the device suspend / resume. 124 * This is intended to be used by freezable tasks only. 125 * (Not freezable workqueues), with the intention that the function 126 * returns %-ERESTARTSYS when tasks are frozen during suspend, 127 * and allows the task to freeze. The caller must be able to 128 * handle the %-ERESTARTSYS. 129 * 130 * Return: %0 on success, %-ERESTARTSYS on signal pending or 131 * if freezing requested. 132 */ 133 int xe_pm_block_on_suspend(struct xe_device *xe) 134 { 135 xe_pm_might_block_on_suspend(); 136 137 return wait_for_completion_interruptible(&xe->pm_block); 138 } 139 140 /** 141 * xe_rpm_reclaim_safe() - Whether runtime resume can be done from reclaim context 142 * @xe: The xe device. 143 * 144 * Return: true if it is safe to runtime resume from reclaim context. 145 * false otherwise. 146 */ 147 bool xe_rpm_reclaim_safe(const struct xe_device *xe) 148 { 149 return !xe->d3cold.capable; 150 } 151 152 static void xe_rpm_lockmap_acquire(const struct xe_device *xe) 153 { 154 lock_map_acquire(xe_rpm_reclaim_safe(xe) ? 155 &xe_pm_runtime_nod3cold_map : 156 &xe_pm_runtime_d3cold_map); 157 } 158 159 static void xe_rpm_lockmap_release(const struct xe_device *xe) 160 { 161 lock_map_release(xe_rpm_reclaim_safe(xe) ? 162 &xe_pm_runtime_nod3cold_map : 163 &xe_pm_runtime_d3cold_map); 164 } 165 166 /** 167 * xe_pm_suspend - Helper for System suspend, i.e. S0->S3 / S0->S2idle 168 * @xe: xe device instance 169 * 170 * Return: 0 on success 171 */ 172 int xe_pm_suspend(struct xe_device *xe) 173 { 174 struct xe_gt *gt; 175 u8 id; 176 int err; 177 178 drm_dbg(&xe->drm, "Suspending device\n"); 179 xe_pm_block_begin_signalling(); 180 trace_xe_pm_suspend(xe, __builtin_return_address(0)); 181 182 err = xe_pxp_pm_suspend(xe->pxp); 183 if (err) 184 goto err; 185 186 xe_late_bind_wait_for_worker_completion(&xe->late_bind); 187 188 for_each_gt(gt, xe, id) 189 xe_gt_suspend_prepare(gt); 190 191 xe_display_pm_suspend(xe); 192 193 /* FIXME: Super racey... */ 194 err = xe_bo_evict_all(xe); 195 if (err) 196 goto err_display; 197 198 for_each_gt(gt, xe, id) { 199 err = xe_gt_suspend(gt); 200 if (err) 201 goto err_display; 202 } 203 204 xe_irq_suspend(xe); 205 206 xe_display_pm_suspend_late(xe); 207 208 xe_i2c_pm_suspend(xe); 209 210 drm_dbg(&xe->drm, "Device suspended\n"); 211 xe_pm_block_end_signalling(); 212 213 return 0; 214 215 err_display: 216 xe_display_pm_resume(xe); 217 xe_pxp_pm_resume(xe->pxp); 218 err: 219 drm_dbg(&xe->drm, "Device suspend failed %d\n", err); 220 xe_pm_block_end_signalling(); 221 return err; 222 } 223 224 /** 225 * xe_pm_resume - Helper for System resume S3->S0 / S2idle->S0 226 * @xe: xe device instance 227 * 228 * Return: 0 on success 229 */ 230 int xe_pm_resume(struct xe_device *xe) 231 { 232 struct xe_tile *tile; 233 struct xe_gt *gt; 234 u8 id; 235 int err; 236 237 xe_pm_block_begin_signalling(); 238 drm_dbg(&xe->drm, "Resuming device\n"); 239 trace_xe_pm_resume(xe, __builtin_return_address(0)); 240 241 for_each_gt(gt, xe, id) 242 xe_gt_idle_disable_c6(gt); 243 244 for_each_tile(tile, xe, id) 245 xe_wa_apply_tile_workarounds(tile); 246 247 err = xe_pcode_ready(xe, true); 248 if (err) 249 return err; 250 251 xe_display_pm_resume_early(xe); 252 253 /* 254 * This only restores pinned memory which is the memory required for the 255 * GT(s) to resume. 256 */ 257 err = xe_bo_restore_early(xe); 258 if (err) 259 goto err; 260 261 xe_i2c_pm_resume(xe, true); 262 263 xe_sysctrl_pm_resume(xe); 264 265 xe_irq_resume(xe); 266 267 for_each_gt(gt, xe, id) { 268 err = xe_gt_resume(gt); 269 if (err) 270 break; 271 } 272 273 /* 274 * Try to bring up display before bailing from GT resume failure, 275 * so we don't leave the user clueless with a blank screen. 276 */ 277 xe_display_pm_resume(xe); 278 if (err) 279 goto err; 280 281 err = xe_bo_restore_late(xe); 282 if (err) 283 goto err; 284 285 xe_pxp_pm_resume(xe->pxp); 286 287 if (IS_VF_CCS_READY(xe)) 288 xe_sriov_vf_ccs_register_context(xe); 289 290 xe_late_bind_fw_load(&xe->late_bind); 291 292 drm_dbg(&xe->drm, "Device resumed\n"); 293 xe_pm_block_end_signalling(); 294 return 0; 295 err: 296 drm_dbg(&xe->drm, "Device resume failed %d\n", err); 297 xe_pm_block_end_signalling(); 298 return err; 299 } 300 301 static bool xe_pm_pci_d3cold_capable(struct xe_device *xe) 302 { 303 struct pci_dev *pdev = to_pci_dev(xe->drm.dev); 304 struct pci_dev *root_pdev; 305 306 root_pdev = pcie_find_root_port(pdev); 307 if (!root_pdev) 308 return false; 309 310 /* D3Cold requires PME capability */ 311 if (!pci_pme_capable(root_pdev, PCI_D3cold)) { 312 drm_dbg(&xe->drm, "d3cold: PME# not supported\n"); 313 return false; 314 } 315 316 /* D3Cold requires _PR3 power resource */ 317 if (!pci_pr3_present(root_pdev)) { 318 drm_dbg(&xe->drm, "d3cold: ACPI _PR3 not present\n"); 319 return false; 320 } 321 322 return true; 323 } 324 325 static void xe_pm_runtime_init(struct xe_device *xe) 326 { 327 struct device *dev = xe->drm.dev; 328 329 /* Our current VFs do not support RPM. so, disable it */ 330 if (IS_SRIOV_VF(xe)) 331 return; 332 333 /* 334 * Disable the system suspend direct complete optimization. 335 * We need to ensure that the regular device suspend/resume functions 336 * are called since our runtime_pm cannot guarantee local memory 337 * eviction for d3cold. 338 * TODO: Check HDA audio dependencies claimed by i915, and then enforce 339 * this option to integrated graphics as well. 340 */ 341 if (IS_DGFX(xe)) 342 dev_pm_set_driver_flags(dev, DPM_FLAG_NO_DIRECT_COMPLETE); 343 344 pm_runtime_use_autosuspend(dev); 345 pm_runtime_set_autosuspend_delay(dev, 1000); 346 pm_runtime_set_active(dev); 347 pm_runtime_allow(dev); 348 pm_runtime_mark_last_busy(dev); 349 pm_runtime_put(dev); 350 } 351 352 int xe_pm_init_early(struct xe_device *xe) 353 { 354 int err; 355 356 INIT_LIST_HEAD(&xe->mem_access.vram_userfault.list); 357 358 err = drmm_mutex_init(&xe->drm, &xe->mem_access.vram_userfault.lock); 359 if (err) 360 return err; 361 362 err = drmm_mutex_init(&xe->drm, &xe->d3cold.lock); 363 if (err) 364 return err; 365 366 xe->d3cold.capable = xe_pm_pci_d3cold_capable(xe); 367 return 0; 368 } 369 ALLOW_ERROR_INJECTION(xe_pm_init_early, ERRNO); /* See xe_pci_probe() */ 370 371 static u32 vram_threshold_value(struct xe_device *xe) 372 { 373 if (xe->info.platform == XE_BATTLEMAGE) { 374 const char *product_name; 375 376 product_name = dmi_get_system_info(DMI_PRODUCT_NAME); 377 if (product_name && strstr(product_name, "NUC13RNG")) { 378 drm_warn(&xe->drm, "BMG + D3Cold not supported on this platform\n"); 379 return 0; 380 } 381 } 382 383 return DEFAULT_VRAM_THRESHOLD; 384 } 385 386 static void xe_pm_wake_rebind_workers(struct xe_device *xe) 387 { 388 struct xe_vm *vm, *next; 389 390 mutex_lock(&xe->rebind_resume_lock); 391 list_for_each_entry_safe(vm, next, &xe->rebind_resume_list, 392 preempt.pm_activate_link) { 393 list_del_init(&vm->preempt.pm_activate_link); 394 xe_vm_resume_rebind_worker(vm); 395 } 396 mutex_unlock(&xe->rebind_resume_lock); 397 } 398 399 static int xe_pm_notifier_callback(struct notifier_block *nb, 400 unsigned long action, void *data) 401 { 402 struct xe_device *xe = container_of(nb, struct xe_device, pm_notifier); 403 int err = 0; 404 405 switch (action) { 406 case PM_HIBERNATION_PREPARE: 407 case PM_SUSPEND_PREPARE: 408 { 409 struct xe_validation_ctx ctx; 410 411 reinit_completion(&xe->pm_block); 412 xe_pm_block_begin_signalling(); 413 xe_pm_runtime_get(xe); 414 (void)xe_validation_ctx_init(&ctx, &xe->val, NULL, 415 (struct xe_val_flags) {.exclusive = true}); 416 err = xe_bo_evict_all_user(xe); 417 xe_validation_ctx_fini(&ctx); 418 if (err) 419 drm_dbg(&xe->drm, "Notifier evict user failed (%d)\n", err); 420 421 err = xe_bo_notifier_prepare_all_pinned(xe); 422 if (err) 423 drm_dbg(&xe->drm, "Notifier prepare pin failed (%d)\n", err); 424 /* 425 * Keep the runtime pm reference until post hibernation / post suspend to 426 * avoid a runtime suspend interfering with evicted objects or backup 427 * allocations. 428 */ 429 xe_pm_block_end_signalling(); 430 break; 431 } 432 case PM_POST_HIBERNATION: 433 case PM_POST_SUSPEND: 434 complete_all(&xe->pm_block); 435 xe_pm_wake_rebind_workers(xe); 436 xe_bo_notifier_unprepare_all_pinned(xe); 437 xe_pm_runtime_put(xe); 438 break; 439 } 440 441 return NOTIFY_DONE; 442 } 443 444 /** 445 * xe_pm_init - Initialize Xe Power Management 446 * @xe: xe device instance 447 * 448 * This component is responsible for System and Device sleep states. 449 * 450 * Returns 0 for success, negative error code otherwise. 451 */ 452 int xe_pm_init(struct xe_device *xe) 453 { 454 u32 vram_threshold; 455 int err; 456 457 xe->pm_notifier.notifier_call = xe_pm_notifier_callback; 458 err = register_pm_notifier(&xe->pm_notifier); 459 if (err) 460 return err; 461 462 err = drmm_mutex_init(&xe->drm, &xe->rebind_resume_lock); 463 if (err) 464 goto err_unregister; 465 466 init_completion(&xe->pm_block); 467 complete_all(&xe->pm_block); 468 INIT_LIST_HEAD(&xe->rebind_resume_list); 469 470 /* For now suspend/resume is only allowed with GuC */ 471 if (!xe_device_uc_enabled(xe)) 472 return 0; 473 474 if (xe->d3cold.capable) { 475 vram_threshold = vram_threshold_value(xe); 476 err = xe_pm_set_vram_threshold(xe, vram_threshold); 477 if (err) 478 goto err_unregister; 479 } 480 481 xe_pm_runtime_init(xe); 482 return 0; 483 484 err_unregister: 485 unregister_pm_notifier(&xe->pm_notifier); 486 return err; 487 } 488 489 static void xe_pm_runtime_fini(struct xe_device *xe) 490 { 491 struct device *dev = xe->drm.dev; 492 493 /* Our current VFs do not support RPM. so, disable it */ 494 if (IS_SRIOV_VF(xe)) 495 return; 496 497 pm_runtime_get_sync(dev); 498 pm_runtime_forbid(dev); 499 } 500 501 /** 502 * xe_pm_fini - Finalize PM 503 * @xe: xe device instance 504 */ 505 void xe_pm_fini(struct xe_device *xe) 506 { 507 if (xe_device_uc_enabled(xe)) 508 xe_pm_runtime_fini(xe); 509 510 unregister_pm_notifier(&xe->pm_notifier); 511 } 512 513 static void xe_pm_write_callback_task(struct xe_device *xe, 514 struct task_struct *task) 515 { 516 WRITE_ONCE(xe->pm_callback_task, task); 517 518 /* 519 * Just in case it's somehow possible for our writes to be reordered to 520 * the extent that something else re-uses the task written in 521 * pm_callback_task. For example after returning from the callback, but 522 * before the reordered write that resets pm_callback_task back to NULL. 523 */ 524 smp_mb(); /* pairs with xe_pm_read_callback_task */ 525 } 526 527 struct task_struct *xe_pm_read_callback_task(struct xe_device *xe) 528 { 529 smp_mb(); /* pairs with xe_pm_write_callback_task */ 530 531 return READ_ONCE(xe->pm_callback_task); 532 } 533 534 /** 535 * xe_pm_runtime_suspended - Check if runtime_pm state is suspended 536 * @xe: xe device instance 537 * 538 * This does not provide any guarantee that the device is going to remain 539 * suspended as it might be racing with the runtime state transitions. 540 * It can be used only as a non-reliable assertion, to ensure that we are not in 541 * the sleep state while trying to access some memory for instance. 542 * 543 * Returns true if PCI device is suspended, false otherwise. 544 */ 545 bool xe_pm_runtime_suspended(struct xe_device *xe) 546 { 547 return pm_runtime_suspended(xe->drm.dev); 548 } 549 550 /** 551 * xe_pm_runtime_suspend - Prepare our device for D3hot/D3Cold 552 * @xe: xe device instance 553 * 554 * Returns 0 for success, negative error code otherwise. 555 */ 556 int xe_pm_runtime_suspend(struct xe_device *xe) 557 { 558 struct xe_bo *bo, *on; 559 struct xe_gt *gt; 560 u8 id; 561 int err = 0; 562 563 trace_xe_pm_runtime_suspend(xe, __builtin_return_address(0)); 564 /* Disable access_ongoing asserts and prevent recursive pm calls */ 565 xe_pm_write_callback_task(xe, current); 566 567 /* 568 * The actual xe_pm_runtime_put() is always async underneath, so 569 * exactly where that is called should makes no difference to us. However 570 * we still need to be very careful with the locks that this callback 571 * acquires and the locks that are acquired and held by any callers of 572 * xe_runtime_pm_get(). We already have the matching annotation 573 * on that side, but we also need it here. For example lockdep should be 574 * able to tell us if the following scenario is in theory possible: 575 * 576 * CPU0 | CPU1 (kworker) 577 * lock(A) | 578 * | xe_pm_runtime_suspend() 579 * | lock(A) 580 * xe_pm_runtime_get() | 581 * 582 * This will clearly deadlock since rpm core needs to wait for 583 * xe_pm_runtime_suspend() to complete, but here we are holding lock(A) 584 * on CPU0 which prevents CPU1 making forward progress. With the 585 * annotation here and in xe_pm_runtime_get() lockdep will see 586 * the potential lock inversion and give us a nice splat. 587 */ 588 xe_rpm_lockmap_acquire(xe); 589 590 err = xe_pxp_pm_suspend(xe->pxp); 591 if (err) 592 goto out; 593 594 /* 595 * Applying lock for entire list op as xe_ttm_bo_destroy and xe_bo_move_notify 596 * also checks and deletes bo entry from user fault list. 597 */ 598 mutex_lock(&xe->mem_access.vram_userfault.lock); 599 list_for_each_entry_safe(bo, on, 600 &xe->mem_access.vram_userfault.list, vram_userfault_link) 601 xe_bo_runtime_pm_release_mmap_offset(bo); 602 mutex_unlock(&xe->mem_access.vram_userfault.lock); 603 604 xe_display_pm_runtime_suspend(xe); 605 606 if (xe->d3cold.allowed) { 607 err = xe_bo_evict_all(xe); 608 if (err) 609 goto out_resume; 610 } 611 612 for_each_gt(gt, xe, id) { 613 err = xe->d3cold.allowed ? xe_gt_suspend(gt) : xe_gt_runtime_suspend(gt); 614 if (err) 615 goto out_resume; 616 } 617 618 xe_irq_suspend(xe); 619 620 xe_display_pm_runtime_suspend_late(xe); 621 622 xe_i2c_pm_suspend(xe); 623 624 xe_rpm_lockmap_release(xe); 625 xe_pm_write_callback_task(xe, NULL); 626 return 0; 627 628 out_resume: 629 xe_display_pm_runtime_resume(xe); 630 xe_pxp_pm_resume(xe->pxp); 631 out: 632 xe_rpm_lockmap_release(xe); 633 xe_pm_write_callback_task(xe, NULL); 634 return err; 635 } 636 637 /** 638 * xe_pm_runtime_resume - Waking up from D3hot/D3Cold 639 * @xe: xe device instance 640 * 641 * Returns 0 for success, negative error code otherwise. 642 */ 643 int xe_pm_runtime_resume(struct xe_device *xe) 644 { 645 struct xe_gt *gt; 646 u8 id; 647 int err = 0; 648 649 trace_xe_pm_runtime_resume(xe, __builtin_return_address(0)); 650 /* Disable access_ongoing asserts and prevent recursive pm calls */ 651 xe_pm_write_callback_task(xe, current); 652 653 xe_rpm_lockmap_acquire(xe); 654 655 if (xe->d3cold.allowed) { 656 for_each_gt(gt, xe, id) 657 xe_gt_idle_disable_c6(gt); 658 659 err = xe_pcode_ready(xe, true); 660 if (err) 661 goto out; 662 663 xe_display_pm_resume_early(xe); 664 665 /* 666 * This only restores pinned memory which is the memory 667 * required for the GT(s) to resume. 668 */ 669 err = xe_bo_restore_early(xe); 670 if (err) 671 goto out; 672 } 673 674 xe_i2c_pm_resume(xe, xe->d3cold.allowed); 675 676 if (xe->d3cold.allowed) 677 xe_sysctrl_pm_resume(xe); 678 679 xe_irq_resume(xe); 680 681 for_each_gt(gt, xe, id) { 682 err = xe->d3cold.allowed ? xe_gt_resume(gt) : xe_gt_runtime_resume(gt); 683 if (err) 684 break; 685 } 686 687 /* 688 * Try to bring up display before bailing from GT resume failure, 689 * so we don't leave the user clueless with a blank screen. 690 */ 691 xe_display_pm_runtime_resume(xe); 692 if (err) 693 goto out; 694 695 if (xe->d3cold.allowed) { 696 err = xe_bo_restore_late(xe); 697 if (err) 698 goto out; 699 } 700 701 xe_pxp_pm_resume(xe->pxp); 702 703 if (IS_VF_CCS_READY(xe)) 704 xe_sriov_vf_ccs_register_context(xe); 705 706 if (xe->d3cold.allowed) 707 xe_late_bind_fw_load(&xe->late_bind); 708 709 out: 710 xe_rpm_lockmap_release(xe); 711 xe_pm_write_callback_task(xe, NULL); 712 return err; 713 } 714 715 /* 716 * For places where resume is synchronous it can be quite easy to deadlock 717 * if we are not careful. Also in practice it might be quite timing 718 * sensitive to ever see the 0 -> 1 transition with the callers locks 719 * held, so deadlocks might exist but are hard for lockdep to ever see. 720 * With this in mind, help lockdep learn about the potentially scary 721 * stuff that can happen inside the runtime_resume callback by acquiring 722 * a dummy lock (it doesn't protect anything and gets compiled out on 723 * non-debug builds). Lockdep then only needs to see the 724 * xe_pm_runtime_xxx_map -> runtime_resume callback once, and then can 725 * hopefully validate all the (callers_locks) -> xe_pm_runtime_xxx_map. 726 * For example if the (callers_locks) are ever grabbed in the 727 * runtime_resume callback, lockdep should give us a nice splat. 728 */ 729 static void xe_rpm_might_enter_cb(const struct xe_device *xe) 730 { 731 xe_rpm_lockmap_acquire(xe); 732 xe_rpm_lockmap_release(xe); 733 } 734 735 /* 736 * Prime the lockdep maps for known locking orders that need to 737 * be supported but that may not always occur on all systems. 738 */ 739 static void xe_pm_runtime_lockdep_prime(void) 740 { 741 struct dma_resv lockdep_resv; 742 743 dma_resv_init(&lockdep_resv); 744 lock_map_acquire(&xe_pm_runtime_d3cold_map); 745 /* D3Cold takes the dma_resv locks to evict bos */ 746 dma_resv_lock(&lockdep_resv, NULL); 747 dma_resv_unlock(&lockdep_resv); 748 lock_map_release(&xe_pm_runtime_d3cold_map); 749 750 /* Shrinkers might like to wake up the device under reclaim. */ 751 fs_reclaim_acquire(GFP_KERNEL); 752 lock_map_acquire(&xe_pm_runtime_nod3cold_map); 753 lock_map_release(&xe_pm_runtime_nod3cold_map); 754 fs_reclaim_release(GFP_KERNEL); 755 } 756 757 /** 758 * xe_pm_runtime_get - Get a runtime_pm reference and resume synchronously 759 * @xe: xe device instance 760 * 761 * When possible, scope-based runtime PM (through guard(xe_pm_runtime)) is 762 * be preferred over direct usage of this function. Manual get/put handling 763 * should only be used when the function contains goto-based logic which 764 * can break scope-based handling, or when the lifetime of the runtime PM 765 * reference does not match a specific scope (e.g., runtime PM obtained in one 766 * function and released in a different one). 767 */ 768 void xe_pm_runtime_get(struct xe_device *xe) 769 { 770 trace_xe_pm_runtime_get(xe, __builtin_return_address(0)); 771 pm_runtime_get_noresume(xe->drm.dev); 772 773 if (xe_pm_read_callback_task(xe) == current) 774 return; 775 776 xe_rpm_might_enter_cb(xe); 777 pm_runtime_resume(xe->drm.dev); 778 } 779 780 /** 781 * xe_pm_runtime_put - Put the runtime_pm reference back and mark as idle 782 * @xe: xe device instance 783 */ 784 void xe_pm_runtime_put(struct xe_device *xe) 785 { 786 trace_xe_pm_runtime_put(xe, __builtin_return_address(0)); 787 if (xe_pm_read_callback_task(xe) == current) { 788 pm_runtime_put_noidle(xe->drm.dev); 789 } else { 790 pm_runtime_mark_last_busy(xe->drm.dev); 791 pm_runtime_put(xe->drm.dev); 792 } 793 } 794 795 /** 796 * xe_pm_runtime_get_ioctl - Get a runtime_pm reference before ioctl 797 * @xe: xe device instance 798 * 799 * When possible, scope-based runtime PM (through 800 * ACQUIRE(xe_pm_runtime_ioctl, ...)) is be preferred over direct usage of this 801 * function. Manual get/put handling should only be used when the function 802 * contains goto-based logic which can break scope-based handling, or when the 803 * lifetime of the runtime PM reference does not match a specific scope (e.g., 804 * runtime PM obtained in one function and released in a different one). 805 * 806 * Returns: Any number greater than or equal to 0 for success, negative error 807 * code otherwise. 808 */ 809 int xe_pm_runtime_get_ioctl(struct xe_device *xe) 810 { 811 trace_xe_pm_runtime_get_ioctl(xe, __builtin_return_address(0)); 812 if (WARN_ON(xe_pm_read_callback_task(xe) == current)) 813 return -ELOOP; 814 815 xe_rpm_might_enter_cb(xe); 816 return pm_runtime_get_sync(xe->drm.dev); 817 } 818 819 /** 820 * xe_pm_runtime_get_if_active - Get a runtime_pm reference if device active 821 * @xe: xe device instance 822 * 823 * Return: True if device is awake (regardless the previous number of references) 824 * and a new reference was taken, false otherwise. 825 */ 826 bool xe_pm_runtime_get_if_active(struct xe_device *xe) 827 { 828 return pm_runtime_get_if_active(xe->drm.dev) > 0; 829 } 830 831 /** 832 * xe_pm_runtime_get_if_in_use - Get a new reference if device is active with previous ref taken 833 * @xe: xe device instance 834 * 835 * Return: True if device is awake, a previous reference had been already taken, 836 * and a new reference was now taken, false otherwise. 837 */ 838 bool xe_pm_runtime_get_if_in_use(struct xe_device *xe) 839 { 840 if (xe_pm_read_callback_task(xe) == current) { 841 /* The device is awake, grab the ref and move on */ 842 pm_runtime_get_noresume(xe->drm.dev); 843 return true; 844 } 845 846 return pm_runtime_get_if_in_use(xe->drm.dev) > 0; 847 } 848 849 /* 850 * Very unreliable! Should only be used to suppress the false positive case 851 * in the missing outer rpm protection warning. 852 */ 853 static bool xe_pm_suspending_or_resuming(struct xe_device *xe) 854 { 855 #ifdef CONFIG_PM 856 struct device *dev = xe->drm.dev; 857 858 return dev->power.runtime_status == RPM_SUSPENDING || 859 dev->power.runtime_status == RPM_RESUMING || 860 pm_suspend_in_progress(); 861 #else 862 return false; 863 #endif 864 } 865 866 /** 867 * xe_pm_runtime_get_noresume - Bump runtime PM usage counter without resuming 868 * @xe: xe device instance 869 * 870 * This function should be used in inner places where it is surely already 871 * protected by outer-bound callers of `xe_pm_runtime_get`. 872 * It will warn if not protected. 873 * The reference should be put back after this function regardless, since it 874 * will always bump the usage counter, regardless. 875 * 876 * When possible, scope-based runtime PM (through guard(xe_pm_runtime_noresume)) 877 * is be preferred over direct usage of this function. Manual get/put handling 878 * should only be used when the function contains goto-based logic which can 879 * break scope-based handling, or when the lifetime of the runtime PM reference 880 * does not match a specific scope (e.g., runtime PM obtained in one function 881 * and released in a different one). 882 */ 883 void xe_pm_runtime_get_noresume(struct xe_device *xe) 884 { 885 bool ref; 886 887 ref = xe_pm_runtime_get_if_in_use(xe); 888 889 if (!ref) { 890 pm_runtime_get_noresume(xe->drm.dev); 891 drm_WARN(&xe->drm, !xe_pm_suspending_or_resuming(xe), 892 "Missing outer runtime PM protection\n"); 893 } 894 } 895 896 /** 897 * xe_pm_runtime_resume_and_get - Resume, then get a runtime_pm ref if awake. 898 * @xe: xe device instance 899 * 900 * Returns: True if device is awake and the reference was taken, false otherwise. 901 */ 902 bool xe_pm_runtime_resume_and_get(struct xe_device *xe) 903 { 904 if (xe_pm_read_callback_task(xe) == current) { 905 /* The device is awake, grab the ref and move on */ 906 pm_runtime_get_noresume(xe->drm.dev); 907 return true; 908 } 909 910 xe_rpm_might_enter_cb(xe); 911 return pm_runtime_resume_and_get(xe->drm.dev) >= 0; 912 } 913 914 /** 915 * xe_pm_assert_unbounded_bridge - Disable PM on unbounded pcie parent bridge 916 * @xe: xe device instance 917 */ 918 void xe_pm_assert_unbounded_bridge(struct xe_device *xe) 919 { 920 struct pci_dev *pdev = to_pci_dev(xe->drm.dev); 921 struct pci_dev *bridge = pci_upstream_bridge(pdev); 922 923 if (!bridge) 924 return; 925 926 if (!bridge->driver) { 927 drm_warn(&xe->drm, "unbounded parent pci bridge, device won't support any PM support.\n"); 928 device_set_pm_not_required(&pdev->dev); 929 } 930 } 931 932 /** 933 * xe_pm_set_vram_threshold - Set a VRAM threshold for allowing/blocking D3Cold 934 * @xe: xe device instance 935 * @threshold: VRAM size in MiB for the D3cold threshold 936 * 937 * Return: 938 * * 0 - success 939 * * -EINVAL - invalid argument 940 */ 941 int xe_pm_set_vram_threshold(struct xe_device *xe, u32 threshold) 942 { 943 struct ttm_resource_manager *man; 944 u32 vram_total_mb = 0; 945 int i; 946 947 for (i = XE_PL_VRAM0; i <= XE_PL_VRAM1; ++i) { 948 man = ttm_manager_type(&xe->ttm, i); 949 if (man) 950 vram_total_mb += DIV_ROUND_UP_ULL(man->size, 1024 * 1024); 951 } 952 953 drm_dbg(&xe->drm, "Total vram %u mb\n", vram_total_mb); 954 955 if (threshold > vram_total_mb) 956 return -EINVAL; 957 958 mutex_lock(&xe->d3cold.lock); 959 xe->d3cold.vram_threshold = threshold; 960 mutex_unlock(&xe->d3cold.lock); 961 962 return 0; 963 } 964 965 /** 966 * xe_pm_d3cold_allowed_toggle - Check conditions to toggle d3cold.allowed 967 * @xe: xe device instance 968 * 969 * To be called during runtime_pm idle callback. 970 * Check for all the D3Cold conditions ahead of runtime suspend. 971 */ 972 void xe_pm_d3cold_allowed_toggle(struct xe_device *xe) 973 { 974 struct ttm_resource_manager *man; 975 u32 total_vram_used_mb = 0; 976 u64 vram_used; 977 int i; 978 979 if (!xe->d3cold.capable) { 980 xe->d3cold.allowed = false; 981 return; 982 } 983 984 for (i = XE_PL_VRAM0; i <= XE_PL_VRAM1; ++i) { 985 man = ttm_manager_type(&xe->ttm, i); 986 if (man) { 987 vram_used = ttm_resource_manager_usage(man); 988 total_vram_used_mb += DIV_ROUND_UP_ULL(vram_used, 1024 * 1024); 989 } 990 } 991 992 mutex_lock(&xe->d3cold.lock); 993 994 if (total_vram_used_mb < xe->d3cold.vram_threshold) 995 xe->d3cold.allowed = true; 996 else 997 xe->d3cold.allowed = false; 998 999 mutex_unlock(&xe->d3cold.lock); 1000 } 1001 1002 /** 1003 * xe_pm_module_init() - Perform xe_pm specific module initialization. 1004 * 1005 * Return: 0 on success. Currently doesn't fail. 1006 */ 1007 int __init xe_pm_module_init(void) 1008 { 1009 xe_pm_runtime_lockdep_prime(); 1010 return 0; 1011 } 1012