1 // SPDX-License-Identifier: MIT
2 /*
3 * Copyright © 2022 Intel Corporation
4 */
5
6 #include "xe_pm.h"
7
8 #include <linux/fault-inject.h>
9 #include <linux/pm_runtime.h>
10 #include <linux/suspend.h>
11 #include <linux/dmi.h>
12
13 #include <drm/drm_managed.h>
14 #include <drm/ttm/ttm_placement.h>
15
16 #include "display/xe_display.h"
17 #include "xe_bo.h"
18 #include "xe_bo_evict.h"
19 #include "xe_device.h"
20 #include "xe_ggtt.h"
21 #include "xe_gt.h"
22 #include "xe_gt_idle.h"
23 #include "xe_i2c.h"
24 #include "xe_irq.h"
25 #include "xe_late_bind_fw.h"
26 #include "xe_pcode.h"
27 #include "xe_pxp.h"
28 #include "xe_sriov_vf_ccs.h"
29 #include "xe_trace.h"
30 #include "xe_vm.h"
31 #include "xe_wa.h"
32
33 /**
34 * DOC: Xe Power Management
35 *
36 * Xe PM implements the main routines for both system level suspend states and
37 * for the opportunistic runtime suspend states.
38 *
39 * System Level Suspend (S-States) - In general this is OS initiated suspend
40 * driven by ACPI for achieving S0ix (a.k.a. S2idle, freeze), S3 (suspend to ram),
41 * S4 (disk). The main functions here are `xe_pm_suspend` and `xe_pm_resume`. They
42 * are the main point for the suspend to and resume from these states.
43 *
44 * PCI Device Suspend (D-States) - This is the opportunistic PCIe device low power
45 * state D3, controlled by the PCI subsystem and ACPI with the help from the
46 * runtime_pm infrastructure.
47 * PCI D3 is special and can mean D3hot, where Vcc power is on for keeping memory
48 * alive and quicker low latency resume or D3Cold where Vcc power is off for
49 * better power savings.
50 * The Vcc control of PCI hierarchy can only be controlled at the PCI root port
51 * level, while the device driver can be behind multiple bridges/switches and
52 * paired with other devices. For this reason, the PCI subsystem cannot perform
53 * the transition towards D3Cold. The lowest runtime PM possible from the PCI
54 * subsystem is D3hot. Then, if all these paired devices in the same root port
55 * are in D3hot, ACPI will assist here and run its own methods (_PR3 and _OFF)
56 * to perform the transition from D3hot to D3cold. Xe may disallow this
57 * transition by calling pci_d3cold_disable(root_pdev) before going to runtime
58 * suspend. It will be based on runtime conditions such as VRAM usage for a
59 * quick and low latency resume for instance.
60 *
61 * Runtime PM - This infrastructure provided by the Linux kernel allows the
62 * device drivers to indicate when the can be runtime suspended, so the device
63 * could be put at D3 (if supported), or allow deeper package sleep states
64 * (PC-states), and/or other low level power states. Xe PM component provides
65 * `xe_pm_runtime_suspend` and `xe_pm_runtime_resume` functions that PCI
66 * subsystem will call before transition to/from runtime suspend.
67 *
68 * Also, Xe PM provides get and put functions that Xe driver will use to
69 * indicate activity. In order to avoid locking complications with the memory
70 * management, whenever possible, these get and put functions needs to be called
71 * from the higher/outer levels.
72 * The main cases that need to be protected from the outer levels are: IOCTL,
73 * sysfs, debugfs, dma-buf sharing, GPU execution.
74 *
75 * This component is not responsible for GT idleness (RC6) nor GT frequency
76 * management (RPS).
77 */
78
79 #ifdef CONFIG_LOCKDEP
80 static struct lockdep_map xe_pm_runtime_d3cold_map = {
81 .name = "xe_rpm_d3cold_map"
82 };
83
84 static struct lockdep_map xe_pm_runtime_nod3cold_map = {
85 .name = "xe_rpm_nod3cold_map"
86 };
87
88 static struct lockdep_map xe_pm_block_lockdep_map = {
89 .name = "xe_pm_block_map",
90 };
91 #endif
92
xe_pm_block_begin_signalling(void)93 static void xe_pm_block_begin_signalling(void)
94 {
95 lock_acquire_shared_recursive(&xe_pm_block_lockdep_map, 0, 1, NULL, _RET_IP_);
96 }
97
xe_pm_block_end_signalling(void)98 static void xe_pm_block_end_signalling(void)
99 {
100 lock_release(&xe_pm_block_lockdep_map, _RET_IP_);
101 }
102
103 /**
104 * xe_pm_might_block_on_suspend() - Annotate that the code might block on suspend
105 *
106 * Annotation to use where the code might block or seize to make
107 * progress pending resume completion.
108 */
xe_pm_might_block_on_suspend(void)109 void xe_pm_might_block_on_suspend(void)
110 {
111 lock_map_acquire(&xe_pm_block_lockdep_map);
112 lock_map_release(&xe_pm_block_lockdep_map);
113 }
114
115 /**
116 * xe_pm_block_on_suspend() - Block pending suspend.
117 * @xe: The xe device about to be suspended.
118 *
119 * Block if the pm notifier has start evicting bos, to avoid
120 * racing and validating those bos back. The function is
121 * annotated to ensure no locks are held that are also grabbed
122 * in the pm notifier or the device suspend / resume.
123 * This is intended to be used by freezable tasks only.
124 * (Not freezable workqueues), with the intention that the function
125 * returns %-ERESTARTSYS when tasks are frozen during suspend,
126 * and allows the task to freeze. The caller must be able to
127 * handle the %-ERESTARTSYS.
128 *
129 * Return: %0 on success, %-ERESTARTSYS on signal pending or
130 * if freezing requested.
131 */
xe_pm_block_on_suspend(struct xe_device * xe)132 int xe_pm_block_on_suspend(struct xe_device *xe)
133 {
134 xe_pm_might_block_on_suspend();
135
136 return wait_for_completion_interruptible(&xe->pm_block);
137 }
138
139 /**
140 * xe_rpm_reclaim_safe() - Whether runtime resume can be done from reclaim context
141 * @xe: The xe device.
142 *
143 * Return: true if it is safe to runtime resume from reclaim context.
144 * false otherwise.
145 */
xe_rpm_reclaim_safe(const struct xe_device * xe)146 bool xe_rpm_reclaim_safe(const struct xe_device *xe)
147 {
148 return !xe->d3cold.capable;
149 }
150
xe_rpm_lockmap_acquire(const struct xe_device * xe)151 static void xe_rpm_lockmap_acquire(const struct xe_device *xe)
152 {
153 lock_map_acquire(xe_rpm_reclaim_safe(xe) ?
154 &xe_pm_runtime_nod3cold_map :
155 &xe_pm_runtime_d3cold_map);
156 }
157
xe_rpm_lockmap_release(const struct xe_device * xe)158 static void xe_rpm_lockmap_release(const struct xe_device *xe)
159 {
160 lock_map_release(xe_rpm_reclaim_safe(xe) ?
161 &xe_pm_runtime_nod3cold_map :
162 &xe_pm_runtime_d3cold_map);
163 }
164
165 /**
166 * xe_pm_suspend - Helper for System suspend, i.e. S0->S3 / S0->S2idle
167 * @xe: xe device instance
168 *
169 * Return: 0 on success
170 */
xe_pm_suspend(struct xe_device * xe)171 int xe_pm_suspend(struct xe_device *xe)
172 {
173 struct xe_gt *gt;
174 u8 id;
175 int err;
176
177 drm_dbg(&xe->drm, "Suspending device\n");
178 xe_pm_block_begin_signalling();
179 trace_xe_pm_suspend(xe, __builtin_return_address(0));
180
181 err = xe_pxp_pm_suspend(xe->pxp);
182 if (err)
183 goto err;
184
185 xe_late_bind_wait_for_worker_completion(&xe->late_bind);
186
187 for_each_gt(gt, xe, id)
188 xe_gt_suspend_prepare(gt);
189
190 xe_display_pm_suspend(xe);
191
192 /* FIXME: Super racey... */
193 err = xe_bo_evict_all(xe);
194 if (err)
195 goto err_display;
196
197 for_each_gt(gt, xe, id) {
198 err = xe_gt_suspend(gt);
199 if (err)
200 goto err_display;
201 }
202
203 xe_irq_suspend(xe);
204
205 xe_display_pm_suspend_late(xe);
206
207 xe_i2c_pm_suspend(xe);
208
209 drm_dbg(&xe->drm, "Device suspended\n");
210 xe_pm_block_end_signalling();
211
212 return 0;
213
214 err_display:
215 xe_display_pm_resume(xe);
216 xe_pxp_pm_resume(xe->pxp);
217 err:
218 drm_dbg(&xe->drm, "Device suspend failed %d\n", err);
219 xe_pm_block_end_signalling();
220 return err;
221 }
222
223 /**
224 * xe_pm_resume - Helper for System resume S3->S0 / S2idle->S0
225 * @xe: xe device instance
226 *
227 * Return: 0 on success
228 */
xe_pm_resume(struct xe_device * xe)229 int xe_pm_resume(struct xe_device *xe)
230 {
231 struct xe_tile *tile;
232 struct xe_gt *gt;
233 u8 id;
234 int err;
235
236 xe_pm_block_begin_signalling();
237 drm_dbg(&xe->drm, "Resuming device\n");
238 trace_xe_pm_resume(xe, __builtin_return_address(0));
239
240 for_each_gt(gt, xe, id)
241 xe_gt_idle_disable_c6(gt);
242
243 for_each_tile(tile, xe, id)
244 xe_wa_apply_tile_workarounds(tile);
245
246 err = xe_pcode_ready(xe, true);
247 if (err)
248 return err;
249
250 xe_display_pm_resume_early(xe);
251
252 /*
253 * This only restores pinned memory which is the memory required for the
254 * GT(s) to resume.
255 */
256 err = xe_bo_restore_early(xe);
257 if (err)
258 goto err;
259
260 xe_i2c_pm_resume(xe, true);
261
262 xe_irq_resume(xe);
263
264 for_each_gt(gt, xe, id) {
265 err = xe_gt_resume(gt);
266 if (err)
267 break;
268 }
269
270 /*
271 * Try to bring up display before bailing from GT resume failure,
272 * so we don't leave the user clueless with a blank screen.
273 */
274 xe_display_pm_resume(xe);
275 if (err)
276 goto err;
277
278 err = xe_bo_restore_late(xe);
279 if (err)
280 goto err;
281
282 xe_pxp_pm_resume(xe->pxp);
283
284 if (IS_VF_CCS_READY(xe))
285 xe_sriov_vf_ccs_register_context(xe);
286
287 xe_late_bind_fw_load(&xe->late_bind);
288
289 drm_dbg(&xe->drm, "Device resumed\n");
290 xe_pm_block_end_signalling();
291 return 0;
292 err:
293 drm_dbg(&xe->drm, "Device resume failed %d\n", err);
294 xe_pm_block_end_signalling();
295 return err;
296 }
297
xe_pm_pci_d3cold_capable(struct xe_device * xe)298 static bool xe_pm_pci_d3cold_capable(struct xe_device *xe)
299 {
300 struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
301 struct pci_dev *root_pdev;
302
303 root_pdev = pcie_find_root_port(pdev);
304 if (!root_pdev)
305 return false;
306
307 /* D3Cold requires PME capability */
308 if (!pci_pme_capable(root_pdev, PCI_D3cold)) {
309 drm_dbg(&xe->drm, "d3cold: PME# not supported\n");
310 return false;
311 }
312
313 /* D3Cold requires _PR3 power resource */
314 if (!pci_pr3_present(root_pdev)) {
315 drm_dbg(&xe->drm, "d3cold: ACPI _PR3 not present\n");
316 return false;
317 }
318
319 return true;
320 }
321
xe_pm_runtime_init(struct xe_device * xe)322 static void xe_pm_runtime_init(struct xe_device *xe)
323 {
324 struct device *dev = xe->drm.dev;
325
326 /* Our current VFs do not support RPM. so, disable it */
327 if (IS_SRIOV_VF(xe))
328 return;
329
330 /*
331 * Disable the system suspend direct complete optimization.
332 * We need to ensure that the regular device suspend/resume functions
333 * are called since our runtime_pm cannot guarantee local memory
334 * eviction for d3cold.
335 * TODO: Check HDA audio dependencies claimed by i915, and then enforce
336 * this option to integrated graphics as well.
337 */
338 if (IS_DGFX(xe))
339 dev_pm_set_driver_flags(dev, DPM_FLAG_NO_DIRECT_COMPLETE);
340
341 pm_runtime_use_autosuspend(dev);
342 pm_runtime_set_autosuspend_delay(dev, 1000);
343 pm_runtime_set_active(dev);
344 pm_runtime_allow(dev);
345 pm_runtime_mark_last_busy(dev);
346 pm_runtime_put(dev);
347 }
348
xe_pm_init_early(struct xe_device * xe)349 int xe_pm_init_early(struct xe_device *xe)
350 {
351 int err;
352
353 INIT_LIST_HEAD(&xe->mem_access.vram_userfault.list);
354
355 err = drmm_mutex_init(&xe->drm, &xe->mem_access.vram_userfault.lock);
356 if (err)
357 return err;
358
359 err = drmm_mutex_init(&xe->drm, &xe->d3cold.lock);
360 if (err)
361 return err;
362
363 xe->d3cold.capable = xe_pm_pci_d3cold_capable(xe);
364 return 0;
365 }
366 ALLOW_ERROR_INJECTION(xe_pm_init_early, ERRNO); /* See xe_pci_probe() */
367
vram_threshold_value(struct xe_device * xe)368 static u32 vram_threshold_value(struct xe_device *xe)
369 {
370 if (xe->info.platform == XE_BATTLEMAGE) {
371 const char *product_name;
372
373 product_name = dmi_get_system_info(DMI_PRODUCT_NAME);
374 if (product_name && strstr(product_name, "NUC13RNG")) {
375 drm_warn(&xe->drm, "BMG + D3Cold not supported on this platform\n");
376 return 0;
377 }
378 }
379
380 return DEFAULT_VRAM_THRESHOLD;
381 }
382
xe_pm_wake_rebind_workers(struct xe_device * xe)383 static void xe_pm_wake_rebind_workers(struct xe_device *xe)
384 {
385 struct xe_vm *vm, *next;
386
387 mutex_lock(&xe->rebind_resume_lock);
388 list_for_each_entry_safe(vm, next, &xe->rebind_resume_list,
389 preempt.pm_activate_link) {
390 list_del_init(&vm->preempt.pm_activate_link);
391 xe_vm_resume_rebind_worker(vm);
392 }
393 mutex_unlock(&xe->rebind_resume_lock);
394 }
395
xe_pm_notifier_callback(struct notifier_block * nb,unsigned long action,void * data)396 static int xe_pm_notifier_callback(struct notifier_block *nb,
397 unsigned long action, void *data)
398 {
399 struct xe_device *xe = container_of(nb, struct xe_device, pm_notifier);
400 int err = 0;
401
402 switch (action) {
403 case PM_HIBERNATION_PREPARE:
404 case PM_SUSPEND_PREPARE:
405 {
406 struct xe_validation_ctx ctx;
407
408 reinit_completion(&xe->pm_block);
409 xe_pm_block_begin_signalling();
410 xe_pm_runtime_get(xe);
411 (void)xe_validation_ctx_init(&ctx, &xe->val, NULL,
412 (struct xe_val_flags) {.exclusive = true});
413 err = xe_bo_evict_all_user(xe);
414 xe_validation_ctx_fini(&ctx);
415 if (err)
416 drm_dbg(&xe->drm, "Notifier evict user failed (%d)\n", err);
417
418 err = xe_bo_notifier_prepare_all_pinned(xe);
419 if (err)
420 drm_dbg(&xe->drm, "Notifier prepare pin failed (%d)\n", err);
421 /*
422 * Keep the runtime pm reference until post hibernation / post suspend to
423 * avoid a runtime suspend interfering with evicted objects or backup
424 * allocations.
425 */
426 xe_pm_block_end_signalling();
427 break;
428 }
429 case PM_POST_HIBERNATION:
430 case PM_POST_SUSPEND:
431 complete_all(&xe->pm_block);
432 xe_pm_wake_rebind_workers(xe);
433 xe_bo_notifier_unprepare_all_pinned(xe);
434 xe_pm_runtime_put(xe);
435 break;
436 }
437
438 return NOTIFY_DONE;
439 }
440
441 /**
442 * xe_pm_init - Initialize Xe Power Management
443 * @xe: xe device instance
444 *
445 * This component is responsible for System and Device sleep states.
446 *
447 * Returns 0 for success, negative error code otherwise.
448 */
xe_pm_init(struct xe_device * xe)449 int xe_pm_init(struct xe_device *xe)
450 {
451 u32 vram_threshold;
452 int err;
453
454 xe->pm_notifier.notifier_call = xe_pm_notifier_callback;
455 err = register_pm_notifier(&xe->pm_notifier);
456 if (err)
457 return err;
458
459 err = drmm_mutex_init(&xe->drm, &xe->rebind_resume_lock);
460 if (err)
461 goto err_unregister;
462
463 init_completion(&xe->pm_block);
464 complete_all(&xe->pm_block);
465 INIT_LIST_HEAD(&xe->rebind_resume_list);
466
467 /* For now suspend/resume is only allowed with GuC */
468 if (!xe_device_uc_enabled(xe))
469 return 0;
470
471 if (xe->d3cold.capable) {
472 vram_threshold = vram_threshold_value(xe);
473 err = xe_pm_set_vram_threshold(xe, vram_threshold);
474 if (err)
475 goto err_unregister;
476 }
477
478 xe_pm_runtime_init(xe);
479 return 0;
480
481 err_unregister:
482 unregister_pm_notifier(&xe->pm_notifier);
483 return err;
484 }
485
xe_pm_runtime_fini(struct xe_device * xe)486 static void xe_pm_runtime_fini(struct xe_device *xe)
487 {
488 struct device *dev = xe->drm.dev;
489
490 /* Our current VFs do not support RPM. so, disable it */
491 if (IS_SRIOV_VF(xe))
492 return;
493
494 pm_runtime_get_sync(dev);
495 pm_runtime_forbid(dev);
496 }
497
498 /**
499 * xe_pm_fini - Finalize PM
500 * @xe: xe device instance
501 */
xe_pm_fini(struct xe_device * xe)502 void xe_pm_fini(struct xe_device *xe)
503 {
504 if (xe_device_uc_enabled(xe))
505 xe_pm_runtime_fini(xe);
506
507 unregister_pm_notifier(&xe->pm_notifier);
508 }
509
xe_pm_write_callback_task(struct xe_device * xe,struct task_struct * task)510 static void xe_pm_write_callback_task(struct xe_device *xe,
511 struct task_struct *task)
512 {
513 WRITE_ONCE(xe->pm_callback_task, task);
514
515 /*
516 * Just in case it's somehow possible for our writes to be reordered to
517 * the extent that something else re-uses the task written in
518 * pm_callback_task. For example after returning from the callback, but
519 * before the reordered write that resets pm_callback_task back to NULL.
520 */
521 smp_mb(); /* pairs with xe_pm_read_callback_task */
522 }
523
xe_pm_read_callback_task(struct xe_device * xe)524 struct task_struct *xe_pm_read_callback_task(struct xe_device *xe)
525 {
526 smp_mb(); /* pairs with xe_pm_write_callback_task */
527
528 return READ_ONCE(xe->pm_callback_task);
529 }
530
531 /**
532 * xe_pm_runtime_suspended - Check if runtime_pm state is suspended
533 * @xe: xe device instance
534 *
535 * This does not provide any guarantee that the device is going to remain
536 * suspended as it might be racing with the runtime state transitions.
537 * It can be used only as a non-reliable assertion, to ensure that we are not in
538 * the sleep state while trying to access some memory for instance.
539 *
540 * Returns true if PCI device is suspended, false otherwise.
541 */
xe_pm_runtime_suspended(struct xe_device * xe)542 bool xe_pm_runtime_suspended(struct xe_device *xe)
543 {
544 return pm_runtime_suspended(xe->drm.dev);
545 }
546
547 /**
548 * xe_pm_runtime_suspend - Prepare our device for D3hot/D3Cold
549 * @xe: xe device instance
550 *
551 * Returns 0 for success, negative error code otherwise.
552 */
xe_pm_runtime_suspend(struct xe_device * xe)553 int xe_pm_runtime_suspend(struct xe_device *xe)
554 {
555 struct xe_bo *bo, *on;
556 struct xe_gt *gt;
557 u8 id;
558 int err = 0;
559
560 trace_xe_pm_runtime_suspend(xe, __builtin_return_address(0));
561 /* Disable access_ongoing asserts and prevent recursive pm calls */
562 xe_pm_write_callback_task(xe, current);
563
564 /*
565 * The actual xe_pm_runtime_put() is always async underneath, so
566 * exactly where that is called should makes no difference to us. However
567 * we still need to be very careful with the locks that this callback
568 * acquires and the locks that are acquired and held by any callers of
569 * xe_runtime_pm_get(). We already have the matching annotation
570 * on that side, but we also need it here. For example lockdep should be
571 * able to tell us if the following scenario is in theory possible:
572 *
573 * CPU0 | CPU1 (kworker)
574 * lock(A) |
575 * | xe_pm_runtime_suspend()
576 * | lock(A)
577 * xe_pm_runtime_get() |
578 *
579 * This will clearly deadlock since rpm core needs to wait for
580 * xe_pm_runtime_suspend() to complete, but here we are holding lock(A)
581 * on CPU0 which prevents CPU1 making forward progress. With the
582 * annotation here and in xe_pm_runtime_get() lockdep will see
583 * the potential lock inversion and give us a nice splat.
584 */
585 xe_rpm_lockmap_acquire(xe);
586
587 err = xe_pxp_pm_suspend(xe->pxp);
588 if (err)
589 goto out;
590
591 /*
592 * Applying lock for entire list op as xe_ttm_bo_destroy and xe_bo_move_notify
593 * also checks and deletes bo entry from user fault list.
594 */
595 mutex_lock(&xe->mem_access.vram_userfault.lock);
596 list_for_each_entry_safe(bo, on,
597 &xe->mem_access.vram_userfault.list, vram_userfault_link)
598 xe_bo_runtime_pm_release_mmap_offset(bo);
599 mutex_unlock(&xe->mem_access.vram_userfault.lock);
600
601 xe_display_pm_runtime_suspend(xe);
602
603 if (xe->d3cold.allowed) {
604 err = xe_bo_evict_all(xe);
605 if (err)
606 goto out_resume;
607 }
608
609 for_each_gt(gt, xe, id) {
610 err = xe->d3cold.allowed ? xe_gt_suspend(gt) : xe_gt_runtime_suspend(gt);
611 if (err)
612 goto out_resume;
613 }
614
615 xe_irq_suspend(xe);
616
617 xe_display_pm_runtime_suspend_late(xe);
618
619 xe_i2c_pm_suspend(xe);
620
621 xe_rpm_lockmap_release(xe);
622 xe_pm_write_callback_task(xe, NULL);
623 return 0;
624
625 out_resume:
626 xe_display_pm_runtime_resume(xe);
627 xe_pxp_pm_resume(xe->pxp);
628 out:
629 xe_rpm_lockmap_release(xe);
630 xe_pm_write_callback_task(xe, NULL);
631 return err;
632 }
633
634 /**
635 * xe_pm_runtime_resume - Waking up from D3hot/D3Cold
636 * @xe: xe device instance
637 *
638 * Returns 0 for success, negative error code otherwise.
639 */
xe_pm_runtime_resume(struct xe_device * xe)640 int xe_pm_runtime_resume(struct xe_device *xe)
641 {
642 struct xe_gt *gt;
643 u8 id;
644 int err = 0;
645
646 trace_xe_pm_runtime_resume(xe, __builtin_return_address(0));
647 /* Disable access_ongoing asserts and prevent recursive pm calls */
648 xe_pm_write_callback_task(xe, current);
649
650 xe_rpm_lockmap_acquire(xe);
651
652 if (xe->d3cold.allowed) {
653 for_each_gt(gt, xe, id)
654 xe_gt_idle_disable_c6(gt);
655
656 err = xe_pcode_ready(xe, true);
657 if (err)
658 goto out;
659
660 xe_display_pm_resume_early(xe);
661
662 /*
663 * This only restores pinned memory which is the memory
664 * required for the GT(s) to resume.
665 */
666 err = xe_bo_restore_early(xe);
667 if (err)
668 goto out;
669 }
670
671 xe_i2c_pm_resume(xe, xe->d3cold.allowed);
672
673 xe_irq_resume(xe);
674
675 for_each_gt(gt, xe, id) {
676 err = xe->d3cold.allowed ? xe_gt_resume(gt) : xe_gt_runtime_resume(gt);
677 if (err)
678 break;
679 }
680
681 /*
682 * Try to bring up display before bailing from GT resume failure,
683 * so we don't leave the user clueless with a blank screen.
684 */
685 xe_display_pm_runtime_resume(xe);
686 if (err)
687 goto out;
688
689 if (xe->d3cold.allowed) {
690 err = xe_bo_restore_late(xe);
691 if (err)
692 goto out;
693 }
694
695 xe_pxp_pm_resume(xe->pxp);
696
697 if (IS_VF_CCS_READY(xe))
698 xe_sriov_vf_ccs_register_context(xe);
699
700 if (xe->d3cold.allowed)
701 xe_late_bind_fw_load(&xe->late_bind);
702
703 out:
704 xe_rpm_lockmap_release(xe);
705 xe_pm_write_callback_task(xe, NULL);
706 return err;
707 }
708
709 /*
710 * For places where resume is synchronous it can be quite easy to deadlock
711 * if we are not careful. Also in practice it might be quite timing
712 * sensitive to ever see the 0 -> 1 transition with the callers locks
713 * held, so deadlocks might exist but are hard for lockdep to ever see.
714 * With this in mind, help lockdep learn about the potentially scary
715 * stuff that can happen inside the runtime_resume callback by acquiring
716 * a dummy lock (it doesn't protect anything and gets compiled out on
717 * non-debug builds). Lockdep then only needs to see the
718 * xe_pm_runtime_xxx_map -> runtime_resume callback once, and then can
719 * hopefully validate all the (callers_locks) -> xe_pm_runtime_xxx_map.
720 * For example if the (callers_locks) are ever grabbed in the
721 * runtime_resume callback, lockdep should give us a nice splat.
722 */
xe_rpm_might_enter_cb(const struct xe_device * xe)723 static void xe_rpm_might_enter_cb(const struct xe_device *xe)
724 {
725 xe_rpm_lockmap_acquire(xe);
726 xe_rpm_lockmap_release(xe);
727 }
728
729 /*
730 * Prime the lockdep maps for known locking orders that need to
731 * be supported but that may not always occur on all systems.
732 */
xe_pm_runtime_lockdep_prime(void)733 static void xe_pm_runtime_lockdep_prime(void)
734 {
735 struct dma_resv lockdep_resv;
736
737 dma_resv_init(&lockdep_resv);
738 lock_map_acquire(&xe_pm_runtime_d3cold_map);
739 /* D3Cold takes the dma_resv locks to evict bos */
740 dma_resv_lock(&lockdep_resv, NULL);
741 dma_resv_unlock(&lockdep_resv);
742 lock_map_release(&xe_pm_runtime_d3cold_map);
743
744 /* Shrinkers might like to wake up the device under reclaim. */
745 fs_reclaim_acquire(GFP_KERNEL);
746 lock_map_acquire(&xe_pm_runtime_nod3cold_map);
747 lock_map_release(&xe_pm_runtime_nod3cold_map);
748 fs_reclaim_release(GFP_KERNEL);
749 }
750
751 /**
752 * xe_pm_runtime_get - Get a runtime_pm reference and resume synchronously
753 * @xe: xe device instance
754 *
755 * When possible, scope-based runtime PM (through guard(xe_pm_runtime)) is
756 * be preferred over direct usage of this function. Manual get/put handling
757 * should only be used when the function contains goto-based logic which
758 * can break scope-based handling, or when the lifetime of the runtime PM
759 * reference does not match a specific scope (e.g., runtime PM obtained in one
760 * function and released in a different one).
761 */
xe_pm_runtime_get(struct xe_device * xe)762 void xe_pm_runtime_get(struct xe_device *xe)
763 {
764 trace_xe_pm_runtime_get(xe, __builtin_return_address(0));
765 pm_runtime_get_noresume(xe->drm.dev);
766
767 if (xe_pm_read_callback_task(xe) == current)
768 return;
769
770 xe_rpm_might_enter_cb(xe);
771 pm_runtime_resume(xe->drm.dev);
772 }
773
774 /**
775 * xe_pm_runtime_put - Put the runtime_pm reference back and mark as idle
776 * @xe: xe device instance
777 */
xe_pm_runtime_put(struct xe_device * xe)778 void xe_pm_runtime_put(struct xe_device *xe)
779 {
780 trace_xe_pm_runtime_put(xe, __builtin_return_address(0));
781 if (xe_pm_read_callback_task(xe) == current) {
782 pm_runtime_put_noidle(xe->drm.dev);
783 } else {
784 pm_runtime_mark_last_busy(xe->drm.dev);
785 pm_runtime_put(xe->drm.dev);
786 }
787 }
788
789 /**
790 * xe_pm_runtime_get_ioctl - Get a runtime_pm reference before ioctl
791 * @xe: xe device instance
792 *
793 * When possible, scope-based runtime PM (through
794 * ACQUIRE(xe_pm_runtime_ioctl, ...)) is be preferred over direct usage of this
795 * function. Manual get/put handling should only be used when the function
796 * contains goto-based logic which can break scope-based handling, or when the
797 * lifetime of the runtime PM reference does not match a specific scope (e.g.,
798 * runtime PM obtained in one function and released in a different one).
799 *
800 * Returns: Any number greater than or equal to 0 for success, negative error
801 * code otherwise.
802 */
xe_pm_runtime_get_ioctl(struct xe_device * xe)803 int xe_pm_runtime_get_ioctl(struct xe_device *xe)
804 {
805 trace_xe_pm_runtime_get_ioctl(xe, __builtin_return_address(0));
806 if (WARN_ON(xe_pm_read_callback_task(xe) == current))
807 return -ELOOP;
808
809 xe_rpm_might_enter_cb(xe);
810 return pm_runtime_get_sync(xe->drm.dev);
811 }
812
813 /**
814 * xe_pm_runtime_get_if_active - Get a runtime_pm reference if device active
815 * @xe: xe device instance
816 *
817 * Return: True if device is awake (regardless the previous number of references)
818 * and a new reference was taken, false otherwise.
819 */
xe_pm_runtime_get_if_active(struct xe_device * xe)820 bool xe_pm_runtime_get_if_active(struct xe_device *xe)
821 {
822 return pm_runtime_get_if_active(xe->drm.dev) > 0;
823 }
824
825 /**
826 * xe_pm_runtime_get_if_in_use - Get a new reference if device is active with previous ref taken
827 * @xe: xe device instance
828 *
829 * Return: True if device is awake, a previous reference had been already taken,
830 * and a new reference was now taken, false otherwise.
831 */
xe_pm_runtime_get_if_in_use(struct xe_device * xe)832 bool xe_pm_runtime_get_if_in_use(struct xe_device *xe)
833 {
834 if (xe_pm_read_callback_task(xe) == current) {
835 /* The device is awake, grab the ref and move on */
836 pm_runtime_get_noresume(xe->drm.dev);
837 return true;
838 }
839
840 return pm_runtime_get_if_in_use(xe->drm.dev) > 0;
841 }
842
843 /*
844 * Very unreliable! Should only be used to suppress the false positive case
845 * in the missing outer rpm protection warning.
846 */
xe_pm_suspending_or_resuming(struct xe_device * xe)847 static bool xe_pm_suspending_or_resuming(struct xe_device *xe)
848 {
849 #ifdef CONFIG_PM
850 struct device *dev = xe->drm.dev;
851
852 return dev->power.runtime_status == RPM_SUSPENDING ||
853 dev->power.runtime_status == RPM_RESUMING ||
854 pm_suspend_in_progress();
855 #else
856 return false;
857 #endif
858 }
859
860 /**
861 * xe_pm_runtime_get_noresume - Bump runtime PM usage counter without resuming
862 * @xe: xe device instance
863 *
864 * This function should be used in inner places where it is surely already
865 * protected by outer-bound callers of `xe_pm_runtime_get`.
866 * It will warn if not protected.
867 * The reference should be put back after this function regardless, since it
868 * will always bump the usage counter, regardless.
869 *
870 * When possible, scope-based runtime PM (through guard(xe_pm_runtime_noresume))
871 * is be preferred over direct usage of this function. Manual get/put handling
872 * should only be used when the function contains goto-based logic which can
873 * break scope-based handling, or when the lifetime of the runtime PM reference
874 * does not match a specific scope (e.g., runtime PM obtained in one function
875 * and released in a different one).
876 */
xe_pm_runtime_get_noresume(struct xe_device * xe)877 void xe_pm_runtime_get_noresume(struct xe_device *xe)
878 {
879 bool ref;
880
881 ref = xe_pm_runtime_get_if_in_use(xe);
882
883 if (!ref) {
884 pm_runtime_get_noresume(xe->drm.dev);
885 drm_WARN(&xe->drm, !xe_pm_suspending_or_resuming(xe),
886 "Missing outer runtime PM protection\n");
887 }
888 }
889
890 /**
891 * xe_pm_runtime_resume_and_get - Resume, then get a runtime_pm ref if awake.
892 * @xe: xe device instance
893 *
894 * Returns: True if device is awake and the reference was taken, false otherwise.
895 */
xe_pm_runtime_resume_and_get(struct xe_device * xe)896 bool xe_pm_runtime_resume_and_get(struct xe_device *xe)
897 {
898 if (xe_pm_read_callback_task(xe) == current) {
899 /* The device is awake, grab the ref and move on */
900 pm_runtime_get_noresume(xe->drm.dev);
901 return true;
902 }
903
904 xe_rpm_might_enter_cb(xe);
905 return pm_runtime_resume_and_get(xe->drm.dev) >= 0;
906 }
907
908 /**
909 * xe_pm_assert_unbounded_bridge - Disable PM on unbounded pcie parent bridge
910 * @xe: xe device instance
911 */
xe_pm_assert_unbounded_bridge(struct xe_device * xe)912 void xe_pm_assert_unbounded_bridge(struct xe_device *xe)
913 {
914 struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
915 struct pci_dev *bridge = pci_upstream_bridge(pdev);
916
917 if (!bridge)
918 return;
919
920 if (!bridge->driver) {
921 drm_warn(&xe->drm, "unbounded parent pci bridge, device won't support any PM support.\n");
922 device_set_pm_not_required(&pdev->dev);
923 }
924 }
925
926 /**
927 * xe_pm_set_vram_threshold - Set a VRAM threshold for allowing/blocking D3Cold
928 * @xe: xe device instance
929 * @threshold: VRAM size in MiB for the D3cold threshold
930 *
931 * Return:
932 * * 0 - success
933 * * -EINVAL - invalid argument
934 */
xe_pm_set_vram_threshold(struct xe_device * xe,u32 threshold)935 int xe_pm_set_vram_threshold(struct xe_device *xe, u32 threshold)
936 {
937 struct ttm_resource_manager *man;
938 u32 vram_total_mb = 0;
939 int i;
940
941 for (i = XE_PL_VRAM0; i <= XE_PL_VRAM1; ++i) {
942 man = ttm_manager_type(&xe->ttm, i);
943 if (man)
944 vram_total_mb += DIV_ROUND_UP_ULL(man->size, 1024 * 1024);
945 }
946
947 drm_dbg(&xe->drm, "Total vram %u mb\n", vram_total_mb);
948
949 if (threshold > vram_total_mb)
950 return -EINVAL;
951
952 mutex_lock(&xe->d3cold.lock);
953 xe->d3cold.vram_threshold = threshold;
954 mutex_unlock(&xe->d3cold.lock);
955
956 return 0;
957 }
958
959 /**
960 * xe_pm_d3cold_allowed_toggle - Check conditions to toggle d3cold.allowed
961 * @xe: xe device instance
962 *
963 * To be called during runtime_pm idle callback.
964 * Check for all the D3Cold conditions ahead of runtime suspend.
965 */
xe_pm_d3cold_allowed_toggle(struct xe_device * xe)966 void xe_pm_d3cold_allowed_toggle(struct xe_device *xe)
967 {
968 struct ttm_resource_manager *man;
969 u32 total_vram_used_mb = 0;
970 u64 vram_used;
971 int i;
972
973 if (!xe->d3cold.capable) {
974 xe->d3cold.allowed = false;
975 return;
976 }
977
978 for (i = XE_PL_VRAM0; i <= XE_PL_VRAM1; ++i) {
979 man = ttm_manager_type(&xe->ttm, i);
980 if (man) {
981 vram_used = ttm_resource_manager_usage(man);
982 total_vram_used_mb += DIV_ROUND_UP_ULL(vram_used, 1024 * 1024);
983 }
984 }
985
986 mutex_lock(&xe->d3cold.lock);
987
988 if (total_vram_used_mb < xe->d3cold.vram_threshold)
989 xe->d3cold.allowed = true;
990 else
991 xe->d3cold.allowed = false;
992
993 mutex_unlock(&xe->d3cold.lock);
994 }
995
996 /**
997 * xe_pm_module_init() - Perform xe_pm specific module initialization.
998 *
999 * Return: 0 on success. Currently doesn't fail.
1000 */
xe_pm_module_init(void)1001 int __init xe_pm_module_init(void)
1002 {
1003 xe_pm_runtime_lockdep_prime();
1004 return 0;
1005 }
1006