1 // SPDX-License-Identifier: MIT
2 /*
3 * Copyright © 2022 Intel Corporation
4 */
5
6 #include "xe_pm.h"
7
8 #include <linux/fault-inject.h>
9 #include <linux/pm_runtime.h>
10 #include <linux/suspend.h>
11
12 #include <drm/drm_managed.h>
13 #include <drm/ttm/ttm_placement.h>
14
15 #include "display/xe_display.h"
16 #include "xe_bo.h"
17 #include "xe_bo_evict.h"
18 #include "xe_device.h"
19 #include "xe_ggtt.h"
20 #include "xe_gt.h"
21 #include "xe_gt_idle.h"
22 #include "xe_i2c.h"
23 #include "xe_irq.h"
24 #include "xe_late_bind_fw.h"
25 #include "xe_pcode.h"
26 #include "xe_pxp.h"
27 #include "xe_sriov_vf_ccs.h"
28 #include "xe_trace.h"
29 #include "xe_vm.h"
30 #include "xe_wa.h"
31
32 /**
33 * DOC: Xe Power Management
34 *
35 * Xe PM implements the main routines for both system level suspend states and
36 * for the opportunistic runtime suspend states.
37 *
38 * System Level Suspend (S-States) - In general this is OS initiated suspend
39 * driven by ACPI for achieving S0ix (a.k.a. S2idle, freeze), S3 (suspend to ram),
40 * S4 (disk). The main functions here are `xe_pm_suspend` and `xe_pm_resume`. They
41 * are the main point for the suspend to and resume from these states.
42 *
43 * PCI Device Suspend (D-States) - This is the opportunistic PCIe device low power
44 * state D3, controlled by the PCI subsystem and ACPI with the help from the
45 * runtime_pm infrastructure.
46 * PCI D3 is special and can mean D3hot, where Vcc power is on for keeping memory
47 * alive and quicker low latency resume or D3Cold where Vcc power is off for
48 * better power savings.
49 * The Vcc control of PCI hierarchy can only be controlled at the PCI root port
50 * level, while the device driver can be behind multiple bridges/switches and
51 * paired with other devices. For this reason, the PCI subsystem cannot perform
52 * the transition towards D3Cold. The lowest runtime PM possible from the PCI
53 * subsystem is D3hot. Then, if all these paired devices in the same root port
54 * are in D3hot, ACPI will assist here and run its own methods (_PR3 and _OFF)
55 * to perform the transition from D3hot to D3cold. Xe may disallow this
56 * transition by calling pci_d3cold_disable(root_pdev) before going to runtime
57 * suspend. It will be based on runtime conditions such as VRAM usage for a
58 * quick and low latency resume for instance.
59 *
60 * Runtime PM - This infrastructure provided by the Linux kernel allows the
61 * device drivers to indicate when the can be runtime suspended, so the device
62 * could be put at D3 (if supported), or allow deeper package sleep states
63 * (PC-states), and/or other low level power states. Xe PM component provides
64 * `xe_pm_runtime_suspend` and `xe_pm_runtime_resume` functions that PCI
65 * subsystem will call before transition to/from runtime suspend.
66 *
67 * Also, Xe PM provides get and put functions that Xe driver will use to
68 * indicate activity. In order to avoid locking complications with the memory
69 * management, whenever possible, these get and put functions needs to be called
70 * from the higher/outer levels.
71 * The main cases that need to be protected from the outer levels are: IOCTL,
72 * sysfs, debugfs, dma-buf sharing, GPU execution.
73 *
74 * This component is not responsible for GT idleness (RC6) nor GT frequency
75 * management (RPS).
76 */
77
78 #ifdef CONFIG_LOCKDEP
79 static struct lockdep_map xe_pm_runtime_d3cold_map = {
80 .name = "xe_rpm_d3cold_map"
81 };
82
83 static struct lockdep_map xe_pm_runtime_nod3cold_map = {
84 .name = "xe_rpm_nod3cold_map"
85 };
86
87 static struct lockdep_map xe_pm_block_lockdep_map = {
88 .name = "xe_pm_block_map",
89 };
90 #endif
91
xe_pm_block_begin_signalling(void)92 static void xe_pm_block_begin_signalling(void)
93 {
94 lock_acquire_shared_recursive(&xe_pm_block_lockdep_map, 0, 1, NULL, _RET_IP_);
95 }
96
xe_pm_block_end_signalling(void)97 static void xe_pm_block_end_signalling(void)
98 {
99 lock_release(&xe_pm_block_lockdep_map, _RET_IP_);
100 }
101
102 /**
103 * xe_pm_might_block_on_suspend() - Annotate that the code might block on suspend
104 *
105 * Annotation to use where the code might block or seize to make
106 * progress pending resume completion.
107 */
xe_pm_might_block_on_suspend(void)108 void xe_pm_might_block_on_suspend(void)
109 {
110 lock_map_acquire(&xe_pm_block_lockdep_map);
111 lock_map_release(&xe_pm_block_lockdep_map);
112 }
113
114 /**
115 * xe_pm_block_on_suspend() - Block pending suspend.
116 * @xe: The xe device about to be suspended.
117 *
118 * Block if the pm notifier has start evicting bos, to avoid
119 * racing and validating those bos back. The function is
120 * annotated to ensure no locks are held that are also grabbed
121 * in the pm notifier or the device suspend / resume.
122 * This is intended to be used by freezable tasks only.
123 * (Not freezable workqueues), with the intention that the function
124 * returns %-ERESTARTSYS when tasks are frozen during suspend,
125 * and allows the task to freeze. The caller must be able to
126 * handle the %-ERESTARTSYS.
127 *
128 * Return: %0 on success, %-ERESTARTSYS on signal pending or
129 * if freezing requested.
130 */
xe_pm_block_on_suspend(struct xe_device * xe)131 int xe_pm_block_on_suspend(struct xe_device *xe)
132 {
133 xe_pm_might_block_on_suspend();
134
135 return wait_for_completion_interruptible(&xe->pm_block);
136 }
137
138 /**
139 * xe_rpm_reclaim_safe() - Whether runtime resume can be done from reclaim context
140 * @xe: The xe device.
141 *
142 * Return: true if it is safe to runtime resume from reclaim context.
143 * false otherwise.
144 */
xe_rpm_reclaim_safe(const struct xe_device * xe)145 bool xe_rpm_reclaim_safe(const struct xe_device *xe)
146 {
147 return !xe->d3cold.capable;
148 }
149
xe_rpm_lockmap_acquire(const struct xe_device * xe)150 static void xe_rpm_lockmap_acquire(const struct xe_device *xe)
151 {
152 lock_map_acquire(xe_rpm_reclaim_safe(xe) ?
153 &xe_pm_runtime_nod3cold_map :
154 &xe_pm_runtime_d3cold_map);
155 }
156
xe_rpm_lockmap_release(const struct xe_device * xe)157 static void xe_rpm_lockmap_release(const struct xe_device *xe)
158 {
159 lock_map_release(xe_rpm_reclaim_safe(xe) ?
160 &xe_pm_runtime_nod3cold_map :
161 &xe_pm_runtime_d3cold_map);
162 }
163
164 /**
165 * xe_pm_suspend - Helper for System suspend, i.e. S0->S3 / S0->S2idle
166 * @xe: xe device instance
167 *
168 * Return: 0 on success
169 */
xe_pm_suspend(struct xe_device * xe)170 int xe_pm_suspend(struct xe_device *xe)
171 {
172 struct xe_gt *gt;
173 u8 id;
174 int err;
175
176 drm_dbg(&xe->drm, "Suspending device\n");
177 xe_pm_block_begin_signalling();
178 trace_xe_pm_suspend(xe, __builtin_return_address(0));
179
180 err = xe_pxp_pm_suspend(xe->pxp);
181 if (err)
182 goto err;
183
184 xe_late_bind_wait_for_worker_completion(&xe->late_bind);
185
186 for_each_gt(gt, xe, id)
187 xe_gt_suspend_prepare(gt);
188
189 xe_display_pm_suspend(xe);
190
191 /* FIXME: Super racey... */
192 err = xe_bo_evict_all(xe);
193 if (err)
194 goto err_display;
195
196 for_each_gt(gt, xe, id) {
197 err = xe_gt_suspend(gt);
198 if (err)
199 goto err_display;
200 }
201
202 xe_irq_suspend(xe);
203
204 xe_display_pm_suspend_late(xe);
205
206 xe_i2c_pm_suspend(xe);
207
208 drm_dbg(&xe->drm, "Device suspended\n");
209 xe_pm_block_end_signalling();
210
211 return 0;
212
213 err_display:
214 xe_display_pm_resume(xe);
215 xe_pxp_pm_resume(xe->pxp);
216 err:
217 drm_dbg(&xe->drm, "Device suspend failed %d\n", err);
218 xe_pm_block_end_signalling();
219 return err;
220 }
221
222 /**
223 * xe_pm_resume - Helper for System resume S3->S0 / S2idle->S0
224 * @xe: xe device instance
225 *
226 * Return: 0 on success
227 */
xe_pm_resume(struct xe_device * xe)228 int xe_pm_resume(struct xe_device *xe)
229 {
230 struct xe_tile *tile;
231 struct xe_gt *gt;
232 u8 id;
233 int err;
234
235 xe_pm_block_begin_signalling();
236 drm_dbg(&xe->drm, "Resuming device\n");
237 trace_xe_pm_resume(xe, __builtin_return_address(0));
238
239 for_each_gt(gt, xe, id)
240 xe_gt_idle_disable_c6(gt);
241
242 for_each_tile(tile, xe, id)
243 xe_wa_apply_tile_workarounds(tile);
244
245 err = xe_pcode_ready(xe, true);
246 if (err)
247 return err;
248
249 xe_display_pm_resume_early(xe);
250
251 /*
252 * This only restores pinned memory which is the memory required for the
253 * GT(s) to resume.
254 */
255 err = xe_bo_restore_early(xe);
256 if (err)
257 goto err;
258
259 xe_i2c_pm_resume(xe, true);
260
261 xe_irq_resume(xe);
262
263 for_each_gt(gt, xe, id)
264 xe_gt_resume(gt);
265
266 xe_display_pm_resume(xe);
267
268 err = xe_bo_restore_late(xe);
269 if (err)
270 goto err;
271
272 xe_pxp_pm_resume(xe->pxp);
273
274 if (IS_VF_CCS_READY(xe))
275 xe_sriov_vf_ccs_register_context(xe);
276
277 xe_late_bind_fw_load(&xe->late_bind);
278
279 drm_dbg(&xe->drm, "Device resumed\n");
280 xe_pm_block_end_signalling();
281 return 0;
282 err:
283 drm_dbg(&xe->drm, "Device resume failed %d\n", err);
284 xe_pm_block_end_signalling();
285 return err;
286 }
287
xe_pm_pci_d3cold_capable(struct xe_device * xe)288 static bool xe_pm_pci_d3cold_capable(struct xe_device *xe)
289 {
290 struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
291 struct pci_dev *root_pdev;
292
293 root_pdev = pcie_find_root_port(pdev);
294 if (!root_pdev)
295 return false;
296
297 /* D3Cold requires PME capability */
298 if (!pci_pme_capable(root_pdev, PCI_D3cold)) {
299 drm_dbg(&xe->drm, "d3cold: PME# not supported\n");
300 return false;
301 }
302
303 /* D3Cold requires _PR3 power resource */
304 if (!pci_pr3_present(root_pdev)) {
305 drm_dbg(&xe->drm, "d3cold: ACPI _PR3 not present\n");
306 return false;
307 }
308
309 return true;
310 }
311
xe_pm_runtime_init(struct xe_device * xe)312 static void xe_pm_runtime_init(struct xe_device *xe)
313 {
314 struct device *dev = xe->drm.dev;
315
316 /* Our current VFs do not support RPM. so, disable it */
317 if (IS_SRIOV_VF(xe))
318 return;
319
320 /*
321 * Disable the system suspend direct complete optimization.
322 * We need to ensure that the regular device suspend/resume functions
323 * are called since our runtime_pm cannot guarantee local memory
324 * eviction for d3cold.
325 * TODO: Check HDA audio dependencies claimed by i915, and then enforce
326 * this option to integrated graphics as well.
327 */
328 if (IS_DGFX(xe))
329 dev_pm_set_driver_flags(dev, DPM_FLAG_NO_DIRECT_COMPLETE);
330
331 pm_runtime_use_autosuspend(dev);
332 pm_runtime_set_autosuspend_delay(dev, 1000);
333 pm_runtime_set_active(dev);
334 pm_runtime_allow(dev);
335 pm_runtime_mark_last_busy(dev);
336 pm_runtime_put(dev);
337 }
338
xe_pm_init_early(struct xe_device * xe)339 int xe_pm_init_early(struct xe_device *xe)
340 {
341 int err;
342
343 INIT_LIST_HEAD(&xe->mem_access.vram_userfault.list);
344
345 err = drmm_mutex_init(&xe->drm, &xe->mem_access.vram_userfault.lock);
346 if (err)
347 return err;
348
349 err = drmm_mutex_init(&xe->drm, &xe->d3cold.lock);
350 if (err)
351 return err;
352
353 xe->d3cold.capable = xe_pm_pci_d3cold_capable(xe);
354 return 0;
355 }
356 ALLOW_ERROR_INJECTION(xe_pm_init_early, ERRNO); /* See xe_pci_probe() */
357
vram_threshold_value(struct xe_device * xe)358 static u32 vram_threshold_value(struct xe_device *xe)
359 {
360 /* FIXME: D3Cold temporarily disabled by default on BMG */
361 if (xe->info.platform == XE_BATTLEMAGE)
362 return 0;
363
364 return DEFAULT_VRAM_THRESHOLD;
365 }
366
xe_pm_wake_rebind_workers(struct xe_device * xe)367 static void xe_pm_wake_rebind_workers(struct xe_device *xe)
368 {
369 struct xe_vm *vm, *next;
370
371 mutex_lock(&xe->rebind_resume_lock);
372 list_for_each_entry_safe(vm, next, &xe->rebind_resume_list,
373 preempt.pm_activate_link) {
374 list_del_init(&vm->preempt.pm_activate_link);
375 xe_vm_resume_rebind_worker(vm);
376 }
377 mutex_unlock(&xe->rebind_resume_lock);
378 }
379
xe_pm_notifier_callback(struct notifier_block * nb,unsigned long action,void * data)380 static int xe_pm_notifier_callback(struct notifier_block *nb,
381 unsigned long action, void *data)
382 {
383 struct xe_device *xe = container_of(nb, struct xe_device, pm_notifier);
384 int err = 0;
385
386 switch (action) {
387 case PM_HIBERNATION_PREPARE:
388 case PM_SUSPEND_PREPARE:
389 {
390 struct xe_validation_ctx ctx;
391
392 reinit_completion(&xe->pm_block);
393 xe_pm_block_begin_signalling();
394 xe_pm_runtime_get(xe);
395 (void)xe_validation_ctx_init(&ctx, &xe->val, NULL,
396 (struct xe_val_flags) {.exclusive = true});
397 err = xe_bo_evict_all_user(xe);
398 xe_validation_ctx_fini(&ctx);
399 if (err)
400 drm_dbg(&xe->drm, "Notifier evict user failed (%d)\n", err);
401
402 err = xe_bo_notifier_prepare_all_pinned(xe);
403 if (err)
404 drm_dbg(&xe->drm, "Notifier prepare pin failed (%d)\n", err);
405 /*
406 * Keep the runtime pm reference until post hibernation / post suspend to
407 * avoid a runtime suspend interfering with evicted objects or backup
408 * allocations.
409 */
410 xe_pm_block_end_signalling();
411 break;
412 }
413 case PM_POST_HIBERNATION:
414 case PM_POST_SUSPEND:
415 complete_all(&xe->pm_block);
416 xe_pm_wake_rebind_workers(xe);
417 xe_bo_notifier_unprepare_all_pinned(xe);
418 xe_pm_runtime_put(xe);
419 break;
420 }
421
422 return NOTIFY_DONE;
423 }
424
425 /**
426 * xe_pm_init - Initialize Xe Power Management
427 * @xe: xe device instance
428 *
429 * This component is responsible for System and Device sleep states.
430 *
431 * Returns 0 for success, negative error code otherwise.
432 */
xe_pm_init(struct xe_device * xe)433 int xe_pm_init(struct xe_device *xe)
434 {
435 u32 vram_threshold;
436 int err;
437
438 xe->pm_notifier.notifier_call = xe_pm_notifier_callback;
439 err = register_pm_notifier(&xe->pm_notifier);
440 if (err)
441 return err;
442
443 err = drmm_mutex_init(&xe->drm, &xe->rebind_resume_lock);
444 if (err)
445 goto err_unregister;
446
447 init_completion(&xe->pm_block);
448 complete_all(&xe->pm_block);
449 INIT_LIST_HEAD(&xe->rebind_resume_list);
450
451 /* For now suspend/resume is only allowed with GuC */
452 if (!xe_device_uc_enabled(xe))
453 return 0;
454
455 if (xe->d3cold.capable) {
456 vram_threshold = vram_threshold_value(xe);
457 err = xe_pm_set_vram_threshold(xe, vram_threshold);
458 if (err)
459 goto err_unregister;
460 }
461
462 xe_pm_runtime_init(xe);
463 return 0;
464
465 err_unregister:
466 unregister_pm_notifier(&xe->pm_notifier);
467 return err;
468 }
469
xe_pm_runtime_fini(struct xe_device * xe)470 static void xe_pm_runtime_fini(struct xe_device *xe)
471 {
472 struct device *dev = xe->drm.dev;
473
474 /* Our current VFs do not support RPM. so, disable it */
475 if (IS_SRIOV_VF(xe))
476 return;
477
478 pm_runtime_get_sync(dev);
479 pm_runtime_forbid(dev);
480 }
481
482 /**
483 * xe_pm_fini - Finalize PM
484 * @xe: xe device instance
485 */
xe_pm_fini(struct xe_device * xe)486 void xe_pm_fini(struct xe_device *xe)
487 {
488 if (xe_device_uc_enabled(xe))
489 xe_pm_runtime_fini(xe);
490
491 unregister_pm_notifier(&xe->pm_notifier);
492 }
493
xe_pm_write_callback_task(struct xe_device * xe,struct task_struct * task)494 static void xe_pm_write_callback_task(struct xe_device *xe,
495 struct task_struct *task)
496 {
497 WRITE_ONCE(xe->pm_callback_task, task);
498
499 /*
500 * Just in case it's somehow possible for our writes to be reordered to
501 * the extent that something else re-uses the task written in
502 * pm_callback_task. For example after returning from the callback, but
503 * before the reordered write that resets pm_callback_task back to NULL.
504 */
505 smp_mb(); /* pairs with xe_pm_read_callback_task */
506 }
507
xe_pm_read_callback_task(struct xe_device * xe)508 struct task_struct *xe_pm_read_callback_task(struct xe_device *xe)
509 {
510 smp_mb(); /* pairs with xe_pm_write_callback_task */
511
512 return READ_ONCE(xe->pm_callback_task);
513 }
514
515 /**
516 * xe_pm_runtime_suspended - Check if runtime_pm state is suspended
517 * @xe: xe device instance
518 *
519 * This does not provide any guarantee that the device is going to remain
520 * suspended as it might be racing with the runtime state transitions.
521 * It can be used only as a non-reliable assertion, to ensure that we are not in
522 * the sleep state while trying to access some memory for instance.
523 *
524 * Returns true if PCI device is suspended, false otherwise.
525 */
xe_pm_runtime_suspended(struct xe_device * xe)526 bool xe_pm_runtime_suspended(struct xe_device *xe)
527 {
528 return pm_runtime_suspended(xe->drm.dev);
529 }
530
531 /**
532 * xe_pm_runtime_suspend - Prepare our device for D3hot/D3Cold
533 * @xe: xe device instance
534 *
535 * Returns 0 for success, negative error code otherwise.
536 */
xe_pm_runtime_suspend(struct xe_device * xe)537 int xe_pm_runtime_suspend(struct xe_device *xe)
538 {
539 struct xe_bo *bo, *on;
540 struct xe_gt *gt;
541 u8 id;
542 int err = 0;
543
544 trace_xe_pm_runtime_suspend(xe, __builtin_return_address(0));
545 /* Disable access_ongoing asserts and prevent recursive pm calls */
546 xe_pm_write_callback_task(xe, current);
547
548 /*
549 * The actual xe_pm_runtime_put() is always async underneath, so
550 * exactly where that is called should makes no difference to us. However
551 * we still need to be very careful with the locks that this callback
552 * acquires and the locks that are acquired and held by any callers of
553 * xe_runtime_pm_get(). We already have the matching annotation
554 * on that side, but we also need it here. For example lockdep should be
555 * able to tell us if the following scenario is in theory possible:
556 *
557 * CPU0 | CPU1 (kworker)
558 * lock(A) |
559 * | xe_pm_runtime_suspend()
560 * | lock(A)
561 * xe_pm_runtime_get() |
562 *
563 * This will clearly deadlock since rpm core needs to wait for
564 * xe_pm_runtime_suspend() to complete, but here we are holding lock(A)
565 * on CPU0 which prevents CPU1 making forward progress. With the
566 * annotation here and in xe_pm_runtime_get() lockdep will see
567 * the potential lock inversion and give us a nice splat.
568 */
569 xe_rpm_lockmap_acquire(xe);
570
571 err = xe_pxp_pm_suspend(xe->pxp);
572 if (err)
573 goto out;
574
575 /*
576 * Applying lock for entire list op as xe_ttm_bo_destroy and xe_bo_move_notify
577 * also checks and deletes bo entry from user fault list.
578 */
579 mutex_lock(&xe->mem_access.vram_userfault.lock);
580 list_for_each_entry_safe(bo, on,
581 &xe->mem_access.vram_userfault.list, vram_userfault_link)
582 xe_bo_runtime_pm_release_mmap_offset(bo);
583 mutex_unlock(&xe->mem_access.vram_userfault.lock);
584
585 xe_display_pm_runtime_suspend(xe);
586
587 if (xe->d3cold.allowed) {
588 err = xe_bo_evict_all(xe);
589 if (err)
590 goto out_resume;
591 }
592
593 for_each_gt(gt, xe, id) {
594 err = xe_gt_suspend(gt);
595 if (err)
596 goto out_resume;
597 }
598
599 xe_irq_suspend(xe);
600
601 xe_display_pm_runtime_suspend_late(xe);
602
603 xe_i2c_pm_suspend(xe);
604
605 xe_rpm_lockmap_release(xe);
606 xe_pm_write_callback_task(xe, NULL);
607 return 0;
608
609 out_resume:
610 xe_display_pm_runtime_resume(xe);
611 xe_pxp_pm_resume(xe->pxp);
612 out:
613 xe_rpm_lockmap_release(xe);
614 xe_pm_write_callback_task(xe, NULL);
615 return err;
616 }
617
618 /**
619 * xe_pm_runtime_resume - Waking up from D3hot/D3Cold
620 * @xe: xe device instance
621 *
622 * Returns 0 for success, negative error code otherwise.
623 */
xe_pm_runtime_resume(struct xe_device * xe)624 int xe_pm_runtime_resume(struct xe_device *xe)
625 {
626 struct xe_gt *gt;
627 u8 id;
628 int err = 0;
629
630 trace_xe_pm_runtime_resume(xe, __builtin_return_address(0));
631 /* Disable access_ongoing asserts and prevent recursive pm calls */
632 xe_pm_write_callback_task(xe, current);
633
634 xe_rpm_lockmap_acquire(xe);
635
636 for_each_gt(gt, xe, id)
637 xe_gt_idle_disable_c6(gt);
638
639 if (xe->d3cold.allowed) {
640 err = xe_pcode_ready(xe, true);
641 if (err)
642 goto out;
643
644 xe_display_pm_resume_early(xe);
645
646 /*
647 * This only restores pinned memory which is the memory
648 * required for the GT(s) to resume.
649 */
650 err = xe_bo_restore_early(xe);
651 if (err)
652 goto out;
653 }
654
655 xe_i2c_pm_resume(xe, xe->d3cold.allowed);
656
657 xe_irq_resume(xe);
658
659 for_each_gt(gt, xe, id)
660 xe_gt_resume(gt);
661
662 xe_display_pm_runtime_resume(xe);
663
664 if (xe->d3cold.allowed) {
665 err = xe_bo_restore_late(xe);
666 if (err)
667 goto out;
668 }
669
670 xe_pxp_pm_resume(xe->pxp);
671
672 if (IS_VF_CCS_READY(xe))
673 xe_sriov_vf_ccs_register_context(xe);
674
675 if (xe->d3cold.allowed)
676 xe_late_bind_fw_load(&xe->late_bind);
677
678 out:
679 xe_rpm_lockmap_release(xe);
680 xe_pm_write_callback_task(xe, NULL);
681 return err;
682 }
683
684 /*
685 * For places where resume is synchronous it can be quite easy to deadlock
686 * if we are not careful. Also in practice it might be quite timing
687 * sensitive to ever see the 0 -> 1 transition with the callers locks
688 * held, so deadlocks might exist but are hard for lockdep to ever see.
689 * With this in mind, help lockdep learn about the potentially scary
690 * stuff that can happen inside the runtime_resume callback by acquiring
691 * a dummy lock (it doesn't protect anything and gets compiled out on
692 * non-debug builds). Lockdep then only needs to see the
693 * xe_pm_runtime_xxx_map -> runtime_resume callback once, and then can
694 * hopefully validate all the (callers_locks) -> xe_pm_runtime_xxx_map.
695 * For example if the (callers_locks) are ever grabbed in the
696 * runtime_resume callback, lockdep should give us a nice splat.
697 */
xe_rpm_might_enter_cb(const struct xe_device * xe)698 static void xe_rpm_might_enter_cb(const struct xe_device *xe)
699 {
700 xe_rpm_lockmap_acquire(xe);
701 xe_rpm_lockmap_release(xe);
702 }
703
704 /*
705 * Prime the lockdep maps for known locking orders that need to
706 * be supported but that may not always occur on all systems.
707 */
xe_pm_runtime_lockdep_prime(void)708 static void xe_pm_runtime_lockdep_prime(void)
709 {
710 struct dma_resv lockdep_resv;
711
712 dma_resv_init(&lockdep_resv);
713 lock_map_acquire(&xe_pm_runtime_d3cold_map);
714 /* D3Cold takes the dma_resv locks to evict bos */
715 dma_resv_lock(&lockdep_resv, NULL);
716 dma_resv_unlock(&lockdep_resv);
717 lock_map_release(&xe_pm_runtime_d3cold_map);
718
719 /* Shrinkers might like to wake up the device under reclaim. */
720 fs_reclaim_acquire(GFP_KERNEL);
721 lock_map_acquire(&xe_pm_runtime_nod3cold_map);
722 lock_map_release(&xe_pm_runtime_nod3cold_map);
723 fs_reclaim_release(GFP_KERNEL);
724 }
725
726 /**
727 * xe_pm_runtime_get - Get a runtime_pm reference and resume synchronously
728 * @xe: xe device instance
729 *
730 * When possible, scope-based runtime PM (through guard(xe_pm_runtime)) is
731 * be preferred over direct usage of this function. Manual get/put handling
732 * should only be used when the function contains goto-based logic which
733 * can break scope-based handling, or when the lifetime of the runtime PM
734 * reference does not match a specific scope (e.g., runtime PM obtained in one
735 * function and released in a different one).
736 */
xe_pm_runtime_get(struct xe_device * xe)737 void xe_pm_runtime_get(struct xe_device *xe)
738 {
739 trace_xe_pm_runtime_get(xe, __builtin_return_address(0));
740 pm_runtime_get_noresume(xe->drm.dev);
741
742 if (xe_pm_read_callback_task(xe) == current)
743 return;
744
745 xe_rpm_might_enter_cb(xe);
746 pm_runtime_resume(xe->drm.dev);
747 }
748
749 /**
750 * xe_pm_runtime_put - Put the runtime_pm reference back and mark as idle
751 * @xe: xe device instance
752 */
xe_pm_runtime_put(struct xe_device * xe)753 void xe_pm_runtime_put(struct xe_device *xe)
754 {
755 trace_xe_pm_runtime_put(xe, __builtin_return_address(0));
756 if (xe_pm_read_callback_task(xe) == current) {
757 pm_runtime_put_noidle(xe->drm.dev);
758 } else {
759 pm_runtime_mark_last_busy(xe->drm.dev);
760 pm_runtime_put(xe->drm.dev);
761 }
762 }
763
764 /**
765 * xe_pm_runtime_get_ioctl - Get a runtime_pm reference before ioctl
766 * @xe: xe device instance
767 *
768 * When possible, scope-based runtime PM (through
769 * ACQUIRE(xe_pm_runtime_ioctl, ...)) is be preferred over direct usage of this
770 * function. Manual get/put handling should only be used when the function
771 * contains goto-based logic which can break scope-based handling, or when the
772 * lifetime of the runtime PM reference does not match a specific scope (e.g.,
773 * runtime PM obtained in one function and released in a different one).
774 *
775 * Returns: Any number greater than or equal to 0 for success, negative error
776 * code otherwise.
777 */
xe_pm_runtime_get_ioctl(struct xe_device * xe)778 int xe_pm_runtime_get_ioctl(struct xe_device *xe)
779 {
780 trace_xe_pm_runtime_get_ioctl(xe, __builtin_return_address(0));
781 if (WARN_ON(xe_pm_read_callback_task(xe) == current))
782 return -ELOOP;
783
784 xe_rpm_might_enter_cb(xe);
785 return pm_runtime_get_sync(xe->drm.dev);
786 }
787
788 /**
789 * xe_pm_runtime_get_if_active - Get a runtime_pm reference if device active
790 * @xe: xe device instance
791 *
792 * Return: True if device is awake (regardless the previous number of references)
793 * and a new reference was taken, false otherwise.
794 */
xe_pm_runtime_get_if_active(struct xe_device * xe)795 bool xe_pm_runtime_get_if_active(struct xe_device *xe)
796 {
797 return pm_runtime_get_if_active(xe->drm.dev) > 0;
798 }
799
800 /**
801 * xe_pm_runtime_get_if_in_use - Get a new reference if device is active with previous ref taken
802 * @xe: xe device instance
803 *
804 * Return: True if device is awake, a previous reference had been already taken,
805 * and a new reference was now taken, false otherwise.
806 */
xe_pm_runtime_get_if_in_use(struct xe_device * xe)807 bool xe_pm_runtime_get_if_in_use(struct xe_device *xe)
808 {
809 if (xe_pm_read_callback_task(xe) == current) {
810 /* The device is awake, grab the ref and move on */
811 pm_runtime_get_noresume(xe->drm.dev);
812 return true;
813 }
814
815 return pm_runtime_get_if_in_use(xe->drm.dev) > 0;
816 }
817
818 /*
819 * Very unreliable! Should only be used to suppress the false positive case
820 * in the missing outer rpm protection warning.
821 */
xe_pm_suspending_or_resuming(struct xe_device * xe)822 static bool xe_pm_suspending_or_resuming(struct xe_device *xe)
823 {
824 #ifdef CONFIG_PM
825 struct device *dev = xe->drm.dev;
826
827 return dev->power.runtime_status == RPM_SUSPENDING ||
828 dev->power.runtime_status == RPM_RESUMING ||
829 pm_suspend_in_progress();
830 #else
831 return false;
832 #endif
833 }
834
835 /**
836 * xe_pm_runtime_get_noresume - Bump runtime PM usage counter without resuming
837 * @xe: xe device instance
838 *
839 * This function should be used in inner places where it is surely already
840 * protected by outer-bound callers of `xe_pm_runtime_get`.
841 * It will warn if not protected.
842 * The reference should be put back after this function regardless, since it
843 * will always bump the usage counter, regardless.
844 *
845 * When possible, scope-based runtime PM (through guard(xe_pm_runtime_noresume))
846 * is be preferred over direct usage of this function. Manual get/put handling
847 * should only be used when the function contains goto-based logic which can
848 * break scope-based handling, or when the lifetime of the runtime PM reference
849 * does not match a specific scope (e.g., runtime PM obtained in one function
850 * and released in a different one).
851 */
xe_pm_runtime_get_noresume(struct xe_device * xe)852 void xe_pm_runtime_get_noresume(struct xe_device *xe)
853 {
854 bool ref;
855
856 ref = xe_pm_runtime_get_if_in_use(xe);
857
858 if (!ref) {
859 pm_runtime_get_noresume(xe->drm.dev);
860 drm_WARN(&xe->drm, !xe_pm_suspending_or_resuming(xe),
861 "Missing outer runtime PM protection\n");
862 }
863 }
864
865 /**
866 * xe_pm_runtime_resume_and_get - Resume, then get a runtime_pm ref if awake.
867 * @xe: xe device instance
868 *
869 * Returns: True if device is awake and the reference was taken, false otherwise.
870 */
xe_pm_runtime_resume_and_get(struct xe_device * xe)871 bool xe_pm_runtime_resume_and_get(struct xe_device *xe)
872 {
873 if (xe_pm_read_callback_task(xe) == current) {
874 /* The device is awake, grab the ref and move on */
875 pm_runtime_get_noresume(xe->drm.dev);
876 return true;
877 }
878
879 xe_rpm_might_enter_cb(xe);
880 return pm_runtime_resume_and_get(xe->drm.dev) >= 0;
881 }
882
883 /**
884 * xe_pm_assert_unbounded_bridge - Disable PM on unbounded pcie parent bridge
885 * @xe: xe device instance
886 */
xe_pm_assert_unbounded_bridge(struct xe_device * xe)887 void xe_pm_assert_unbounded_bridge(struct xe_device *xe)
888 {
889 struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
890 struct pci_dev *bridge = pci_upstream_bridge(pdev);
891
892 if (!bridge)
893 return;
894
895 if (!bridge->driver) {
896 drm_warn(&xe->drm, "unbounded parent pci bridge, device won't support any PM support.\n");
897 device_set_pm_not_required(&pdev->dev);
898 }
899 }
900
901 /**
902 * xe_pm_set_vram_threshold - Set a VRAM threshold for allowing/blocking D3Cold
903 * @xe: xe device instance
904 * @threshold: VRAM size in MiB for the D3cold threshold
905 *
906 * Return:
907 * * 0 - success
908 * * -EINVAL - invalid argument
909 */
xe_pm_set_vram_threshold(struct xe_device * xe,u32 threshold)910 int xe_pm_set_vram_threshold(struct xe_device *xe, u32 threshold)
911 {
912 struct ttm_resource_manager *man;
913 u32 vram_total_mb = 0;
914 int i;
915
916 for (i = XE_PL_VRAM0; i <= XE_PL_VRAM1; ++i) {
917 man = ttm_manager_type(&xe->ttm, i);
918 if (man)
919 vram_total_mb += DIV_ROUND_UP_ULL(man->size, 1024 * 1024);
920 }
921
922 drm_dbg(&xe->drm, "Total vram %u mb\n", vram_total_mb);
923
924 if (threshold > vram_total_mb)
925 return -EINVAL;
926
927 mutex_lock(&xe->d3cold.lock);
928 xe->d3cold.vram_threshold = threshold;
929 mutex_unlock(&xe->d3cold.lock);
930
931 return 0;
932 }
933
934 /**
935 * xe_pm_d3cold_allowed_toggle - Check conditions to toggle d3cold.allowed
936 * @xe: xe device instance
937 *
938 * To be called during runtime_pm idle callback.
939 * Check for all the D3Cold conditions ahead of runtime suspend.
940 */
xe_pm_d3cold_allowed_toggle(struct xe_device * xe)941 void xe_pm_d3cold_allowed_toggle(struct xe_device *xe)
942 {
943 struct ttm_resource_manager *man;
944 u32 total_vram_used_mb = 0;
945 u64 vram_used;
946 int i;
947
948 if (!xe->d3cold.capable) {
949 xe->d3cold.allowed = false;
950 return;
951 }
952
953 for (i = XE_PL_VRAM0; i <= XE_PL_VRAM1; ++i) {
954 man = ttm_manager_type(&xe->ttm, i);
955 if (man) {
956 vram_used = ttm_resource_manager_usage(man);
957 total_vram_used_mb += DIV_ROUND_UP_ULL(vram_used, 1024 * 1024);
958 }
959 }
960
961 mutex_lock(&xe->d3cold.lock);
962
963 if (total_vram_used_mb < xe->d3cold.vram_threshold)
964 xe->d3cold.allowed = true;
965 else
966 xe->d3cold.allowed = false;
967
968 mutex_unlock(&xe->d3cold.lock);
969 }
970
971 /**
972 * xe_pm_module_init() - Perform xe_pm specific module initialization.
973 *
974 * Return: 0 on success. Currently doesn't fail.
975 */
xe_pm_module_init(void)976 int __init xe_pm_module_init(void)
977 {
978 xe_pm_runtime_lockdep_prime();
979 return 0;
980 }
981