xref: /linux/drivers/gpu/drm/xe/xe_pm.c (revision 6f17ab9a63e670bd62a287f95e3982f99eafd77e)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2022 Intel Corporation
4  */
5 
6 #include "xe_pm.h"
7 
8 #include <linux/fault-inject.h>
9 #include <linux/pm_runtime.h>
10 #include <linux/suspend.h>
11 
12 #include <drm/drm_managed.h>
13 #include <drm/ttm/ttm_placement.h>
14 
15 #include "display/xe_display.h"
16 #include "xe_bo.h"
17 #include "xe_bo_evict.h"
18 #include "xe_device.h"
19 #include "xe_ggtt.h"
20 #include "xe_gt.h"
21 #include "xe_gt_idle.h"
22 #include "xe_i2c.h"
23 #include "xe_irq.h"
24 #include "xe_pcode.h"
25 #include "xe_pxp.h"
26 #include "xe_sriov_vf_ccs.h"
27 #include "xe_trace.h"
28 #include "xe_vm.h"
29 #include "xe_wa.h"
30 
31 /**
32  * DOC: Xe Power Management
33  *
34  * Xe PM implements the main routines for both system level suspend states and
35  * for the opportunistic runtime suspend states.
36  *
37  * System Level Suspend (S-States) - In general this is OS initiated suspend
38  * driven by ACPI for achieving S0ix (a.k.a. S2idle, freeze), S3 (suspend to ram),
39  * S4 (disk). The main functions here are `xe_pm_suspend` and `xe_pm_resume`. They
40  * are the main point for the suspend to and resume from these states.
41  *
42  * PCI Device Suspend (D-States) - This is the opportunistic PCIe device low power
43  * state D3, controlled by the PCI subsystem and ACPI with the help from the
44  * runtime_pm infrastructure.
45  * PCI D3 is special and can mean D3hot, where Vcc power is on for keeping memory
46  * alive and quicker low latency resume or D3Cold where Vcc power is off for
47  * better power savings.
48  * The Vcc control of PCI hierarchy can only be controlled at the PCI root port
49  * level, while the device driver can be behind multiple bridges/switches and
50  * paired with other devices. For this reason, the PCI subsystem cannot perform
51  * the transition towards D3Cold. The lowest runtime PM possible from the PCI
52  * subsystem is D3hot. Then, if all these paired devices in the same root port
53  * are in D3hot, ACPI will assist here and run its own methods (_PR3 and _OFF)
54  * to perform the transition from D3hot to D3cold. Xe may disallow this
55  * transition by calling pci_d3cold_disable(root_pdev) before going to runtime
56  * suspend. It will be based on runtime conditions such as VRAM usage for a
57  * quick and low latency resume for instance.
58  *
59  * Runtime PM - This infrastructure provided by the Linux kernel allows the
60  * device drivers to indicate when the can be runtime suspended, so the device
61  * could be put at D3 (if supported), or allow deeper package sleep states
62  * (PC-states), and/or other low level power states. Xe PM component provides
63  * `xe_pm_runtime_suspend` and `xe_pm_runtime_resume` functions that PCI
64  * subsystem will call before transition to/from runtime suspend.
65  *
66  * Also, Xe PM provides get and put functions that Xe driver will use to
67  * indicate activity. In order to avoid locking complications with the memory
68  * management, whenever possible, these get and put functions needs to be called
69  * from the higher/outer levels.
70  * The main cases that need to be protected from the outer levels are: IOCTL,
71  * sysfs, debugfs, dma-buf sharing, GPU execution.
72  *
73  * This component is not responsible for GT idleness (RC6) nor GT frequency
74  * management (RPS).
75  */
76 
77 #ifdef CONFIG_LOCKDEP
78 static struct lockdep_map xe_pm_runtime_d3cold_map = {
79 	.name = "xe_rpm_d3cold_map"
80 };
81 
82 static struct lockdep_map xe_pm_runtime_nod3cold_map = {
83 	.name = "xe_rpm_nod3cold_map"
84 };
85 #endif
86 
87 /**
88  * xe_rpm_reclaim_safe() - Whether runtime resume can be done from reclaim context
89  * @xe: The xe device.
90  *
91  * Return: true if it is safe to runtime resume from reclaim context.
92  * false otherwise.
93  */
94 bool xe_rpm_reclaim_safe(const struct xe_device *xe)
95 {
96 	return !xe->d3cold.capable;
97 }
98 
99 static void xe_rpm_lockmap_acquire(const struct xe_device *xe)
100 {
101 	lock_map_acquire(xe_rpm_reclaim_safe(xe) ?
102 			 &xe_pm_runtime_nod3cold_map :
103 			 &xe_pm_runtime_d3cold_map);
104 }
105 
106 static void xe_rpm_lockmap_release(const struct xe_device *xe)
107 {
108 	lock_map_release(xe_rpm_reclaim_safe(xe) ?
109 			 &xe_pm_runtime_nod3cold_map :
110 			 &xe_pm_runtime_d3cold_map);
111 }
112 
113 /**
114  * xe_pm_suspend - Helper for System suspend, i.e. S0->S3 / S0->S2idle
115  * @xe: xe device instance
116  *
117  * Return: 0 on success
118  */
119 int xe_pm_suspend(struct xe_device *xe)
120 {
121 	struct xe_gt *gt;
122 	u8 id;
123 	int err;
124 
125 	drm_dbg(&xe->drm, "Suspending device\n");
126 	trace_xe_pm_suspend(xe, __builtin_return_address(0));
127 
128 	err = xe_pxp_pm_suspend(xe->pxp);
129 	if (err)
130 		goto err;
131 
132 	for_each_gt(gt, xe, id)
133 		xe_gt_suspend_prepare(gt);
134 
135 	xe_display_pm_suspend(xe);
136 
137 	/* FIXME: Super racey... */
138 	err = xe_bo_evict_all(xe);
139 	if (err)
140 		goto err_display;
141 
142 	for_each_gt(gt, xe, id) {
143 		err = xe_gt_suspend(gt);
144 		if (err)
145 			goto err_display;
146 	}
147 
148 	xe_irq_suspend(xe);
149 
150 	xe_display_pm_suspend_late(xe);
151 
152 	xe_i2c_pm_suspend(xe);
153 
154 	drm_dbg(&xe->drm, "Device suspended\n");
155 	return 0;
156 
157 err_display:
158 	xe_display_pm_resume(xe);
159 	xe_pxp_pm_resume(xe->pxp);
160 err:
161 	drm_dbg(&xe->drm, "Device suspend failed %d\n", err);
162 	return err;
163 }
164 
165 /**
166  * xe_pm_resume - Helper for System resume S3->S0 / S2idle->S0
167  * @xe: xe device instance
168  *
169  * Return: 0 on success
170  */
171 int xe_pm_resume(struct xe_device *xe)
172 {
173 	struct xe_tile *tile;
174 	struct xe_gt *gt;
175 	u8 id;
176 	int err;
177 
178 	drm_dbg(&xe->drm, "Resuming device\n");
179 	trace_xe_pm_resume(xe, __builtin_return_address(0));
180 
181 	for_each_gt(gt, xe, id)
182 		xe_gt_idle_disable_c6(gt);
183 
184 	for_each_tile(tile, xe, id)
185 		xe_wa_apply_tile_workarounds(tile);
186 
187 	err = xe_pcode_ready(xe, true);
188 	if (err)
189 		return err;
190 
191 	xe_display_pm_resume_early(xe);
192 
193 	/*
194 	 * This only restores pinned memory which is the memory required for the
195 	 * GT(s) to resume.
196 	 */
197 	err = xe_bo_restore_early(xe);
198 	if (err)
199 		goto err;
200 
201 	xe_i2c_pm_resume(xe, xe->d3cold.allowed);
202 
203 	xe_irq_resume(xe);
204 
205 	for_each_gt(gt, xe, id)
206 		xe_gt_resume(gt);
207 
208 	xe_display_pm_resume(xe);
209 
210 	err = xe_bo_restore_late(xe);
211 	if (err)
212 		goto err;
213 
214 	xe_pxp_pm_resume(xe->pxp);
215 
216 	if (IS_SRIOV_VF(xe))
217 		xe_sriov_vf_ccs_register_context(xe);
218 
219 	drm_dbg(&xe->drm, "Device resumed\n");
220 	return 0;
221 err:
222 	drm_dbg(&xe->drm, "Device resume failed %d\n", err);
223 	return err;
224 }
225 
226 static bool xe_pm_pci_d3cold_capable(struct xe_device *xe)
227 {
228 	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
229 	struct pci_dev *root_pdev;
230 
231 	root_pdev = pcie_find_root_port(pdev);
232 	if (!root_pdev)
233 		return false;
234 
235 	/* D3Cold requires PME capability */
236 	if (!pci_pme_capable(root_pdev, PCI_D3cold)) {
237 		drm_dbg(&xe->drm, "d3cold: PME# not supported\n");
238 		return false;
239 	}
240 
241 	/* D3Cold requires _PR3 power resource */
242 	if (!pci_pr3_present(root_pdev)) {
243 		drm_dbg(&xe->drm, "d3cold: ACPI _PR3 not present\n");
244 		return false;
245 	}
246 
247 	return true;
248 }
249 
250 static void xe_pm_runtime_init(struct xe_device *xe)
251 {
252 	struct device *dev = xe->drm.dev;
253 
254 	/* Our current VFs do not support RPM. so, disable it */
255 	if (IS_SRIOV_VF(xe))
256 		return;
257 
258 	/*
259 	 * Disable the system suspend direct complete optimization.
260 	 * We need to ensure that the regular device suspend/resume functions
261 	 * are called since our runtime_pm cannot guarantee local memory
262 	 * eviction for d3cold.
263 	 * TODO: Check HDA audio dependencies claimed by i915, and then enforce
264 	 *       this option to integrated graphics as well.
265 	 */
266 	if (IS_DGFX(xe))
267 		dev_pm_set_driver_flags(dev, DPM_FLAG_NO_DIRECT_COMPLETE);
268 
269 	pm_runtime_use_autosuspend(dev);
270 	pm_runtime_set_autosuspend_delay(dev, 1000);
271 	pm_runtime_set_active(dev);
272 	pm_runtime_allow(dev);
273 	pm_runtime_mark_last_busy(dev);
274 	pm_runtime_put(dev);
275 }
276 
277 int xe_pm_init_early(struct xe_device *xe)
278 {
279 	int err;
280 
281 	INIT_LIST_HEAD(&xe->mem_access.vram_userfault.list);
282 
283 	err = drmm_mutex_init(&xe->drm, &xe->mem_access.vram_userfault.lock);
284 	if (err)
285 		return err;
286 
287 	err = drmm_mutex_init(&xe->drm, &xe->d3cold.lock);
288 	if (err)
289 		return err;
290 
291 	xe->d3cold.capable = xe_pm_pci_d3cold_capable(xe);
292 	return 0;
293 }
294 ALLOW_ERROR_INJECTION(xe_pm_init_early, ERRNO); /* See xe_pci_probe() */
295 
296 static u32 vram_threshold_value(struct xe_device *xe)
297 {
298 	/* FIXME: D3Cold temporarily disabled by default on BMG */
299 	if (xe->info.platform == XE_BATTLEMAGE)
300 		return 0;
301 
302 	return DEFAULT_VRAM_THRESHOLD;
303 }
304 
305 static void xe_pm_wake_rebind_workers(struct xe_device *xe)
306 {
307 	struct xe_vm *vm, *next;
308 
309 	mutex_lock(&xe->rebind_resume_lock);
310 	list_for_each_entry_safe(vm, next, &xe->rebind_resume_list,
311 				 preempt.pm_activate_link) {
312 		list_del_init(&vm->preempt.pm_activate_link);
313 		xe_vm_resume_rebind_worker(vm);
314 	}
315 	mutex_unlock(&xe->rebind_resume_lock);
316 }
317 
318 static int xe_pm_notifier_callback(struct notifier_block *nb,
319 				   unsigned long action, void *data)
320 {
321 	struct xe_device *xe = container_of(nb, struct xe_device, pm_notifier);
322 	int err = 0;
323 
324 	switch (action) {
325 	case PM_HIBERNATION_PREPARE:
326 	case PM_SUSPEND_PREPARE:
327 		reinit_completion(&xe->pm_block);
328 		xe_pm_runtime_get(xe);
329 		err = xe_bo_evict_all_user(xe);
330 		if (err)
331 			drm_dbg(&xe->drm, "Notifier evict user failed (%d)\n", err);
332 
333 		err = xe_bo_notifier_prepare_all_pinned(xe);
334 		if (err)
335 			drm_dbg(&xe->drm, "Notifier prepare pin failed (%d)\n", err);
336 		/*
337 		 * Keep the runtime pm reference until post hibernation / post suspend to
338 		 * avoid a runtime suspend interfering with evicted objects or backup
339 		 * allocations.
340 		 */
341 		break;
342 	case PM_POST_HIBERNATION:
343 	case PM_POST_SUSPEND:
344 		complete_all(&xe->pm_block);
345 		xe_pm_wake_rebind_workers(xe);
346 		xe_bo_notifier_unprepare_all_pinned(xe);
347 		xe_pm_runtime_put(xe);
348 		break;
349 	}
350 
351 	return NOTIFY_DONE;
352 }
353 
354 /**
355  * xe_pm_init - Initialize Xe Power Management
356  * @xe: xe device instance
357  *
358  * This component is responsible for System and Device sleep states.
359  *
360  * Returns 0 for success, negative error code otherwise.
361  */
362 int xe_pm_init(struct xe_device *xe)
363 {
364 	u32 vram_threshold;
365 	int err;
366 
367 	xe->pm_notifier.notifier_call = xe_pm_notifier_callback;
368 	err = register_pm_notifier(&xe->pm_notifier);
369 	if (err)
370 		return err;
371 
372 	err = drmm_mutex_init(&xe->drm, &xe->rebind_resume_lock);
373 	if (err)
374 		goto err_unregister;
375 
376 	init_completion(&xe->pm_block);
377 	complete_all(&xe->pm_block);
378 	INIT_LIST_HEAD(&xe->rebind_resume_list);
379 
380 	/* For now suspend/resume is only allowed with GuC */
381 	if (!xe_device_uc_enabled(xe))
382 		return 0;
383 
384 	if (xe->d3cold.capable) {
385 		vram_threshold = vram_threshold_value(xe);
386 		err = xe_pm_set_vram_threshold(xe, vram_threshold);
387 		if (err)
388 			goto err_unregister;
389 	}
390 
391 	xe_pm_runtime_init(xe);
392 	return 0;
393 
394 err_unregister:
395 	unregister_pm_notifier(&xe->pm_notifier);
396 	return err;
397 }
398 
399 static void xe_pm_runtime_fini(struct xe_device *xe)
400 {
401 	struct device *dev = xe->drm.dev;
402 
403 	/* Our current VFs do not support RPM. so, disable it */
404 	if (IS_SRIOV_VF(xe))
405 		return;
406 
407 	pm_runtime_get_sync(dev);
408 	pm_runtime_forbid(dev);
409 }
410 
411 /**
412  * xe_pm_fini - Finalize PM
413  * @xe: xe device instance
414  */
415 void xe_pm_fini(struct xe_device *xe)
416 {
417 	if (xe_device_uc_enabled(xe))
418 		xe_pm_runtime_fini(xe);
419 
420 	unregister_pm_notifier(&xe->pm_notifier);
421 }
422 
423 static void xe_pm_write_callback_task(struct xe_device *xe,
424 				      struct task_struct *task)
425 {
426 	WRITE_ONCE(xe->pm_callback_task, task);
427 
428 	/*
429 	 * Just in case it's somehow possible for our writes to be reordered to
430 	 * the extent that something else re-uses the task written in
431 	 * pm_callback_task. For example after returning from the callback, but
432 	 * before the reordered write that resets pm_callback_task back to NULL.
433 	 */
434 	smp_mb(); /* pairs with xe_pm_read_callback_task */
435 }
436 
437 struct task_struct *xe_pm_read_callback_task(struct xe_device *xe)
438 {
439 	smp_mb(); /* pairs with xe_pm_write_callback_task */
440 
441 	return READ_ONCE(xe->pm_callback_task);
442 }
443 
444 /**
445  * xe_pm_runtime_suspended - Check if runtime_pm state is suspended
446  * @xe: xe device instance
447  *
448  * This does not provide any guarantee that the device is going to remain
449  * suspended as it might be racing with the runtime state transitions.
450  * It can be used only as a non-reliable assertion, to ensure that we are not in
451  * the sleep state while trying to access some memory for instance.
452  *
453  * Returns true if PCI device is suspended, false otherwise.
454  */
455 bool xe_pm_runtime_suspended(struct xe_device *xe)
456 {
457 	return pm_runtime_suspended(xe->drm.dev);
458 }
459 
460 /**
461  * xe_pm_runtime_suspend - Prepare our device for D3hot/D3Cold
462  * @xe: xe device instance
463  *
464  * Returns 0 for success, negative error code otherwise.
465  */
466 int xe_pm_runtime_suspend(struct xe_device *xe)
467 {
468 	struct xe_bo *bo, *on;
469 	struct xe_gt *gt;
470 	u8 id;
471 	int err = 0;
472 
473 	trace_xe_pm_runtime_suspend(xe, __builtin_return_address(0));
474 	/* Disable access_ongoing asserts and prevent recursive pm calls */
475 	xe_pm_write_callback_task(xe, current);
476 
477 	/*
478 	 * The actual xe_pm_runtime_put() is always async underneath, so
479 	 * exactly where that is called should makes no difference to us. However
480 	 * we still need to be very careful with the locks that this callback
481 	 * acquires and the locks that are acquired and held by any callers of
482 	 * xe_runtime_pm_get(). We already have the matching annotation
483 	 * on that side, but we also need it here. For example lockdep should be
484 	 * able to tell us if the following scenario is in theory possible:
485 	 *
486 	 * CPU0                          | CPU1 (kworker)
487 	 * lock(A)                       |
488 	 *                               | xe_pm_runtime_suspend()
489 	 *                               |      lock(A)
490 	 * xe_pm_runtime_get()           |
491 	 *
492 	 * This will clearly deadlock since rpm core needs to wait for
493 	 * xe_pm_runtime_suspend() to complete, but here we are holding lock(A)
494 	 * on CPU0 which prevents CPU1 making forward progress.  With the
495 	 * annotation here and in xe_pm_runtime_get() lockdep will see
496 	 * the potential lock inversion and give us a nice splat.
497 	 */
498 	xe_rpm_lockmap_acquire(xe);
499 
500 	err = xe_pxp_pm_suspend(xe->pxp);
501 	if (err)
502 		goto out;
503 
504 	/*
505 	 * Applying lock for entire list op as xe_ttm_bo_destroy and xe_bo_move_notify
506 	 * also checks and deletes bo entry from user fault list.
507 	 */
508 	mutex_lock(&xe->mem_access.vram_userfault.lock);
509 	list_for_each_entry_safe(bo, on,
510 				 &xe->mem_access.vram_userfault.list, vram_userfault_link)
511 		xe_bo_runtime_pm_release_mmap_offset(bo);
512 	mutex_unlock(&xe->mem_access.vram_userfault.lock);
513 
514 	xe_display_pm_runtime_suspend(xe);
515 
516 	if (xe->d3cold.allowed) {
517 		err = xe_bo_evict_all(xe);
518 		if (err)
519 			goto out_resume;
520 	}
521 
522 	for_each_gt(gt, xe, id) {
523 		err = xe_gt_suspend(gt);
524 		if (err)
525 			goto out_resume;
526 	}
527 
528 	xe_irq_suspend(xe);
529 
530 	xe_display_pm_runtime_suspend_late(xe);
531 
532 	xe_i2c_pm_suspend(xe);
533 
534 	xe_rpm_lockmap_release(xe);
535 	xe_pm_write_callback_task(xe, NULL);
536 	return 0;
537 
538 out_resume:
539 	xe_display_pm_runtime_resume(xe);
540 	xe_pxp_pm_resume(xe->pxp);
541 out:
542 	xe_rpm_lockmap_release(xe);
543 	xe_pm_write_callback_task(xe, NULL);
544 	return err;
545 }
546 
547 /**
548  * xe_pm_runtime_resume - Waking up from D3hot/D3Cold
549  * @xe: xe device instance
550  *
551  * Returns 0 for success, negative error code otherwise.
552  */
553 int xe_pm_runtime_resume(struct xe_device *xe)
554 {
555 	struct xe_gt *gt;
556 	u8 id;
557 	int err = 0;
558 
559 	trace_xe_pm_runtime_resume(xe, __builtin_return_address(0));
560 	/* Disable access_ongoing asserts and prevent recursive pm calls */
561 	xe_pm_write_callback_task(xe, current);
562 
563 	xe_rpm_lockmap_acquire(xe);
564 
565 	for_each_gt(gt, xe, id)
566 		xe_gt_idle_disable_c6(gt);
567 
568 	if (xe->d3cold.allowed) {
569 		err = xe_pcode_ready(xe, true);
570 		if (err)
571 			goto out;
572 
573 		xe_display_pm_resume_early(xe);
574 
575 		/*
576 		 * This only restores pinned memory which is the memory
577 		 * required for the GT(s) to resume.
578 		 */
579 		err = xe_bo_restore_early(xe);
580 		if (err)
581 			goto out;
582 	}
583 
584 	xe_i2c_pm_resume(xe, xe->d3cold.allowed);
585 
586 	xe_irq_resume(xe);
587 
588 	for_each_gt(gt, xe, id)
589 		xe_gt_resume(gt);
590 
591 	xe_display_pm_runtime_resume(xe);
592 
593 	if (xe->d3cold.allowed) {
594 		err = xe_bo_restore_late(xe);
595 		if (err)
596 			goto out;
597 	}
598 
599 	xe_pxp_pm_resume(xe->pxp);
600 
601 	if (IS_SRIOV_VF(xe))
602 		xe_sriov_vf_ccs_register_context(xe);
603 
604 out:
605 	xe_rpm_lockmap_release(xe);
606 	xe_pm_write_callback_task(xe, NULL);
607 	return err;
608 }
609 
610 /*
611  * For places where resume is synchronous it can be quite easy to deadlock
612  * if we are not careful. Also in practice it might be quite timing
613  * sensitive to ever see the 0 -> 1 transition with the callers locks
614  * held, so deadlocks might exist but are hard for lockdep to ever see.
615  * With this in mind, help lockdep learn about the potentially scary
616  * stuff that can happen inside the runtime_resume callback by acquiring
617  * a dummy lock (it doesn't protect anything and gets compiled out on
618  * non-debug builds).  Lockdep then only needs to see the
619  * xe_pm_runtime_xxx_map -> runtime_resume callback once, and then can
620  * hopefully validate all the (callers_locks) -> xe_pm_runtime_xxx_map.
621  * For example if the (callers_locks) are ever grabbed in the
622  * runtime_resume callback, lockdep should give us a nice splat.
623  */
624 static void xe_rpm_might_enter_cb(const struct xe_device *xe)
625 {
626 	xe_rpm_lockmap_acquire(xe);
627 	xe_rpm_lockmap_release(xe);
628 }
629 
630 /*
631  * Prime the lockdep maps for known locking orders that need to
632  * be supported but that may not always occur on all systems.
633  */
634 static void xe_pm_runtime_lockdep_prime(void)
635 {
636 	struct dma_resv lockdep_resv;
637 
638 	dma_resv_init(&lockdep_resv);
639 	lock_map_acquire(&xe_pm_runtime_d3cold_map);
640 	/* D3Cold takes the dma_resv locks to evict bos */
641 	dma_resv_lock(&lockdep_resv, NULL);
642 	dma_resv_unlock(&lockdep_resv);
643 	lock_map_release(&xe_pm_runtime_d3cold_map);
644 
645 	/* Shrinkers might like to wake up the device under reclaim. */
646 	fs_reclaim_acquire(GFP_KERNEL);
647 	lock_map_acquire(&xe_pm_runtime_nod3cold_map);
648 	lock_map_release(&xe_pm_runtime_nod3cold_map);
649 	fs_reclaim_release(GFP_KERNEL);
650 }
651 
652 /**
653  * xe_pm_runtime_get - Get a runtime_pm reference and resume synchronously
654  * @xe: xe device instance
655  */
656 void xe_pm_runtime_get(struct xe_device *xe)
657 {
658 	trace_xe_pm_runtime_get(xe, __builtin_return_address(0));
659 	pm_runtime_get_noresume(xe->drm.dev);
660 
661 	if (xe_pm_read_callback_task(xe) == current)
662 		return;
663 
664 	xe_rpm_might_enter_cb(xe);
665 	pm_runtime_resume(xe->drm.dev);
666 }
667 
668 /**
669  * xe_pm_runtime_put - Put the runtime_pm reference back and mark as idle
670  * @xe: xe device instance
671  */
672 void xe_pm_runtime_put(struct xe_device *xe)
673 {
674 	trace_xe_pm_runtime_put(xe, __builtin_return_address(0));
675 	if (xe_pm_read_callback_task(xe) == current) {
676 		pm_runtime_put_noidle(xe->drm.dev);
677 	} else {
678 		pm_runtime_mark_last_busy(xe->drm.dev);
679 		pm_runtime_put(xe->drm.dev);
680 	}
681 }
682 
683 /**
684  * xe_pm_runtime_get_ioctl - Get a runtime_pm reference before ioctl
685  * @xe: xe device instance
686  *
687  * Returns: Any number greater than or equal to 0 for success, negative error
688  * code otherwise.
689  */
690 int xe_pm_runtime_get_ioctl(struct xe_device *xe)
691 {
692 	trace_xe_pm_runtime_get_ioctl(xe, __builtin_return_address(0));
693 	if (WARN_ON(xe_pm_read_callback_task(xe) == current))
694 		return -ELOOP;
695 
696 	xe_rpm_might_enter_cb(xe);
697 	return pm_runtime_get_sync(xe->drm.dev);
698 }
699 
700 /**
701  * xe_pm_runtime_get_if_active - Get a runtime_pm reference if device active
702  * @xe: xe device instance
703  *
704  * Return: True if device is awake (regardless the previous number of references)
705  * and a new reference was taken, false otherwise.
706  */
707 bool xe_pm_runtime_get_if_active(struct xe_device *xe)
708 {
709 	return pm_runtime_get_if_active(xe->drm.dev) > 0;
710 }
711 
712 /**
713  * xe_pm_runtime_get_if_in_use - Get a new reference if device is active with previous ref taken
714  * @xe: xe device instance
715  *
716  * Return: True if device is awake, a previous reference had been already taken,
717  * and a new reference was now taken, false otherwise.
718  */
719 bool xe_pm_runtime_get_if_in_use(struct xe_device *xe)
720 {
721 	if (xe_pm_read_callback_task(xe) == current) {
722 		/* The device is awake, grab the ref and move on */
723 		pm_runtime_get_noresume(xe->drm.dev);
724 		return true;
725 	}
726 
727 	return pm_runtime_get_if_in_use(xe->drm.dev) > 0;
728 }
729 
730 /*
731  * Very unreliable! Should only be used to suppress the false positive case
732  * in the missing outer rpm protection warning.
733  */
734 static bool xe_pm_suspending_or_resuming(struct xe_device *xe)
735 {
736 #ifdef CONFIG_PM
737 	struct device *dev = xe->drm.dev;
738 
739 	return dev->power.runtime_status == RPM_SUSPENDING ||
740 		dev->power.runtime_status == RPM_RESUMING ||
741 		pm_suspend_in_progress();
742 #else
743 	return false;
744 #endif
745 }
746 
747 /**
748  * xe_pm_runtime_get_noresume - Bump runtime PM usage counter without resuming
749  * @xe: xe device instance
750  *
751  * This function should be used in inner places where it is surely already
752  * protected by outer-bound callers of `xe_pm_runtime_get`.
753  * It will warn if not protected.
754  * The reference should be put back after this function regardless, since it
755  * will always bump the usage counter, regardless.
756  */
757 void xe_pm_runtime_get_noresume(struct xe_device *xe)
758 {
759 	bool ref;
760 
761 	ref = xe_pm_runtime_get_if_in_use(xe);
762 
763 	if (!ref) {
764 		pm_runtime_get_noresume(xe->drm.dev);
765 		drm_WARN(&xe->drm, !xe_pm_suspending_or_resuming(xe),
766 			 "Missing outer runtime PM protection\n");
767 	}
768 }
769 
770 /**
771  * xe_pm_runtime_resume_and_get - Resume, then get a runtime_pm ref if awake.
772  * @xe: xe device instance
773  *
774  * Returns: True if device is awake and the reference was taken, false otherwise.
775  */
776 bool xe_pm_runtime_resume_and_get(struct xe_device *xe)
777 {
778 	if (xe_pm_read_callback_task(xe) == current) {
779 		/* The device is awake, grab the ref and move on */
780 		pm_runtime_get_noresume(xe->drm.dev);
781 		return true;
782 	}
783 
784 	xe_rpm_might_enter_cb(xe);
785 	return pm_runtime_resume_and_get(xe->drm.dev) >= 0;
786 }
787 
788 /**
789  * xe_pm_assert_unbounded_bridge - Disable PM on unbounded pcie parent bridge
790  * @xe: xe device instance
791  */
792 void xe_pm_assert_unbounded_bridge(struct xe_device *xe)
793 {
794 	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
795 	struct pci_dev *bridge = pci_upstream_bridge(pdev);
796 
797 	if (!bridge)
798 		return;
799 
800 	if (!bridge->driver) {
801 		drm_warn(&xe->drm, "unbounded parent pci bridge, device won't support any PM support.\n");
802 		device_set_pm_not_required(&pdev->dev);
803 	}
804 }
805 
806 /**
807  * xe_pm_set_vram_threshold - Set a VRAM threshold for allowing/blocking D3Cold
808  * @xe: xe device instance
809  * @threshold: VRAM size in MiB for the D3cold threshold
810  *
811  * Return:
812  * * 0		- success
813  * * -EINVAL	- invalid argument
814  */
815 int xe_pm_set_vram_threshold(struct xe_device *xe, u32 threshold)
816 {
817 	struct ttm_resource_manager *man;
818 	u32 vram_total_mb = 0;
819 	int i;
820 
821 	for (i = XE_PL_VRAM0; i <= XE_PL_VRAM1; ++i) {
822 		man = ttm_manager_type(&xe->ttm, i);
823 		if (man)
824 			vram_total_mb += DIV_ROUND_UP_ULL(man->size, 1024 * 1024);
825 	}
826 
827 	drm_dbg(&xe->drm, "Total vram %u mb\n", vram_total_mb);
828 
829 	if (threshold > vram_total_mb)
830 		return -EINVAL;
831 
832 	mutex_lock(&xe->d3cold.lock);
833 	xe->d3cold.vram_threshold = threshold;
834 	mutex_unlock(&xe->d3cold.lock);
835 
836 	return 0;
837 }
838 
839 /**
840  * xe_pm_d3cold_allowed_toggle - Check conditions to toggle d3cold.allowed
841  * @xe: xe device instance
842  *
843  * To be called during runtime_pm idle callback.
844  * Check for all the D3Cold conditions ahead of runtime suspend.
845  */
846 void xe_pm_d3cold_allowed_toggle(struct xe_device *xe)
847 {
848 	struct ttm_resource_manager *man;
849 	u32 total_vram_used_mb = 0;
850 	u64 vram_used;
851 	int i;
852 
853 	if (!xe->d3cold.capable) {
854 		xe->d3cold.allowed = false;
855 		return;
856 	}
857 
858 	for (i = XE_PL_VRAM0; i <= XE_PL_VRAM1; ++i) {
859 		man = ttm_manager_type(&xe->ttm, i);
860 		if (man) {
861 			vram_used = ttm_resource_manager_usage(man);
862 			total_vram_used_mb += DIV_ROUND_UP_ULL(vram_used, 1024 * 1024);
863 		}
864 	}
865 
866 	mutex_lock(&xe->d3cold.lock);
867 
868 	if (total_vram_used_mb < xe->d3cold.vram_threshold)
869 		xe->d3cold.allowed = true;
870 	else
871 		xe->d3cold.allowed = false;
872 
873 	mutex_unlock(&xe->d3cold.lock);
874 }
875 
876 /**
877  * xe_pm_module_init() - Perform xe_pm specific module initialization.
878  *
879  * Return: 0 on success. Currently doesn't fail.
880  */
881 int __init xe_pm_module_init(void)
882 {
883 	xe_pm_runtime_lockdep_prime();
884 	return 0;
885 }
886