xref: /linux/drivers/gpu/drm/xe/xe_pm.c (revision 666c654a5ae4090a3402ac14f399283934ea8104)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2022 Intel Corporation
4  */
5 
6 #include "xe_pm.h"
7 
8 #include <linux/fault-inject.h>
9 #include <linux/pm_runtime.h>
10 #include <linux/suspend.h>
11 #include <linux/dmi.h>
12 
13 #include <drm/drm_managed.h>
14 #include <drm/ttm/ttm_placement.h>
15 
16 #include "display/xe_display.h"
17 #include "xe_bo.h"
18 #include "xe_bo_evict.h"
19 #include "xe_device.h"
20 #include "xe_ggtt.h"
21 #include "xe_gt.h"
22 #include "xe_gt_idle.h"
23 #include "xe_i2c.h"
24 #include "xe_irq.h"
25 #include "xe_late_bind_fw.h"
26 #include "xe_pcode.h"
27 #include "xe_pxp.h"
28 #include "xe_sriov_vf_ccs.h"
29 #include "xe_trace.h"
30 #include "xe_vm.h"
31 #include "xe_wa.h"
32 
33 /**
34  * DOC: Xe Power Management
35  *
36  * Xe PM implements the main routines for both system level suspend states and
37  * for the opportunistic runtime suspend states.
38  *
39  * System Level Suspend (S-States) - In general this is OS initiated suspend
40  * driven by ACPI for achieving S0ix (a.k.a. S2idle, freeze), S3 (suspend to ram),
41  * S4 (disk). The main functions here are `xe_pm_suspend` and `xe_pm_resume`. They
42  * are the main point for the suspend to and resume from these states.
43  *
44  * PCI Device Suspend (D-States) - This is the opportunistic PCIe device low power
45  * state D3, controlled by the PCI subsystem and ACPI with the help from the
46  * runtime_pm infrastructure.
47  * PCI D3 is special and can mean D3hot, where Vcc power is on for keeping memory
48  * alive and quicker low latency resume or D3Cold where Vcc power is off for
49  * better power savings.
50  * The Vcc control of PCI hierarchy can only be controlled at the PCI root port
51  * level, while the device driver can be behind multiple bridges/switches and
52  * paired with other devices. For this reason, the PCI subsystem cannot perform
53  * the transition towards D3Cold. The lowest runtime PM possible from the PCI
54  * subsystem is D3hot. Then, if all these paired devices in the same root port
55  * are in D3hot, ACPI will assist here and run its own methods (_PR3 and _OFF)
56  * to perform the transition from D3hot to D3cold. Xe may disallow this
57  * transition by calling pci_d3cold_disable(root_pdev) before going to runtime
58  * suspend. It will be based on runtime conditions such as VRAM usage for a
59  * quick and low latency resume for instance.
60  *
61  * Runtime PM - This infrastructure provided by the Linux kernel allows the
62  * device drivers to indicate when the can be runtime suspended, so the device
63  * could be put at D3 (if supported), or allow deeper package sleep states
64  * (PC-states), and/or other low level power states. Xe PM component provides
65  * `xe_pm_runtime_suspend` and `xe_pm_runtime_resume` functions that PCI
66  * subsystem will call before transition to/from runtime suspend.
67  *
68  * Also, Xe PM provides get and put functions that Xe driver will use to
69  * indicate activity. In order to avoid locking complications with the memory
70  * management, whenever possible, these get and put functions needs to be called
71  * from the higher/outer levels.
72  * The main cases that need to be protected from the outer levels are: IOCTL,
73  * sysfs, debugfs, dma-buf sharing, GPU execution.
74  *
75  * This component is not responsible for GT idleness (RC6) nor GT frequency
76  * management (RPS).
77  */
78 
79 #ifdef CONFIG_LOCKDEP
80 static struct lockdep_map xe_pm_runtime_d3cold_map = {
81 	.name = "xe_rpm_d3cold_map"
82 };
83 
84 static struct lockdep_map xe_pm_runtime_nod3cold_map = {
85 	.name = "xe_rpm_nod3cold_map"
86 };
87 
88 static struct lockdep_map xe_pm_block_lockdep_map = {
89 	.name = "xe_pm_block_map",
90 };
91 #endif
92 
93 static void xe_pm_block_begin_signalling(void)
94 {
95 	lock_acquire_shared_recursive(&xe_pm_block_lockdep_map, 0, 1, NULL, _RET_IP_);
96 }
97 
98 static void xe_pm_block_end_signalling(void)
99 {
100 	lock_release(&xe_pm_block_lockdep_map, _RET_IP_);
101 }
102 
103 /**
104  * xe_pm_might_block_on_suspend() - Annotate that the code might block on suspend
105  *
106  * Annotation to use where the code might block or seize to make
107  * progress pending resume completion.
108  */
109 void xe_pm_might_block_on_suspend(void)
110 {
111 	lock_map_acquire(&xe_pm_block_lockdep_map);
112 	lock_map_release(&xe_pm_block_lockdep_map);
113 }
114 
115 /**
116  * xe_pm_block_on_suspend() - Block pending suspend.
117  * @xe: The xe device about to be suspended.
118  *
119  * Block if the pm notifier has start evicting bos, to avoid
120  * racing and validating those bos back. The function is
121  * annotated to ensure no locks are held that are also grabbed
122  * in the pm notifier or the device suspend / resume.
123  * This is intended to be used by freezable tasks only.
124  * (Not freezable workqueues), with the intention that the function
125  * returns %-ERESTARTSYS when tasks are frozen during suspend,
126  * and allows the task to freeze. The caller must be able to
127  * handle the %-ERESTARTSYS.
128  *
129  * Return: %0 on success, %-ERESTARTSYS on signal pending or
130  * if freezing requested.
131  */
132 int xe_pm_block_on_suspend(struct xe_device *xe)
133 {
134 	xe_pm_might_block_on_suspend();
135 
136 	return wait_for_completion_interruptible(&xe->pm_block);
137 }
138 
139 /**
140  * xe_rpm_reclaim_safe() - Whether runtime resume can be done from reclaim context
141  * @xe: The xe device.
142  *
143  * Return: true if it is safe to runtime resume from reclaim context.
144  * false otherwise.
145  */
146 bool xe_rpm_reclaim_safe(const struct xe_device *xe)
147 {
148 	return !xe->d3cold.capable;
149 }
150 
151 static void xe_rpm_lockmap_acquire(const struct xe_device *xe)
152 {
153 	lock_map_acquire(xe_rpm_reclaim_safe(xe) ?
154 			 &xe_pm_runtime_nod3cold_map :
155 			 &xe_pm_runtime_d3cold_map);
156 }
157 
158 static void xe_rpm_lockmap_release(const struct xe_device *xe)
159 {
160 	lock_map_release(xe_rpm_reclaim_safe(xe) ?
161 			 &xe_pm_runtime_nod3cold_map :
162 			 &xe_pm_runtime_d3cold_map);
163 }
164 
165 /**
166  * xe_pm_suspend - Helper for System suspend, i.e. S0->S3 / S0->S2idle
167  * @xe: xe device instance
168  *
169  * Return: 0 on success
170  */
171 int xe_pm_suspend(struct xe_device *xe)
172 {
173 	struct xe_gt *gt;
174 	u8 id;
175 	int err;
176 
177 	drm_dbg(&xe->drm, "Suspending device\n");
178 	xe_pm_block_begin_signalling();
179 	trace_xe_pm_suspend(xe, __builtin_return_address(0));
180 
181 	err = xe_pxp_pm_suspend(xe->pxp);
182 	if (err)
183 		goto err;
184 
185 	xe_late_bind_wait_for_worker_completion(&xe->late_bind);
186 
187 	for_each_gt(gt, xe, id)
188 		xe_gt_suspend_prepare(gt);
189 
190 	xe_display_pm_suspend(xe);
191 
192 	/* FIXME: Super racey... */
193 	err = xe_bo_evict_all(xe);
194 	if (err)
195 		goto err_display;
196 
197 	for_each_gt(gt, xe, id) {
198 		err = xe_gt_suspend(gt);
199 		if (err)
200 			goto err_display;
201 	}
202 
203 	xe_irq_suspend(xe);
204 
205 	xe_display_pm_suspend_late(xe);
206 
207 	xe_i2c_pm_suspend(xe);
208 
209 	drm_dbg(&xe->drm, "Device suspended\n");
210 	xe_pm_block_end_signalling();
211 
212 	return 0;
213 
214 err_display:
215 	xe_display_pm_resume(xe);
216 	xe_pxp_pm_resume(xe->pxp);
217 err:
218 	drm_dbg(&xe->drm, "Device suspend failed %d\n", err);
219 	xe_pm_block_end_signalling();
220 	return err;
221 }
222 
223 /**
224  * xe_pm_resume - Helper for System resume S3->S0 / S2idle->S0
225  * @xe: xe device instance
226  *
227  * Return: 0 on success
228  */
229 int xe_pm_resume(struct xe_device *xe)
230 {
231 	struct xe_tile *tile;
232 	struct xe_gt *gt;
233 	u8 id;
234 	int err;
235 
236 	xe_pm_block_begin_signalling();
237 	drm_dbg(&xe->drm, "Resuming device\n");
238 	trace_xe_pm_resume(xe, __builtin_return_address(0));
239 
240 	for_each_gt(gt, xe, id)
241 		xe_gt_idle_disable_c6(gt);
242 
243 	for_each_tile(tile, xe, id)
244 		xe_wa_apply_tile_workarounds(tile);
245 
246 	err = xe_pcode_ready(xe, true);
247 	if (err)
248 		return err;
249 
250 	xe_display_pm_resume_early(xe);
251 
252 	/*
253 	 * This only restores pinned memory which is the memory required for the
254 	 * GT(s) to resume.
255 	 */
256 	err = xe_bo_restore_early(xe);
257 	if (err)
258 		goto err;
259 
260 	xe_i2c_pm_resume(xe, true);
261 
262 	xe_irq_resume(xe);
263 
264 	for_each_gt(gt, xe, id) {
265 		err = xe_gt_resume(gt);
266 		if (err)
267 			break;
268 	}
269 
270 	/*
271 	 * Try to bring up display before bailing from GT resume failure,
272 	 * so we don't leave the user clueless with a blank screen.
273 	 */
274 	xe_display_pm_resume(xe);
275 	if (err)
276 		goto err;
277 
278 	err = xe_bo_restore_late(xe);
279 	if (err)
280 		goto err;
281 
282 	xe_pxp_pm_resume(xe->pxp);
283 
284 	if (IS_VF_CCS_READY(xe))
285 		xe_sriov_vf_ccs_register_context(xe);
286 
287 	xe_late_bind_fw_load(&xe->late_bind);
288 
289 	drm_dbg(&xe->drm, "Device resumed\n");
290 	xe_pm_block_end_signalling();
291 	return 0;
292 err:
293 	drm_dbg(&xe->drm, "Device resume failed %d\n", err);
294 	xe_pm_block_end_signalling();
295 	return err;
296 }
297 
298 static bool xe_pm_pci_d3cold_capable(struct xe_device *xe)
299 {
300 	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
301 	struct pci_dev *root_pdev;
302 
303 	root_pdev = pcie_find_root_port(pdev);
304 	if (!root_pdev)
305 		return false;
306 
307 	/* D3Cold requires PME capability */
308 	if (!pci_pme_capable(root_pdev, PCI_D3cold)) {
309 		drm_dbg(&xe->drm, "d3cold: PME# not supported\n");
310 		return false;
311 	}
312 
313 	/* D3Cold requires _PR3 power resource */
314 	if (!pci_pr3_present(root_pdev)) {
315 		drm_dbg(&xe->drm, "d3cold: ACPI _PR3 not present\n");
316 		return false;
317 	}
318 
319 	return true;
320 }
321 
322 static void xe_pm_runtime_init(struct xe_device *xe)
323 {
324 	struct device *dev = xe->drm.dev;
325 
326 	/* Our current VFs do not support RPM. so, disable it */
327 	if (IS_SRIOV_VF(xe))
328 		return;
329 
330 	/*
331 	 * Disable the system suspend direct complete optimization.
332 	 * We need to ensure that the regular device suspend/resume functions
333 	 * are called since our runtime_pm cannot guarantee local memory
334 	 * eviction for d3cold.
335 	 * TODO: Check HDA audio dependencies claimed by i915, and then enforce
336 	 *       this option to integrated graphics as well.
337 	 */
338 	if (IS_DGFX(xe))
339 		dev_pm_set_driver_flags(dev, DPM_FLAG_NO_DIRECT_COMPLETE);
340 
341 	pm_runtime_use_autosuspend(dev);
342 	pm_runtime_set_autosuspend_delay(dev, 1000);
343 	pm_runtime_set_active(dev);
344 	pm_runtime_allow(dev);
345 	pm_runtime_mark_last_busy(dev);
346 	pm_runtime_put(dev);
347 }
348 
349 int xe_pm_init_early(struct xe_device *xe)
350 {
351 	int err;
352 
353 	INIT_LIST_HEAD(&xe->mem_access.vram_userfault.list);
354 
355 	err = drmm_mutex_init(&xe->drm, &xe->mem_access.vram_userfault.lock);
356 	if (err)
357 		return err;
358 
359 	err = drmm_mutex_init(&xe->drm, &xe->d3cold.lock);
360 	if (err)
361 		return err;
362 
363 	xe->d3cold.capable = xe_pm_pci_d3cold_capable(xe);
364 	return 0;
365 }
366 ALLOW_ERROR_INJECTION(xe_pm_init_early, ERRNO); /* See xe_pci_probe() */
367 
368 static u32 vram_threshold_value(struct xe_device *xe)
369 {
370 	if (xe->info.platform == XE_BATTLEMAGE) {
371 		const char *product_name;
372 
373 		product_name = dmi_get_system_info(DMI_PRODUCT_NAME);
374 		if (product_name && strstr(product_name, "NUC13RNG")) {
375 			drm_warn(&xe->drm, "BMG + D3Cold not supported on this platform\n");
376 			return 0;
377 		}
378 	}
379 
380 	return DEFAULT_VRAM_THRESHOLD;
381 }
382 
383 static void xe_pm_wake_rebind_workers(struct xe_device *xe)
384 {
385 	struct xe_vm *vm, *next;
386 
387 	mutex_lock(&xe->rebind_resume_lock);
388 	list_for_each_entry_safe(vm, next, &xe->rebind_resume_list,
389 				 preempt.pm_activate_link) {
390 		list_del_init(&vm->preempt.pm_activate_link);
391 		xe_vm_resume_rebind_worker(vm);
392 	}
393 	mutex_unlock(&xe->rebind_resume_lock);
394 }
395 
396 static int xe_pm_notifier_callback(struct notifier_block *nb,
397 				   unsigned long action, void *data)
398 {
399 	struct xe_device *xe = container_of(nb, struct xe_device, pm_notifier);
400 	int err = 0;
401 
402 	switch (action) {
403 	case PM_HIBERNATION_PREPARE:
404 	case PM_SUSPEND_PREPARE:
405 	{
406 		struct xe_validation_ctx ctx;
407 
408 		reinit_completion(&xe->pm_block);
409 		xe_pm_block_begin_signalling();
410 		xe_pm_runtime_get(xe);
411 		(void)xe_validation_ctx_init(&ctx, &xe->val, NULL,
412 					     (struct xe_val_flags) {.exclusive = true});
413 		err = xe_bo_evict_all_user(xe);
414 		xe_validation_ctx_fini(&ctx);
415 		if (err)
416 			drm_dbg(&xe->drm, "Notifier evict user failed (%d)\n", err);
417 
418 		err = xe_bo_notifier_prepare_all_pinned(xe);
419 		if (err)
420 			drm_dbg(&xe->drm, "Notifier prepare pin failed (%d)\n", err);
421 		/*
422 		 * Keep the runtime pm reference until post hibernation / post suspend to
423 		 * avoid a runtime suspend interfering with evicted objects or backup
424 		 * allocations.
425 		 */
426 		xe_pm_block_end_signalling();
427 		break;
428 	}
429 	case PM_POST_HIBERNATION:
430 	case PM_POST_SUSPEND:
431 		complete_all(&xe->pm_block);
432 		xe_pm_wake_rebind_workers(xe);
433 		xe_bo_notifier_unprepare_all_pinned(xe);
434 		xe_pm_runtime_put(xe);
435 		break;
436 	}
437 
438 	return NOTIFY_DONE;
439 }
440 
441 /**
442  * xe_pm_init - Initialize Xe Power Management
443  * @xe: xe device instance
444  *
445  * This component is responsible for System and Device sleep states.
446  *
447  * Returns 0 for success, negative error code otherwise.
448  */
449 int xe_pm_init(struct xe_device *xe)
450 {
451 	u32 vram_threshold;
452 	int err;
453 
454 	xe->pm_notifier.notifier_call = xe_pm_notifier_callback;
455 	err = register_pm_notifier(&xe->pm_notifier);
456 	if (err)
457 		return err;
458 
459 	err = drmm_mutex_init(&xe->drm, &xe->rebind_resume_lock);
460 	if (err)
461 		goto err_unregister;
462 
463 	init_completion(&xe->pm_block);
464 	complete_all(&xe->pm_block);
465 	INIT_LIST_HEAD(&xe->rebind_resume_list);
466 
467 	/* For now suspend/resume is only allowed with GuC */
468 	if (!xe_device_uc_enabled(xe))
469 		return 0;
470 
471 	if (xe->d3cold.capable) {
472 		vram_threshold = vram_threshold_value(xe);
473 		err = xe_pm_set_vram_threshold(xe, vram_threshold);
474 		if (err)
475 			goto err_unregister;
476 	}
477 
478 	xe_pm_runtime_init(xe);
479 	return 0;
480 
481 err_unregister:
482 	unregister_pm_notifier(&xe->pm_notifier);
483 	return err;
484 }
485 
486 static void xe_pm_runtime_fini(struct xe_device *xe)
487 {
488 	struct device *dev = xe->drm.dev;
489 
490 	/* Our current VFs do not support RPM. so, disable it */
491 	if (IS_SRIOV_VF(xe))
492 		return;
493 
494 	pm_runtime_get_sync(dev);
495 	pm_runtime_forbid(dev);
496 }
497 
498 /**
499  * xe_pm_fini - Finalize PM
500  * @xe: xe device instance
501  */
502 void xe_pm_fini(struct xe_device *xe)
503 {
504 	if (xe_device_uc_enabled(xe))
505 		xe_pm_runtime_fini(xe);
506 
507 	unregister_pm_notifier(&xe->pm_notifier);
508 }
509 
510 static void xe_pm_write_callback_task(struct xe_device *xe,
511 				      struct task_struct *task)
512 {
513 	WRITE_ONCE(xe->pm_callback_task, task);
514 
515 	/*
516 	 * Just in case it's somehow possible for our writes to be reordered to
517 	 * the extent that something else re-uses the task written in
518 	 * pm_callback_task. For example after returning from the callback, but
519 	 * before the reordered write that resets pm_callback_task back to NULL.
520 	 */
521 	smp_mb(); /* pairs with xe_pm_read_callback_task */
522 }
523 
524 struct task_struct *xe_pm_read_callback_task(struct xe_device *xe)
525 {
526 	smp_mb(); /* pairs with xe_pm_write_callback_task */
527 
528 	return READ_ONCE(xe->pm_callback_task);
529 }
530 
531 /**
532  * xe_pm_runtime_suspended - Check if runtime_pm state is suspended
533  * @xe: xe device instance
534  *
535  * This does not provide any guarantee that the device is going to remain
536  * suspended as it might be racing with the runtime state transitions.
537  * It can be used only as a non-reliable assertion, to ensure that we are not in
538  * the sleep state while trying to access some memory for instance.
539  *
540  * Returns true if PCI device is suspended, false otherwise.
541  */
542 bool xe_pm_runtime_suspended(struct xe_device *xe)
543 {
544 	return pm_runtime_suspended(xe->drm.dev);
545 }
546 
547 /**
548  * xe_pm_runtime_suspend - Prepare our device for D3hot/D3Cold
549  * @xe: xe device instance
550  *
551  * Returns 0 for success, negative error code otherwise.
552  */
553 int xe_pm_runtime_suspend(struct xe_device *xe)
554 {
555 	struct xe_bo *bo, *on;
556 	struct xe_gt *gt;
557 	u8 id;
558 	int err = 0;
559 
560 	trace_xe_pm_runtime_suspend(xe, __builtin_return_address(0));
561 	/* Disable access_ongoing asserts and prevent recursive pm calls */
562 	xe_pm_write_callback_task(xe, current);
563 
564 	/*
565 	 * The actual xe_pm_runtime_put() is always async underneath, so
566 	 * exactly where that is called should makes no difference to us. However
567 	 * we still need to be very careful with the locks that this callback
568 	 * acquires and the locks that are acquired and held by any callers of
569 	 * xe_runtime_pm_get(). We already have the matching annotation
570 	 * on that side, but we also need it here. For example lockdep should be
571 	 * able to tell us if the following scenario is in theory possible:
572 	 *
573 	 * CPU0                          | CPU1 (kworker)
574 	 * lock(A)                       |
575 	 *                               | xe_pm_runtime_suspend()
576 	 *                               |      lock(A)
577 	 * xe_pm_runtime_get()           |
578 	 *
579 	 * This will clearly deadlock since rpm core needs to wait for
580 	 * xe_pm_runtime_suspend() to complete, but here we are holding lock(A)
581 	 * on CPU0 which prevents CPU1 making forward progress.  With the
582 	 * annotation here and in xe_pm_runtime_get() lockdep will see
583 	 * the potential lock inversion and give us a nice splat.
584 	 */
585 	xe_rpm_lockmap_acquire(xe);
586 
587 	err = xe_pxp_pm_suspend(xe->pxp);
588 	if (err)
589 		goto out;
590 
591 	/*
592 	 * Applying lock for entire list op as xe_ttm_bo_destroy and xe_bo_move_notify
593 	 * also checks and deletes bo entry from user fault list.
594 	 */
595 	mutex_lock(&xe->mem_access.vram_userfault.lock);
596 	list_for_each_entry_safe(bo, on,
597 				 &xe->mem_access.vram_userfault.list, vram_userfault_link)
598 		xe_bo_runtime_pm_release_mmap_offset(bo);
599 	mutex_unlock(&xe->mem_access.vram_userfault.lock);
600 
601 	xe_display_pm_runtime_suspend(xe);
602 
603 	if (xe->d3cold.allowed) {
604 		err = xe_bo_evict_all(xe);
605 		if (err)
606 			goto out_resume;
607 	}
608 
609 	for_each_gt(gt, xe, id) {
610 		err = xe->d3cold.allowed ? xe_gt_suspend(gt) : xe_gt_runtime_suspend(gt);
611 		if (err)
612 			goto out_resume;
613 	}
614 
615 	xe_irq_suspend(xe);
616 
617 	xe_display_pm_runtime_suspend_late(xe);
618 
619 	xe_i2c_pm_suspend(xe);
620 
621 	xe_rpm_lockmap_release(xe);
622 	xe_pm_write_callback_task(xe, NULL);
623 	return 0;
624 
625 out_resume:
626 	xe_display_pm_runtime_resume(xe);
627 	xe_pxp_pm_resume(xe->pxp);
628 out:
629 	xe_rpm_lockmap_release(xe);
630 	xe_pm_write_callback_task(xe, NULL);
631 	return err;
632 }
633 
634 /**
635  * xe_pm_runtime_resume - Waking up from D3hot/D3Cold
636  * @xe: xe device instance
637  *
638  * Returns 0 for success, negative error code otherwise.
639  */
640 int xe_pm_runtime_resume(struct xe_device *xe)
641 {
642 	struct xe_gt *gt;
643 	u8 id;
644 	int err = 0;
645 
646 	trace_xe_pm_runtime_resume(xe, __builtin_return_address(0));
647 	/* Disable access_ongoing asserts and prevent recursive pm calls */
648 	xe_pm_write_callback_task(xe, current);
649 
650 	xe_rpm_lockmap_acquire(xe);
651 
652 	if (xe->d3cold.allowed) {
653 		for_each_gt(gt, xe, id)
654 			xe_gt_idle_disable_c6(gt);
655 
656 		err = xe_pcode_ready(xe, true);
657 		if (err)
658 			goto out;
659 
660 		xe_display_pm_resume_early(xe);
661 
662 		/*
663 		 * This only restores pinned memory which is the memory
664 		 * required for the GT(s) to resume.
665 		 */
666 		err = xe_bo_restore_early(xe);
667 		if (err)
668 			goto out;
669 	}
670 
671 	xe_i2c_pm_resume(xe, xe->d3cold.allowed);
672 
673 	xe_irq_resume(xe);
674 
675 	for_each_gt(gt, xe, id) {
676 		err = xe->d3cold.allowed ? xe_gt_resume(gt) : xe_gt_runtime_resume(gt);
677 		if (err)
678 			break;
679 	}
680 
681 	/*
682 	 * Try to bring up display before bailing from GT resume failure,
683 	 * so we don't leave the user clueless with a blank screen.
684 	 */
685 	xe_display_pm_runtime_resume(xe);
686 	if (err)
687 		goto out;
688 
689 	if (xe->d3cold.allowed) {
690 		err = xe_bo_restore_late(xe);
691 		if (err)
692 			goto out;
693 	}
694 
695 	xe_pxp_pm_resume(xe->pxp);
696 
697 	if (IS_VF_CCS_READY(xe))
698 		xe_sriov_vf_ccs_register_context(xe);
699 
700 	if (xe->d3cold.allowed)
701 		xe_late_bind_fw_load(&xe->late_bind);
702 
703 out:
704 	xe_rpm_lockmap_release(xe);
705 	xe_pm_write_callback_task(xe, NULL);
706 	return err;
707 }
708 
709 /*
710  * For places where resume is synchronous it can be quite easy to deadlock
711  * if we are not careful. Also in practice it might be quite timing
712  * sensitive to ever see the 0 -> 1 transition with the callers locks
713  * held, so deadlocks might exist but are hard for lockdep to ever see.
714  * With this in mind, help lockdep learn about the potentially scary
715  * stuff that can happen inside the runtime_resume callback by acquiring
716  * a dummy lock (it doesn't protect anything and gets compiled out on
717  * non-debug builds).  Lockdep then only needs to see the
718  * xe_pm_runtime_xxx_map -> runtime_resume callback once, and then can
719  * hopefully validate all the (callers_locks) -> xe_pm_runtime_xxx_map.
720  * For example if the (callers_locks) are ever grabbed in the
721  * runtime_resume callback, lockdep should give us a nice splat.
722  */
723 static void xe_rpm_might_enter_cb(const struct xe_device *xe)
724 {
725 	xe_rpm_lockmap_acquire(xe);
726 	xe_rpm_lockmap_release(xe);
727 }
728 
729 /*
730  * Prime the lockdep maps for known locking orders that need to
731  * be supported but that may not always occur on all systems.
732  */
733 static void xe_pm_runtime_lockdep_prime(void)
734 {
735 	struct dma_resv lockdep_resv;
736 
737 	dma_resv_init(&lockdep_resv);
738 	lock_map_acquire(&xe_pm_runtime_d3cold_map);
739 	/* D3Cold takes the dma_resv locks to evict bos */
740 	dma_resv_lock(&lockdep_resv, NULL);
741 	dma_resv_unlock(&lockdep_resv);
742 	lock_map_release(&xe_pm_runtime_d3cold_map);
743 
744 	/* Shrinkers might like to wake up the device under reclaim. */
745 	fs_reclaim_acquire(GFP_KERNEL);
746 	lock_map_acquire(&xe_pm_runtime_nod3cold_map);
747 	lock_map_release(&xe_pm_runtime_nod3cold_map);
748 	fs_reclaim_release(GFP_KERNEL);
749 }
750 
751 /**
752  * xe_pm_runtime_get - Get a runtime_pm reference and resume synchronously
753  * @xe: xe device instance
754  *
755  * When possible, scope-based runtime PM (through guard(xe_pm_runtime)) is
756  * be preferred over direct usage of this function.  Manual get/put handling
757  * should only be used when the function contains goto-based logic which
758  * can break scope-based handling, or when the lifetime of the runtime PM
759  * reference does not match a specific scope (e.g., runtime PM obtained in one
760  * function and released in a different one).
761  */
762 void xe_pm_runtime_get(struct xe_device *xe)
763 {
764 	trace_xe_pm_runtime_get(xe, __builtin_return_address(0));
765 	pm_runtime_get_noresume(xe->drm.dev);
766 
767 	if (xe_pm_read_callback_task(xe) == current)
768 		return;
769 
770 	xe_rpm_might_enter_cb(xe);
771 	pm_runtime_resume(xe->drm.dev);
772 }
773 
774 /**
775  * xe_pm_runtime_put - Put the runtime_pm reference back and mark as idle
776  * @xe: xe device instance
777  */
778 void xe_pm_runtime_put(struct xe_device *xe)
779 {
780 	trace_xe_pm_runtime_put(xe, __builtin_return_address(0));
781 	if (xe_pm_read_callback_task(xe) == current) {
782 		pm_runtime_put_noidle(xe->drm.dev);
783 	} else {
784 		pm_runtime_mark_last_busy(xe->drm.dev);
785 		pm_runtime_put(xe->drm.dev);
786 	}
787 }
788 
789 /**
790  * xe_pm_runtime_get_ioctl - Get a runtime_pm reference before ioctl
791  * @xe: xe device instance
792  *
793  * When possible, scope-based runtime PM (through
794  * ACQUIRE(xe_pm_runtime_ioctl, ...)) is be preferred over direct usage of this
795  * function.  Manual get/put handling should only be used when the function
796  * contains goto-based logic which can break scope-based handling, or when the
797  * lifetime of the runtime PM reference does not match a specific scope (e.g.,
798  * runtime PM obtained in one function and released in a different one).
799  *
800  * Returns: Any number greater than or equal to 0 for success, negative error
801  * code otherwise.
802  */
803 int xe_pm_runtime_get_ioctl(struct xe_device *xe)
804 {
805 	trace_xe_pm_runtime_get_ioctl(xe, __builtin_return_address(0));
806 	if (WARN_ON(xe_pm_read_callback_task(xe) == current))
807 		return -ELOOP;
808 
809 	xe_rpm_might_enter_cb(xe);
810 	return pm_runtime_get_sync(xe->drm.dev);
811 }
812 
813 /**
814  * xe_pm_runtime_get_if_active - Get a runtime_pm reference if device active
815  * @xe: xe device instance
816  *
817  * Return: True if device is awake (regardless the previous number of references)
818  * and a new reference was taken, false otherwise.
819  */
820 bool xe_pm_runtime_get_if_active(struct xe_device *xe)
821 {
822 	return pm_runtime_get_if_active(xe->drm.dev) > 0;
823 }
824 
825 /**
826  * xe_pm_runtime_get_if_in_use - Get a new reference if device is active with previous ref taken
827  * @xe: xe device instance
828  *
829  * Return: True if device is awake, a previous reference had been already taken,
830  * and a new reference was now taken, false otherwise.
831  */
832 bool xe_pm_runtime_get_if_in_use(struct xe_device *xe)
833 {
834 	if (xe_pm_read_callback_task(xe) == current) {
835 		/* The device is awake, grab the ref and move on */
836 		pm_runtime_get_noresume(xe->drm.dev);
837 		return true;
838 	}
839 
840 	return pm_runtime_get_if_in_use(xe->drm.dev) > 0;
841 }
842 
843 /*
844  * Very unreliable! Should only be used to suppress the false positive case
845  * in the missing outer rpm protection warning.
846  */
847 static bool xe_pm_suspending_or_resuming(struct xe_device *xe)
848 {
849 #ifdef CONFIG_PM
850 	struct device *dev = xe->drm.dev;
851 
852 	return dev->power.runtime_status == RPM_SUSPENDING ||
853 		dev->power.runtime_status == RPM_RESUMING ||
854 		pm_suspend_in_progress();
855 #else
856 	return false;
857 #endif
858 }
859 
860 /**
861  * xe_pm_runtime_get_noresume - Bump runtime PM usage counter without resuming
862  * @xe: xe device instance
863  *
864  * This function should be used in inner places where it is surely already
865  * protected by outer-bound callers of `xe_pm_runtime_get`.
866  * It will warn if not protected.
867  * The reference should be put back after this function regardless, since it
868  * will always bump the usage counter, regardless.
869  *
870  * When possible, scope-based runtime PM (through guard(xe_pm_runtime_noresume))
871  * is be preferred over direct usage of this function.  Manual get/put handling
872  * should only be used when the function contains goto-based logic which can
873  * break scope-based handling, or when the lifetime of the runtime PM reference
874  * does not match a specific scope (e.g., runtime PM obtained in one function
875  * and released in a different one).
876  */
877 void xe_pm_runtime_get_noresume(struct xe_device *xe)
878 {
879 	bool ref;
880 
881 	ref = xe_pm_runtime_get_if_in_use(xe);
882 
883 	if (!ref) {
884 		pm_runtime_get_noresume(xe->drm.dev);
885 		drm_WARN(&xe->drm, !xe_pm_suspending_or_resuming(xe),
886 			 "Missing outer runtime PM protection\n");
887 	}
888 }
889 
890 /**
891  * xe_pm_runtime_resume_and_get - Resume, then get a runtime_pm ref if awake.
892  * @xe: xe device instance
893  *
894  * Returns: True if device is awake and the reference was taken, false otherwise.
895  */
896 bool xe_pm_runtime_resume_and_get(struct xe_device *xe)
897 {
898 	if (xe_pm_read_callback_task(xe) == current) {
899 		/* The device is awake, grab the ref and move on */
900 		pm_runtime_get_noresume(xe->drm.dev);
901 		return true;
902 	}
903 
904 	xe_rpm_might_enter_cb(xe);
905 	return pm_runtime_resume_and_get(xe->drm.dev) >= 0;
906 }
907 
908 /**
909  * xe_pm_assert_unbounded_bridge - Disable PM on unbounded pcie parent bridge
910  * @xe: xe device instance
911  */
912 void xe_pm_assert_unbounded_bridge(struct xe_device *xe)
913 {
914 	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
915 	struct pci_dev *bridge = pci_upstream_bridge(pdev);
916 
917 	if (!bridge)
918 		return;
919 
920 	if (!bridge->driver) {
921 		drm_warn(&xe->drm, "unbounded parent pci bridge, device won't support any PM support.\n");
922 		device_set_pm_not_required(&pdev->dev);
923 	}
924 }
925 
926 /**
927  * xe_pm_set_vram_threshold - Set a VRAM threshold for allowing/blocking D3Cold
928  * @xe: xe device instance
929  * @threshold: VRAM size in MiB for the D3cold threshold
930  *
931  * Return:
932  * * 0		- success
933  * * -EINVAL	- invalid argument
934  */
935 int xe_pm_set_vram_threshold(struct xe_device *xe, u32 threshold)
936 {
937 	struct ttm_resource_manager *man;
938 	u32 vram_total_mb = 0;
939 	int i;
940 
941 	for (i = XE_PL_VRAM0; i <= XE_PL_VRAM1; ++i) {
942 		man = ttm_manager_type(&xe->ttm, i);
943 		if (man)
944 			vram_total_mb += DIV_ROUND_UP_ULL(man->size, 1024 * 1024);
945 	}
946 
947 	drm_dbg(&xe->drm, "Total vram %u mb\n", vram_total_mb);
948 
949 	if (threshold > vram_total_mb)
950 		return -EINVAL;
951 
952 	mutex_lock(&xe->d3cold.lock);
953 	xe->d3cold.vram_threshold = threshold;
954 	mutex_unlock(&xe->d3cold.lock);
955 
956 	return 0;
957 }
958 
959 /**
960  * xe_pm_d3cold_allowed_toggle - Check conditions to toggle d3cold.allowed
961  * @xe: xe device instance
962  *
963  * To be called during runtime_pm idle callback.
964  * Check for all the D3Cold conditions ahead of runtime suspend.
965  */
966 void xe_pm_d3cold_allowed_toggle(struct xe_device *xe)
967 {
968 	struct ttm_resource_manager *man;
969 	u32 total_vram_used_mb = 0;
970 	u64 vram_used;
971 	int i;
972 
973 	if (!xe->d3cold.capable) {
974 		xe->d3cold.allowed = false;
975 		return;
976 	}
977 
978 	for (i = XE_PL_VRAM0; i <= XE_PL_VRAM1; ++i) {
979 		man = ttm_manager_type(&xe->ttm, i);
980 		if (man) {
981 			vram_used = ttm_resource_manager_usage(man);
982 			total_vram_used_mb += DIV_ROUND_UP_ULL(vram_used, 1024 * 1024);
983 		}
984 	}
985 
986 	mutex_lock(&xe->d3cold.lock);
987 
988 	if (total_vram_used_mb < xe->d3cold.vram_threshold)
989 		xe->d3cold.allowed = true;
990 	else
991 		xe->d3cold.allowed = false;
992 
993 	mutex_unlock(&xe->d3cold.lock);
994 }
995 
996 /**
997  * xe_pm_module_init() - Perform xe_pm specific module initialization.
998  *
999  * Return: 0 on success. Currently doesn't fail.
1000  */
1001 int __init xe_pm_module_init(void)
1002 {
1003 	xe_pm_runtime_lockdep_prime();
1004 	return 0;
1005 }
1006