xref: /linux/drivers/gpu/drm/xe/xe_pm.c (revision 9462f2b677506d8d698e81bfa378bbfd65a19187)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2022 Intel Corporation
4  */
5 
6 #include "xe_pm.h"
7 
8 #include <linux/fault-inject.h>
9 #include <linux/pm_runtime.h>
10 #include <linux/suspend.h>
11 #include <linux/dmi.h>
12 
13 #include <drm/drm_managed.h>
14 #include <drm/ttm/ttm_placement.h>
15 
16 #include "display/xe_display.h"
17 #include "xe_bo.h"
18 #include "xe_bo_evict.h"
19 #include "xe_device.h"
20 #include "xe_ggtt.h"
21 #include "xe_gt.h"
22 #include "xe_gt_idle.h"
23 #include "xe_i2c.h"
24 #include "xe_irq.h"
25 #include "xe_late_bind_fw.h"
26 #include "xe_pcode.h"
27 #include "xe_printk.h"
28 #include "xe_pxp.h"
29 #include "xe_sriov_vf_ccs.h"
30 #include "xe_sysctrl.h"
31 #include "xe_trace.h"
32 #include "xe_vm.h"
33 #include "xe_wa.h"
34 
35 /**
36  * DOC: Xe Power Management
37  *
38  * Xe PM implements the main routines for both system level suspend states and
39  * for the opportunistic runtime suspend states.
40  *
41  * System Level Suspend (S-States) - In general this is OS initiated suspend
42  * driven by ACPI for achieving S0ix (a.k.a. S2idle, freeze), S3 (suspend to ram),
43  * S4 (disk). The main functions here are `xe_pm_suspend` and `xe_pm_resume`. They
44  * are the main point for the suspend to and resume from these states.
45  *
46  * PCI Device Suspend (D-States) - This is the opportunistic PCIe device low power
47  * state D3, controlled by the PCI subsystem and ACPI with the help from the
48  * runtime_pm infrastructure.
49  * PCI D3 is special and can mean D3hot, where Vcc power is on for keeping memory
50  * alive and quicker low latency resume or D3Cold where Vcc power is off for
51  * better power savings.
52  * The Vcc control of PCI hierarchy can only be controlled at the PCI root port
53  * level, while the device driver can be behind multiple bridges/switches and
54  * paired with other devices. For this reason, the PCI subsystem cannot perform
55  * the transition towards D3Cold. The lowest runtime PM possible from the PCI
56  * subsystem is D3hot. Then, if all these paired devices in the same root port
57  * are in D3hot, ACPI will assist here and run its own methods (_PR3 and _OFF)
58  * to perform the transition from D3hot to D3cold. Xe may disallow this
59  * transition by calling pci_d3cold_disable(root_pdev) before going to runtime
60  * suspend. It will be based on runtime conditions such as VRAM usage for a
61  * quick and low latency resume for instance.
62  *
63  * Runtime PM - This infrastructure provided by the Linux kernel allows the
64  * device drivers to indicate when the can be runtime suspended, so the device
65  * could be put at D3 (if supported), or allow deeper package sleep states
66  * (PC-states), and/or other low level power states. Xe PM component provides
67  * `xe_pm_runtime_suspend` and `xe_pm_runtime_resume` functions that PCI
68  * subsystem will call before transition to/from runtime suspend.
69  *
70  * Also, Xe PM provides get and put functions that Xe driver will use to
71  * indicate activity. In order to avoid locking complications with the memory
72  * management, whenever possible, these get and put functions needs to be called
73  * from the higher/outer levels.
74  * The main cases that need to be protected from the outer levels are: IOCTL,
75  * sysfs, debugfs, dma-buf sharing, GPU execution.
76  *
77  * This component is not responsible for GT idleness (RC6) nor GT frequency
78  * management (RPS).
79  */
80 
81 #ifdef CONFIG_LOCKDEP
82 static struct lockdep_map xe_pm_runtime_d3cold_map = {
83 	.name = "xe_rpm_d3cold_map"
84 };
85 
86 static struct lockdep_map xe_pm_runtime_nod3cold_map = {
87 	.name = "xe_rpm_nod3cold_map"
88 };
89 
90 static struct lockdep_map xe_pm_block_lockdep_map = {
91 	.name = "xe_pm_block_map",
92 };
93 #endif
94 
95 static void xe_pm_block_begin_signalling(void)
96 {
97 	lock_acquire_shared_recursive(&xe_pm_block_lockdep_map, 0, 1, NULL, _RET_IP_);
98 }
99 
100 static void xe_pm_block_end_signalling(void)
101 {
102 	lock_release(&xe_pm_block_lockdep_map, _RET_IP_);
103 }
104 
105 /**
106  * xe_pm_might_block_on_suspend() - Annotate that the code might block on suspend
107  *
108  * Annotation to use where the code might block or seize to make
109  * progress pending resume completion.
110  */
111 void xe_pm_might_block_on_suspend(void)
112 {
113 	lock_map_acquire(&xe_pm_block_lockdep_map);
114 	lock_map_release(&xe_pm_block_lockdep_map);
115 }
116 
117 /**
118  * xe_pm_block_on_suspend() - Block pending suspend.
119  * @xe: The xe device about to be suspended.
120  *
121  * Block if the pm notifier has start evicting bos, to avoid
122  * racing and validating those bos back. The function is
123  * annotated to ensure no locks are held that are also grabbed
124  * in the pm notifier or the device suspend / resume.
125  * This is intended to be used by freezable tasks only.
126  * (Not freezable workqueues), with the intention that the function
127  * returns %-ERESTARTSYS when tasks are frozen during suspend,
128  * and allows the task to freeze. The caller must be able to
129  * handle the %-ERESTARTSYS.
130  *
131  * Return: %0 on success, %-ERESTARTSYS on signal pending or
132  * if freezing requested.
133  */
134 int xe_pm_block_on_suspend(struct xe_device *xe)
135 {
136 	xe_pm_might_block_on_suspend();
137 
138 	return wait_for_completion_interruptible(&xe->pm_block);
139 }
140 
141 /**
142  * xe_rpm_reclaim_safe() - Whether runtime resume can be done from reclaim context
143  * @xe: The xe device.
144  *
145  * Return: true if it is safe to runtime resume from reclaim context.
146  * false otherwise.
147  */
148 bool xe_rpm_reclaim_safe(const struct xe_device *xe)
149 {
150 	return !xe->d3cold.capable;
151 }
152 
153 static void xe_rpm_lockmap_acquire(const struct xe_device *xe)
154 {
155 	lock_map_acquire(xe_rpm_reclaim_safe(xe) ?
156 			 &xe_pm_runtime_nod3cold_map :
157 			 &xe_pm_runtime_d3cold_map);
158 }
159 
160 static void xe_rpm_lockmap_release(const struct xe_device *xe)
161 {
162 	lock_map_release(xe_rpm_reclaim_safe(xe) ?
163 			 &xe_pm_runtime_nod3cold_map :
164 			 &xe_pm_runtime_d3cold_map);
165 }
166 
167 /**
168  * xe_pm_suspend - Helper for System suspend, i.e. S0->S3 / S0->S2idle
169  * @xe: xe device instance
170  *
171  * Return: 0 on success
172  */
173 int xe_pm_suspend(struct xe_device *xe)
174 {
175 	struct xe_gt *gt;
176 	u8 id;
177 	int err;
178 
179 	drm_dbg(&xe->drm, "Suspending device\n");
180 	xe_pm_block_begin_signalling();
181 	trace_xe_pm_suspend(xe, __builtin_return_address(0));
182 
183 	err = xe_pxp_pm_suspend(xe->pxp);
184 	if (err)
185 		goto err;
186 
187 	xe_late_bind_wait_for_worker_completion(&xe->late_bind);
188 
189 	for_each_gt(gt, xe, id)
190 		xe_gt_suspend_prepare(gt);
191 
192 	xe_display_pm_suspend(xe);
193 
194 	/* FIXME: Super racey... */
195 	err = xe_bo_evict_all(xe);
196 	if (err)
197 		goto err_display;
198 
199 	for_each_gt(gt, xe, id) {
200 		err = xe_gt_suspend(gt);
201 		if (err)
202 			goto err_display;
203 	}
204 
205 	xe_irq_suspend(xe);
206 
207 	xe_display_pm_suspend_late(xe);
208 
209 	xe_i2c_pm_suspend(xe);
210 
211 	drm_dbg(&xe->drm, "Device suspended\n");
212 	xe_pm_block_end_signalling();
213 
214 	return 0;
215 
216 err_display:
217 	xe_display_pm_resume(xe);
218 	xe_pxp_pm_resume(xe->pxp);
219 err:
220 	drm_dbg(&xe->drm, "Device suspend failed %d\n", err);
221 	xe_pm_block_end_signalling();
222 	return err;
223 }
224 
225 /**
226  * xe_pm_resume - Helper for System resume S3->S0 / S2idle->S0
227  * @xe: xe device instance
228  *
229  * Return: 0 on success
230  */
231 int xe_pm_resume(struct xe_device *xe)
232 {
233 	struct xe_tile *tile;
234 	struct xe_gt *gt;
235 	u8 id;
236 	int err;
237 
238 	xe_pm_block_begin_signalling();
239 	drm_dbg(&xe->drm, "Resuming device\n");
240 	trace_xe_pm_resume(xe, __builtin_return_address(0));
241 
242 	for_each_gt(gt, xe, id)
243 		xe_gt_idle_disable_c6(gt);
244 
245 	for_each_tile(tile, xe, id)
246 		xe_wa_apply_tile_workarounds(tile);
247 
248 	err = xe_pcode_ready(xe, true);
249 	if (err)
250 		return err;
251 
252 	xe_display_pm_resume_early(xe);
253 
254 	/*
255 	 * This only restores pinned memory which is the memory required for the
256 	 * GT(s) to resume.
257 	 */
258 	err = xe_bo_restore_early(xe);
259 	if (err)
260 		goto err;
261 
262 	xe_i2c_pm_resume(xe, true);
263 
264 	xe_sysctrl_pm_resume(xe);
265 
266 	xe_irq_resume(xe);
267 
268 	for_each_gt(gt, xe, id) {
269 		err = xe_gt_resume(gt);
270 		if (err)
271 			break;
272 	}
273 
274 	/*
275 	 * Try to bring up display before bailing from GT resume failure,
276 	 * so we don't leave the user clueless with a blank screen.
277 	 */
278 	xe_display_pm_resume(xe);
279 	if (err)
280 		goto err;
281 
282 	err = xe_bo_restore_late(xe);
283 	if (err)
284 		goto err;
285 
286 	xe_pxp_pm_resume(xe->pxp);
287 
288 	if (IS_VF_CCS_READY(xe))
289 		xe_sriov_vf_ccs_register_context(xe);
290 
291 	xe_late_bind_fw_load(&xe->late_bind);
292 
293 	drm_dbg(&xe->drm, "Device resumed\n");
294 	xe_pm_block_end_signalling();
295 	return 0;
296 err:
297 	drm_dbg(&xe->drm, "Device resume failed %d\n", err);
298 	xe_pm_block_end_signalling();
299 	return err;
300 }
301 
302 static bool xe_pm_pci_d3cold_capable(struct xe_device *xe)
303 {
304 	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
305 	struct pci_dev *root_pdev;
306 
307 	root_pdev = pcie_find_root_port(pdev);
308 	if (!root_pdev)
309 		return false;
310 
311 	/* D3Cold requires PME capability */
312 	if (!pci_pme_capable(root_pdev, PCI_D3cold)) {
313 		drm_dbg(&xe->drm, "d3cold: PME# not supported\n");
314 		return false;
315 	}
316 
317 	/* D3Cold requires _PR3 power resource */
318 	if (!pci_pr3_present(root_pdev)) {
319 		drm_dbg(&xe->drm, "d3cold: ACPI _PR3 not present\n");
320 		return false;
321 	}
322 
323 	return true;
324 }
325 
326 static void xe_pm_runtime_init(struct xe_device *xe)
327 {
328 	struct device *dev = xe->drm.dev;
329 
330 	/* Our current VFs do not support RPM. so, disable it */
331 	if (IS_SRIOV_VF(xe))
332 		return;
333 
334 	/*
335 	 * Disable the system suspend direct complete optimization.
336 	 * We need to ensure that the regular device suspend/resume functions
337 	 * are called since our runtime_pm cannot guarantee local memory
338 	 * eviction for d3cold.
339 	 * TODO: Check HDA audio dependencies claimed by i915, and then enforce
340 	 *       this option to integrated graphics as well.
341 	 */
342 	if (IS_DGFX(xe))
343 		dev_pm_set_driver_flags(dev, DPM_FLAG_NO_DIRECT_COMPLETE);
344 
345 	pm_runtime_use_autosuspend(dev);
346 	pm_runtime_set_autosuspend_delay(dev, 1000);
347 	pm_runtime_set_active(dev);
348 	pm_runtime_allow(dev);
349 	pm_runtime_mark_last_busy(dev);
350 	pm_runtime_put(dev);
351 }
352 
353 /**
354  * xe_pm_init_early() - Initialize Xe Power Management
355  * @xe: the &xe_device instance
356  *
357  * Initialize everything that is a "software-only" state that does not
358  * require access to any of the device's hardware data.
359  *
360  * Return: 0 on success or a negative error code on failure.
361  */
362 int xe_pm_init_early(struct xe_device *xe)
363 {
364 	int err;
365 
366 	init_completion(&xe->pm_block);
367 	complete_all(&xe->pm_block);
368 	INIT_LIST_HEAD(&xe->rebind_resume_list);
369 	INIT_LIST_HEAD(&xe->mem_access.vram_userfault.list);
370 
371 	err = drmm_mutex_init(&xe->drm, &xe->mem_access.vram_userfault.lock);
372 	if (err)
373 		return err;
374 
375 	err = drmm_mutex_init(&xe->drm, &xe->d3cold.lock);
376 	if (err)
377 		return err;
378 
379 	err = drmm_mutex_init(&xe->drm, &xe->rebind_resume_lock);
380 	if (err)
381 		return err;
382 
383 	return 0;
384 }
385 ALLOW_ERROR_INJECTION(xe_pm_init_early, ERRNO); /* See xe_pci_probe() */
386 
387 /**
388  * xe_pm_probe() - Initialize Xe Power Management
389  * @xe: the &xe_device instance
390  *
391  * Check d3cold capability.
392  *
393  * Return: 0 on success or a negative error code on failure.
394  */
395 int xe_pm_probe(struct xe_device *xe)
396 {
397 	xe->d3cold.capable = xe_pm_pci_d3cold_capable(xe);
398 	xe_dbg(xe, "d3cold: capable=%s\n", str_yes_no(xe->d3cold.capable));
399 
400 	return 0;
401 }
402 
403 static u32 vram_threshold_value(struct xe_device *xe)
404 {
405 	if (xe->info.platform == XE_BATTLEMAGE) {
406 		const char *product_name;
407 
408 		product_name = dmi_get_system_info(DMI_PRODUCT_NAME);
409 		if (product_name && strstr(product_name, "NUC13RNG")) {
410 			drm_warn(&xe->drm, "BMG + D3Cold not supported on this platform\n");
411 			return 0;
412 		}
413 	}
414 
415 	return DEFAULT_VRAM_THRESHOLD;
416 }
417 
418 static void xe_pm_wake_rebind_workers(struct xe_device *xe)
419 {
420 	struct xe_vm *vm, *next;
421 
422 	mutex_lock(&xe->rebind_resume_lock);
423 	list_for_each_entry_safe(vm, next, &xe->rebind_resume_list,
424 				 preempt.pm_activate_link) {
425 		list_del_init(&vm->preempt.pm_activate_link);
426 		xe_vm_resume_rebind_worker(vm);
427 	}
428 	mutex_unlock(&xe->rebind_resume_lock);
429 }
430 
431 static int xe_pm_notifier_callback(struct notifier_block *nb,
432 				   unsigned long action, void *data)
433 {
434 	struct xe_device *xe = container_of(nb, struct xe_device, pm_notifier);
435 	int err = 0;
436 
437 	switch (action) {
438 	case PM_HIBERNATION_PREPARE:
439 	case PM_SUSPEND_PREPARE:
440 	{
441 		struct xe_validation_ctx ctx;
442 
443 		reinit_completion(&xe->pm_block);
444 		xe_pm_block_begin_signalling();
445 		xe_pm_runtime_get(xe);
446 		(void)xe_validation_ctx_init(&ctx, &xe->val, NULL,
447 					     (struct xe_val_flags) {.exclusive = true});
448 		err = xe_bo_evict_all_user(xe);
449 		xe_validation_ctx_fini(&ctx);
450 		if (err)
451 			drm_dbg(&xe->drm, "Notifier evict user failed (%d)\n", err);
452 
453 		err = xe_bo_notifier_prepare_all_pinned(xe);
454 		if (err)
455 			drm_dbg(&xe->drm, "Notifier prepare pin failed (%d)\n", err);
456 		/*
457 		 * Keep the runtime pm reference until post hibernation / post suspend to
458 		 * avoid a runtime suspend interfering with evicted objects or backup
459 		 * allocations.
460 		 */
461 		xe_pm_block_end_signalling();
462 		break;
463 	}
464 	case PM_POST_HIBERNATION:
465 	case PM_POST_SUSPEND:
466 		complete_all(&xe->pm_block);
467 		xe_pm_wake_rebind_workers(xe);
468 		xe_bo_notifier_unprepare_all_pinned(xe);
469 		xe_pm_runtime_put(xe);
470 		break;
471 	}
472 
473 	return NOTIFY_DONE;
474 }
475 
476 /**
477  * xe_pm_init - Initialize Xe Power Management
478  * @xe: xe device instance
479  *
480  * This component is responsible for System and Device sleep states.
481  *
482  * Returns 0 for success, negative error code otherwise.
483  */
484 int xe_pm_init(struct xe_device *xe)
485 {
486 	u32 vram_threshold;
487 	int err;
488 
489 	xe->pm_notifier.notifier_call = xe_pm_notifier_callback;
490 	err = register_pm_notifier(&xe->pm_notifier);
491 	if (err)
492 		return err;
493 
494 	/* For now suspend/resume is only allowed with GuC */
495 	if (!xe_device_uc_enabled(xe))
496 		return 0;
497 
498 	if (xe->d3cold.capable) {
499 		vram_threshold = vram_threshold_value(xe);
500 		err = xe_pm_set_vram_threshold(xe, vram_threshold);
501 		if (err)
502 			goto err_unregister;
503 	}
504 
505 	xe_pm_runtime_init(xe);
506 	return 0;
507 
508 err_unregister:
509 	unregister_pm_notifier(&xe->pm_notifier);
510 	return err;
511 }
512 
513 static void xe_pm_runtime_fini(struct xe_device *xe)
514 {
515 	struct device *dev = xe->drm.dev;
516 
517 	/* Our current VFs do not support RPM. so, disable it */
518 	if (IS_SRIOV_VF(xe))
519 		return;
520 
521 	pm_runtime_get_sync(dev);
522 	pm_runtime_forbid(dev);
523 }
524 
525 /**
526  * xe_pm_fini - Finalize PM
527  * @xe: xe device instance
528  */
529 void xe_pm_fini(struct xe_device *xe)
530 {
531 	if (xe_device_uc_enabled(xe))
532 		xe_pm_runtime_fini(xe);
533 
534 	unregister_pm_notifier(&xe->pm_notifier);
535 }
536 
537 static void xe_pm_write_callback_task(struct xe_device *xe,
538 				      struct task_struct *task)
539 {
540 	WRITE_ONCE(xe->pm_callback_task, task);
541 
542 	/*
543 	 * Just in case it's somehow possible for our writes to be reordered to
544 	 * the extent that something else re-uses the task written in
545 	 * pm_callback_task. For example after returning from the callback, but
546 	 * before the reordered write that resets pm_callback_task back to NULL.
547 	 */
548 	smp_mb(); /* pairs with xe_pm_read_callback_task */
549 }
550 
551 struct task_struct *xe_pm_read_callback_task(struct xe_device *xe)
552 {
553 	smp_mb(); /* pairs with xe_pm_write_callback_task */
554 
555 	return READ_ONCE(xe->pm_callback_task);
556 }
557 
558 /**
559  * xe_pm_runtime_suspended - Check if runtime_pm state is suspended
560  * @xe: xe device instance
561  *
562  * This does not provide any guarantee that the device is going to remain
563  * suspended as it might be racing with the runtime state transitions.
564  * It can be used only as a non-reliable assertion, to ensure that we are not in
565  * the sleep state while trying to access some memory for instance.
566  *
567  * Returns true if PCI device is suspended, false otherwise.
568  */
569 bool xe_pm_runtime_suspended(struct xe_device *xe)
570 {
571 	return pm_runtime_suspended(xe->drm.dev);
572 }
573 
574 /**
575  * xe_pm_runtime_suspend - Prepare our device for D3hot/D3Cold
576  * @xe: xe device instance
577  *
578  * Returns 0 for success, negative error code otherwise.
579  */
580 int xe_pm_runtime_suspend(struct xe_device *xe)
581 {
582 	struct xe_bo *bo, *on;
583 	struct xe_gt *gt;
584 	u8 id;
585 	int err = 0;
586 
587 	trace_xe_pm_runtime_suspend(xe, __builtin_return_address(0));
588 	/* Disable access_ongoing asserts and prevent recursive pm calls */
589 	xe_pm_write_callback_task(xe, current);
590 
591 	/*
592 	 * The actual xe_pm_runtime_put() is always async underneath, so
593 	 * exactly where that is called should makes no difference to us. However
594 	 * we still need to be very careful with the locks that this callback
595 	 * acquires and the locks that are acquired and held by any callers of
596 	 * xe_runtime_pm_get(). We already have the matching annotation
597 	 * on that side, but we also need it here. For example lockdep should be
598 	 * able to tell us if the following scenario is in theory possible:
599 	 *
600 	 * CPU0                          | CPU1 (kworker)
601 	 * lock(A)                       |
602 	 *                               | xe_pm_runtime_suspend()
603 	 *                               |      lock(A)
604 	 * xe_pm_runtime_get()           |
605 	 *
606 	 * This will clearly deadlock since rpm core needs to wait for
607 	 * xe_pm_runtime_suspend() to complete, but here we are holding lock(A)
608 	 * on CPU0 which prevents CPU1 making forward progress.  With the
609 	 * annotation here and in xe_pm_runtime_get() lockdep will see
610 	 * the potential lock inversion and give us a nice splat.
611 	 */
612 	xe_rpm_lockmap_acquire(xe);
613 
614 	err = xe_pxp_pm_suspend(xe->pxp);
615 	if (err)
616 		goto out;
617 
618 	/*
619 	 * Applying lock for entire list op as xe_ttm_bo_destroy and xe_bo_move_notify
620 	 * also checks and deletes bo entry from user fault list.
621 	 */
622 	mutex_lock(&xe->mem_access.vram_userfault.lock);
623 	list_for_each_entry_safe(bo, on,
624 				 &xe->mem_access.vram_userfault.list, vram_userfault_link)
625 		xe_bo_runtime_pm_release_mmap_offset(bo);
626 	mutex_unlock(&xe->mem_access.vram_userfault.lock);
627 
628 	xe_display_pm_runtime_suspend(xe);
629 
630 	if (xe->d3cold.allowed) {
631 		err = xe_bo_evict_all(xe);
632 		if (err)
633 			goto out_resume;
634 	}
635 
636 	for_each_gt(gt, xe, id) {
637 		err = xe->d3cold.allowed ? xe_gt_suspend(gt) : xe_gt_runtime_suspend(gt);
638 		if (err)
639 			goto out_resume;
640 	}
641 
642 	xe_irq_suspend(xe);
643 
644 	xe_display_pm_runtime_suspend_late(xe);
645 
646 	xe_i2c_pm_suspend(xe);
647 
648 	xe_rpm_lockmap_release(xe);
649 	xe_pm_write_callback_task(xe, NULL);
650 	return 0;
651 
652 out_resume:
653 	xe_display_pm_runtime_resume(xe);
654 	xe_pxp_pm_resume(xe->pxp);
655 out:
656 	xe_rpm_lockmap_release(xe);
657 	xe_pm_write_callback_task(xe, NULL);
658 	return err;
659 }
660 
661 /**
662  * xe_pm_runtime_resume - Waking up from D3hot/D3Cold
663  * @xe: xe device instance
664  *
665  * Returns 0 for success, negative error code otherwise.
666  */
667 int xe_pm_runtime_resume(struct xe_device *xe)
668 {
669 	struct xe_gt *gt;
670 	u8 id;
671 	int err = 0;
672 
673 	trace_xe_pm_runtime_resume(xe, __builtin_return_address(0));
674 	/* Disable access_ongoing asserts and prevent recursive pm calls */
675 	xe_pm_write_callback_task(xe, current);
676 
677 	xe_rpm_lockmap_acquire(xe);
678 
679 	if (xe->d3cold.allowed) {
680 		for_each_gt(gt, xe, id)
681 			xe_gt_idle_disable_c6(gt);
682 
683 		err = xe_pcode_ready(xe, true);
684 		if (err)
685 			goto out;
686 
687 		xe_display_pm_resume_early(xe);
688 
689 		/*
690 		 * This only restores pinned memory which is the memory
691 		 * required for the GT(s) to resume.
692 		 */
693 		err = xe_bo_restore_early(xe);
694 		if (err)
695 			goto out;
696 	}
697 
698 	xe_i2c_pm_resume(xe, xe->d3cold.allowed);
699 
700 	if (xe->d3cold.allowed)
701 		xe_sysctrl_pm_resume(xe);
702 
703 	xe_irq_resume(xe);
704 
705 	for_each_gt(gt, xe, id) {
706 		err = xe->d3cold.allowed ? xe_gt_resume(gt) : xe_gt_runtime_resume(gt);
707 		if (err)
708 			break;
709 	}
710 
711 	/*
712 	 * Try to bring up display before bailing from GT resume failure,
713 	 * so we don't leave the user clueless with a blank screen.
714 	 */
715 	xe_display_pm_runtime_resume(xe);
716 	if (err)
717 		goto out;
718 
719 	if (xe->d3cold.allowed) {
720 		err = xe_bo_restore_late(xe);
721 		if (err)
722 			goto out;
723 	}
724 
725 	xe_pxp_pm_resume(xe->pxp);
726 
727 	if (IS_VF_CCS_READY(xe))
728 		xe_sriov_vf_ccs_register_context(xe);
729 
730 	if (xe->d3cold.allowed)
731 		xe_late_bind_fw_load(&xe->late_bind);
732 
733 out:
734 	xe_rpm_lockmap_release(xe);
735 	xe_pm_write_callback_task(xe, NULL);
736 	return err;
737 }
738 
739 /*
740  * For places where resume is synchronous it can be quite easy to deadlock
741  * if we are not careful. Also in practice it might be quite timing
742  * sensitive to ever see the 0 -> 1 transition with the callers locks
743  * held, so deadlocks might exist but are hard for lockdep to ever see.
744  * With this in mind, help lockdep learn about the potentially scary
745  * stuff that can happen inside the runtime_resume callback by acquiring
746  * a dummy lock (it doesn't protect anything and gets compiled out on
747  * non-debug builds).  Lockdep then only needs to see the
748  * xe_pm_runtime_xxx_map -> runtime_resume callback once, and then can
749  * hopefully validate all the (callers_locks) -> xe_pm_runtime_xxx_map.
750  * For example if the (callers_locks) are ever grabbed in the
751  * runtime_resume callback, lockdep should give us a nice splat.
752  */
753 static void xe_rpm_might_enter_cb(const struct xe_device *xe)
754 {
755 	xe_rpm_lockmap_acquire(xe);
756 	xe_rpm_lockmap_release(xe);
757 }
758 
759 /*
760  * Prime the lockdep maps for known locking orders that need to
761  * be supported but that may not always occur on all systems.
762  */
763 static void xe_pm_runtime_lockdep_prime(void)
764 {
765 	struct dma_resv lockdep_resv;
766 
767 	dma_resv_init(&lockdep_resv);
768 	lock_map_acquire(&xe_pm_runtime_d3cold_map);
769 	/* D3Cold takes the dma_resv locks to evict bos */
770 	dma_resv_lock(&lockdep_resv, NULL);
771 	dma_resv_unlock(&lockdep_resv);
772 	lock_map_release(&xe_pm_runtime_d3cold_map);
773 
774 	/* Shrinkers might like to wake up the device under reclaim. */
775 	fs_reclaim_acquire(GFP_KERNEL);
776 	lock_map_acquire(&xe_pm_runtime_nod3cold_map);
777 	lock_map_release(&xe_pm_runtime_nod3cold_map);
778 	fs_reclaim_release(GFP_KERNEL);
779 }
780 
781 /**
782  * xe_pm_runtime_get - Get a runtime_pm reference and resume synchronously
783  * @xe: xe device instance
784  *
785  * When possible, scope-based runtime PM (through guard(xe_pm_runtime)) is
786  * be preferred over direct usage of this function.  Manual get/put handling
787  * should only be used when the function contains goto-based logic which
788  * can break scope-based handling, or when the lifetime of the runtime PM
789  * reference does not match a specific scope (e.g., runtime PM obtained in one
790  * function and released in a different one).
791  */
792 void xe_pm_runtime_get(struct xe_device *xe)
793 {
794 	trace_xe_pm_runtime_get(xe, __builtin_return_address(0));
795 	pm_runtime_get_noresume(xe->drm.dev);
796 
797 	if (xe_pm_read_callback_task(xe) == current)
798 		return;
799 
800 	xe_rpm_might_enter_cb(xe);
801 	pm_runtime_resume(xe->drm.dev);
802 }
803 
804 /**
805  * xe_pm_runtime_put - Put the runtime_pm reference back and mark as idle
806  * @xe: xe device instance
807  */
808 void xe_pm_runtime_put(struct xe_device *xe)
809 {
810 	trace_xe_pm_runtime_put(xe, __builtin_return_address(0));
811 	if (xe_pm_read_callback_task(xe) == current) {
812 		pm_runtime_put_noidle(xe->drm.dev);
813 	} else {
814 		pm_runtime_mark_last_busy(xe->drm.dev);
815 		pm_runtime_put(xe->drm.dev);
816 	}
817 }
818 
819 /**
820  * xe_pm_runtime_get_ioctl - Get a runtime_pm reference before ioctl
821  * @xe: xe device instance
822  *
823  * When possible, scope-based runtime PM (through
824  * ACQUIRE(xe_pm_runtime_ioctl, ...)) is be preferred over direct usage of this
825  * function.  Manual get/put handling should only be used when the function
826  * contains goto-based logic which can break scope-based handling, or when the
827  * lifetime of the runtime PM reference does not match a specific scope (e.g.,
828  * runtime PM obtained in one function and released in a different one).
829  *
830  * Returns: Any number greater than or equal to 0 for success, negative error
831  * code otherwise.
832  */
833 int xe_pm_runtime_get_ioctl(struct xe_device *xe)
834 {
835 	trace_xe_pm_runtime_get_ioctl(xe, __builtin_return_address(0));
836 	if (WARN_ON(xe_pm_read_callback_task(xe) == current))
837 		return -ELOOP;
838 
839 	xe_rpm_might_enter_cb(xe);
840 	return pm_runtime_get_sync(xe->drm.dev);
841 }
842 
843 /**
844  * xe_pm_runtime_get_if_active - Get a runtime_pm reference if device active
845  * @xe: xe device instance
846  *
847  * Return: True if device is awake (regardless the previous number of references)
848  * and a new reference was taken, false otherwise.
849  */
850 bool xe_pm_runtime_get_if_active(struct xe_device *xe)
851 {
852 	return pm_runtime_get_if_active(xe->drm.dev) > 0;
853 }
854 
855 /**
856  * xe_pm_runtime_get_if_in_use - Get a new reference if device is active with previous ref taken
857  * @xe: xe device instance
858  *
859  * Return: True if device is awake, a previous reference had been already taken,
860  * and a new reference was now taken, false otherwise.
861  */
862 bool xe_pm_runtime_get_if_in_use(struct xe_device *xe)
863 {
864 	if (xe_pm_read_callback_task(xe) == current) {
865 		/* The device is awake, grab the ref and move on */
866 		pm_runtime_get_noresume(xe->drm.dev);
867 		return true;
868 	}
869 
870 	return pm_runtime_get_if_in_use(xe->drm.dev) > 0;
871 }
872 
873 /*
874  * Very unreliable! Should only be used to suppress the false positive case
875  * in the missing outer rpm protection warning.
876  */
877 static bool xe_pm_suspending_or_resuming(struct xe_device *xe)
878 {
879 #ifdef CONFIG_PM
880 	struct device *dev = xe->drm.dev;
881 
882 	return dev->power.runtime_status == RPM_SUSPENDING ||
883 		dev->power.runtime_status == RPM_RESUMING ||
884 		pm_suspend_in_progress();
885 #else
886 	return false;
887 #endif
888 }
889 
890 /**
891  * xe_pm_runtime_get_noresume - Bump runtime PM usage counter without resuming
892  * @xe: xe device instance
893  *
894  * This function should be used in inner places where it is surely already
895  * protected by outer-bound callers of `xe_pm_runtime_get`.
896  * It will warn if not protected.
897  * The reference should be put back after this function regardless, since it
898  * will always bump the usage counter, regardless.
899  *
900  * When possible, scope-based runtime PM (through guard(xe_pm_runtime_noresume))
901  * is be preferred over direct usage of this function.  Manual get/put handling
902  * should only be used when the function contains goto-based logic which can
903  * break scope-based handling, or when the lifetime of the runtime PM reference
904  * does not match a specific scope (e.g., runtime PM obtained in one function
905  * and released in a different one).
906  */
907 void xe_pm_runtime_get_noresume(struct xe_device *xe)
908 {
909 	bool ref;
910 
911 	ref = xe_pm_runtime_get_if_in_use(xe);
912 
913 	if (!ref) {
914 		pm_runtime_get_noresume(xe->drm.dev);
915 		drm_WARN(&xe->drm, !xe_pm_suspending_or_resuming(xe),
916 			 "Missing outer runtime PM protection\n");
917 	}
918 }
919 
920 /**
921  * xe_pm_runtime_resume_and_get - Resume, then get a runtime_pm ref if awake.
922  * @xe: xe device instance
923  *
924  * Returns: True if device is awake and the reference was taken, false otherwise.
925  */
926 bool xe_pm_runtime_resume_and_get(struct xe_device *xe)
927 {
928 	if (xe_pm_read_callback_task(xe) == current) {
929 		/* The device is awake, grab the ref and move on */
930 		pm_runtime_get_noresume(xe->drm.dev);
931 		return true;
932 	}
933 
934 	xe_rpm_might_enter_cb(xe);
935 	return pm_runtime_resume_and_get(xe->drm.dev) >= 0;
936 }
937 
938 /**
939  * xe_pm_assert_unbounded_bridge - Disable PM on unbounded pcie parent bridge
940  * @xe: xe device instance
941  */
942 void xe_pm_assert_unbounded_bridge(struct xe_device *xe)
943 {
944 	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
945 	struct pci_dev *bridge = pci_upstream_bridge(pdev);
946 
947 	if (!bridge)
948 		return;
949 
950 	if (!bridge->driver) {
951 		drm_warn(&xe->drm, "unbounded parent pci bridge, device won't support any PM support.\n");
952 		device_set_pm_not_required(&pdev->dev);
953 	}
954 }
955 
956 /**
957  * xe_pm_set_vram_threshold - Set a VRAM threshold for allowing/blocking D3Cold
958  * @xe: xe device instance
959  * @threshold: VRAM size in MiB for the D3cold threshold
960  *
961  * Return:
962  * * 0		- success
963  * * -EINVAL	- invalid argument
964  */
965 int xe_pm_set_vram_threshold(struct xe_device *xe, u32 threshold)
966 {
967 	struct ttm_resource_manager *man;
968 	u32 vram_total_mb = 0;
969 	int i;
970 
971 	for (i = XE_PL_VRAM0; i <= XE_PL_VRAM1; ++i) {
972 		man = ttm_manager_type(&xe->ttm, i);
973 		if (man)
974 			vram_total_mb += DIV_ROUND_UP_ULL(man->size, 1024 * 1024);
975 	}
976 
977 	drm_dbg(&xe->drm, "Total vram %u mb\n", vram_total_mb);
978 
979 	if (threshold > vram_total_mb)
980 		return -EINVAL;
981 
982 	mutex_lock(&xe->d3cold.lock);
983 	xe->d3cold.vram_threshold = threshold;
984 	mutex_unlock(&xe->d3cold.lock);
985 
986 	return 0;
987 }
988 
989 /**
990  * xe_pm_d3cold_allowed_toggle - Check conditions to toggle d3cold.allowed
991  * @xe: xe device instance
992  *
993  * To be called during runtime_pm idle callback.
994  * Check for all the D3Cold conditions ahead of runtime suspend.
995  */
996 void xe_pm_d3cold_allowed_toggle(struct xe_device *xe)
997 {
998 	struct ttm_resource_manager *man;
999 	u32 total_vram_used_mb = 0;
1000 	u64 vram_used;
1001 	int i;
1002 
1003 	if (!xe->d3cold.capable) {
1004 		xe->d3cold.allowed = false;
1005 		return;
1006 	}
1007 
1008 	for (i = XE_PL_VRAM0; i <= XE_PL_VRAM1; ++i) {
1009 		man = ttm_manager_type(&xe->ttm, i);
1010 		if (man) {
1011 			vram_used = ttm_resource_manager_usage(man);
1012 			total_vram_used_mb += DIV_ROUND_UP_ULL(vram_used, 1024 * 1024);
1013 		}
1014 	}
1015 
1016 	mutex_lock(&xe->d3cold.lock);
1017 
1018 	if (total_vram_used_mb < xe->d3cold.vram_threshold)
1019 		xe->d3cold.allowed = true;
1020 	else
1021 		xe->d3cold.allowed = false;
1022 
1023 	mutex_unlock(&xe->d3cold.lock);
1024 }
1025 
1026 /**
1027  * xe_pm_module_init() - Perform xe_pm specific module initialization.
1028  *
1029  * Return: 0 on success. Currently doesn't fail.
1030  */
1031 int __init xe_pm_module_init(void)
1032 {
1033 	xe_pm_runtime_lockdep_prime();
1034 	return 0;
1035 }
1036