xref: /linux/drivers/gpu/drm/xe/xe_pm.c (revision cbb6a7413b174637f35354675ecd7e1183091bfa)
1dd08ebf6SMatthew Brost // SPDX-License-Identifier: MIT
2dd08ebf6SMatthew Brost /*
3dd08ebf6SMatthew Brost  * Copyright © 2022 Intel Corporation
4dd08ebf6SMatthew Brost  */
5dd08ebf6SMatthew Brost 
6ea9f879dSLucas De Marchi #include "xe_pm.h"
7ea9f879dSLucas De Marchi 
8dd08ebf6SMatthew Brost #include <linux/pm_runtime.h>
9dd08ebf6SMatthew Brost 
10b2d75619SAnshuman Gupta #include <drm/drm_managed.h>
11dd08ebf6SMatthew Brost #include <drm/ttm/ttm_placement.h>
12dd08ebf6SMatthew Brost 
131e5a4dfeSJani Nikula #include "display/xe_display.h"
14dd08ebf6SMatthew Brost #include "xe_bo.h"
15dd08ebf6SMatthew Brost #include "xe_bo_evict.h"
16dd08ebf6SMatthew Brost #include "xe_device.h"
17b2d75619SAnshuman Gupta #include "xe_device_sysfs.h"
18dd08ebf6SMatthew Brost #include "xe_ggtt.h"
19ea9f879dSLucas De Marchi #include "xe_gt.h"
2009d88e3bSAnshuman Gupta #include "xe_guc.h"
21dd08ebf6SMatthew Brost #include "xe_irq.h"
22dd08ebf6SMatthew Brost #include "xe_pcode.h"
230d053475SMatt Roper #include "xe_wa.h"
24dd08ebf6SMatthew Brost 
25dd08ebf6SMatthew Brost /**
26dd08ebf6SMatthew Brost  * DOC: Xe Power Management
27dd08ebf6SMatthew Brost  *
2830c39952SRodrigo Vivi  * Xe PM implements the main routines for both system level suspend states and
2930c39952SRodrigo Vivi  * for the opportunistic runtime suspend states.
30dd08ebf6SMatthew Brost  *
3130c39952SRodrigo Vivi  * System Level Suspend (S-States) - In general this is OS initiated suspend
3230c39952SRodrigo Vivi  * driven by ACPI for achieving S0ix (a.k.a. S2idle, freeze), S3 (suspend to ram),
3330c39952SRodrigo Vivi  * S4 (disk). The main functions here are `xe_pm_suspend` and `xe_pm_resume`. They
3430c39952SRodrigo Vivi  * are the main point for the suspend to and resume from these states.
35dd08ebf6SMatthew Brost  *
3630c39952SRodrigo Vivi  * PCI Device Suspend (D-States) - This is the opportunistic PCIe device low power
3730c39952SRodrigo Vivi  * state D3, controlled by the PCI subsystem and ACPI with the help from the
3830c39952SRodrigo Vivi  * runtime_pm infrastructure.
3930c39952SRodrigo Vivi  * PCI D3 is special and can mean D3hot, where Vcc power is on for keeping memory
4030c39952SRodrigo Vivi  * alive and quicker low latency resume or D3Cold where Vcc power is off for
4130c39952SRodrigo Vivi  * better power savings.
4230c39952SRodrigo Vivi  * The Vcc control of PCI hierarchy can only be controlled at the PCI root port
4330c39952SRodrigo Vivi  * level, while the device driver can be behind multiple bridges/switches and
4430c39952SRodrigo Vivi  * paired with other devices. For this reason, the PCI subsystem cannot perform
4530c39952SRodrigo Vivi  * the transition towards D3Cold. The lowest runtime PM possible from the PCI
4630c39952SRodrigo Vivi  * subsystem is D3hot. Then, if all these paired devices in the same root port
4730c39952SRodrigo Vivi  * are in D3hot, ACPI will assist here and run its own methods (_PR3 and _OFF)
4830c39952SRodrigo Vivi  * to perform the transition from D3hot to D3cold. Xe may disallow this
4930c39952SRodrigo Vivi  * transition by calling pci_d3cold_disable(root_pdev) before going to runtime
5030c39952SRodrigo Vivi  * suspend. It will be based on runtime conditions such as VRAM usage for a
5130c39952SRodrigo Vivi  * quick and low latency resume for instance.
52dd08ebf6SMatthew Brost  *
5330c39952SRodrigo Vivi  * Runtime PM - This infrastructure provided by the Linux kernel allows the
5430c39952SRodrigo Vivi  * device drivers to indicate when the can be runtime suspended, so the device
5530c39952SRodrigo Vivi  * could be put at D3 (if supported), or allow deeper package sleep states
5630c39952SRodrigo Vivi  * (PC-states), and/or other low level power states. Xe PM component provides
5730c39952SRodrigo Vivi  * `xe_pm_runtime_suspend` and `xe_pm_runtime_resume` functions that PCI
5830c39952SRodrigo Vivi  * subsystem will call before transition to/from runtime suspend.
59dd08ebf6SMatthew Brost  *
6030c39952SRodrigo Vivi  * Also, Xe PM provides get and put functions that Xe driver will use to
6130c39952SRodrigo Vivi  * indicate activity. In order to avoid locking complications with the memory
6230c39952SRodrigo Vivi  * management, whenever possible, these get and put functions needs to be called
6330c39952SRodrigo Vivi  * from the higher/outer levels.
6430c39952SRodrigo Vivi  * The main cases that need to be protected from the outer levels are: IOCTL,
6530c39952SRodrigo Vivi  * sysfs, debugfs, dma-buf sharing, GPU execution.
6630c39952SRodrigo Vivi  *
6730c39952SRodrigo Vivi  * This component is not responsible for GT idleness (RC6) nor GT frequency
6830c39952SRodrigo Vivi  * management (RPS).
69dd08ebf6SMatthew Brost  */
70dd08ebf6SMatthew Brost 
71dd08ebf6SMatthew Brost /**
72dd08ebf6SMatthew Brost  * xe_pm_suspend - Helper for System suspend, i.e. S0->S3 / S0->S2idle
73dd08ebf6SMatthew Brost  * @xe: xe device instance
74dd08ebf6SMatthew Brost  *
75dd08ebf6SMatthew Brost  * Return: 0 on success
76dd08ebf6SMatthew Brost  */
77dd08ebf6SMatthew Brost int xe_pm_suspend(struct xe_device *xe)
78dd08ebf6SMatthew Brost {
79dd08ebf6SMatthew Brost 	struct xe_gt *gt;
80dd08ebf6SMatthew Brost 	u8 id;
81dd08ebf6SMatthew Brost 	int err;
82dd08ebf6SMatthew Brost 
83f7f24b79SRodrigo Vivi 	drm_dbg(&xe->drm, "Suspending device\n");
84f7f24b79SRodrigo Vivi 
85dd08ebf6SMatthew Brost 	for_each_gt(gt, xe, id)
86dd08ebf6SMatthew Brost 		xe_gt_suspend_prepare(gt);
87dd08ebf6SMatthew Brost 
88dd08ebf6SMatthew Brost 	/* FIXME: Super racey... */
89dd08ebf6SMatthew Brost 	err = xe_bo_evict_all(xe);
90dd08ebf6SMatthew Brost 	if (err)
91f7f24b79SRodrigo Vivi 		goto err;
92dd08ebf6SMatthew Brost 
9344e69495SMaarten Lankhorst 	xe_display_pm_suspend(xe);
9444e69495SMaarten Lankhorst 
95dd08ebf6SMatthew Brost 	for_each_gt(gt, xe, id) {
96dd08ebf6SMatthew Brost 		err = xe_gt_suspend(gt);
9744e69495SMaarten Lankhorst 		if (err) {
9844e69495SMaarten Lankhorst 			xe_display_pm_resume(xe);
99f7f24b79SRodrigo Vivi 			goto err;
100dd08ebf6SMatthew Brost 		}
10144e69495SMaarten Lankhorst 	}
102dd08ebf6SMatthew Brost 
103dd08ebf6SMatthew Brost 	xe_irq_suspend(xe);
104dd08ebf6SMatthew Brost 
10544e69495SMaarten Lankhorst 	xe_display_pm_suspend_late(xe);
10644e69495SMaarten Lankhorst 
107f7f24b79SRodrigo Vivi 	drm_dbg(&xe->drm, "Device suspended\n");
108dd08ebf6SMatthew Brost 	return 0;
109f7f24b79SRodrigo Vivi err:
110f7f24b79SRodrigo Vivi 	drm_dbg(&xe->drm, "Device suspend failed %d\n", err);
111f7f24b79SRodrigo Vivi 	return err;
112dd08ebf6SMatthew Brost }
113dd08ebf6SMatthew Brost 
114dd08ebf6SMatthew Brost /**
115dd08ebf6SMatthew Brost  * xe_pm_resume - Helper for System resume S3->S0 / S2idle->S0
116dd08ebf6SMatthew Brost  * @xe: xe device instance
117dd08ebf6SMatthew Brost  *
118dd08ebf6SMatthew Brost  * Return: 0 on success
119dd08ebf6SMatthew Brost  */
120dd08ebf6SMatthew Brost int xe_pm_resume(struct xe_device *xe)
121dd08ebf6SMatthew Brost {
1220d053475SMatt Roper 	struct xe_tile *tile;
123dd08ebf6SMatthew Brost 	struct xe_gt *gt;
124dd08ebf6SMatthew Brost 	u8 id;
125dd08ebf6SMatthew Brost 	int err;
126dd08ebf6SMatthew Brost 
127f7f24b79SRodrigo Vivi 	drm_dbg(&xe->drm, "Resuming device\n");
128f7f24b79SRodrigo Vivi 
1290d053475SMatt Roper 	for_each_tile(tile, xe, id)
1300d053475SMatt Roper 		xe_wa_apply_tile_workarounds(tile);
1310d053475SMatt Roper 
132933fd5ffSRiana Tauro 	err = xe_pcode_ready(xe, true);
133dd08ebf6SMatthew Brost 	if (err)
134933fd5ffSRiana Tauro 		return err;
135dd08ebf6SMatthew Brost 
13644e69495SMaarten Lankhorst 	xe_display_pm_resume_early(xe);
13744e69495SMaarten Lankhorst 
138dd08ebf6SMatthew Brost 	/*
139dd08ebf6SMatthew Brost 	 * This only restores pinned memory which is the memory required for the
140dd08ebf6SMatthew Brost 	 * GT(s) to resume.
141dd08ebf6SMatthew Brost 	 */
142dd08ebf6SMatthew Brost 	err = xe_bo_restore_kernel(xe);
143dd08ebf6SMatthew Brost 	if (err)
144f7f24b79SRodrigo Vivi 		goto err;
145dd08ebf6SMatthew Brost 
146dd08ebf6SMatthew Brost 	xe_irq_resume(xe);
147dd08ebf6SMatthew Brost 
14844e69495SMaarten Lankhorst 	xe_display_pm_resume(xe);
14944e69495SMaarten Lankhorst 
150dd08ebf6SMatthew Brost 	for_each_gt(gt, xe, id)
151dd08ebf6SMatthew Brost 		xe_gt_resume(gt);
152dd08ebf6SMatthew Brost 
153dd08ebf6SMatthew Brost 	err = xe_bo_restore_user(xe);
154dd08ebf6SMatthew Brost 	if (err)
155f7f24b79SRodrigo Vivi 		goto err;
156dd08ebf6SMatthew Brost 
157f7f24b79SRodrigo Vivi 	drm_dbg(&xe->drm, "Device resumed\n");
158dd08ebf6SMatthew Brost 	return 0;
159f7f24b79SRodrigo Vivi err:
160f7f24b79SRodrigo Vivi 	drm_dbg(&xe->drm, "Device resume failed %d\n", err);
161f7f24b79SRodrigo Vivi 	return err;
162dd08ebf6SMatthew Brost }
163dd08ebf6SMatthew Brost 
16495ec8c1dSRiana Tauro static bool xe_pm_pci_d3cold_capable(struct xe_device *xe)
165ac0be3b5SAnshuman Gupta {
16695ec8c1dSRiana Tauro 	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
167ac0be3b5SAnshuman Gupta 	struct pci_dev *root_pdev;
168ac0be3b5SAnshuman Gupta 
169ac0be3b5SAnshuman Gupta 	root_pdev = pcie_find_root_port(pdev);
170ac0be3b5SAnshuman Gupta 	if (!root_pdev)
171ac0be3b5SAnshuman Gupta 		return false;
172ac0be3b5SAnshuman Gupta 
17395ec8c1dSRiana Tauro 	/* D3Cold requires PME capability */
17495ec8c1dSRiana Tauro 	if (!pci_pme_capable(root_pdev, PCI_D3cold)) {
17595ec8c1dSRiana Tauro 		drm_dbg(&xe->drm, "d3cold: PME# not supported\n");
176ac0be3b5SAnshuman Gupta 		return false;
17795ec8c1dSRiana Tauro 	}
17895ec8c1dSRiana Tauro 
17995ec8c1dSRiana Tauro 	/* D3Cold requires _PR3 power resource */
18095ec8c1dSRiana Tauro 	if (!pci_pr3_present(root_pdev)) {
18195ec8c1dSRiana Tauro 		drm_dbg(&xe->drm, "d3cold: ACPI _PR3 not present\n");
18295ec8c1dSRiana Tauro 		return false;
18395ec8c1dSRiana Tauro 	}
184ac0be3b5SAnshuman Gupta 
185ac0be3b5SAnshuman Gupta 	return true;
186ac0be3b5SAnshuman Gupta }
187ac0be3b5SAnshuman Gupta 
188fddebcbfSAnshuman Gupta static void xe_pm_runtime_init(struct xe_device *xe)
189dd08ebf6SMatthew Brost {
190dd08ebf6SMatthew Brost 	struct device *dev = xe->drm.dev;
191dd08ebf6SMatthew Brost 
192d87c424aSRodrigo Vivi 	/*
193d87c424aSRodrigo Vivi 	 * Disable the system suspend direct complete optimization.
194d87c424aSRodrigo Vivi 	 * We need to ensure that the regular device suspend/resume functions
195d87c424aSRodrigo Vivi 	 * are called since our runtime_pm cannot guarantee local memory
196d87c424aSRodrigo Vivi 	 * eviction for d3cold.
197d87c424aSRodrigo Vivi 	 * TODO: Check HDA audio dependencies claimed by i915, and then enforce
198d87c424aSRodrigo Vivi 	 *       this option to integrated graphics as well.
199d87c424aSRodrigo Vivi 	 */
200d87c424aSRodrigo Vivi 	if (IS_DGFX(xe))
201d87c424aSRodrigo Vivi 		dev_pm_set_driver_flags(dev, DPM_FLAG_NO_DIRECT_COMPLETE);
202d87c424aSRodrigo Vivi 
203dd08ebf6SMatthew Brost 	pm_runtime_use_autosuspend(dev);
204dd08ebf6SMatthew Brost 	pm_runtime_set_autosuspend_delay(dev, 1000);
205dd08ebf6SMatthew Brost 	pm_runtime_set_active(dev);
206dd08ebf6SMatthew Brost 	pm_runtime_allow(dev);
207dd08ebf6SMatthew Brost 	pm_runtime_mark_last_busy(dev);
208bba2ec41SRodrigo Vivi 	pm_runtime_put(dev);
209dd08ebf6SMatthew Brost }
210dd08ebf6SMatthew Brost 
211fa78e188SBadal Nilawar void xe_pm_init_early(struct xe_device *xe)
212fa78e188SBadal Nilawar {
213fa78e188SBadal Nilawar 	INIT_LIST_HEAD(&xe->mem_access.vram_userfault.list);
214fa78e188SBadal Nilawar 	drmm_mutex_init(&xe->drm, &xe->mem_access.vram_userfault.lock);
215fa78e188SBadal Nilawar }
216fa78e188SBadal Nilawar 
21730c39952SRodrigo Vivi /**
21830c39952SRodrigo Vivi  * xe_pm_init - Initialize Xe Power Management
21930c39952SRodrigo Vivi  * @xe: xe device instance
22030c39952SRodrigo Vivi  *
22130c39952SRodrigo Vivi  * This component is responsible for System and Device sleep states.
22230c39952SRodrigo Vivi  */
223ac0be3b5SAnshuman Gupta void xe_pm_init(struct xe_device *xe)
224ac0be3b5SAnshuman Gupta {
2255349bb76SOhad Sharabi 	/* For now suspend/resume is only allowed with GuC */
2265349bb76SOhad Sharabi 	if (!xe_device_uc_enabled(xe))
2275349bb76SOhad Sharabi 		return;
2285349bb76SOhad Sharabi 
229b2d75619SAnshuman Gupta 	drmm_mutex_init(&xe->drm, &xe->d3cold.lock);
230a32d82b4SRodrigo Vivi 
23195ec8c1dSRiana Tauro 	xe->d3cold.capable = xe_pm_pci_d3cold_capable(xe);
2323d4b0bfcSAnshuman Gupta 
2333d4b0bfcSAnshuman Gupta 	if (xe->d3cold.capable) {
234b2d75619SAnshuman Gupta 		xe_device_sysfs_init(xe);
235b2d75619SAnshuman Gupta 		xe_pm_set_vram_threshold(xe, DEFAULT_VRAM_THRESHOLD);
2363d4b0bfcSAnshuman Gupta 	}
237a32d82b4SRodrigo Vivi 
238a32d82b4SRodrigo Vivi 	xe_pm_runtime_init(xe);
239ac0be3b5SAnshuman Gupta }
240ac0be3b5SAnshuman Gupta 
24130c39952SRodrigo Vivi /**
24230c39952SRodrigo Vivi  * xe_pm_runtime_fini - Finalize Runtime PM
24330c39952SRodrigo Vivi  * @xe: xe device instance
24430c39952SRodrigo Vivi  */
2455b7e50e2SMatthew Auld void xe_pm_runtime_fini(struct xe_device *xe)
2465b7e50e2SMatthew Auld {
2475b7e50e2SMatthew Auld 	struct device *dev = xe->drm.dev;
2485b7e50e2SMatthew Auld 
2495b7e50e2SMatthew Auld 	pm_runtime_get_sync(dev);
2505b7e50e2SMatthew Auld 	pm_runtime_forbid(dev);
2515b7e50e2SMatthew Auld }
2525b7e50e2SMatthew Auld 
253a00b8f1aSMatthew Auld static void xe_pm_write_callback_task(struct xe_device *xe,
254a00b8f1aSMatthew Auld 				      struct task_struct *task)
255a00b8f1aSMatthew Auld {
256a00b8f1aSMatthew Auld 	WRITE_ONCE(xe->pm_callback_task, task);
257a00b8f1aSMatthew Auld 
258a00b8f1aSMatthew Auld 	/*
259a00b8f1aSMatthew Auld 	 * Just in case it's somehow possible for our writes to be reordered to
260a00b8f1aSMatthew Auld 	 * the extent that something else re-uses the task written in
261a00b8f1aSMatthew Auld 	 * pm_callback_task. For example after returning from the callback, but
262a00b8f1aSMatthew Auld 	 * before the reordered write that resets pm_callback_task back to NULL.
263a00b8f1aSMatthew Auld 	 */
264a00b8f1aSMatthew Auld 	smp_mb(); /* pairs with xe_pm_read_callback_task */
265a00b8f1aSMatthew Auld }
266a00b8f1aSMatthew Auld 
267a00b8f1aSMatthew Auld struct task_struct *xe_pm_read_callback_task(struct xe_device *xe)
268a00b8f1aSMatthew Auld {
269a00b8f1aSMatthew Auld 	smp_mb(); /* pairs with xe_pm_write_callback_task */
270a00b8f1aSMatthew Auld 
271a00b8f1aSMatthew Auld 	return READ_ONCE(xe->pm_callback_task);
272a00b8f1aSMatthew Auld }
273a00b8f1aSMatthew Auld 
27430c39952SRodrigo Vivi /**
2750f9d886fSRodrigo Vivi  * xe_pm_runtime_suspended - Check if runtime_pm state is suspended
2760f9d886fSRodrigo Vivi  * @xe: xe device instance
2770f9d886fSRodrigo Vivi  *
2780f9d886fSRodrigo Vivi  * This does not provide any guarantee that the device is going to remain
2790f9d886fSRodrigo Vivi  * suspended as it might be racing with the runtime state transitions.
2800f9d886fSRodrigo Vivi  * It can be used only as a non-reliable assertion, to ensure that we are not in
2810f9d886fSRodrigo Vivi  * the sleep state while trying to access some memory for instance.
2820f9d886fSRodrigo Vivi  *
2830f9d886fSRodrigo Vivi  * Returns true if PCI device is suspended, false otherwise.
2840f9d886fSRodrigo Vivi  */
2850f9d886fSRodrigo Vivi bool xe_pm_runtime_suspended(struct xe_device *xe)
2860f9d886fSRodrigo Vivi {
2870f9d886fSRodrigo Vivi 	return pm_runtime_suspended(xe->drm.dev);
2880f9d886fSRodrigo Vivi }
2890f9d886fSRodrigo Vivi 
2900f9d886fSRodrigo Vivi /**
29130c39952SRodrigo Vivi  * xe_pm_runtime_suspend - Prepare our device for D3hot/D3Cold
29230c39952SRodrigo Vivi  * @xe: xe device instance
29330c39952SRodrigo Vivi  *
29430c39952SRodrigo Vivi  * Returns 0 for success, negative error code otherwise.
29530c39952SRodrigo Vivi  */
296dd08ebf6SMatthew Brost int xe_pm_runtime_suspend(struct xe_device *xe)
297dd08ebf6SMatthew Brost {
298fa78e188SBadal Nilawar 	struct xe_bo *bo, *on;
299dd08ebf6SMatthew Brost 	struct xe_gt *gt;
300dd08ebf6SMatthew Brost 	u8 id;
301a00b8f1aSMatthew Auld 	int err = 0;
302dd08ebf6SMatthew Brost 
303a00b8f1aSMatthew Auld 	if (xe->d3cold.allowed && xe_device_mem_access_ongoing(xe))
304dd08ebf6SMatthew Brost 		return -EBUSY;
305dd08ebf6SMatthew Brost 
306a00b8f1aSMatthew Auld 	/* Disable access_ongoing asserts and prevent recursive pm calls */
307a00b8f1aSMatthew Auld 	xe_pm_write_callback_task(xe, current);
308a00b8f1aSMatthew Auld 
3099700a1dfSMatthew Auld 	/*
3109700a1dfSMatthew Auld 	 * The actual xe_device_mem_access_put() is always async underneath, so
3119700a1dfSMatthew Auld 	 * exactly where that is called should makes no difference to us. However
3129700a1dfSMatthew Auld 	 * we still need to be very careful with the locks that this callback
3139700a1dfSMatthew Auld 	 * acquires and the locks that are acquired and held by any callers of
3149700a1dfSMatthew Auld 	 * xe_device_mem_access_get(). We already have the matching annotation
3159700a1dfSMatthew Auld 	 * on that side, but we also need it here. For example lockdep should be
3169700a1dfSMatthew Auld 	 * able to tell us if the following scenario is in theory possible:
3179700a1dfSMatthew Auld 	 *
3189700a1dfSMatthew Auld 	 * CPU0                          | CPU1 (kworker)
3199700a1dfSMatthew Auld 	 * lock(A)                       |
3209700a1dfSMatthew Auld 	 *                               | xe_pm_runtime_suspend()
3219700a1dfSMatthew Auld 	 *                               |      lock(A)
3229700a1dfSMatthew Auld 	 * xe_device_mem_access_get()    |
3239700a1dfSMatthew Auld 	 *
3249700a1dfSMatthew Auld 	 * This will clearly deadlock since rpm core needs to wait for
3259700a1dfSMatthew Auld 	 * xe_pm_runtime_suspend() to complete, but here we are holding lock(A)
3269700a1dfSMatthew Auld 	 * on CPU0 which prevents CPU1 making forward progress.  With the
3279700a1dfSMatthew Auld 	 * annotation here and in xe_device_mem_access_get() lockdep will see
3289700a1dfSMatthew Auld 	 * the potential lock inversion and give us a nice splat.
3299700a1dfSMatthew Auld 	 */
3309700a1dfSMatthew Auld 	lock_map_acquire(&xe_device_mem_access_lockdep_map);
3319700a1dfSMatthew Auld 
332fa78e188SBadal Nilawar 	/*
333fa78e188SBadal Nilawar 	 * Applying lock for entire list op as xe_ttm_bo_destroy and xe_bo_move_notify
334fa78e188SBadal Nilawar 	 * also checks and delets bo entry from user fault list.
335fa78e188SBadal Nilawar 	 */
336fa78e188SBadal Nilawar 	mutex_lock(&xe->mem_access.vram_userfault.lock);
337fa78e188SBadal Nilawar 	list_for_each_entry_safe(bo, on,
338fa78e188SBadal Nilawar 				 &xe->mem_access.vram_userfault.list, vram_userfault_link)
339fa78e188SBadal Nilawar 		xe_bo_runtime_pm_release_mmap_offset(bo);
340fa78e188SBadal Nilawar 	mutex_unlock(&xe->mem_access.vram_userfault.lock);
341fa78e188SBadal Nilawar 
342a00b8f1aSMatthew Auld 	if (xe->d3cold.allowed) {
343dd08ebf6SMatthew Brost 		err = xe_bo_evict_all(xe);
344dd08ebf6SMatthew Brost 		if (err)
345a00b8f1aSMatthew Auld 			goto out;
346dd08ebf6SMatthew Brost 	}
347dd08ebf6SMatthew Brost 
348dd08ebf6SMatthew Brost 	for_each_gt(gt, xe, id) {
349dd08ebf6SMatthew Brost 		err = xe_gt_suspend(gt);
350dd08ebf6SMatthew Brost 		if (err)
351a00b8f1aSMatthew Auld 			goto out;
352dd08ebf6SMatthew Brost 	}
353dd08ebf6SMatthew Brost 
354dd08ebf6SMatthew Brost 	xe_irq_suspend(xe);
355a00b8f1aSMatthew Auld out:
3569700a1dfSMatthew Auld 	lock_map_release(&xe_device_mem_access_lockdep_map);
357a00b8f1aSMatthew Auld 	xe_pm_write_callback_task(xe, NULL);
358a00b8f1aSMatthew Auld 	return err;
359dd08ebf6SMatthew Brost }
360dd08ebf6SMatthew Brost 
36130c39952SRodrigo Vivi /**
36230c39952SRodrigo Vivi  * xe_pm_runtime_resume - Waking up from D3hot/D3Cold
36330c39952SRodrigo Vivi  * @xe: xe device instance
36430c39952SRodrigo Vivi  *
36530c39952SRodrigo Vivi  * Returns 0 for success, negative error code otherwise.
36630c39952SRodrigo Vivi  */
367dd08ebf6SMatthew Brost int xe_pm_runtime_resume(struct xe_device *xe)
368dd08ebf6SMatthew Brost {
369dd08ebf6SMatthew Brost 	struct xe_gt *gt;
370dd08ebf6SMatthew Brost 	u8 id;
371a00b8f1aSMatthew Auld 	int err = 0;
372a00b8f1aSMatthew Auld 
373a00b8f1aSMatthew Auld 	/* Disable access_ongoing asserts and prevent recursive pm calls */
374a00b8f1aSMatthew Auld 	xe_pm_write_callback_task(xe, current);
375dd08ebf6SMatthew Brost 
3769700a1dfSMatthew Auld 	lock_map_acquire(&xe_device_mem_access_lockdep_map);
3779700a1dfSMatthew Auld 
37809d88e3bSAnshuman Gupta 	/*
37909d88e3bSAnshuman Gupta 	 * It can be possible that xe has allowed d3cold but other pcie devices
38009d88e3bSAnshuman Gupta 	 * in gfx card soc would have blocked d3cold, therefore card has not
38109d88e3bSAnshuman Gupta 	 * really lost power. Detecting primary Gt power is sufficient.
38209d88e3bSAnshuman Gupta 	 */
38309d88e3bSAnshuman Gupta 	gt = xe_device_get_gt(xe, 0);
38409d88e3bSAnshuman Gupta 	xe->d3cold.power_lost = xe_guc_in_reset(&gt->uc.guc);
38509d88e3bSAnshuman Gupta 
38609d88e3bSAnshuman Gupta 	if (xe->d3cold.allowed && xe->d3cold.power_lost) {
387933fd5ffSRiana Tauro 		err = xe_pcode_ready(xe, true);
388dd08ebf6SMatthew Brost 		if (err)
389a00b8f1aSMatthew Auld 			goto out;
390dd08ebf6SMatthew Brost 
391dd08ebf6SMatthew Brost 		/*
392dd08ebf6SMatthew Brost 		 * This only restores pinned memory which is the memory
393dd08ebf6SMatthew Brost 		 * required for the GT(s) to resume.
394dd08ebf6SMatthew Brost 		 */
395dd08ebf6SMatthew Brost 		err = xe_bo_restore_kernel(xe);
396dd08ebf6SMatthew Brost 		if (err)
397a00b8f1aSMatthew Auld 			goto out;
398dd08ebf6SMatthew Brost 	}
399dd08ebf6SMatthew Brost 
400dd08ebf6SMatthew Brost 	xe_irq_resume(xe);
401dd08ebf6SMatthew Brost 
402dd08ebf6SMatthew Brost 	for_each_gt(gt, xe, id)
403dd08ebf6SMatthew Brost 		xe_gt_resume(gt);
404dd08ebf6SMatthew Brost 
40509d88e3bSAnshuman Gupta 	if (xe->d3cold.allowed && xe->d3cold.power_lost) {
406dd08ebf6SMatthew Brost 		err = xe_bo_restore_user(xe);
407dd08ebf6SMatthew Brost 		if (err)
408a00b8f1aSMatthew Auld 			goto out;
409dd08ebf6SMatthew Brost 	}
410a00b8f1aSMatthew Auld out:
4119700a1dfSMatthew Auld 	lock_map_release(&xe_device_mem_access_lockdep_map);
412a00b8f1aSMatthew Auld 	xe_pm_write_callback_task(xe, NULL);
413a00b8f1aSMatthew Auld 	return err;
414dd08ebf6SMatthew Brost }
415dd08ebf6SMatthew Brost 
41630c39952SRodrigo Vivi /**
41730c39952SRodrigo Vivi  * xe_pm_runtime_get - Get a runtime_pm reference and resume synchronously
41830c39952SRodrigo Vivi  * @xe: xe device instance
41930c39952SRodrigo Vivi  */
4205c9da9fcSRodrigo Vivi void xe_pm_runtime_get(struct xe_device *xe)
421dd08ebf6SMatthew Brost {
4225c9da9fcSRodrigo Vivi 	pm_runtime_get_noresume(xe->drm.dev);
4235c9da9fcSRodrigo Vivi 
4245c9da9fcSRodrigo Vivi 	if (xe_pm_read_callback_task(xe) == current)
4255c9da9fcSRodrigo Vivi 		return;
4265c9da9fcSRodrigo Vivi 
4275c9da9fcSRodrigo Vivi 	pm_runtime_resume(xe->drm.dev);
428dd08ebf6SMatthew Brost }
429dd08ebf6SMatthew Brost 
43030c39952SRodrigo Vivi /**
43130c39952SRodrigo Vivi  * xe_pm_runtime_put - Put the runtime_pm reference back and mark as idle
43230c39952SRodrigo Vivi  * @xe: xe device instance
43330c39952SRodrigo Vivi  */
4345c9da9fcSRodrigo Vivi void xe_pm_runtime_put(struct xe_device *xe)
435dd08ebf6SMatthew Brost {
4365c9da9fcSRodrigo Vivi 	if (xe_pm_read_callback_task(xe) == current) {
4375c9da9fcSRodrigo Vivi 		pm_runtime_put_noidle(xe->drm.dev);
4385c9da9fcSRodrigo Vivi 	} else {
439dd08ebf6SMatthew Brost 		pm_runtime_mark_last_busy(xe->drm.dev);
4405c9da9fcSRodrigo Vivi 		pm_runtime_put(xe->drm.dev);
4415c9da9fcSRodrigo Vivi 	}
442dd08ebf6SMatthew Brost }
443dd08ebf6SMatthew Brost 
44430c39952SRodrigo Vivi /**
44523cf006bSRodrigo Vivi  * xe_pm_runtime_get_ioctl - Get a runtime_pm reference before ioctl
44623cf006bSRodrigo Vivi  * @xe: xe device instance
44723cf006bSRodrigo Vivi  *
44823cf006bSRodrigo Vivi  * Returns: Any number greater than or equal to 0 for success, negative error
44923cf006bSRodrigo Vivi  * code otherwise.
45023cf006bSRodrigo Vivi  */
45123cf006bSRodrigo Vivi int xe_pm_runtime_get_ioctl(struct xe_device *xe)
45223cf006bSRodrigo Vivi {
45323cf006bSRodrigo Vivi 	if (WARN_ON(xe_pm_read_callback_task(xe) == current))
45423cf006bSRodrigo Vivi 		return -ELOOP;
45523cf006bSRodrigo Vivi 
45623cf006bSRodrigo Vivi 	return pm_runtime_get_sync(xe->drm.dev);
45723cf006bSRodrigo Vivi }
45823cf006bSRodrigo Vivi 
45923cf006bSRodrigo Vivi /**
46030c39952SRodrigo Vivi  * xe_pm_runtime_get_if_active - Get a runtime_pm reference if device active
46130c39952SRodrigo Vivi  * @xe: xe device instance
46230c39952SRodrigo Vivi  *
46330c39952SRodrigo Vivi  * Returns: Any number greater than or equal to 0 for success, negative error
46430c39952SRodrigo Vivi  * code otherwise.
46530c39952SRodrigo Vivi  */
466dd08ebf6SMatthew Brost int xe_pm_runtime_get_if_active(struct xe_device *xe)
467dd08ebf6SMatthew Brost {
468c0ef3df8SSakari Ailus 	return pm_runtime_get_if_active(xe->drm.dev);
469dd08ebf6SMatthew Brost }
470c8a74077SAnshuman Gupta 
47130c39952SRodrigo Vivi /**
4723b85b7bcSRodrigo Vivi  * xe_pm_runtime_get_if_in_use - Get a runtime_pm reference and resume if needed
4733b85b7bcSRodrigo Vivi  * @xe: xe device instance
4743b85b7bcSRodrigo Vivi  *
4753b85b7bcSRodrigo Vivi  * Returns: True if device is awake and the reference was taken, false otherwise.
4763b85b7bcSRodrigo Vivi  */
4773b85b7bcSRodrigo Vivi bool xe_pm_runtime_get_if_in_use(struct xe_device *xe)
4783b85b7bcSRodrigo Vivi {
4793b85b7bcSRodrigo Vivi 	if (xe_pm_read_callback_task(xe) == current) {
4803b85b7bcSRodrigo Vivi 		/* The device is awake, grab the ref and move on */
4813b85b7bcSRodrigo Vivi 		pm_runtime_get_noresume(xe->drm.dev);
4823b85b7bcSRodrigo Vivi 		return true;
4833b85b7bcSRodrigo Vivi 	}
4843b85b7bcSRodrigo Vivi 
4853b85b7bcSRodrigo Vivi 	return pm_runtime_get_if_in_use(xe->drm.dev) > 0;
4863b85b7bcSRodrigo Vivi }
4873b85b7bcSRodrigo Vivi 
4883b85b7bcSRodrigo Vivi /**
489*cbb6a741SRodrigo Vivi  * xe_pm_runtime_get_noresume - Bump runtime PM usage counter without resuming
490*cbb6a741SRodrigo Vivi  * @xe: xe device instance
491*cbb6a741SRodrigo Vivi  *
492*cbb6a741SRodrigo Vivi  * This function should be used in inner places where it is surely already
493*cbb6a741SRodrigo Vivi  * protected by outer-bound callers of `xe_pm_runtime_get`.
494*cbb6a741SRodrigo Vivi  * It will warn if not protected.
495*cbb6a741SRodrigo Vivi  * The reference should be put back after this function regardless, since it
496*cbb6a741SRodrigo Vivi  * will always bump the usage counter, regardless.
497*cbb6a741SRodrigo Vivi  */
498*cbb6a741SRodrigo Vivi void xe_pm_runtime_get_noresume(struct xe_device *xe)
499*cbb6a741SRodrigo Vivi {
500*cbb6a741SRodrigo Vivi 	bool ref;
501*cbb6a741SRodrigo Vivi 
502*cbb6a741SRodrigo Vivi 	ref = xe_pm_runtime_get_if_in_use(xe);
503*cbb6a741SRodrigo Vivi 
504*cbb6a741SRodrigo Vivi 	if (drm_WARN(&xe->drm, !ref, "Missing outer runtime PM protection\n"))
505*cbb6a741SRodrigo Vivi 		pm_runtime_get_noresume(xe->drm.dev);
506*cbb6a741SRodrigo Vivi }
507*cbb6a741SRodrigo Vivi 
508*cbb6a741SRodrigo Vivi /**
509d6b41378SRodrigo Vivi  * xe_pm_runtime_resume_and_get - Resume, then get a runtime_pm ref if awake.
510d6b41378SRodrigo Vivi  * @xe: xe device instance
511d6b41378SRodrigo Vivi  *
512d6b41378SRodrigo Vivi  * Returns: True if device is awake and the reference was taken, false otherwise.
513d6b41378SRodrigo Vivi  */
514d6b41378SRodrigo Vivi bool xe_pm_runtime_resume_and_get(struct xe_device *xe)
515d6b41378SRodrigo Vivi {
516d6b41378SRodrigo Vivi 	if (xe_pm_read_callback_task(xe) == current) {
517d6b41378SRodrigo Vivi 		/* The device is awake, grab the ref and move on */
518d6b41378SRodrigo Vivi 		pm_runtime_get_noresume(xe->drm.dev);
519d6b41378SRodrigo Vivi 		return true;
520d6b41378SRodrigo Vivi 	}
521d6b41378SRodrigo Vivi 
522d6b41378SRodrigo Vivi 	return pm_runtime_resume_and_get(xe->drm.dev) >= 0;
523d6b41378SRodrigo Vivi }
524d6b41378SRodrigo Vivi 
525d6b41378SRodrigo Vivi /**
52630c39952SRodrigo Vivi  * xe_pm_assert_unbounded_bridge - Disable PM on unbounded pcie parent bridge
52730c39952SRodrigo Vivi  * @xe: xe device instance
52830c39952SRodrigo Vivi  */
529c8a74077SAnshuman Gupta void xe_pm_assert_unbounded_bridge(struct xe_device *xe)
530c8a74077SAnshuman Gupta {
531c8a74077SAnshuman Gupta 	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
532c8a74077SAnshuman Gupta 	struct pci_dev *bridge = pci_upstream_bridge(pdev);
533c8a74077SAnshuman Gupta 
534c8a74077SAnshuman Gupta 	if (!bridge)
535c8a74077SAnshuman Gupta 		return;
536c8a74077SAnshuman Gupta 
537c8a74077SAnshuman Gupta 	if (!bridge->driver) {
538c8a74077SAnshuman Gupta 		drm_warn(&xe->drm, "unbounded parent pci bridge, device won't support any PM support.\n");
539c8a74077SAnshuman Gupta 		device_set_pm_not_required(&pdev->dev);
540c8a74077SAnshuman Gupta 	}
541c8a74077SAnshuman Gupta }
542b2d75619SAnshuman Gupta 
54330c39952SRodrigo Vivi /**
54430c39952SRodrigo Vivi  * xe_pm_set_vram_threshold - Set a vram threshold for allowing/blocking D3Cold
54530c39952SRodrigo Vivi  * @xe: xe device instance
54630c39952SRodrigo Vivi  * @threshold: VRAM size in bites for the D3cold threshold
54730c39952SRodrigo Vivi  *
54830c39952SRodrigo Vivi  * Returns 0 for success, negative error code otherwise.
54930c39952SRodrigo Vivi  */
550b2d75619SAnshuman Gupta int xe_pm_set_vram_threshold(struct xe_device *xe, u32 threshold)
551b2d75619SAnshuman Gupta {
552b2d75619SAnshuman Gupta 	struct ttm_resource_manager *man;
553b2d75619SAnshuman Gupta 	u32 vram_total_mb = 0;
554b2d75619SAnshuman Gupta 	int i;
555b2d75619SAnshuman Gupta 
556b2d75619SAnshuman Gupta 	for (i = XE_PL_VRAM0; i <= XE_PL_VRAM1; ++i) {
557b2d75619SAnshuman Gupta 		man = ttm_manager_type(&xe->ttm, i);
558b2d75619SAnshuman Gupta 		if (man)
559b2d75619SAnshuman Gupta 			vram_total_mb += DIV_ROUND_UP_ULL(man->size, 1024 * 1024);
560b2d75619SAnshuman Gupta 	}
561b2d75619SAnshuman Gupta 
562b2d75619SAnshuman Gupta 	drm_dbg(&xe->drm, "Total vram %u mb\n", vram_total_mb);
563b2d75619SAnshuman Gupta 
564b2d75619SAnshuman Gupta 	if (threshold > vram_total_mb)
565b2d75619SAnshuman Gupta 		return -EINVAL;
566b2d75619SAnshuman Gupta 
567b2d75619SAnshuman Gupta 	mutex_lock(&xe->d3cold.lock);
568b2d75619SAnshuman Gupta 	xe->d3cold.vram_threshold = threshold;
569b2d75619SAnshuman Gupta 	mutex_unlock(&xe->d3cold.lock);
570b2d75619SAnshuman Gupta 
571b2d75619SAnshuman Gupta 	return 0;
572b2d75619SAnshuman Gupta }
5732ef08b98SAnshuman Gupta 
57430c39952SRodrigo Vivi /**
57530c39952SRodrigo Vivi  * xe_pm_d3cold_allowed_toggle - Check conditions to toggle d3cold.allowed
57630c39952SRodrigo Vivi  * @xe: xe device instance
57730c39952SRodrigo Vivi  *
57830c39952SRodrigo Vivi  * To be called during runtime_pm idle callback.
57930c39952SRodrigo Vivi  * Check for all the D3Cold conditions ahead of runtime suspend.
58030c39952SRodrigo Vivi  */
5812ef08b98SAnshuman Gupta void xe_pm_d3cold_allowed_toggle(struct xe_device *xe)
5822ef08b98SAnshuman Gupta {
5832ef08b98SAnshuman Gupta 	struct ttm_resource_manager *man;
5842ef08b98SAnshuman Gupta 	u32 total_vram_used_mb = 0;
5852ef08b98SAnshuman Gupta 	u64 vram_used;
5862ef08b98SAnshuman Gupta 	int i;
5872ef08b98SAnshuman Gupta 
588e07aa913SRodrigo Vivi 	if (!xe->d3cold.capable) {
589e07aa913SRodrigo Vivi 		xe->d3cold.allowed = false;
590e07aa913SRodrigo Vivi 		return;
591e07aa913SRodrigo Vivi 	}
592e07aa913SRodrigo Vivi 
5932ef08b98SAnshuman Gupta 	for (i = XE_PL_VRAM0; i <= XE_PL_VRAM1; ++i) {
5942ef08b98SAnshuman Gupta 		man = ttm_manager_type(&xe->ttm, i);
5952ef08b98SAnshuman Gupta 		if (man) {
5962ef08b98SAnshuman Gupta 			vram_used = ttm_resource_manager_usage(man);
5972ef08b98SAnshuman Gupta 			total_vram_used_mb += DIV_ROUND_UP_ULL(vram_used, 1024 * 1024);
5982ef08b98SAnshuman Gupta 		}
5992ef08b98SAnshuman Gupta 	}
6002ef08b98SAnshuman Gupta 
6012ef08b98SAnshuman Gupta 	mutex_lock(&xe->d3cold.lock);
6022ef08b98SAnshuman Gupta 
6032ef08b98SAnshuman Gupta 	if (total_vram_used_mb < xe->d3cold.vram_threshold)
6042ef08b98SAnshuman Gupta 		xe->d3cold.allowed = true;
6052ef08b98SAnshuman Gupta 	else
6062ef08b98SAnshuman Gupta 		xe->d3cold.allowed = false;
6072ef08b98SAnshuman Gupta 
6082ef08b98SAnshuman Gupta 	mutex_unlock(&xe->d3cold.lock);
609ff765b77SMatthew Auld 
610ff765b77SMatthew Auld 	drm_dbg(&xe->drm,
611ff765b77SMatthew Auld 		"d3cold: allowed=%s\n", str_yes_no(xe->d3cold.allowed));
6122ef08b98SAnshuman Gupta }
613