xref: /linux/drivers/gpu/drm/xe/xe_pm.c (revision 3d4b0bfcd97fbb43d4848bafbf605f6d95afa7c8)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2022 Intel Corporation
4  */
5 
6 #include "xe_pm.h"
7 
8 #include <linux/pm_runtime.h>
9 
10 #include <drm/drm_managed.h>
11 #include <drm/ttm/ttm_placement.h>
12 
13 #include "xe_bo.h"
14 #include "xe_bo_evict.h"
15 #include "xe_device.h"
16 #include "xe_device_sysfs.h"
17 #include "xe_ggtt.h"
18 #include "xe_gt.h"
19 #include "xe_guc.h"
20 #include "xe_irq.h"
21 #include "xe_pcode.h"
22 
23 /**
24  * DOC: Xe Power Management
25  *
26  * Xe PM shall be guided by the simplicity.
27  * Use the simplest hook options whenever possible.
28  * Let's not reinvent the runtime_pm references and hooks.
29  * Shall have a clear separation of display and gt underneath this component.
30  *
31  * What's next:
32  *
33  * For now s2idle and s3 are only working in integrated devices. The next step
34  * is to iterate through all VRAM's BO backing them up into the system memory
35  * before allowing the system suspend.
36  *
37  * Also runtime_pm needs to be here from the beginning.
38  *
39  * RC6/RPS are also critical PM features. Let's start with GuCRC and GuC SLPC
40  * and no wait boost. Frequency optimizations should come on a next stage.
41  */
42 
43 /**
44  * xe_pm_suspend - Helper for System suspend, i.e. S0->S3 / S0->S2idle
45  * @xe: xe device instance
46  *
47  * Return: 0 on success
48  */
49 int xe_pm_suspend(struct xe_device *xe)
50 {
51 	struct xe_gt *gt;
52 	u8 id;
53 	int err;
54 
55 	for_each_gt(gt, xe, id)
56 		xe_gt_suspend_prepare(gt);
57 
58 	/* FIXME: Super racey... */
59 	err = xe_bo_evict_all(xe);
60 	if (err)
61 		return err;
62 
63 	for_each_gt(gt, xe, id) {
64 		err = xe_gt_suspend(gt);
65 		if (err)
66 			return err;
67 	}
68 
69 	xe_irq_suspend(xe);
70 
71 	return 0;
72 }
73 
74 /**
75  * xe_pm_resume - Helper for System resume S3->S0 / S2idle->S0
76  * @xe: xe device instance
77  *
78  * Return: 0 on success
79  */
80 int xe_pm_resume(struct xe_device *xe)
81 {
82 	struct xe_gt *gt;
83 	u8 id;
84 	int err;
85 
86 	for_each_gt(gt, xe, id) {
87 		err = xe_pcode_init(gt);
88 		if (err)
89 			return err;
90 	}
91 
92 	/*
93 	 * This only restores pinned memory which is the memory required for the
94 	 * GT(s) to resume.
95 	 */
96 	err = xe_bo_restore_kernel(xe);
97 	if (err)
98 		return err;
99 
100 	xe_irq_resume(xe);
101 
102 	for_each_gt(gt, xe, id)
103 		xe_gt_resume(gt);
104 
105 	err = xe_bo_restore_user(xe);
106 	if (err)
107 		return err;
108 
109 	return 0;
110 }
111 
112 static bool xe_pm_pci_d3cold_capable(struct pci_dev *pdev)
113 {
114 	struct pci_dev *root_pdev;
115 
116 	root_pdev = pcie_find_root_port(pdev);
117 	if (!root_pdev)
118 		return false;
119 
120 	/* D3Cold requires PME capability and _PR3 power resource */
121 	if (!pci_pme_capable(root_pdev, PCI_D3cold) || !pci_pr3_present(root_pdev))
122 		return false;
123 
124 	return true;
125 }
126 
127 static void xe_pm_runtime_init(struct xe_device *xe)
128 {
129 	struct device *dev = xe->drm.dev;
130 
131 	/*
132 	 * Disable the system suspend direct complete optimization.
133 	 * We need to ensure that the regular device suspend/resume functions
134 	 * are called since our runtime_pm cannot guarantee local memory
135 	 * eviction for d3cold.
136 	 * TODO: Check HDA audio dependencies claimed by i915, and then enforce
137 	 *       this option to integrated graphics as well.
138 	 */
139 	if (IS_DGFX(xe))
140 		dev_pm_set_driver_flags(dev, DPM_FLAG_NO_DIRECT_COMPLETE);
141 
142 	pm_runtime_use_autosuspend(dev);
143 	pm_runtime_set_autosuspend_delay(dev, 1000);
144 	pm_runtime_set_active(dev);
145 	pm_runtime_allow(dev);
146 	pm_runtime_mark_last_busy(dev);
147 	pm_runtime_put(dev);
148 }
149 
150 void xe_pm_init(struct xe_device *xe)
151 {
152 	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
153 
154 	drmm_mutex_init(&xe->drm, &xe->d3cold.lock);
155 
156 	xe->d3cold.capable = xe_pm_pci_d3cold_capable(pdev);
157 
158 	if (xe->d3cold.capable) {
159 		xe_device_sysfs_init(xe);
160 		xe_pm_set_vram_threshold(xe, DEFAULT_VRAM_THRESHOLD);
161 	}
162 
163 	xe_pm_runtime_init(xe);
164 }
165 
166 void xe_pm_runtime_fini(struct xe_device *xe)
167 {
168 	struct device *dev = xe->drm.dev;
169 
170 	pm_runtime_get_sync(dev);
171 	pm_runtime_forbid(dev);
172 }
173 
174 static void xe_pm_write_callback_task(struct xe_device *xe,
175 				      struct task_struct *task)
176 {
177 	WRITE_ONCE(xe->pm_callback_task, task);
178 
179 	/*
180 	 * Just in case it's somehow possible for our writes to be reordered to
181 	 * the extent that something else re-uses the task written in
182 	 * pm_callback_task. For example after returning from the callback, but
183 	 * before the reordered write that resets pm_callback_task back to NULL.
184 	 */
185 	smp_mb(); /* pairs with xe_pm_read_callback_task */
186 }
187 
188 struct task_struct *xe_pm_read_callback_task(struct xe_device *xe)
189 {
190 	smp_mb(); /* pairs with xe_pm_write_callback_task */
191 
192 	return READ_ONCE(xe->pm_callback_task);
193 }
194 
195 int xe_pm_runtime_suspend(struct xe_device *xe)
196 {
197 	struct xe_gt *gt;
198 	u8 id;
199 	int err = 0;
200 
201 	if (xe->d3cold.allowed && xe_device_mem_access_ongoing(xe))
202 		return -EBUSY;
203 
204 	/* Disable access_ongoing asserts and prevent recursive pm calls */
205 	xe_pm_write_callback_task(xe, current);
206 
207 	/*
208 	 * The actual xe_device_mem_access_put() is always async underneath, so
209 	 * exactly where that is called should makes no difference to us. However
210 	 * we still need to be very careful with the locks that this callback
211 	 * acquires and the locks that are acquired and held by any callers of
212 	 * xe_device_mem_access_get(). We already have the matching annotation
213 	 * on that side, but we also need it here. For example lockdep should be
214 	 * able to tell us if the following scenario is in theory possible:
215 	 *
216 	 * CPU0                          | CPU1 (kworker)
217 	 * lock(A)                       |
218 	 *                               | xe_pm_runtime_suspend()
219 	 *                               |      lock(A)
220 	 * xe_device_mem_access_get()    |
221 	 *
222 	 * This will clearly deadlock since rpm core needs to wait for
223 	 * xe_pm_runtime_suspend() to complete, but here we are holding lock(A)
224 	 * on CPU0 which prevents CPU1 making forward progress.  With the
225 	 * annotation here and in xe_device_mem_access_get() lockdep will see
226 	 * the potential lock inversion and give us a nice splat.
227 	 */
228 	lock_map_acquire(&xe_device_mem_access_lockdep_map);
229 
230 	if (xe->d3cold.allowed) {
231 		err = xe_bo_evict_all(xe);
232 		if (err)
233 			goto out;
234 	}
235 
236 	for_each_gt(gt, xe, id) {
237 		err = xe_gt_suspend(gt);
238 		if (err)
239 			goto out;
240 	}
241 
242 	xe_irq_suspend(xe);
243 out:
244 	lock_map_release(&xe_device_mem_access_lockdep_map);
245 	xe_pm_write_callback_task(xe, NULL);
246 	return err;
247 }
248 
249 int xe_pm_runtime_resume(struct xe_device *xe)
250 {
251 	struct xe_gt *gt;
252 	u8 id;
253 	int err = 0;
254 
255 	/* Disable access_ongoing asserts and prevent recursive pm calls */
256 	xe_pm_write_callback_task(xe, current);
257 
258 	lock_map_acquire(&xe_device_mem_access_lockdep_map);
259 
260 	/*
261 	 * It can be possible that xe has allowed d3cold but other pcie devices
262 	 * in gfx card soc would have blocked d3cold, therefore card has not
263 	 * really lost power. Detecting primary Gt power is sufficient.
264 	 */
265 	gt = xe_device_get_gt(xe, 0);
266 	xe->d3cold.power_lost = xe_guc_in_reset(&gt->uc.guc);
267 
268 	if (xe->d3cold.allowed && xe->d3cold.power_lost) {
269 		for_each_gt(gt, xe, id) {
270 			err = xe_pcode_init(gt);
271 			if (err)
272 				goto out;
273 		}
274 
275 		/*
276 		 * This only restores pinned memory which is the memory
277 		 * required for the GT(s) to resume.
278 		 */
279 		err = xe_bo_restore_kernel(xe);
280 		if (err)
281 			goto out;
282 	}
283 
284 	xe_irq_resume(xe);
285 
286 	for_each_gt(gt, xe, id)
287 		xe_gt_resume(gt);
288 
289 	if (xe->d3cold.allowed && xe->d3cold.power_lost) {
290 		err = xe_bo_restore_user(xe);
291 		if (err)
292 			goto out;
293 	}
294 out:
295 	lock_map_release(&xe_device_mem_access_lockdep_map);
296 	xe_pm_write_callback_task(xe, NULL);
297 	return err;
298 }
299 
300 int xe_pm_runtime_get(struct xe_device *xe)
301 {
302 	return pm_runtime_get_sync(xe->drm.dev);
303 }
304 
305 int xe_pm_runtime_put(struct xe_device *xe)
306 {
307 	pm_runtime_mark_last_busy(xe->drm.dev);
308 	return pm_runtime_put(xe->drm.dev);
309 }
310 
311 int xe_pm_runtime_get_if_active(struct xe_device *xe)
312 {
313 	return pm_runtime_get_if_active(xe->drm.dev, true);
314 }
315 
316 void xe_pm_assert_unbounded_bridge(struct xe_device *xe)
317 {
318 	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
319 	struct pci_dev *bridge = pci_upstream_bridge(pdev);
320 
321 	if (!bridge)
322 		return;
323 
324 	if (!bridge->driver) {
325 		drm_warn(&xe->drm, "unbounded parent pci bridge, device won't support any PM support.\n");
326 		device_set_pm_not_required(&pdev->dev);
327 	}
328 }
329 
330 int xe_pm_set_vram_threshold(struct xe_device *xe, u32 threshold)
331 {
332 	struct ttm_resource_manager *man;
333 	u32 vram_total_mb = 0;
334 	int i;
335 
336 	for (i = XE_PL_VRAM0; i <= XE_PL_VRAM1; ++i) {
337 		man = ttm_manager_type(&xe->ttm, i);
338 		if (man)
339 			vram_total_mb += DIV_ROUND_UP_ULL(man->size, 1024 * 1024);
340 	}
341 
342 	drm_dbg(&xe->drm, "Total vram %u mb\n", vram_total_mb);
343 
344 	if (threshold > vram_total_mb)
345 		return -EINVAL;
346 
347 	mutex_lock(&xe->d3cold.lock);
348 	xe->d3cold.vram_threshold = threshold;
349 	mutex_unlock(&xe->d3cold.lock);
350 
351 	return 0;
352 }
353 
354 void xe_pm_d3cold_allowed_toggle(struct xe_device *xe)
355 {
356 	struct ttm_resource_manager *man;
357 	u32 total_vram_used_mb = 0;
358 	u64 vram_used;
359 	int i;
360 
361 	if (!xe->d3cold.capable) {
362 		xe->d3cold.allowed = false;
363 		return;
364 	}
365 
366 	for (i = XE_PL_VRAM0; i <= XE_PL_VRAM1; ++i) {
367 		man = ttm_manager_type(&xe->ttm, i);
368 		if (man) {
369 			vram_used = ttm_resource_manager_usage(man);
370 			total_vram_used_mb += DIV_ROUND_UP_ULL(vram_used, 1024 * 1024);
371 		}
372 	}
373 
374 	mutex_lock(&xe->d3cold.lock);
375 
376 	if (total_vram_used_mb < xe->d3cold.vram_threshold)
377 		xe->d3cold.allowed = true;
378 	else
379 		xe->d3cold.allowed = false;
380 
381 	mutex_unlock(&xe->d3cold.lock);
382 }
383