xref: /linux/drivers/gpu/drm/xe/xe_pm.c (revision 1f20a5769446a1acae67ac9e63d07a594829a789)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2022 Intel Corporation
4  */
5 
6 #include "xe_pm.h"
7 
8 #include <linux/pm_runtime.h>
9 
10 #include <drm/drm_managed.h>
11 #include <drm/ttm/ttm_placement.h>
12 
13 #include "display/xe_display.h"
14 #include "xe_bo.h"
15 #include "xe_bo_evict.h"
16 #include "xe_device.h"
17 #include "xe_device_sysfs.h"
18 #include "xe_ggtt.h"
19 #include "xe_gt.h"
20 #include "xe_guc.h"
21 #include "xe_irq.h"
22 #include "xe_pcode.h"
23 #include "xe_wa.h"
24 
25 /**
26  * DOC: Xe Power Management
27  *
28  * Xe PM shall be guided by the simplicity.
29  * Use the simplest hook options whenever possible.
30  * Let's not reinvent the runtime_pm references and hooks.
31  * Shall have a clear separation of display and gt underneath this component.
32  *
33  * What's next:
34  *
35  * For now s2idle and s3 are only working in integrated devices. The next step
36  * is to iterate through all VRAM's BO backing them up into the system memory
37  * before allowing the system suspend.
38  *
39  * Also runtime_pm needs to be here from the beginning.
40  *
41  * RC6/RPS are also critical PM features. Let's start with GuCRC and GuC SLPC
42  * and no wait boost. Frequency optimizations should come on a next stage.
43  */
44 
45 /**
46  * xe_pm_suspend - Helper for System suspend, i.e. S0->S3 / S0->S2idle
47  * @xe: xe device instance
48  *
49  * Return: 0 on success
50  */
51 int xe_pm_suspend(struct xe_device *xe)
52 {
53 	struct xe_gt *gt;
54 	u8 id;
55 	int err;
56 
57 	for_each_gt(gt, xe, id)
58 		xe_gt_suspend_prepare(gt);
59 
60 	/* FIXME: Super racey... */
61 	err = xe_bo_evict_all(xe);
62 	if (err)
63 		return err;
64 
65 	xe_display_pm_suspend(xe);
66 
67 	for_each_gt(gt, xe, id) {
68 		err = xe_gt_suspend(gt);
69 		if (err) {
70 			xe_display_pm_resume(xe);
71 			return err;
72 		}
73 	}
74 
75 	xe_irq_suspend(xe);
76 
77 	xe_display_pm_suspend_late(xe);
78 
79 	return 0;
80 }
81 
82 /**
83  * xe_pm_resume - Helper for System resume S3->S0 / S2idle->S0
84  * @xe: xe device instance
85  *
86  * Return: 0 on success
87  */
88 int xe_pm_resume(struct xe_device *xe)
89 {
90 	struct xe_tile *tile;
91 	struct xe_gt *gt;
92 	u8 id;
93 	int err;
94 
95 	for_each_tile(tile, xe, id)
96 		xe_wa_apply_tile_workarounds(tile);
97 
98 	for_each_gt(gt, xe, id) {
99 		err = xe_pcode_init(gt);
100 		if (err)
101 			return err;
102 	}
103 
104 	xe_display_pm_resume_early(xe);
105 
106 	/*
107 	 * This only restores pinned memory which is the memory required for the
108 	 * GT(s) to resume.
109 	 */
110 	err = xe_bo_restore_kernel(xe);
111 	if (err)
112 		return err;
113 
114 	xe_irq_resume(xe);
115 
116 	xe_display_pm_resume(xe);
117 
118 	for_each_gt(gt, xe, id)
119 		xe_gt_resume(gt);
120 
121 	err = xe_bo_restore_user(xe);
122 	if (err)
123 		return err;
124 
125 	return 0;
126 }
127 
128 static bool xe_pm_pci_d3cold_capable(struct xe_device *xe)
129 {
130 	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
131 	struct pci_dev *root_pdev;
132 
133 	root_pdev = pcie_find_root_port(pdev);
134 	if (!root_pdev)
135 		return false;
136 
137 	/* D3Cold requires PME capability */
138 	if (!pci_pme_capable(root_pdev, PCI_D3cold)) {
139 		drm_dbg(&xe->drm, "d3cold: PME# not supported\n");
140 		return false;
141 	}
142 
143 	/* D3Cold requires _PR3 power resource */
144 	if (!pci_pr3_present(root_pdev)) {
145 		drm_dbg(&xe->drm, "d3cold: ACPI _PR3 not present\n");
146 		return false;
147 	}
148 
149 	return true;
150 }
151 
152 static void xe_pm_runtime_init(struct xe_device *xe)
153 {
154 	struct device *dev = xe->drm.dev;
155 
156 	/*
157 	 * Disable the system suspend direct complete optimization.
158 	 * We need to ensure that the regular device suspend/resume functions
159 	 * are called since our runtime_pm cannot guarantee local memory
160 	 * eviction for d3cold.
161 	 * TODO: Check HDA audio dependencies claimed by i915, and then enforce
162 	 *       this option to integrated graphics as well.
163 	 */
164 	if (IS_DGFX(xe))
165 		dev_pm_set_driver_flags(dev, DPM_FLAG_NO_DIRECT_COMPLETE);
166 
167 	pm_runtime_use_autosuspend(dev);
168 	pm_runtime_set_autosuspend_delay(dev, 1000);
169 	pm_runtime_set_active(dev);
170 	pm_runtime_allow(dev);
171 	pm_runtime_mark_last_busy(dev);
172 	pm_runtime_put(dev);
173 }
174 
175 void xe_pm_init_early(struct xe_device *xe)
176 {
177 	INIT_LIST_HEAD(&xe->mem_access.vram_userfault.list);
178 	drmm_mutex_init(&xe->drm, &xe->mem_access.vram_userfault.lock);
179 }
180 
181 void xe_pm_init(struct xe_device *xe)
182 {
183 	/* For now suspend/resume is only allowed with GuC */
184 	if (!xe_device_uc_enabled(xe))
185 		return;
186 
187 	drmm_mutex_init(&xe->drm, &xe->d3cold.lock);
188 
189 	xe->d3cold.capable = xe_pm_pci_d3cold_capable(xe);
190 
191 	if (xe->d3cold.capable) {
192 		xe_device_sysfs_init(xe);
193 		xe_pm_set_vram_threshold(xe, DEFAULT_VRAM_THRESHOLD);
194 	}
195 
196 	xe_pm_runtime_init(xe);
197 }
198 
199 void xe_pm_runtime_fini(struct xe_device *xe)
200 {
201 	struct device *dev = xe->drm.dev;
202 
203 	pm_runtime_get_sync(dev);
204 	pm_runtime_forbid(dev);
205 }
206 
207 static void xe_pm_write_callback_task(struct xe_device *xe,
208 				      struct task_struct *task)
209 {
210 	WRITE_ONCE(xe->pm_callback_task, task);
211 
212 	/*
213 	 * Just in case it's somehow possible for our writes to be reordered to
214 	 * the extent that something else re-uses the task written in
215 	 * pm_callback_task. For example after returning from the callback, but
216 	 * before the reordered write that resets pm_callback_task back to NULL.
217 	 */
218 	smp_mb(); /* pairs with xe_pm_read_callback_task */
219 }
220 
221 struct task_struct *xe_pm_read_callback_task(struct xe_device *xe)
222 {
223 	smp_mb(); /* pairs with xe_pm_write_callback_task */
224 
225 	return READ_ONCE(xe->pm_callback_task);
226 }
227 
228 int xe_pm_runtime_suspend(struct xe_device *xe)
229 {
230 	struct xe_bo *bo, *on;
231 	struct xe_gt *gt;
232 	u8 id;
233 	int err = 0;
234 
235 	if (xe->d3cold.allowed && xe_device_mem_access_ongoing(xe))
236 		return -EBUSY;
237 
238 	/* Disable access_ongoing asserts and prevent recursive pm calls */
239 	xe_pm_write_callback_task(xe, current);
240 
241 	/*
242 	 * The actual xe_device_mem_access_put() is always async underneath, so
243 	 * exactly where that is called should makes no difference to us. However
244 	 * we still need to be very careful with the locks that this callback
245 	 * acquires and the locks that are acquired and held by any callers of
246 	 * xe_device_mem_access_get(). We already have the matching annotation
247 	 * on that side, but we also need it here. For example lockdep should be
248 	 * able to tell us if the following scenario is in theory possible:
249 	 *
250 	 * CPU0                          | CPU1 (kworker)
251 	 * lock(A)                       |
252 	 *                               | xe_pm_runtime_suspend()
253 	 *                               |      lock(A)
254 	 * xe_device_mem_access_get()    |
255 	 *
256 	 * This will clearly deadlock since rpm core needs to wait for
257 	 * xe_pm_runtime_suspend() to complete, but here we are holding lock(A)
258 	 * on CPU0 which prevents CPU1 making forward progress.  With the
259 	 * annotation here and in xe_device_mem_access_get() lockdep will see
260 	 * the potential lock inversion and give us a nice splat.
261 	 */
262 	lock_map_acquire(&xe_device_mem_access_lockdep_map);
263 
264 	/*
265 	 * Applying lock for entire list op as xe_ttm_bo_destroy and xe_bo_move_notify
266 	 * also checks and delets bo entry from user fault list.
267 	 */
268 	mutex_lock(&xe->mem_access.vram_userfault.lock);
269 	list_for_each_entry_safe(bo, on,
270 				 &xe->mem_access.vram_userfault.list, vram_userfault_link)
271 		xe_bo_runtime_pm_release_mmap_offset(bo);
272 	mutex_unlock(&xe->mem_access.vram_userfault.lock);
273 
274 	if (xe->d3cold.allowed) {
275 		err = xe_bo_evict_all(xe);
276 		if (err)
277 			goto out;
278 	}
279 
280 	for_each_gt(gt, xe, id) {
281 		err = xe_gt_suspend(gt);
282 		if (err)
283 			goto out;
284 	}
285 
286 	xe_irq_suspend(xe);
287 out:
288 	lock_map_release(&xe_device_mem_access_lockdep_map);
289 	xe_pm_write_callback_task(xe, NULL);
290 	return err;
291 }
292 
293 int xe_pm_runtime_resume(struct xe_device *xe)
294 {
295 	struct xe_gt *gt;
296 	u8 id;
297 	int err = 0;
298 
299 	/* Disable access_ongoing asserts and prevent recursive pm calls */
300 	xe_pm_write_callback_task(xe, current);
301 
302 	lock_map_acquire(&xe_device_mem_access_lockdep_map);
303 
304 	/*
305 	 * It can be possible that xe has allowed d3cold but other pcie devices
306 	 * in gfx card soc would have blocked d3cold, therefore card has not
307 	 * really lost power. Detecting primary Gt power is sufficient.
308 	 */
309 	gt = xe_device_get_gt(xe, 0);
310 	xe->d3cold.power_lost = xe_guc_in_reset(&gt->uc.guc);
311 
312 	if (xe->d3cold.allowed && xe->d3cold.power_lost) {
313 		for_each_gt(gt, xe, id) {
314 			err = xe_pcode_init(gt);
315 			if (err)
316 				goto out;
317 		}
318 
319 		/*
320 		 * This only restores pinned memory which is the memory
321 		 * required for the GT(s) to resume.
322 		 */
323 		err = xe_bo_restore_kernel(xe);
324 		if (err)
325 			goto out;
326 	}
327 
328 	xe_irq_resume(xe);
329 
330 	for_each_gt(gt, xe, id)
331 		xe_gt_resume(gt);
332 
333 	if (xe->d3cold.allowed && xe->d3cold.power_lost) {
334 		err = xe_bo_restore_user(xe);
335 		if (err)
336 			goto out;
337 	}
338 out:
339 	lock_map_release(&xe_device_mem_access_lockdep_map);
340 	xe_pm_write_callback_task(xe, NULL);
341 	return err;
342 }
343 
344 int xe_pm_runtime_get(struct xe_device *xe)
345 {
346 	return pm_runtime_get_sync(xe->drm.dev);
347 }
348 
349 int xe_pm_runtime_put(struct xe_device *xe)
350 {
351 	pm_runtime_mark_last_busy(xe->drm.dev);
352 	return pm_runtime_put(xe->drm.dev);
353 }
354 
355 int xe_pm_runtime_get_if_active(struct xe_device *xe)
356 {
357 	return pm_runtime_get_if_active(xe->drm.dev);
358 }
359 
360 void xe_pm_assert_unbounded_bridge(struct xe_device *xe)
361 {
362 	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
363 	struct pci_dev *bridge = pci_upstream_bridge(pdev);
364 
365 	if (!bridge)
366 		return;
367 
368 	if (!bridge->driver) {
369 		drm_warn(&xe->drm, "unbounded parent pci bridge, device won't support any PM support.\n");
370 		device_set_pm_not_required(&pdev->dev);
371 	}
372 }
373 
374 int xe_pm_set_vram_threshold(struct xe_device *xe, u32 threshold)
375 {
376 	struct ttm_resource_manager *man;
377 	u32 vram_total_mb = 0;
378 	int i;
379 
380 	for (i = XE_PL_VRAM0; i <= XE_PL_VRAM1; ++i) {
381 		man = ttm_manager_type(&xe->ttm, i);
382 		if (man)
383 			vram_total_mb += DIV_ROUND_UP_ULL(man->size, 1024 * 1024);
384 	}
385 
386 	drm_dbg(&xe->drm, "Total vram %u mb\n", vram_total_mb);
387 
388 	if (threshold > vram_total_mb)
389 		return -EINVAL;
390 
391 	mutex_lock(&xe->d3cold.lock);
392 	xe->d3cold.vram_threshold = threshold;
393 	mutex_unlock(&xe->d3cold.lock);
394 
395 	return 0;
396 }
397 
398 void xe_pm_d3cold_allowed_toggle(struct xe_device *xe)
399 {
400 	struct ttm_resource_manager *man;
401 	u32 total_vram_used_mb = 0;
402 	u64 vram_used;
403 	int i;
404 
405 	if (!xe->d3cold.capable) {
406 		xe->d3cold.allowed = false;
407 		return;
408 	}
409 
410 	for (i = XE_PL_VRAM0; i <= XE_PL_VRAM1; ++i) {
411 		man = ttm_manager_type(&xe->ttm, i);
412 		if (man) {
413 			vram_used = ttm_resource_manager_usage(man);
414 			total_vram_used_mb += DIV_ROUND_UP_ULL(vram_used, 1024 * 1024);
415 		}
416 	}
417 
418 	mutex_lock(&xe->d3cold.lock);
419 
420 	if (total_vram_used_mb < xe->d3cold.vram_threshold)
421 		xe->d3cold.allowed = true;
422 	else
423 		xe->d3cold.allowed = false;
424 
425 	mutex_unlock(&xe->d3cold.lock);
426 
427 	drm_dbg(&xe->drm,
428 		"d3cold: allowed=%s\n", str_yes_no(xe->d3cold.allowed));
429 }
430