xref: /linux/drivers/gpu/drm/xe/xe_pm.c (revision 3c2fe27971c3c9cc27de6e369385f6428db6c0b5)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2022 Intel Corporation
4  */
5 
6 #include "xe_pm.h"
7 
8 #include <linux/fault-inject.h>
9 #include <linux/pm_runtime.h>
10 #include <linux/suspend.h>
11 
12 #include <drm/drm_managed.h>
13 #include <drm/ttm/ttm_placement.h>
14 
15 #include "display/xe_display.h"
16 #include "xe_bo.h"
17 #include "xe_bo_evict.h"
18 #include "xe_device.h"
19 #include "xe_ggtt.h"
20 #include "xe_gt.h"
21 #include "xe_guc.h"
22 #include "xe_irq.h"
23 #include "xe_pcode.h"
24 #include "xe_pxp.h"
25 #include "xe_trace.h"
26 #include "xe_wa.h"
27 
28 /**
29  * DOC: Xe Power Management
30  *
31  * Xe PM implements the main routines for both system level suspend states and
32  * for the opportunistic runtime suspend states.
33  *
34  * System Level Suspend (S-States) - In general this is OS initiated suspend
35  * driven by ACPI for achieving S0ix (a.k.a. S2idle, freeze), S3 (suspend to ram),
36  * S4 (disk). The main functions here are `xe_pm_suspend` and `xe_pm_resume`. They
37  * are the main point for the suspend to and resume from these states.
38  *
39  * PCI Device Suspend (D-States) - This is the opportunistic PCIe device low power
40  * state D3, controlled by the PCI subsystem and ACPI with the help from the
41  * runtime_pm infrastructure.
42  * PCI D3 is special and can mean D3hot, where Vcc power is on for keeping memory
43  * alive and quicker low latency resume or D3Cold where Vcc power is off for
44  * better power savings.
45  * The Vcc control of PCI hierarchy can only be controlled at the PCI root port
46  * level, while the device driver can be behind multiple bridges/switches and
47  * paired with other devices. For this reason, the PCI subsystem cannot perform
48  * the transition towards D3Cold. The lowest runtime PM possible from the PCI
49  * subsystem is D3hot. Then, if all these paired devices in the same root port
50  * are in D3hot, ACPI will assist here and run its own methods (_PR3 and _OFF)
51  * to perform the transition from D3hot to D3cold. Xe may disallow this
52  * transition by calling pci_d3cold_disable(root_pdev) before going to runtime
53  * suspend. It will be based on runtime conditions such as VRAM usage for a
54  * quick and low latency resume for instance.
55  *
56  * Runtime PM - This infrastructure provided by the Linux kernel allows the
57  * device drivers to indicate when the can be runtime suspended, so the device
58  * could be put at D3 (if supported), or allow deeper package sleep states
59  * (PC-states), and/or other low level power states. Xe PM component provides
60  * `xe_pm_runtime_suspend` and `xe_pm_runtime_resume` functions that PCI
61  * subsystem will call before transition to/from runtime suspend.
62  *
63  * Also, Xe PM provides get and put functions that Xe driver will use to
64  * indicate activity. In order to avoid locking complications with the memory
65  * management, whenever possible, these get and put functions needs to be called
66  * from the higher/outer levels.
67  * The main cases that need to be protected from the outer levels are: IOCTL,
68  * sysfs, debugfs, dma-buf sharing, GPU execution.
69  *
70  * This component is not responsible for GT idleness (RC6) nor GT frequency
71  * management (RPS).
72  */
73 
74 #ifdef CONFIG_LOCKDEP
75 static struct lockdep_map xe_pm_runtime_d3cold_map = {
76 	.name = "xe_rpm_d3cold_map"
77 };
78 
79 static struct lockdep_map xe_pm_runtime_nod3cold_map = {
80 	.name = "xe_rpm_nod3cold_map"
81 };
82 #endif
83 
84 /**
85  * xe_rpm_reclaim_safe() - Whether runtime resume can be done from reclaim context
86  * @xe: The xe device.
87  *
88  * Return: true if it is safe to runtime resume from reclaim context.
89  * false otherwise.
90  */
xe_rpm_reclaim_safe(const struct xe_device * xe)91 bool xe_rpm_reclaim_safe(const struct xe_device *xe)
92 {
93 	return !xe->d3cold.capable;
94 }
95 
xe_rpm_lockmap_acquire(const struct xe_device * xe)96 static void xe_rpm_lockmap_acquire(const struct xe_device *xe)
97 {
98 	lock_map_acquire(xe_rpm_reclaim_safe(xe) ?
99 			 &xe_pm_runtime_nod3cold_map :
100 			 &xe_pm_runtime_d3cold_map);
101 }
102 
xe_rpm_lockmap_release(const struct xe_device * xe)103 static void xe_rpm_lockmap_release(const struct xe_device *xe)
104 {
105 	lock_map_release(xe_rpm_reclaim_safe(xe) ?
106 			 &xe_pm_runtime_nod3cold_map :
107 			 &xe_pm_runtime_d3cold_map);
108 }
109 
110 /**
111  * xe_pm_suspend - Helper for System suspend, i.e. S0->S3 / S0->S2idle
112  * @xe: xe device instance
113  *
114  * Return: 0 on success
115  */
xe_pm_suspend(struct xe_device * xe)116 int xe_pm_suspend(struct xe_device *xe)
117 {
118 	struct xe_gt *gt;
119 	u8 id;
120 	int err;
121 
122 	drm_dbg(&xe->drm, "Suspending device\n");
123 	trace_xe_pm_suspend(xe, __builtin_return_address(0));
124 
125 	err = xe_pxp_pm_suspend(xe->pxp);
126 	if (err)
127 		goto err;
128 
129 	for_each_gt(gt, xe, id)
130 		xe_gt_suspend_prepare(gt);
131 
132 	xe_display_pm_suspend(xe);
133 
134 	/* FIXME: Super racey... */
135 	err = xe_bo_evict_all(xe);
136 	if (err)
137 		goto err_display;
138 
139 	for_each_gt(gt, xe, id) {
140 		err = xe_gt_suspend(gt);
141 		if (err)
142 			goto err_display;
143 	}
144 
145 	xe_irq_suspend(xe);
146 
147 	xe_display_pm_suspend_late(xe);
148 
149 	drm_dbg(&xe->drm, "Device suspended\n");
150 	return 0;
151 
152 err_display:
153 	xe_display_pm_resume(xe);
154 	xe_pxp_pm_resume(xe->pxp);
155 err:
156 	drm_dbg(&xe->drm, "Device suspend failed %d\n", err);
157 	return err;
158 }
159 
160 /**
161  * xe_pm_resume - Helper for System resume S3->S0 / S2idle->S0
162  * @xe: xe device instance
163  *
164  * Return: 0 on success
165  */
xe_pm_resume(struct xe_device * xe)166 int xe_pm_resume(struct xe_device *xe)
167 {
168 	struct xe_tile *tile;
169 	struct xe_gt *gt;
170 	u8 id;
171 	int err;
172 
173 	drm_dbg(&xe->drm, "Resuming device\n");
174 	trace_xe_pm_resume(xe, __builtin_return_address(0));
175 
176 	for_each_tile(tile, xe, id)
177 		xe_wa_apply_tile_workarounds(tile);
178 
179 	err = xe_pcode_ready(xe, true);
180 	if (err)
181 		return err;
182 
183 	xe_display_pm_resume_early(xe);
184 
185 	/*
186 	 * This only restores pinned memory which is the memory required for the
187 	 * GT(s) to resume.
188 	 */
189 	err = xe_bo_restore_early(xe);
190 	if (err)
191 		goto err;
192 
193 	xe_irq_resume(xe);
194 
195 	for_each_gt(gt, xe, id)
196 		xe_gt_resume(gt);
197 
198 	xe_display_pm_resume(xe);
199 
200 	err = xe_bo_restore_late(xe);
201 	if (err)
202 		goto err;
203 
204 	xe_pxp_pm_resume(xe->pxp);
205 
206 	drm_dbg(&xe->drm, "Device resumed\n");
207 	return 0;
208 err:
209 	drm_dbg(&xe->drm, "Device resume failed %d\n", err);
210 	return err;
211 }
212 
xe_pm_pci_d3cold_capable(struct xe_device * xe)213 static bool xe_pm_pci_d3cold_capable(struct xe_device *xe)
214 {
215 	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
216 	struct pci_dev *root_pdev;
217 
218 	root_pdev = pcie_find_root_port(pdev);
219 	if (!root_pdev)
220 		return false;
221 
222 	/* D3Cold requires PME capability */
223 	if (!pci_pme_capable(root_pdev, PCI_D3cold)) {
224 		drm_dbg(&xe->drm, "d3cold: PME# not supported\n");
225 		return false;
226 	}
227 
228 	/* D3Cold requires _PR3 power resource */
229 	if (!pci_pr3_present(root_pdev)) {
230 		drm_dbg(&xe->drm, "d3cold: ACPI _PR3 not present\n");
231 		return false;
232 	}
233 
234 	return true;
235 }
236 
xe_pm_runtime_init(struct xe_device * xe)237 static void xe_pm_runtime_init(struct xe_device *xe)
238 {
239 	struct device *dev = xe->drm.dev;
240 
241 	/*
242 	 * Disable the system suspend direct complete optimization.
243 	 * We need to ensure that the regular device suspend/resume functions
244 	 * are called since our runtime_pm cannot guarantee local memory
245 	 * eviction for d3cold.
246 	 * TODO: Check HDA audio dependencies claimed by i915, and then enforce
247 	 *       this option to integrated graphics as well.
248 	 */
249 	if (IS_DGFX(xe))
250 		dev_pm_set_driver_flags(dev, DPM_FLAG_NO_DIRECT_COMPLETE);
251 
252 	pm_runtime_use_autosuspend(dev);
253 	pm_runtime_set_autosuspend_delay(dev, 1000);
254 	pm_runtime_set_active(dev);
255 	pm_runtime_allow(dev);
256 	pm_runtime_mark_last_busy(dev);
257 	pm_runtime_put(dev);
258 }
259 
xe_pm_init_early(struct xe_device * xe)260 int xe_pm_init_early(struct xe_device *xe)
261 {
262 	int err;
263 
264 	INIT_LIST_HEAD(&xe->mem_access.vram_userfault.list);
265 
266 	err = drmm_mutex_init(&xe->drm, &xe->mem_access.vram_userfault.lock);
267 	if (err)
268 		return err;
269 
270 	err = drmm_mutex_init(&xe->drm, &xe->d3cold.lock);
271 	if (err)
272 		return err;
273 
274 	xe->d3cold.capable = xe_pm_pci_d3cold_capable(xe);
275 	return 0;
276 }
277 ALLOW_ERROR_INJECTION(xe_pm_init_early, ERRNO); /* See xe_pci_probe() */
278 
vram_threshold_value(struct xe_device * xe)279 static u32 vram_threshold_value(struct xe_device *xe)
280 {
281 	/* FIXME: D3Cold temporarily disabled by default on BMG */
282 	if (xe->info.platform == XE_BATTLEMAGE)
283 		return 0;
284 
285 	return DEFAULT_VRAM_THRESHOLD;
286 }
287 
xe_pm_notifier_callback(struct notifier_block * nb,unsigned long action,void * data)288 static int xe_pm_notifier_callback(struct notifier_block *nb,
289 				   unsigned long action, void *data)
290 {
291 	struct xe_device *xe = container_of(nb, struct xe_device, pm_notifier);
292 	int err = 0;
293 
294 	switch (action) {
295 	case PM_HIBERNATION_PREPARE:
296 	case PM_SUSPEND_PREPARE:
297 		xe_pm_runtime_get(xe);
298 		err = xe_bo_evict_all_user(xe);
299 		if (err) {
300 			drm_dbg(&xe->drm, "Notifier evict user failed (%d)\n", err);
301 			xe_pm_runtime_put(xe);
302 			break;
303 		}
304 
305 		err = xe_bo_notifier_prepare_all_pinned(xe);
306 		if (err) {
307 			drm_dbg(&xe->drm, "Notifier prepare pin failed (%d)\n", err);
308 			xe_pm_runtime_put(xe);
309 		}
310 		break;
311 	case PM_POST_HIBERNATION:
312 	case PM_POST_SUSPEND:
313 		xe_bo_notifier_unprepare_all_pinned(xe);
314 		xe_pm_runtime_put(xe);
315 		break;
316 	}
317 
318 	if (err)
319 		return NOTIFY_BAD;
320 
321 	return NOTIFY_DONE;
322 }
323 
324 /**
325  * xe_pm_init - Initialize Xe Power Management
326  * @xe: xe device instance
327  *
328  * This component is responsible for System and Device sleep states.
329  *
330  * Returns 0 for success, negative error code otherwise.
331  */
xe_pm_init(struct xe_device * xe)332 int xe_pm_init(struct xe_device *xe)
333 {
334 	u32 vram_threshold;
335 	int err;
336 
337 	xe->pm_notifier.notifier_call = xe_pm_notifier_callback;
338 	err = register_pm_notifier(&xe->pm_notifier);
339 	if (err)
340 		return err;
341 
342 	/* For now suspend/resume is only allowed with GuC */
343 	if (!xe_device_uc_enabled(xe))
344 		return 0;
345 
346 	if (xe->d3cold.capable) {
347 		vram_threshold = vram_threshold_value(xe);
348 		err = xe_pm_set_vram_threshold(xe, vram_threshold);
349 		if (err)
350 			goto err_unregister;
351 	}
352 
353 	xe_pm_runtime_init(xe);
354 	return 0;
355 
356 err_unregister:
357 	unregister_pm_notifier(&xe->pm_notifier);
358 	return err;
359 }
360 
xe_pm_runtime_fini(struct xe_device * xe)361 static void xe_pm_runtime_fini(struct xe_device *xe)
362 {
363 	struct device *dev = xe->drm.dev;
364 
365 	pm_runtime_get_sync(dev);
366 	pm_runtime_forbid(dev);
367 }
368 
369 /**
370  * xe_pm_fini - Finalize PM
371  * @xe: xe device instance
372  */
xe_pm_fini(struct xe_device * xe)373 void xe_pm_fini(struct xe_device *xe)
374 {
375 	if (xe_device_uc_enabled(xe))
376 		xe_pm_runtime_fini(xe);
377 
378 	unregister_pm_notifier(&xe->pm_notifier);
379 }
380 
xe_pm_write_callback_task(struct xe_device * xe,struct task_struct * task)381 static void xe_pm_write_callback_task(struct xe_device *xe,
382 				      struct task_struct *task)
383 {
384 	WRITE_ONCE(xe->pm_callback_task, task);
385 
386 	/*
387 	 * Just in case it's somehow possible for our writes to be reordered to
388 	 * the extent that something else re-uses the task written in
389 	 * pm_callback_task. For example after returning from the callback, but
390 	 * before the reordered write that resets pm_callback_task back to NULL.
391 	 */
392 	smp_mb(); /* pairs with xe_pm_read_callback_task */
393 }
394 
xe_pm_read_callback_task(struct xe_device * xe)395 struct task_struct *xe_pm_read_callback_task(struct xe_device *xe)
396 {
397 	smp_mb(); /* pairs with xe_pm_write_callback_task */
398 
399 	return READ_ONCE(xe->pm_callback_task);
400 }
401 
402 /**
403  * xe_pm_runtime_suspended - Check if runtime_pm state is suspended
404  * @xe: xe device instance
405  *
406  * This does not provide any guarantee that the device is going to remain
407  * suspended as it might be racing with the runtime state transitions.
408  * It can be used only as a non-reliable assertion, to ensure that we are not in
409  * the sleep state while trying to access some memory for instance.
410  *
411  * Returns true if PCI device is suspended, false otherwise.
412  */
xe_pm_runtime_suspended(struct xe_device * xe)413 bool xe_pm_runtime_suspended(struct xe_device *xe)
414 {
415 	return pm_runtime_suspended(xe->drm.dev);
416 }
417 
418 /**
419  * xe_pm_runtime_suspend - Prepare our device for D3hot/D3Cold
420  * @xe: xe device instance
421  *
422  * Returns 0 for success, negative error code otherwise.
423  */
xe_pm_runtime_suspend(struct xe_device * xe)424 int xe_pm_runtime_suspend(struct xe_device *xe)
425 {
426 	struct xe_bo *bo, *on;
427 	struct xe_gt *gt;
428 	u8 id;
429 	int err = 0;
430 
431 	trace_xe_pm_runtime_suspend(xe, __builtin_return_address(0));
432 	/* Disable access_ongoing asserts and prevent recursive pm calls */
433 	xe_pm_write_callback_task(xe, current);
434 
435 	/*
436 	 * The actual xe_pm_runtime_put() is always async underneath, so
437 	 * exactly where that is called should makes no difference to us. However
438 	 * we still need to be very careful with the locks that this callback
439 	 * acquires and the locks that are acquired and held by any callers of
440 	 * xe_runtime_pm_get(). We already have the matching annotation
441 	 * on that side, but we also need it here. For example lockdep should be
442 	 * able to tell us if the following scenario is in theory possible:
443 	 *
444 	 * CPU0                          | CPU1 (kworker)
445 	 * lock(A)                       |
446 	 *                               | xe_pm_runtime_suspend()
447 	 *                               |      lock(A)
448 	 * xe_pm_runtime_get()           |
449 	 *
450 	 * This will clearly deadlock since rpm core needs to wait for
451 	 * xe_pm_runtime_suspend() to complete, but here we are holding lock(A)
452 	 * on CPU0 which prevents CPU1 making forward progress.  With the
453 	 * annotation here and in xe_pm_runtime_get() lockdep will see
454 	 * the potential lock inversion and give us a nice splat.
455 	 */
456 	xe_rpm_lockmap_acquire(xe);
457 
458 	err = xe_pxp_pm_suspend(xe->pxp);
459 	if (err)
460 		goto out;
461 
462 	/*
463 	 * Applying lock for entire list op as xe_ttm_bo_destroy and xe_bo_move_notify
464 	 * also checks and deletes bo entry from user fault list.
465 	 */
466 	mutex_lock(&xe->mem_access.vram_userfault.lock);
467 	list_for_each_entry_safe(bo, on,
468 				 &xe->mem_access.vram_userfault.list, vram_userfault_link)
469 		xe_bo_runtime_pm_release_mmap_offset(bo);
470 	mutex_unlock(&xe->mem_access.vram_userfault.lock);
471 
472 	xe_display_pm_runtime_suspend(xe);
473 
474 	if (xe->d3cold.allowed) {
475 		err = xe_bo_evict_all(xe);
476 		if (err)
477 			goto out_resume;
478 	}
479 
480 	for_each_gt(gt, xe, id) {
481 		err = xe_gt_suspend(gt);
482 		if (err)
483 			goto out_resume;
484 	}
485 
486 	xe_irq_suspend(xe);
487 
488 	xe_display_pm_runtime_suspend_late(xe);
489 
490 	xe_rpm_lockmap_release(xe);
491 	xe_pm_write_callback_task(xe, NULL);
492 	return 0;
493 
494 out_resume:
495 	xe_display_pm_runtime_resume(xe);
496 	xe_pxp_pm_resume(xe->pxp);
497 out:
498 	xe_rpm_lockmap_release(xe);
499 	xe_pm_write_callback_task(xe, NULL);
500 	return err;
501 }
502 
503 /**
504  * xe_pm_runtime_resume - Waking up from D3hot/D3Cold
505  * @xe: xe device instance
506  *
507  * Returns 0 for success, negative error code otherwise.
508  */
xe_pm_runtime_resume(struct xe_device * xe)509 int xe_pm_runtime_resume(struct xe_device *xe)
510 {
511 	struct xe_gt *gt;
512 	u8 id;
513 	int err = 0;
514 
515 	trace_xe_pm_runtime_resume(xe, __builtin_return_address(0));
516 	/* Disable access_ongoing asserts and prevent recursive pm calls */
517 	xe_pm_write_callback_task(xe, current);
518 
519 	xe_rpm_lockmap_acquire(xe);
520 
521 	if (xe->d3cold.allowed) {
522 		err = xe_pcode_ready(xe, true);
523 		if (err)
524 			goto out;
525 
526 		xe_display_pm_resume_early(xe);
527 
528 		/*
529 		 * This only restores pinned memory which is the memory
530 		 * required for the GT(s) to resume.
531 		 */
532 		err = xe_bo_restore_early(xe);
533 		if (err)
534 			goto out;
535 	}
536 
537 	xe_irq_resume(xe);
538 
539 	for_each_gt(gt, xe, id)
540 		xe_gt_resume(gt);
541 
542 	xe_display_pm_runtime_resume(xe);
543 
544 	if (xe->d3cold.allowed) {
545 		err = xe_bo_restore_late(xe);
546 		if (err)
547 			goto out;
548 	}
549 
550 	xe_pxp_pm_resume(xe->pxp);
551 
552 out:
553 	xe_rpm_lockmap_release(xe);
554 	xe_pm_write_callback_task(xe, NULL);
555 	return err;
556 }
557 
558 /*
559  * For places where resume is synchronous it can be quite easy to deadlock
560  * if we are not careful. Also in practice it might be quite timing
561  * sensitive to ever see the 0 -> 1 transition with the callers locks
562  * held, so deadlocks might exist but are hard for lockdep to ever see.
563  * With this in mind, help lockdep learn about the potentially scary
564  * stuff that can happen inside the runtime_resume callback by acquiring
565  * a dummy lock (it doesn't protect anything and gets compiled out on
566  * non-debug builds).  Lockdep then only needs to see the
567  * xe_pm_runtime_xxx_map -> runtime_resume callback once, and then can
568  * hopefully validate all the (callers_locks) -> xe_pm_runtime_xxx_map.
569  * For example if the (callers_locks) are ever grabbed in the
570  * runtime_resume callback, lockdep should give us a nice splat.
571  */
xe_rpm_might_enter_cb(const struct xe_device * xe)572 static void xe_rpm_might_enter_cb(const struct xe_device *xe)
573 {
574 	xe_rpm_lockmap_acquire(xe);
575 	xe_rpm_lockmap_release(xe);
576 }
577 
578 /*
579  * Prime the lockdep maps for known locking orders that need to
580  * be supported but that may not always occur on all systems.
581  */
xe_pm_runtime_lockdep_prime(void)582 static void xe_pm_runtime_lockdep_prime(void)
583 {
584 	struct dma_resv lockdep_resv;
585 
586 	dma_resv_init(&lockdep_resv);
587 	lock_map_acquire(&xe_pm_runtime_d3cold_map);
588 	/* D3Cold takes the dma_resv locks to evict bos */
589 	dma_resv_lock(&lockdep_resv, NULL);
590 	dma_resv_unlock(&lockdep_resv);
591 	lock_map_release(&xe_pm_runtime_d3cold_map);
592 
593 	/* Shrinkers might like to wake up the device under reclaim. */
594 	fs_reclaim_acquire(GFP_KERNEL);
595 	lock_map_acquire(&xe_pm_runtime_nod3cold_map);
596 	lock_map_release(&xe_pm_runtime_nod3cold_map);
597 	fs_reclaim_release(GFP_KERNEL);
598 }
599 
600 /**
601  * xe_pm_runtime_get - Get a runtime_pm reference and resume synchronously
602  * @xe: xe device instance
603  */
xe_pm_runtime_get(struct xe_device * xe)604 void xe_pm_runtime_get(struct xe_device *xe)
605 {
606 	trace_xe_pm_runtime_get(xe, __builtin_return_address(0));
607 	pm_runtime_get_noresume(xe->drm.dev);
608 
609 	if (xe_pm_read_callback_task(xe) == current)
610 		return;
611 
612 	xe_rpm_might_enter_cb(xe);
613 	pm_runtime_resume(xe->drm.dev);
614 }
615 
616 /**
617  * xe_pm_runtime_put - Put the runtime_pm reference back and mark as idle
618  * @xe: xe device instance
619  */
xe_pm_runtime_put(struct xe_device * xe)620 void xe_pm_runtime_put(struct xe_device *xe)
621 {
622 	trace_xe_pm_runtime_put(xe, __builtin_return_address(0));
623 	if (xe_pm_read_callback_task(xe) == current) {
624 		pm_runtime_put_noidle(xe->drm.dev);
625 	} else {
626 		pm_runtime_mark_last_busy(xe->drm.dev);
627 		pm_runtime_put(xe->drm.dev);
628 	}
629 }
630 
631 /**
632  * xe_pm_runtime_get_ioctl - Get a runtime_pm reference before ioctl
633  * @xe: xe device instance
634  *
635  * Returns: Any number greater than or equal to 0 for success, negative error
636  * code otherwise.
637  */
xe_pm_runtime_get_ioctl(struct xe_device * xe)638 int xe_pm_runtime_get_ioctl(struct xe_device *xe)
639 {
640 	trace_xe_pm_runtime_get_ioctl(xe, __builtin_return_address(0));
641 	if (WARN_ON(xe_pm_read_callback_task(xe) == current))
642 		return -ELOOP;
643 
644 	xe_rpm_might_enter_cb(xe);
645 	return pm_runtime_get_sync(xe->drm.dev);
646 }
647 
648 /**
649  * xe_pm_runtime_get_if_active - Get a runtime_pm reference if device active
650  * @xe: xe device instance
651  *
652  * Return: True if device is awake (regardless the previous number of references)
653  * and a new reference was taken, false otherwise.
654  */
xe_pm_runtime_get_if_active(struct xe_device * xe)655 bool xe_pm_runtime_get_if_active(struct xe_device *xe)
656 {
657 	return pm_runtime_get_if_active(xe->drm.dev) > 0;
658 }
659 
660 /**
661  * xe_pm_runtime_get_if_in_use - Get a new reference if device is active with previous ref taken
662  * @xe: xe device instance
663  *
664  * Return: True if device is awake, a previous reference had been already taken,
665  * and a new reference was now taken, false otherwise.
666  */
xe_pm_runtime_get_if_in_use(struct xe_device * xe)667 bool xe_pm_runtime_get_if_in_use(struct xe_device *xe)
668 {
669 	if (xe_pm_read_callback_task(xe) == current) {
670 		/* The device is awake, grab the ref and move on */
671 		pm_runtime_get_noresume(xe->drm.dev);
672 		return true;
673 	}
674 
675 	return pm_runtime_get_if_in_use(xe->drm.dev) > 0;
676 }
677 
678 /*
679  * Very unreliable! Should only be used to suppress the false positive case
680  * in the missing outer rpm protection warning.
681  */
xe_pm_suspending_or_resuming(struct xe_device * xe)682 static bool xe_pm_suspending_or_resuming(struct xe_device *xe)
683 {
684 #ifdef CONFIG_PM
685 	struct device *dev = xe->drm.dev;
686 
687 	return dev->power.runtime_status == RPM_SUSPENDING ||
688 		dev->power.runtime_status == RPM_RESUMING ||
689 		pm_suspend_in_progress();
690 #else
691 	return false;
692 #endif
693 }
694 
695 /**
696  * xe_pm_runtime_get_noresume - Bump runtime PM usage counter without resuming
697  * @xe: xe device instance
698  *
699  * This function should be used in inner places where it is surely already
700  * protected by outer-bound callers of `xe_pm_runtime_get`.
701  * It will warn if not protected.
702  * The reference should be put back after this function regardless, since it
703  * will always bump the usage counter, regardless.
704  */
xe_pm_runtime_get_noresume(struct xe_device * xe)705 void xe_pm_runtime_get_noresume(struct xe_device *xe)
706 {
707 	bool ref;
708 
709 	ref = xe_pm_runtime_get_if_in_use(xe);
710 
711 	if (!ref) {
712 		pm_runtime_get_noresume(xe->drm.dev);
713 		drm_WARN(&xe->drm, !xe_pm_suspending_or_resuming(xe),
714 			 "Missing outer runtime PM protection\n");
715 	}
716 }
717 
718 /**
719  * xe_pm_runtime_resume_and_get - Resume, then get a runtime_pm ref if awake.
720  * @xe: xe device instance
721  *
722  * Returns: True if device is awake and the reference was taken, false otherwise.
723  */
xe_pm_runtime_resume_and_get(struct xe_device * xe)724 bool xe_pm_runtime_resume_and_get(struct xe_device *xe)
725 {
726 	if (xe_pm_read_callback_task(xe) == current) {
727 		/* The device is awake, grab the ref and move on */
728 		pm_runtime_get_noresume(xe->drm.dev);
729 		return true;
730 	}
731 
732 	xe_rpm_might_enter_cb(xe);
733 	return pm_runtime_resume_and_get(xe->drm.dev) >= 0;
734 }
735 
736 /**
737  * xe_pm_assert_unbounded_bridge - Disable PM on unbounded pcie parent bridge
738  * @xe: xe device instance
739  */
xe_pm_assert_unbounded_bridge(struct xe_device * xe)740 void xe_pm_assert_unbounded_bridge(struct xe_device *xe)
741 {
742 	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
743 	struct pci_dev *bridge = pci_upstream_bridge(pdev);
744 
745 	if (!bridge)
746 		return;
747 
748 	if (!bridge->driver) {
749 		drm_warn(&xe->drm, "unbounded parent pci bridge, device won't support any PM support.\n");
750 		device_set_pm_not_required(&pdev->dev);
751 	}
752 }
753 
754 /**
755  * xe_pm_set_vram_threshold - Set a VRAM threshold for allowing/blocking D3Cold
756  * @xe: xe device instance
757  * @threshold: VRAM size in MiB for the D3cold threshold
758  *
759  * Return:
760  * * 0		- success
761  * * -EINVAL	- invalid argument
762  */
xe_pm_set_vram_threshold(struct xe_device * xe,u32 threshold)763 int xe_pm_set_vram_threshold(struct xe_device *xe, u32 threshold)
764 {
765 	struct ttm_resource_manager *man;
766 	u32 vram_total_mb = 0;
767 	int i;
768 
769 	for (i = XE_PL_VRAM0; i <= XE_PL_VRAM1; ++i) {
770 		man = ttm_manager_type(&xe->ttm, i);
771 		if (man)
772 			vram_total_mb += DIV_ROUND_UP_ULL(man->size, 1024 * 1024);
773 	}
774 
775 	drm_dbg(&xe->drm, "Total vram %u mb\n", vram_total_mb);
776 
777 	if (threshold > vram_total_mb)
778 		return -EINVAL;
779 
780 	mutex_lock(&xe->d3cold.lock);
781 	xe->d3cold.vram_threshold = threshold;
782 	mutex_unlock(&xe->d3cold.lock);
783 
784 	return 0;
785 }
786 
787 /**
788  * xe_pm_d3cold_allowed_toggle - Check conditions to toggle d3cold.allowed
789  * @xe: xe device instance
790  *
791  * To be called during runtime_pm idle callback.
792  * Check for all the D3Cold conditions ahead of runtime suspend.
793  */
xe_pm_d3cold_allowed_toggle(struct xe_device * xe)794 void xe_pm_d3cold_allowed_toggle(struct xe_device *xe)
795 {
796 	struct ttm_resource_manager *man;
797 	u32 total_vram_used_mb = 0;
798 	u64 vram_used;
799 	int i;
800 
801 	if (!xe->d3cold.capable) {
802 		xe->d3cold.allowed = false;
803 		return;
804 	}
805 
806 	for (i = XE_PL_VRAM0; i <= XE_PL_VRAM1; ++i) {
807 		man = ttm_manager_type(&xe->ttm, i);
808 		if (man) {
809 			vram_used = ttm_resource_manager_usage(man);
810 			total_vram_used_mb += DIV_ROUND_UP_ULL(vram_used, 1024 * 1024);
811 		}
812 	}
813 
814 	mutex_lock(&xe->d3cold.lock);
815 
816 	if (total_vram_used_mb < xe->d3cold.vram_threshold)
817 		xe->d3cold.allowed = true;
818 	else
819 		xe->d3cold.allowed = false;
820 
821 	mutex_unlock(&xe->d3cold.lock);
822 }
823 
824 /**
825  * xe_pm_module_init() - Perform xe_pm specific module initialization.
826  *
827  * Return: 0 on success. Currently doesn't fail.
828  */
xe_pm_module_init(void)829 int __init xe_pm_module_init(void)
830 {
831 	xe_pm_runtime_lockdep_prime();
832 	return 0;
833 }
834