xref: /linux/drivers/gpu/drm/xe/xe_pm.c (revision 284fc30e66e602a5df58393860f67477d6a79339)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2022 Intel Corporation
4  */
5 
6 #include "xe_pm.h"
7 
8 #include <linux/fault-inject.h>
9 #include <linux/pm_runtime.h>
10 #include <linux/suspend.h>
11 
12 #include <drm/drm_managed.h>
13 #include <drm/ttm/ttm_placement.h>
14 
15 #include "display/xe_display.h"
16 #include "xe_bo.h"
17 #include "xe_bo_evict.h"
18 #include "xe_device.h"
19 #include "xe_ggtt.h"
20 #include "xe_gt.h"
21 #include "xe_gt_idle.h"
22 #include "xe_i2c.h"
23 #include "xe_irq.h"
24 #include "xe_late_bind_fw.h"
25 #include "xe_pcode.h"
26 #include "xe_pxp.h"
27 #include "xe_sriov_vf_ccs.h"
28 #include "xe_trace.h"
29 #include "xe_vm.h"
30 #include "xe_wa.h"
31 
32 /**
33  * DOC: Xe Power Management
34  *
35  * Xe PM implements the main routines for both system level suspend states and
36  * for the opportunistic runtime suspend states.
37  *
38  * System Level Suspend (S-States) - In general this is OS initiated suspend
39  * driven by ACPI for achieving S0ix (a.k.a. S2idle, freeze), S3 (suspend to ram),
40  * S4 (disk). The main functions here are `xe_pm_suspend` and `xe_pm_resume`. They
41  * are the main point for the suspend to and resume from these states.
42  *
43  * PCI Device Suspend (D-States) - This is the opportunistic PCIe device low power
44  * state D3, controlled by the PCI subsystem and ACPI with the help from the
45  * runtime_pm infrastructure.
46  * PCI D3 is special and can mean D3hot, where Vcc power is on for keeping memory
47  * alive and quicker low latency resume or D3Cold where Vcc power is off for
48  * better power savings.
49  * The Vcc control of PCI hierarchy can only be controlled at the PCI root port
50  * level, while the device driver can be behind multiple bridges/switches and
51  * paired with other devices. For this reason, the PCI subsystem cannot perform
52  * the transition towards D3Cold. The lowest runtime PM possible from the PCI
53  * subsystem is D3hot. Then, if all these paired devices in the same root port
54  * are in D3hot, ACPI will assist here and run its own methods (_PR3 and _OFF)
55  * to perform the transition from D3hot to D3cold. Xe may disallow this
56  * transition by calling pci_d3cold_disable(root_pdev) before going to runtime
57  * suspend. It will be based on runtime conditions such as VRAM usage for a
58  * quick and low latency resume for instance.
59  *
60  * Runtime PM - This infrastructure provided by the Linux kernel allows the
61  * device drivers to indicate when the can be runtime suspended, so the device
62  * could be put at D3 (if supported), or allow deeper package sleep states
63  * (PC-states), and/or other low level power states. Xe PM component provides
64  * `xe_pm_runtime_suspend` and `xe_pm_runtime_resume` functions that PCI
65  * subsystem will call before transition to/from runtime suspend.
66  *
67  * Also, Xe PM provides get and put functions that Xe driver will use to
68  * indicate activity. In order to avoid locking complications with the memory
69  * management, whenever possible, these get and put functions needs to be called
70  * from the higher/outer levels.
71  * The main cases that need to be protected from the outer levels are: IOCTL,
72  * sysfs, debugfs, dma-buf sharing, GPU execution.
73  *
74  * This component is not responsible for GT idleness (RC6) nor GT frequency
75  * management (RPS).
76  */
77 
78 #ifdef CONFIG_LOCKDEP
79 static struct lockdep_map xe_pm_runtime_d3cold_map = {
80 	.name = "xe_rpm_d3cold_map"
81 };
82 
83 static struct lockdep_map xe_pm_runtime_nod3cold_map = {
84 	.name = "xe_rpm_nod3cold_map"
85 };
86 #endif
87 
88 /**
89  * xe_rpm_reclaim_safe() - Whether runtime resume can be done from reclaim context
90  * @xe: The xe device.
91  *
92  * Return: true if it is safe to runtime resume from reclaim context.
93  * false otherwise.
94  */
xe_rpm_reclaim_safe(const struct xe_device * xe)95 bool xe_rpm_reclaim_safe(const struct xe_device *xe)
96 {
97 	return !xe->d3cold.capable;
98 }
99 
xe_rpm_lockmap_acquire(const struct xe_device * xe)100 static void xe_rpm_lockmap_acquire(const struct xe_device *xe)
101 {
102 	lock_map_acquire(xe_rpm_reclaim_safe(xe) ?
103 			 &xe_pm_runtime_nod3cold_map :
104 			 &xe_pm_runtime_d3cold_map);
105 }
106 
xe_rpm_lockmap_release(const struct xe_device * xe)107 static void xe_rpm_lockmap_release(const struct xe_device *xe)
108 {
109 	lock_map_release(xe_rpm_reclaim_safe(xe) ?
110 			 &xe_pm_runtime_nod3cold_map :
111 			 &xe_pm_runtime_d3cold_map);
112 }
113 
114 /**
115  * xe_pm_suspend - Helper for System suspend, i.e. S0->S3 / S0->S2idle
116  * @xe: xe device instance
117  *
118  * Return: 0 on success
119  */
xe_pm_suspend(struct xe_device * xe)120 int xe_pm_suspend(struct xe_device *xe)
121 {
122 	struct xe_gt *gt;
123 	u8 id;
124 	int err;
125 
126 	drm_dbg(&xe->drm, "Suspending device\n");
127 	trace_xe_pm_suspend(xe, __builtin_return_address(0));
128 
129 	err = xe_pxp_pm_suspend(xe->pxp);
130 	if (err)
131 		goto err;
132 
133 	xe_late_bind_wait_for_worker_completion(&xe->late_bind);
134 
135 	for_each_gt(gt, xe, id)
136 		xe_gt_suspend_prepare(gt);
137 
138 	xe_display_pm_suspend(xe);
139 
140 	/* FIXME: Super racey... */
141 	err = xe_bo_evict_all(xe);
142 	if (err)
143 		goto err_display;
144 
145 	for_each_gt(gt, xe, id) {
146 		err = xe_gt_suspend(gt);
147 		if (err)
148 			goto err_display;
149 	}
150 
151 	xe_irq_suspend(xe);
152 
153 	xe_display_pm_suspend_late(xe);
154 
155 	xe_i2c_pm_suspend(xe);
156 
157 	drm_dbg(&xe->drm, "Device suspended\n");
158 	return 0;
159 
160 err_display:
161 	xe_display_pm_resume(xe);
162 	xe_pxp_pm_resume(xe->pxp);
163 err:
164 	drm_dbg(&xe->drm, "Device suspend failed %d\n", err);
165 	return err;
166 }
167 
168 /**
169  * xe_pm_resume - Helper for System resume S3->S0 / S2idle->S0
170  * @xe: xe device instance
171  *
172  * Return: 0 on success
173  */
xe_pm_resume(struct xe_device * xe)174 int xe_pm_resume(struct xe_device *xe)
175 {
176 	struct xe_tile *tile;
177 	struct xe_gt *gt;
178 	u8 id;
179 	int err;
180 
181 	drm_dbg(&xe->drm, "Resuming device\n");
182 	trace_xe_pm_resume(xe, __builtin_return_address(0));
183 
184 	for_each_gt(gt, xe, id)
185 		xe_gt_idle_disable_c6(gt);
186 
187 	for_each_tile(tile, xe, id)
188 		xe_wa_apply_tile_workarounds(tile);
189 
190 	err = xe_pcode_ready(xe, true);
191 	if (err)
192 		return err;
193 
194 	xe_display_pm_resume_early(xe);
195 
196 	/*
197 	 * This only restores pinned memory which is the memory required for the
198 	 * GT(s) to resume.
199 	 */
200 	err = xe_bo_restore_early(xe);
201 	if (err)
202 		goto err;
203 
204 	xe_i2c_pm_resume(xe, true);
205 
206 	xe_irq_resume(xe);
207 
208 	for_each_gt(gt, xe, id)
209 		xe_gt_resume(gt);
210 
211 	xe_display_pm_resume(xe);
212 
213 	err = xe_bo_restore_late(xe);
214 	if (err)
215 		goto err;
216 
217 	xe_pxp_pm_resume(xe->pxp);
218 
219 	if (IS_VF_CCS_READY(xe))
220 		xe_sriov_vf_ccs_register_context(xe);
221 
222 	xe_late_bind_fw_load(&xe->late_bind);
223 
224 	drm_dbg(&xe->drm, "Device resumed\n");
225 	return 0;
226 err:
227 	drm_dbg(&xe->drm, "Device resume failed %d\n", err);
228 	return err;
229 }
230 
xe_pm_pci_d3cold_capable(struct xe_device * xe)231 static bool xe_pm_pci_d3cold_capable(struct xe_device *xe)
232 {
233 	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
234 	struct pci_dev *root_pdev;
235 
236 	root_pdev = pcie_find_root_port(pdev);
237 	if (!root_pdev)
238 		return false;
239 
240 	/* D3Cold requires PME capability */
241 	if (!pci_pme_capable(root_pdev, PCI_D3cold)) {
242 		drm_dbg(&xe->drm, "d3cold: PME# not supported\n");
243 		return false;
244 	}
245 
246 	/* D3Cold requires _PR3 power resource */
247 	if (!pci_pr3_present(root_pdev)) {
248 		drm_dbg(&xe->drm, "d3cold: ACPI _PR3 not present\n");
249 		return false;
250 	}
251 
252 	return true;
253 }
254 
xe_pm_runtime_init(struct xe_device * xe)255 static void xe_pm_runtime_init(struct xe_device *xe)
256 {
257 	struct device *dev = xe->drm.dev;
258 
259 	/* Our current VFs do not support RPM. so, disable it */
260 	if (IS_SRIOV_VF(xe))
261 		return;
262 
263 	/*
264 	 * Disable the system suspend direct complete optimization.
265 	 * We need to ensure that the regular device suspend/resume functions
266 	 * are called since our runtime_pm cannot guarantee local memory
267 	 * eviction for d3cold.
268 	 * TODO: Check HDA audio dependencies claimed by i915, and then enforce
269 	 *       this option to integrated graphics as well.
270 	 */
271 	if (IS_DGFX(xe))
272 		dev_pm_set_driver_flags(dev, DPM_FLAG_NO_DIRECT_COMPLETE);
273 
274 	pm_runtime_use_autosuspend(dev);
275 	pm_runtime_set_autosuspend_delay(dev, 1000);
276 	pm_runtime_set_active(dev);
277 	pm_runtime_allow(dev);
278 	pm_runtime_mark_last_busy(dev);
279 	pm_runtime_put(dev);
280 }
281 
xe_pm_init_early(struct xe_device * xe)282 int xe_pm_init_early(struct xe_device *xe)
283 {
284 	int err;
285 
286 	INIT_LIST_HEAD(&xe->mem_access.vram_userfault.list);
287 
288 	err = drmm_mutex_init(&xe->drm, &xe->mem_access.vram_userfault.lock);
289 	if (err)
290 		return err;
291 
292 	err = drmm_mutex_init(&xe->drm, &xe->d3cold.lock);
293 	if (err)
294 		return err;
295 
296 	xe->d3cold.capable = xe_pm_pci_d3cold_capable(xe);
297 	return 0;
298 }
299 ALLOW_ERROR_INJECTION(xe_pm_init_early, ERRNO); /* See xe_pci_probe() */
300 
vram_threshold_value(struct xe_device * xe)301 static u32 vram_threshold_value(struct xe_device *xe)
302 {
303 	/* FIXME: D3Cold temporarily disabled by default on BMG */
304 	if (xe->info.platform == XE_BATTLEMAGE)
305 		return 0;
306 
307 	return DEFAULT_VRAM_THRESHOLD;
308 }
309 
xe_pm_wake_rebind_workers(struct xe_device * xe)310 static void xe_pm_wake_rebind_workers(struct xe_device *xe)
311 {
312 	struct xe_vm *vm, *next;
313 
314 	mutex_lock(&xe->rebind_resume_lock);
315 	list_for_each_entry_safe(vm, next, &xe->rebind_resume_list,
316 				 preempt.pm_activate_link) {
317 		list_del_init(&vm->preempt.pm_activate_link);
318 		xe_vm_resume_rebind_worker(vm);
319 	}
320 	mutex_unlock(&xe->rebind_resume_lock);
321 }
322 
xe_pm_notifier_callback(struct notifier_block * nb,unsigned long action,void * data)323 static int xe_pm_notifier_callback(struct notifier_block *nb,
324 				   unsigned long action, void *data)
325 {
326 	struct xe_device *xe = container_of(nb, struct xe_device, pm_notifier);
327 	int err = 0;
328 
329 	switch (action) {
330 	case PM_HIBERNATION_PREPARE:
331 	case PM_SUSPEND_PREPARE:
332 		reinit_completion(&xe->pm_block);
333 		xe_pm_runtime_get(xe);
334 		err = xe_bo_evict_all_user(xe);
335 		if (err)
336 			drm_dbg(&xe->drm, "Notifier evict user failed (%d)\n", err);
337 
338 		err = xe_bo_notifier_prepare_all_pinned(xe);
339 		if (err)
340 			drm_dbg(&xe->drm, "Notifier prepare pin failed (%d)\n", err);
341 		/*
342 		 * Keep the runtime pm reference until post hibernation / post suspend to
343 		 * avoid a runtime suspend interfering with evicted objects or backup
344 		 * allocations.
345 		 */
346 		break;
347 	case PM_POST_HIBERNATION:
348 	case PM_POST_SUSPEND:
349 		complete_all(&xe->pm_block);
350 		xe_pm_wake_rebind_workers(xe);
351 		xe_bo_notifier_unprepare_all_pinned(xe);
352 		xe_pm_runtime_put(xe);
353 		break;
354 	}
355 
356 	return NOTIFY_DONE;
357 }
358 
359 /**
360  * xe_pm_init - Initialize Xe Power Management
361  * @xe: xe device instance
362  *
363  * This component is responsible for System and Device sleep states.
364  *
365  * Returns 0 for success, negative error code otherwise.
366  */
xe_pm_init(struct xe_device * xe)367 int xe_pm_init(struct xe_device *xe)
368 {
369 	u32 vram_threshold;
370 	int err;
371 
372 	xe->pm_notifier.notifier_call = xe_pm_notifier_callback;
373 	err = register_pm_notifier(&xe->pm_notifier);
374 	if (err)
375 		return err;
376 
377 	err = drmm_mutex_init(&xe->drm, &xe->rebind_resume_lock);
378 	if (err)
379 		goto err_unregister;
380 
381 	init_completion(&xe->pm_block);
382 	complete_all(&xe->pm_block);
383 	INIT_LIST_HEAD(&xe->rebind_resume_list);
384 
385 	/* For now suspend/resume is only allowed with GuC */
386 	if (!xe_device_uc_enabled(xe))
387 		return 0;
388 
389 	if (xe->d3cold.capable) {
390 		vram_threshold = vram_threshold_value(xe);
391 		err = xe_pm_set_vram_threshold(xe, vram_threshold);
392 		if (err)
393 			goto err_unregister;
394 	}
395 
396 	xe_pm_runtime_init(xe);
397 	return 0;
398 
399 err_unregister:
400 	unregister_pm_notifier(&xe->pm_notifier);
401 	return err;
402 }
403 
xe_pm_runtime_fini(struct xe_device * xe)404 static void xe_pm_runtime_fini(struct xe_device *xe)
405 {
406 	struct device *dev = xe->drm.dev;
407 
408 	/* Our current VFs do not support RPM. so, disable it */
409 	if (IS_SRIOV_VF(xe))
410 		return;
411 
412 	pm_runtime_get_sync(dev);
413 	pm_runtime_forbid(dev);
414 }
415 
416 /**
417  * xe_pm_fini - Finalize PM
418  * @xe: xe device instance
419  */
xe_pm_fini(struct xe_device * xe)420 void xe_pm_fini(struct xe_device *xe)
421 {
422 	if (xe_device_uc_enabled(xe))
423 		xe_pm_runtime_fini(xe);
424 
425 	unregister_pm_notifier(&xe->pm_notifier);
426 }
427 
xe_pm_write_callback_task(struct xe_device * xe,struct task_struct * task)428 static void xe_pm_write_callback_task(struct xe_device *xe,
429 				      struct task_struct *task)
430 {
431 	WRITE_ONCE(xe->pm_callback_task, task);
432 
433 	/*
434 	 * Just in case it's somehow possible for our writes to be reordered to
435 	 * the extent that something else re-uses the task written in
436 	 * pm_callback_task. For example after returning from the callback, but
437 	 * before the reordered write that resets pm_callback_task back to NULL.
438 	 */
439 	smp_mb(); /* pairs with xe_pm_read_callback_task */
440 }
441 
xe_pm_read_callback_task(struct xe_device * xe)442 struct task_struct *xe_pm_read_callback_task(struct xe_device *xe)
443 {
444 	smp_mb(); /* pairs with xe_pm_write_callback_task */
445 
446 	return READ_ONCE(xe->pm_callback_task);
447 }
448 
449 /**
450  * xe_pm_runtime_suspended - Check if runtime_pm state is suspended
451  * @xe: xe device instance
452  *
453  * This does not provide any guarantee that the device is going to remain
454  * suspended as it might be racing with the runtime state transitions.
455  * It can be used only as a non-reliable assertion, to ensure that we are not in
456  * the sleep state while trying to access some memory for instance.
457  *
458  * Returns true if PCI device is suspended, false otherwise.
459  */
xe_pm_runtime_suspended(struct xe_device * xe)460 bool xe_pm_runtime_suspended(struct xe_device *xe)
461 {
462 	return pm_runtime_suspended(xe->drm.dev);
463 }
464 
465 /**
466  * xe_pm_runtime_suspend - Prepare our device for D3hot/D3Cold
467  * @xe: xe device instance
468  *
469  * Returns 0 for success, negative error code otherwise.
470  */
xe_pm_runtime_suspend(struct xe_device * xe)471 int xe_pm_runtime_suspend(struct xe_device *xe)
472 {
473 	struct xe_bo *bo, *on;
474 	struct xe_gt *gt;
475 	u8 id;
476 	int err = 0;
477 
478 	trace_xe_pm_runtime_suspend(xe, __builtin_return_address(0));
479 	/* Disable access_ongoing asserts and prevent recursive pm calls */
480 	xe_pm_write_callback_task(xe, current);
481 
482 	/*
483 	 * The actual xe_pm_runtime_put() is always async underneath, so
484 	 * exactly where that is called should makes no difference to us. However
485 	 * we still need to be very careful with the locks that this callback
486 	 * acquires and the locks that are acquired and held by any callers of
487 	 * xe_runtime_pm_get(). We already have the matching annotation
488 	 * on that side, but we also need it here. For example lockdep should be
489 	 * able to tell us if the following scenario is in theory possible:
490 	 *
491 	 * CPU0                          | CPU1 (kworker)
492 	 * lock(A)                       |
493 	 *                               | xe_pm_runtime_suspend()
494 	 *                               |      lock(A)
495 	 * xe_pm_runtime_get()           |
496 	 *
497 	 * This will clearly deadlock since rpm core needs to wait for
498 	 * xe_pm_runtime_suspend() to complete, but here we are holding lock(A)
499 	 * on CPU0 which prevents CPU1 making forward progress.  With the
500 	 * annotation here and in xe_pm_runtime_get() lockdep will see
501 	 * the potential lock inversion and give us a nice splat.
502 	 */
503 	xe_rpm_lockmap_acquire(xe);
504 
505 	err = xe_pxp_pm_suspend(xe->pxp);
506 	if (err)
507 		goto out;
508 
509 	/*
510 	 * Applying lock for entire list op as xe_ttm_bo_destroy and xe_bo_move_notify
511 	 * also checks and deletes bo entry from user fault list.
512 	 */
513 	mutex_lock(&xe->mem_access.vram_userfault.lock);
514 	list_for_each_entry_safe(bo, on,
515 				 &xe->mem_access.vram_userfault.list, vram_userfault_link)
516 		xe_bo_runtime_pm_release_mmap_offset(bo);
517 	mutex_unlock(&xe->mem_access.vram_userfault.lock);
518 
519 	xe_display_pm_runtime_suspend(xe);
520 
521 	if (xe->d3cold.allowed) {
522 		err = xe_bo_evict_all(xe);
523 		if (err)
524 			goto out_resume;
525 	}
526 
527 	for_each_gt(gt, xe, id) {
528 		err = xe_gt_suspend(gt);
529 		if (err)
530 			goto out_resume;
531 	}
532 
533 	xe_irq_suspend(xe);
534 
535 	xe_display_pm_runtime_suspend_late(xe);
536 
537 	xe_i2c_pm_suspend(xe);
538 
539 	xe_rpm_lockmap_release(xe);
540 	xe_pm_write_callback_task(xe, NULL);
541 	return 0;
542 
543 out_resume:
544 	xe_display_pm_runtime_resume(xe);
545 	xe_pxp_pm_resume(xe->pxp);
546 out:
547 	xe_rpm_lockmap_release(xe);
548 	xe_pm_write_callback_task(xe, NULL);
549 	return err;
550 }
551 
552 /**
553  * xe_pm_runtime_resume - Waking up from D3hot/D3Cold
554  * @xe: xe device instance
555  *
556  * Returns 0 for success, negative error code otherwise.
557  */
xe_pm_runtime_resume(struct xe_device * xe)558 int xe_pm_runtime_resume(struct xe_device *xe)
559 {
560 	struct xe_gt *gt;
561 	u8 id;
562 	int err = 0;
563 
564 	trace_xe_pm_runtime_resume(xe, __builtin_return_address(0));
565 	/* Disable access_ongoing asserts and prevent recursive pm calls */
566 	xe_pm_write_callback_task(xe, current);
567 
568 	xe_rpm_lockmap_acquire(xe);
569 
570 	for_each_gt(gt, xe, id)
571 		xe_gt_idle_disable_c6(gt);
572 
573 	if (xe->d3cold.allowed) {
574 		err = xe_pcode_ready(xe, true);
575 		if (err)
576 			goto out;
577 
578 		xe_display_pm_resume_early(xe);
579 
580 		/*
581 		 * This only restores pinned memory which is the memory
582 		 * required for the GT(s) to resume.
583 		 */
584 		err = xe_bo_restore_early(xe);
585 		if (err)
586 			goto out;
587 	}
588 
589 	xe_i2c_pm_resume(xe, xe->d3cold.allowed);
590 
591 	xe_irq_resume(xe);
592 
593 	for_each_gt(gt, xe, id)
594 		xe_gt_resume(gt);
595 
596 	xe_display_pm_runtime_resume(xe);
597 
598 	if (xe->d3cold.allowed) {
599 		err = xe_bo_restore_late(xe);
600 		if (err)
601 			goto out;
602 	}
603 
604 	xe_pxp_pm_resume(xe->pxp);
605 
606 	if (IS_VF_CCS_READY(xe))
607 		xe_sriov_vf_ccs_register_context(xe);
608 
609 	if (xe->d3cold.allowed)
610 		xe_late_bind_fw_load(&xe->late_bind);
611 
612 out:
613 	xe_rpm_lockmap_release(xe);
614 	xe_pm_write_callback_task(xe, NULL);
615 	return err;
616 }
617 
618 /*
619  * For places where resume is synchronous it can be quite easy to deadlock
620  * if we are not careful. Also in practice it might be quite timing
621  * sensitive to ever see the 0 -> 1 transition with the callers locks
622  * held, so deadlocks might exist but are hard for lockdep to ever see.
623  * With this in mind, help lockdep learn about the potentially scary
624  * stuff that can happen inside the runtime_resume callback by acquiring
625  * a dummy lock (it doesn't protect anything and gets compiled out on
626  * non-debug builds).  Lockdep then only needs to see the
627  * xe_pm_runtime_xxx_map -> runtime_resume callback once, and then can
628  * hopefully validate all the (callers_locks) -> xe_pm_runtime_xxx_map.
629  * For example if the (callers_locks) are ever grabbed in the
630  * runtime_resume callback, lockdep should give us a nice splat.
631  */
xe_rpm_might_enter_cb(const struct xe_device * xe)632 static void xe_rpm_might_enter_cb(const struct xe_device *xe)
633 {
634 	xe_rpm_lockmap_acquire(xe);
635 	xe_rpm_lockmap_release(xe);
636 }
637 
638 /*
639  * Prime the lockdep maps for known locking orders that need to
640  * be supported but that may not always occur on all systems.
641  */
xe_pm_runtime_lockdep_prime(void)642 static void xe_pm_runtime_lockdep_prime(void)
643 {
644 	struct dma_resv lockdep_resv;
645 
646 	dma_resv_init(&lockdep_resv);
647 	lock_map_acquire(&xe_pm_runtime_d3cold_map);
648 	/* D3Cold takes the dma_resv locks to evict bos */
649 	dma_resv_lock(&lockdep_resv, NULL);
650 	dma_resv_unlock(&lockdep_resv);
651 	lock_map_release(&xe_pm_runtime_d3cold_map);
652 
653 	/* Shrinkers might like to wake up the device under reclaim. */
654 	fs_reclaim_acquire(GFP_KERNEL);
655 	lock_map_acquire(&xe_pm_runtime_nod3cold_map);
656 	lock_map_release(&xe_pm_runtime_nod3cold_map);
657 	fs_reclaim_release(GFP_KERNEL);
658 }
659 
660 /**
661  * xe_pm_runtime_get - Get a runtime_pm reference and resume synchronously
662  * @xe: xe device instance
663  */
xe_pm_runtime_get(struct xe_device * xe)664 void xe_pm_runtime_get(struct xe_device *xe)
665 {
666 	trace_xe_pm_runtime_get(xe, __builtin_return_address(0));
667 	pm_runtime_get_noresume(xe->drm.dev);
668 
669 	if (xe_pm_read_callback_task(xe) == current)
670 		return;
671 
672 	xe_rpm_might_enter_cb(xe);
673 	pm_runtime_resume(xe->drm.dev);
674 }
675 
676 /**
677  * xe_pm_runtime_put - Put the runtime_pm reference back and mark as idle
678  * @xe: xe device instance
679  */
xe_pm_runtime_put(struct xe_device * xe)680 void xe_pm_runtime_put(struct xe_device *xe)
681 {
682 	trace_xe_pm_runtime_put(xe, __builtin_return_address(0));
683 	if (xe_pm_read_callback_task(xe) == current) {
684 		pm_runtime_put_noidle(xe->drm.dev);
685 	} else {
686 		pm_runtime_mark_last_busy(xe->drm.dev);
687 		pm_runtime_put(xe->drm.dev);
688 	}
689 }
690 
691 /**
692  * xe_pm_runtime_get_ioctl - Get a runtime_pm reference before ioctl
693  * @xe: xe device instance
694  *
695  * Returns: Any number greater than or equal to 0 for success, negative error
696  * code otherwise.
697  */
xe_pm_runtime_get_ioctl(struct xe_device * xe)698 int xe_pm_runtime_get_ioctl(struct xe_device *xe)
699 {
700 	trace_xe_pm_runtime_get_ioctl(xe, __builtin_return_address(0));
701 	if (WARN_ON(xe_pm_read_callback_task(xe) == current))
702 		return -ELOOP;
703 
704 	xe_rpm_might_enter_cb(xe);
705 	return pm_runtime_get_sync(xe->drm.dev);
706 }
707 
708 /**
709  * xe_pm_runtime_get_if_active - Get a runtime_pm reference if device active
710  * @xe: xe device instance
711  *
712  * Return: True if device is awake (regardless the previous number of references)
713  * and a new reference was taken, false otherwise.
714  */
xe_pm_runtime_get_if_active(struct xe_device * xe)715 bool xe_pm_runtime_get_if_active(struct xe_device *xe)
716 {
717 	return pm_runtime_get_if_active(xe->drm.dev) > 0;
718 }
719 
720 /**
721  * xe_pm_runtime_get_if_in_use - Get a new reference if device is active with previous ref taken
722  * @xe: xe device instance
723  *
724  * Return: True if device is awake, a previous reference had been already taken,
725  * and a new reference was now taken, false otherwise.
726  */
xe_pm_runtime_get_if_in_use(struct xe_device * xe)727 bool xe_pm_runtime_get_if_in_use(struct xe_device *xe)
728 {
729 	if (xe_pm_read_callback_task(xe) == current) {
730 		/* The device is awake, grab the ref and move on */
731 		pm_runtime_get_noresume(xe->drm.dev);
732 		return true;
733 	}
734 
735 	return pm_runtime_get_if_in_use(xe->drm.dev) > 0;
736 }
737 
738 /*
739  * Very unreliable! Should only be used to suppress the false positive case
740  * in the missing outer rpm protection warning.
741  */
xe_pm_suspending_or_resuming(struct xe_device * xe)742 static bool xe_pm_suspending_or_resuming(struct xe_device *xe)
743 {
744 #ifdef CONFIG_PM
745 	struct device *dev = xe->drm.dev;
746 
747 	return dev->power.runtime_status == RPM_SUSPENDING ||
748 		dev->power.runtime_status == RPM_RESUMING ||
749 		pm_suspend_in_progress();
750 #else
751 	return false;
752 #endif
753 }
754 
755 /**
756  * xe_pm_runtime_get_noresume - Bump runtime PM usage counter without resuming
757  * @xe: xe device instance
758  *
759  * This function should be used in inner places where it is surely already
760  * protected by outer-bound callers of `xe_pm_runtime_get`.
761  * It will warn if not protected.
762  * The reference should be put back after this function regardless, since it
763  * will always bump the usage counter, regardless.
764  */
xe_pm_runtime_get_noresume(struct xe_device * xe)765 void xe_pm_runtime_get_noresume(struct xe_device *xe)
766 {
767 	bool ref;
768 
769 	ref = xe_pm_runtime_get_if_in_use(xe);
770 
771 	if (!ref) {
772 		pm_runtime_get_noresume(xe->drm.dev);
773 		drm_WARN(&xe->drm, !xe_pm_suspending_or_resuming(xe),
774 			 "Missing outer runtime PM protection\n");
775 	}
776 }
777 
778 /**
779  * xe_pm_runtime_resume_and_get - Resume, then get a runtime_pm ref if awake.
780  * @xe: xe device instance
781  *
782  * Returns: True if device is awake and the reference was taken, false otherwise.
783  */
xe_pm_runtime_resume_and_get(struct xe_device * xe)784 bool xe_pm_runtime_resume_and_get(struct xe_device *xe)
785 {
786 	if (xe_pm_read_callback_task(xe) == current) {
787 		/* The device is awake, grab the ref and move on */
788 		pm_runtime_get_noresume(xe->drm.dev);
789 		return true;
790 	}
791 
792 	xe_rpm_might_enter_cb(xe);
793 	return pm_runtime_resume_and_get(xe->drm.dev) >= 0;
794 }
795 
796 /**
797  * xe_pm_assert_unbounded_bridge - Disable PM on unbounded pcie parent bridge
798  * @xe: xe device instance
799  */
xe_pm_assert_unbounded_bridge(struct xe_device * xe)800 void xe_pm_assert_unbounded_bridge(struct xe_device *xe)
801 {
802 	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
803 	struct pci_dev *bridge = pci_upstream_bridge(pdev);
804 
805 	if (!bridge)
806 		return;
807 
808 	if (!bridge->driver) {
809 		drm_warn(&xe->drm, "unbounded parent pci bridge, device won't support any PM support.\n");
810 		device_set_pm_not_required(&pdev->dev);
811 	}
812 }
813 
814 /**
815  * xe_pm_set_vram_threshold - Set a VRAM threshold for allowing/blocking D3Cold
816  * @xe: xe device instance
817  * @threshold: VRAM size in MiB for the D3cold threshold
818  *
819  * Return:
820  * * 0		- success
821  * * -EINVAL	- invalid argument
822  */
xe_pm_set_vram_threshold(struct xe_device * xe,u32 threshold)823 int xe_pm_set_vram_threshold(struct xe_device *xe, u32 threshold)
824 {
825 	struct ttm_resource_manager *man;
826 	u32 vram_total_mb = 0;
827 	int i;
828 
829 	for (i = XE_PL_VRAM0; i <= XE_PL_VRAM1; ++i) {
830 		man = ttm_manager_type(&xe->ttm, i);
831 		if (man)
832 			vram_total_mb += DIV_ROUND_UP_ULL(man->size, 1024 * 1024);
833 	}
834 
835 	drm_dbg(&xe->drm, "Total vram %u mb\n", vram_total_mb);
836 
837 	if (threshold > vram_total_mb)
838 		return -EINVAL;
839 
840 	mutex_lock(&xe->d3cold.lock);
841 	xe->d3cold.vram_threshold = threshold;
842 	mutex_unlock(&xe->d3cold.lock);
843 
844 	return 0;
845 }
846 
847 /**
848  * xe_pm_d3cold_allowed_toggle - Check conditions to toggle d3cold.allowed
849  * @xe: xe device instance
850  *
851  * To be called during runtime_pm idle callback.
852  * Check for all the D3Cold conditions ahead of runtime suspend.
853  */
xe_pm_d3cold_allowed_toggle(struct xe_device * xe)854 void xe_pm_d3cold_allowed_toggle(struct xe_device *xe)
855 {
856 	struct ttm_resource_manager *man;
857 	u32 total_vram_used_mb = 0;
858 	u64 vram_used;
859 	int i;
860 
861 	if (!xe->d3cold.capable) {
862 		xe->d3cold.allowed = false;
863 		return;
864 	}
865 
866 	for (i = XE_PL_VRAM0; i <= XE_PL_VRAM1; ++i) {
867 		man = ttm_manager_type(&xe->ttm, i);
868 		if (man) {
869 			vram_used = ttm_resource_manager_usage(man);
870 			total_vram_used_mb += DIV_ROUND_UP_ULL(vram_used, 1024 * 1024);
871 		}
872 	}
873 
874 	mutex_lock(&xe->d3cold.lock);
875 
876 	if (total_vram_used_mb < xe->d3cold.vram_threshold)
877 		xe->d3cold.allowed = true;
878 	else
879 		xe->d3cold.allowed = false;
880 
881 	mutex_unlock(&xe->d3cold.lock);
882 }
883 
884 /**
885  * xe_pm_module_init() - Perform xe_pm specific module initialization.
886  *
887  * Return: 0 on success. Currently doesn't fail.
888  */
xe_pm_module_init(void)889 int __init xe_pm_module_init(void)
890 {
891 	xe_pm_runtime_lockdep_prime();
892 	return 0;
893 }
894