xref: /linux/drivers/accel/ivpu/ivpu_pm.c (revision 3e0bc2855b573bcffa2a52955a878f537f5ac0cd)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (C) 2020-2023 Intel Corporation
4  */
5 
6 #include <linux/highmem.h>
7 #include <linux/moduleparam.h>
8 #include <linux/pci.h>
9 #include <linux/pm_runtime.h>
10 #include <linux/reboot.h>
11 
12 #include "vpu_boot_api.h"
13 #include "ivpu_drv.h"
14 #include "ivpu_hw.h"
15 #include "ivpu_fw.h"
16 #include "ivpu_ipc.h"
17 #include "ivpu_job.h"
18 #include "ivpu_jsm_msg.h"
19 #include "ivpu_mmu.h"
20 #include "ivpu_pm.h"
21 
22 static bool ivpu_disable_recovery;
23 module_param_named_unsafe(disable_recovery, ivpu_disable_recovery, bool, 0644);
24 MODULE_PARM_DESC(disable_recovery, "Disables recovery when VPU hang is detected");
25 
26 static unsigned long ivpu_tdr_timeout_ms;
27 module_param_named(tdr_timeout_ms, ivpu_tdr_timeout_ms, ulong, 0644);
28 MODULE_PARM_DESC(tdr_timeout_ms, "Timeout for device hang detection, in milliseconds, 0 - default");
29 
30 #define PM_RESCHEDULE_LIMIT     5
31 
32 static void ivpu_pm_prepare_cold_boot(struct ivpu_device *vdev)
33 {
34 	struct ivpu_fw_info *fw = vdev->fw;
35 
36 	ivpu_cmdq_reset_all_contexts(vdev);
37 	ivpu_ipc_reset(vdev);
38 	ivpu_fw_load(vdev);
39 	fw->entry_point = fw->cold_boot_entry_point;
40 }
41 
42 static void ivpu_pm_prepare_warm_boot(struct ivpu_device *vdev)
43 {
44 	struct ivpu_fw_info *fw = vdev->fw;
45 	struct vpu_boot_params *bp = ivpu_bo_vaddr(fw->mem);
46 
47 	if (!bp->save_restore_ret_address) {
48 		ivpu_pm_prepare_cold_boot(vdev);
49 		return;
50 	}
51 
52 	ivpu_dbg(vdev, FW_BOOT, "Save/restore entry point %llx", bp->save_restore_ret_address);
53 	fw->entry_point = bp->save_restore_ret_address;
54 }
55 
56 static int ivpu_suspend(struct ivpu_device *vdev)
57 {
58 	int ret;
59 
60 	ret = ivpu_shutdown(vdev);
61 	if (ret) {
62 		ivpu_err(vdev, "Failed to shutdown VPU: %d\n", ret);
63 		return ret;
64 	}
65 
66 	return ret;
67 }
68 
69 static int ivpu_resume(struct ivpu_device *vdev)
70 {
71 	int ret;
72 
73 retry:
74 	ret = ivpu_hw_power_up(vdev);
75 	if (ret) {
76 		ivpu_err(vdev, "Failed to power up HW: %d\n", ret);
77 		goto err_power_down;
78 	}
79 
80 	ret = ivpu_mmu_enable(vdev);
81 	if (ret) {
82 		ivpu_err(vdev, "Failed to resume MMU: %d\n", ret);
83 		goto err_power_down;
84 	}
85 
86 	ret = ivpu_boot(vdev);
87 	if (ret)
88 		goto err_mmu_disable;
89 
90 	return 0;
91 
92 err_mmu_disable:
93 	ivpu_mmu_disable(vdev);
94 err_power_down:
95 	ivpu_hw_power_down(vdev);
96 
97 	if (!ivpu_fw_is_cold_boot(vdev)) {
98 		ivpu_pm_prepare_cold_boot(vdev);
99 		goto retry;
100 	} else {
101 		ivpu_err(vdev, "Failed to resume the FW: %d\n", ret);
102 	}
103 
104 	return ret;
105 }
106 
107 static void ivpu_pm_recovery_work(struct work_struct *work)
108 {
109 	struct ivpu_pm_info *pm = container_of(work, struct ivpu_pm_info, recovery_work);
110 	struct ivpu_device *vdev = pm->vdev;
111 	char *evt[2] = {"IVPU_PM_EVENT=IVPU_RECOVER", NULL};
112 	int ret;
113 
114 retry:
115 	ret = pci_try_reset_function(to_pci_dev(vdev->drm.dev));
116 	if (ret == -EAGAIN && !drm_dev_is_unplugged(&vdev->drm)) {
117 		cond_resched();
118 		goto retry;
119 	}
120 
121 	if (ret && ret != -EAGAIN)
122 		ivpu_err(vdev, "Failed to reset VPU: %d\n", ret);
123 
124 	kobject_uevent_env(&vdev->drm.dev->kobj, KOBJ_CHANGE, evt);
125 }
126 
127 void ivpu_pm_schedule_recovery(struct ivpu_device *vdev)
128 {
129 	struct ivpu_pm_info *pm = vdev->pm;
130 
131 	if (ivpu_disable_recovery) {
132 		ivpu_err(vdev, "Recovery not available when disable_recovery param is set\n");
133 		return;
134 	}
135 
136 	if (ivpu_is_fpga(vdev)) {
137 		ivpu_err(vdev, "Recovery not available on FPGA\n");
138 		return;
139 	}
140 
141 	/* Schedule recovery if it's not in progress */
142 	if (atomic_cmpxchg(&pm->in_reset, 0, 1) == 0) {
143 		ivpu_hw_irq_disable(vdev);
144 		queue_work(system_long_wq, &pm->recovery_work);
145 	}
146 }
147 
148 static void ivpu_job_timeout_work(struct work_struct *work)
149 {
150 	struct ivpu_pm_info *pm = container_of(work, struct ivpu_pm_info, job_timeout_work.work);
151 	struct ivpu_device *vdev = pm->vdev;
152 	unsigned long timeout_ms = ivpu_tdr_timeout_ms ? ivpu_tdr_timeout_ms : vdev->timeout.tdr;
153 
154 	ivpu_err(vdev, "TDR detected, timeout %lu ms", timeout_ms);
155 	ivpu_hw_diagnose_failure(vdev);
156 
157 	ivpu_pm_schedule_recovery(vdev);
158 }
159 
160 void ivpu_start_job_timeout_detection(struct ivpu_device *vdev)
161 {
162 	unsigned long timeout_ms = ivpu_tdr_timeout_ms ? ivpu_tdr_timeout_ms : vdev->timeout.tdr;
163 
164 	/* No-op if already queued */
165 	queue_delayed_work(system_wq, &vdev->pm->job_timeout_work, msecs_to_jiffies(timeout_ms));
166 }
167 
168 void ivpu_stop_job_timeout_detection(struct ivpu_device *vdev)
169 {
170 	cancel_delayed_work_sync(&vdev->pm->job_timeout_work);
171 }
172 
173 int ivpu_pm_suspend_cb(struct device *dev)
174 {
175 	struct drm_device *drm = dev_get_drvdata(dev);
176 	struct ivpu_device *vdev = to_ivpu_device(drm);
177 	unsigned long timeout;
178 
179 	ivpu_dbg(vdev, PM, "Suspend..\n");
180 
181 	timeout = jiffies + msecs_to_jiffies(vdev->timeout.tdr);
182 	while (!ivpu_hw_is_idle(vdev)) {
183 		cond_resched();
184 		if (time_after_eq(jiffies, timeout)) {
185 			ivpu_err(vdev, "Failed to enter idle on system suspend\n");
186 			return -EBUSY;
187 		}
188 	}
189 
190 	ivpu_jsm_pwr_d0i3_enter(vdev);
191 
192 	ivpu_suspend(vdev);
193 	ivpu_pm_prepare_warm_boot(vdev);
194 
195 	pci_save_state(to_pci_dev(dev));
196 	pci_set_power_state(to_pci_dev(dev), PCI_D3hot);
197 
198 	ivpu_dbg(vdev, PM, "Suspend done.\n");
199 
200 	return 0;
201 }
202 
203 int ivpu_pm_resume_cb(struct device *dev)
204 {
205 	struct drm_device *drm = dev_get_drvdata(dev);
206 	struct ivpu_device *vdev = to_ivpu_device(drm);
207 	int ret;
208 
209 	ivpu_dbg(vdev, PM, "Resume..\n");
210 
211 	pci_set_power_state(to_pci_dev(dev), PCI_D0);
212 	pci_restore_state(to_pci_dev(dev));
213 
214 	ret = ivpu_resume(vdev);
215 	if (ret)
216 		ivpu_err(vdev, "Failed to resume: %d\n", ret);
217 
218 	ivpu_dbg(vdev, PM, "Resume done.\n");
219 
220 	return ret;
221 }
222 
223 int ivpu_pm_runtime_suspend_cb(struct device *dev)
224 {
225 	struct drm_device *drm = dev_get_drvdata(dev);
226 	struct ivpu_device *vdev = to_ivpu_device(drm);
227 	bool hw_is_idle = true;
228 	int ret;
229 
230 	ivpu_dbg(vdev, PM, "Runtime suspend..\n");
231 
232 	if (!ivpu_hw_is_idle(vdev) && vdev->pm->suspend_reschedule_counter) {
233 		ivpu_dbg(vdev, PM, "Failed to enter idle, rescheduling suspend, retries left %d\n",
234 			 vdev->pm->suspend_reschedule_counter);
235 		pm_schedule_suspend(dev, vdev->timeout.reschedule_suspend);
236 		vdev->pm->suspend_reschedule_counter--;
237 		return -EAGAIN;
238 	}
239 
240 	if (!vdev->pm->suspend_reschedule_counter)
241 		hw_is_idle = false;
242 	else if (ivpu_jsm_pwr_d0i3_enter(vdev))
243 		hw_is_idle = false;
244 
245 	ret = ivpu_suspend(vdev);
246 	if (ret)
247 		ivpu_err(vdev, "Failed to set suspend VPU: %d\n", ret);
248 
249 	if (!hw_is_idle) {
250 		ivpu_warn(vdev, "VPU failed to enter idle, force suspended.\n");
251 		ivpu_pm_prepare_cold_boot(vdev);
252 	} else {
253 		ivpu_pm_prepare_warm_boot(vdev);
254 	}
255 
256 	vdev->pm->suspend_reschedule_counter = PM_RESCHEDULE_LIMIT;
257 
258 	ivpu_dbg(vdev, PM, "Runtime suspend done.\n");
259 
260 	return 0;
261 }
262 
263 int ivpu_pm_runtime_resume_cb(struct device *dev)
264 {
265 	struct drm_device *drm = dev_get_drvdata(dev);
266 	struct ivpu_device *vdev = to_ivpu_device(drm);
267 	int ret;
268 
269 	ivpu_dbg(vdev, PM, "Runtime resume..\n");
270 
271 	ret = ivpu_resume(vdev);
272 	if (ret)
273 		ivpu_err(vdev, "Failed to set RESUME state: %d\n", ret);
274 
275 	ivpu_dbg(vdev, PM, "Runtime resume done.\n");
276 
277 	return ret;
278 }
279 
280 int ivpu_rpm_get(struct ivpu_device *vdev)
281 {
282 	int ret;
283 
284 	ret = pm_runtime_resume_and_get(vdev->drm.dev);
285 	if (!drm_WARN_ON(&vdev->drm, ret < 0))
286 		vdev->pm->suspend_reschedule_counter = PM_RESCHEDULE_LIMIT;
287 
288 	return ret;
289 }
290 
291 int ivpu_rpm_get_if_active(struct ivpu_device *vdev)
292 {
293 	int ret;
294 
295 	ret = pm_runtime_get_if_active(vdev->drm.dev, false);
296 	drm_WARN_ON(&vdev->drm, ret < 0);
297 
298 	return ret;
299 }
300 
301 void ivpu_rpm_put(struct ivpu_device *vdev)
302 {
303 	pm_runtime_mark_last_busy(vdev->drm.dev);
304 	pm_runtime_put_autosuspend(vdev->drm.dev);
305 }
306 
307 void ivpu_pm_reset_prepare_cb(struct pci_dev *pdev)
308 {
309 	struct ivpu_device *vdev = pci_get_drvdata(pdev);
310 
311 	pm_runtime_get_sync(vdev->drm.dev);
312 
313 	ivpu_dbg(vdev, PM, "Pre-reset..\n");
314 	atomic_inc(&vdev->pm->reset_counter);
315 	atomic_set(&vdev->pm->in_reset, 1);
316 	ivpu_prepare_for_reset(vdev);
317 	ivpu_hw_reset(vdev);
318 	ivpu_pm_prepare_cold_boot(vdev);
319 	ivpu_jobs_abort_all(vdev);
320 	ivpu_dbg(vdev, PM, "Pre-reset done.\n");
321 }
322 
323 void ivpu_pm_reset_done_cb(struct pci_dev *pdev)
324 {
325 	struct ivpu_device *vdev = pci_get_drvdata(pdev);
326 	int ret;
327 
328 	ivpu_dbg(vdev, PM, "Post-reset..\n");
329 	ret = ivpu_resume(vdev);
330 	if (ret)
331 		ivpu_err(vdev, "Failed to set RESUME state: %d\n", ret);
332 	atomic_set(&vdev->pm->in_reset, 0);
333 	ivpu_dbg(vdev, PM, "Post-reset done.\n");
334 
335 	pm_runtime_put_autosuspend(vdev->drm.dev);
336 }
337 
338 void ivpu_pm_init(struct ivpu_device *vdev)
339 {
340 	struct device *dev = vdev->drm.dev;
341 	struct ivpu_pm_info *pm = vdev->pm;
342 	int delay;
343 
344 	pm->vdev = vdev;
345 	pm->suspend_reschedule_counter = PM_RESCHEDULE_LIMIT;
346 
347 	atomic_set(&pm->in_reset, 0);
348 	INIT_WORK(&pm->recovery_work, ivpu_pm_recovery_work);
349 	INIT_DELAYED_WORK(&pm->job_timeout_work, ivpu_job_timeout_work);
350 
351 	if (ivpu_disable_recovery)
352 		delay = -1;
353 	else
354 		delay = vdev->timeout.autosuspend;
355 
356 	pm_runtime_use_autosuspend(dev);
357 	pm_runtime_set_autosuspend_delay(dev, delay);
358 
359 	ivpu_dbg(vdev, PM, "Autosuspend delay = %d\n", delay);
360 }
361 
362 void ivpu_pm_cancel_recovery(struct ivpu_device *vdev)
363 {
364 	drm_WARN_ON(&vdev->drm, delayed_work_pending(&vdev->pm->job_timeout_work));
365 	cancel_work_sync(&vdev->pm->recovery_work);
366 }
367 
368 void ivpu_pm_enable(struct ivpu_device *vdev)
369 {
370 	struct device *dev = vdev->drm.dev;
371 
372 	pm_runtime_set_active(dev);
373 	pm_runtime_allow(dev);
374 	pm_runtime_mark_last_busy(dev);
375 	pm_runtime_put_autosuspend(dev);
376 }
377 
378 void ivpu_pm_disable(struct ivpu_device *vdev)
379 {
380 	pm_runtime_get_noresume(vdev->drm.dev);
381 	pm_runtime_forbid(vdev->drm.dev);
382 }
383