1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2020-2023 Intel Corporation 4 */ 5 6 #include <linux/highmem.h> 7 #include <linux/moduleparam.h> 8 #include <linux/pci.h> 9 #include <linux/pm_runtime.h> 10 #include <linux/reboot.h> 11 12 #include "vpu_boot_api.h" 13 #include "ivpu_drv.h" 14 #include "ivpu_hw.h" 15 #include "ivpu_fw.h" 16 #include "ivpu_ipc.h" 17 #include "ivpu_job.h" 18 #include "ivpu_jsm_msg.h" 19 #include "ivpu_mmu.h" 20 #include "ivpu_pm.h" 21 22 static bool ivpu_disable_recovery; 23 module_param_named_unsafe(disable_recovery, ivpu_disable_recovery, bool, 0644); 24 MODULE_PARM_DESC(disable_recovery, "Disables recovery when VPU hang is detected"); 25 26 static unsigned long ivpu_tdr_timeout_ms; 27 module_param_named(tdr_timeout_ms, ivpu_tdr_timeout_ms, ulong, 0644); 28 MODULE_PARM_DESC(tdr_timeout_ms, "Timeout for device hang detection, in milliseconds, 0 - default"); 29 30 #define PM_RESCHEDULE_LIMIT 5 31 32 static void ivpu_pm_prepare_cold_boot(struct ivpu_device *vdev) 33 { 34 struct ivpu_fw_info *fw = vdev->fw; 35 36 ivpu_cmdq_reset_all_contexts(vdev); 37 ivpu_ipc_reset(vdev); 38 ivpu_fw_load(vdev); 39 fw->entry_point = fw->cold_boot_entry_point; 40 } 41 42 static void ivpu_pm_prepare_warm_boot(struct ivpu_device *vdev) 43 { 44 struct ivpu_fw_info *fw = vdev->fw; 45 struct vpu_boot_params *bp = ivpu_bo_vaddr(fw->mem); 46 47 if (!bp->save_restore_ret_address) { 48 ivpu_pm_prepare_cold_boot(vdev); 49 return; 50 } 51 52 ivpu_dbg(vdev, FW_BOOT, "Save/restore entry point %llx", bp->save_restore_ret_address); 53 fw->entry_point = bp->save_restore_ret_address; 54 } 55 56 static int ivpu_suspend(struct ivpu_device *vdev) 57 { 58 int ret; 59 60 ret = ivpu_shutdown(vdev); 61 if (ret) { 62 ivpu_err(vdev, "Failed to shutdown VPU: %d\n", ret); 63 return ret; 64 } 65 66 return ret; 67 } 68 69 static int ivpu_resume(struct ivpu_device *vdev) 70 { 71 int ret; 72 73 retry: 74 ret = ivpu_hw_power_up(vdev); 75 if (ret) { 76 ivpu_err(vdev, "Failed to power up HW: %d\n", ret); 77 goto err_power_down; 78 } 79 80 ret = ivpu_mmu_enable(vdev); 81 if (ret) { 82 ivpu_err(vdev, "Failed to resume MMU: %d\n", ret); 83 goto err_power_down; 84 } 85 86 ret = ivpu_boot(vdev); 87 if (ret) 88 goto err_mmu_disable; 89 90 return 0; 91 92 err_mmu_disable: 93 ivpu_mmu_disable(vdev); 94 err_power_down: 95 ivpu_hw_power_down(vdev); 96 97 if (!ivpu_fw_is_cold_boot(vdev)) { 98 ivpu_pm_prepare_cold_boot(vdev); 99 goto retry; 100 } else { 101 ivpu_err(vdev, "Failed to resume the FW: %d\n", ret); 102 } 103 104 return ret; 105 } 106 107 static void ivpu_pm_recovery_work(struct work_struct *work) 108 { 109 struct ivpu_pm_info *pm = container_of(work, struct ivpu_pm_info, recovery_work); 110 struct ivpu_device *vdev = pm->vdev; 111 char *evt[2] = {"IVPU_PM_EVENT=IVPU_RECOVER", NULL}; 112 int ret; 113 114 retry: 115 ret = pci_try_reset_function(to_pci_dev(vdev->drm.dev)); 116 if (ret == -EAGAIN && !drm_dev_is_unplugged(&vdev->drm)) { 117 cond_resched(); 118 goto retry; 119 } 120 121 if (ret && ret != -EAGAIN) 122 ivpu_err(vdev, "Failed to reset VPU: %d\n", ret); 123 124 kobject_uevent_env(&vdev->drm.dev->kobj, KOBJ_CHANGE, evt); 125 } 126 127 void ivpu_pm_schedule_recovery(struct ivpu_device *vdev) 128 { 129 struct ivpu_pm_info *pm = vdev->pm; 130 131 if (ivpu_disable_recovery) { 132 ivpu_err(vdev, "Recovery not available when disable_recovery param is set\n"); 133 return; 134 } 135 136 if (ivpu_is_fpga(vdev)) { 137 ivpu_err(vdev, "Recovery not available on FPGA\n"); 138 return; 139 } 140 141 /* Schedule recovery if it's not in progress */ 142 if (atomic_cmpxchg(&pm->in_reset, 0, 1) == 0) { 143 ivpu_hw_irq_disable(vdev); 144 queue_work(system_long_wq, &pm->recovery_work); 145 } 146 } 147 148 static void ivpu_job_timeout_work(struct work_struct *work) 149 { 150 struct ivpu_pm_info *pm = container_of(work, struct ivpu_pm_info, job_timeout_work.work); 151 struct ivpu_device *vdev = pm->vdev; 152 unsigned long timeout_ms = ivpu_tdr_timeout_ms ? ivpu_tdr_timeout_ms : vdev->timeout.tdr; 153 154 ivpu_err(vdev, "TDR detected, timeout %lu ms", timeout_ms); 155 ivpu_hw_diagnose_failure(vdev); 156 157 ivpu_pm_schedule_recovery(vdev); 158 } 159 160 void ivpu_start_job_timeout_detection(struct ivpu_device *vdev) 161 { 162 unsigned long timeout_ms = ivpu_tdr_timeout_ms ? ivpu_tdr_timeout_ms : vdev->timeout.tdr; 163 164 /* No-op if already queued */ 165 queue_delayed_work(system_wq, &vdev->pm->job_timeout_work, msecs_to_jiffies(timeout_ms)); 166 } 167 168 void ivpu_stop_job_timeout_detection(struct ivpu_device *vdev) 169 { 170 cancel_delayed_work_sync(&vdev->pm->job_timeout_work); 171 } 172 173 int ivpu_pm_suspend_cb(struct device *dev) 174 { 175 struct drm_device *drm = dev_get_drvdata(dev); 176 struct ivpu_device *vdev = to_ivpu_device(drm); 177 unsigned long timeout; 178 179 ivpu_dbg(vdev, PM, "Suspend..\n"); 180 181 timeout = jiffies + msecs_to_jiffies(vdev->timeout.tdr); 182 while (!ivpu_hw_is_idle(vdev)) { 183 cond_resched(); 184 if (time_after_eq(jiffies, timeout)) { 185 ivpu_err(vdev, "Failed to enter idle on system suspend\n"); 186 return -EBUSY; 187 } 188 } 189 190 ivpu_jsm_pwr_d0i3_enter(vdev); 191 192 ivpu_suspend(vdev); 193 ivpu_pm_prepare_warm_boot(vdev); 194 195 pci_save_state(to_pci_dev(dev)); 196 pci_set_power_state(to_pci_dev(dev), PCI_D3hot); 197 198 ivpu_dbg(vdev, PM, "Suspend done.\n"); 199 200 return 0; 201 } 202 203 int ivpu_pm_resume_cb(struct device *dev) 204 { 205 struct drm_device *drm = dev_get_drvdata(dev); 206 struct ivpu_device *vdev = to_ivpu_device(drm); 207 int ret; 208 209 ivpu_dbg(vdev, PM, "Resume..\n"); 210 211 pci_set_power_state(to_pci_dev(dev), PCI_D0); 212 pci_restore_state(to_pci_dev(dev)); 213 214 ret = ivpu_resume(vdev); 215 if (ret) 216 ivpu_err(vdev, "Failed to resume: %d\n", ret); 217 218 ivpu_dbg(vdev, PM, "Resume done.\n"); 219 220 return ret; 221 } 222 223 int ivpu_pm_runtime_suspend_cb(struct device *dev) 224 { 225 struct drm_device *drm = dev_get_drvdata(dev); 226 struct ivpu_device *vdev = to_ivpu_device(drm); 227 bool hw_is_idle = true; 228 int ret; 229 230 ivpu_dbg(vdev, PM, "Runtime suspend..\n"); 231 232 if (!ivpu_hw_is_idle(vdev) && vdev->pm->suspend_reschedule_counter) { 233 ivpu_dbg(vdev, PM, "Failed to enter idle, rescheduling suspend, retries left %d\n", 234 vdev->pm->suspend_reschedule_counter); 235 pm_schedule_suspend(dev, vdev->timeout.reschedule_suspend); 236 vdev->pm->suspend_reschedule_counter--; 237 return -EAGAIN; 238 } 239 240 if (!vdev->pm->suspend_reschedule_counter) 241 hw_is_idle = false; 242 else if (ivpu_jsm_pwr_d0i3_enter(vdev)) 243 hw_is_idle = false; 244 245 ret = ivpu_suspend(vdev); 246 if (ret) 247 ivpu_err(vdev, "Failed to set suspend VPU: %d\n", ret); 248 249 if (!hw_is_idle) { 250 ivpu_warn(vdev, "VPU failed to enter idle, force suspended.\n"); 251 ivpu_pm_prepare_cold_boot(vdev); 252 } else { 253 ivpu_pm_prepare_warm_boot(vdev); 254 } 255 256 vdev->pm->suspend_reschedule_counter = PM_RESCHEDULE_LIMIT; 257 258 ivpu_dbg(vdev, PM, "Runtime suspend done.\n"); 259 260 return 0; 261 } 262 263 int ivpu_pm_runtime_resume_cb(struct device *dev) 264 { 265 struct drm_device *drm = dev_get_drvdata(dev); 266 struct ivpu_device *vdev = to_ivpu_device(drm); 267 int ret; 268 269 ivpu_dbg(vdev, PM, "Runtime resume..\n"); 270 271 ret = ivpu_resume(vdev); 272 if (ret) 273 ivpu_err(vdev, "Failed to set RESUME state: %d\n", ret); 274 275 ivpu_dbg(vdev, PM, "Runtime resume done.\n"); 276 277 return ret; 278 } 279 280 int ivpu_rpm_get(struct ivpu_device *vdev) 281 { 282 int ret; 283 284 ret = pm_runtime_resume_and_get(vdev->drm.dev); 285 if (!drm_WARN_ON(&vdev->drm, ret < 0)) 286 vdev->pm->suspend_reschedule_counter = PM_RESCHEDULE_LIMIT; 287 288 return ret; 289 } 290 291 int ivpu_rpm_get_if_active(struct ivpu_device *vdev) 292 { 293 int ret; 294 295 ret = pm_runtime_get_if_active(vdev->drm.dev, false); 296 drm_WARN_ON(&vdev->drm, ret < 0); 297 298 return ret; 299 } 300 301 void ivpu_rpm_put(struct ivpu_device *vdev) 302 { 303 pm_runtime_mark_last_busy(vdev->drm.dev); 304 pm_runtime_put_autosuspend(vdev->drm.dev); 305 } 306 307 void ivpu_pm_reset_prepare_cb(struct pci_dev *pdev) 308 { 309 struct ivpu_device *vdev = pci_get_drvdata(pdev); 310 311 pm_runtime_get_sync(vdev->drm.dev); 312 313 ivpu_dbg(vdev, PM, "Pre-reset..\n"); 314 atomic_inc(&vdev->pm->reset_counter); 315 atomic_set(&vdev->pm->in_reset, 1); 316 ivpu_prepare_for_reset(vdev); 317 ivpu_hw_reset(vdev); 318 ivpu_pm_prepare_cold_boot(vdev); 319 ivpu_jobs_abort_all(vdev); 320 ivpu_dbg(vdev, PM, "Pre-reset done.\n"); 321 } 322 323 void ivpu_pm_reset_done_cb(struct pci_dev *pdev) 324 { 325 struct ivpu_device *vdev = pci_get_drvdata(pdev); 326 int ret; 327 328 ivpu_dbg(vdev, PM, "Post-reset..\n"); 329 ret = ivpu_resume(vdev); 330 if (ret) 331 ivpu_err(vdev, "Failed to set RESUME state: %d\n", ret); 332 atomic_set(&vdev->pm->in_reset, 0); 333 ivpu_dbg(vdev, PM, "Post-reset done.\n"); 334 335 pm_runtime_put_autosuspend(vdev->drm.dev); 336 } 337 338 void ivpu_pm_init(struct ivpu_device *vdev) 339 { 340 struct device *dev = vdev->drm.dev; 341 struct ivpu_pm_info *pm = vdev->pm; 342 int delay; 343 344 pm->vdev = vdev; 345 pm->suspend_reschedule_counter = PM_RESCHEDULE_LIMIT; 346 347 atomic_set(&pm->in_reset, 0); 348 INIT_WORK(&pm->recovery_work, ivpu_pm_recovery_work); 349 INIT_DELAYED_WORK(&pm->job_timeout_work, ivpu_job_timeout_work); 350 351 if (ivpu_disable_recovery) 352 delay = -1; 353 else 354 delay = vdev->timeout.autosuspend; 355 356 pm_runtime_use_autosuspend(dev); 357 pm_runtime_set_autosuspend_delay(dev, delay); 358 359 ivpu_dbg(vdev, PM, "Autosuspend delay = %d\n", delay); 360 } 361 362 void ivpu_pm_cancel_recovery(struct ivpu_device *vdev) 363 { 364 drm_WARN_ON(&vdev->drm, delayed_work_pending(&vdev->pm->job_timeout_work)); 365 cancel_work_sync(&vdev->pm->recovery_work); 366 } 367 368 void ivpu_pm_enable(struct ivpu_device *vdev) 369 { 370 struct device *dev = vdev->drm.dev; 371 372 pm_runtime_set_active(dev); 373 pm_runtime_allow(dev); 374 pm_runtime_mark_last_busy(dev); 375 pm_runtime_put_autosuspend(dev); 376 } 377 378 void ivpu_pm_disable(struct ivpu_device *vdev) 379 { 380 pm_runtime_get_noresume(vdev->drm.dev); 381 pm_runtime_forbid(vdev->drm.dev); 382 } 383