1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2022 Intel Corporation 4 */ 5 6 #include "xe_pm.h" 7 8 #include <linux/pm_runtime.h> 9 10 #include <drm/drm_managed.h> 11 #include <drm/ttm/ttm_placement.h> 12 13 #include "display/xe_display.h" 14 #include "xe_bo.h" 15 #include "xe_bo_evict.h" 16 #include "xe_device.h" 17 #include "xe_device_sysfs.h" 18 #include "xe_ggtt.h" 19 #include "xe_gt.h" 20 #include "xe_guc.h" 21 #include "xe_irq.h" 22 #include "xe_pcode.h" 23 #include "xe_wa.h" 24 25 /** 26 * DOC: Xe Power Management 27 * 28 * Xe PM shall be guided by the simplicity. 29 * Use the simplest hook options whenever possible. 30 * Let's not reinvent the runtime_pm references and hooks. 31 * Shall have a clear separation of display and gt underneath this component. 32 * 33 * What's next: 34 * 35 * For now s2idle and s3 are only working in integrated devices. The next step 36 * is to iterate through all VRAM's BO backing them up into the system memory 37 * before allowing the system suspend. 38 * 39 * Also runtime_pm needs to be here from the beginning. 40 * 41 * RC6/RPS are also critical PM features. Let's start with GuCRC and GuC SLPC 42 * and no wait boost. Frequency optimizations should come on a next stage. 43 */ 44 45 /** 46 * xe_pm_suspend - Helper for System suspend, i.e. S0->S3 / S0->S2idle 47 * @xe: xe device instance 48 * 49 * Return: 0 on success 50 */ 51 int xe_pm_suspend(struct xe_device *xe) 52 { 53 struct xe_gt *gt; 54 u8 id; 55 int err; 56 57 for_each_gt(gt, xe, id) 58 xe_gt_suspend_prepare(gt); 59 60 /* FIXME: Super racey... */ 61 err = xe_bo_evict_all(xe); 62 if (err) 63 return err; 64 65 xe_display_pm_suspend(xe); 66 67 for_each_gt(gt, xe, id) { 68 err = xe_gt_suspend(gt); 69 if (err) { 70 xe_display_pm_resume(xe); 71 return err; 72 } 73 } 74 75 xe_irq_suspend(xe); 76 77 xe_display_pm_suspend_late(xe); 78 79 return 0; 80 } 81 82 /** 83 * xe_pm_resume - Helper for System resume S3->S0 / S2idle->S0 84 * @xe: xe device instance 85 * 86 * Return: 0 on success 87 */ 88 int xe_pm_resume(struct xe_device *xe) 89 { 90 struct xe_tile *tile; 91 struct xe_gt *gt; 92 u8 id; 93 int err; 94 95 for_each_tile(tile, xe, id) 96 xe_wa_apply_tile_workarounds(tile); 97 98 for_each_gt(gt, xe, id) { 99 err = xe_pcode_init(gt); 100 if (err) 101 return err; 102 } 103 104 xe_display_pm_resume_early(xe); 105 106 /* 107 * This only restores pinned memory which is the memory required for the 108 * GT(s) to resume. 109 */ 110 err = xe_bo_restore_kernel(xe); 111 if (err) 112 return err; 113 114 xe_irq_resume(xe); 115 116 xe_display_pm_resume(xe); 117 118 for_each_gt(gt, xe, id) 119 xe_gt_resume(gt); 120 121 err = xe_bo_restore_user(xe); 122 if (err) 123 return err; 124 125 return 0; 126 } 127 128 static bool xe_pm_pci_d3cold_capable(struct pci_dev *pdev) 129 { 130 struct pci_dev *root_pdev; 131 132 root_pdev = pcie_find_root_port(pdev); 133 if (!root_pdev) 134 return false; 135 136 /* D3Cold requires PME capability and _PR3 power resource */ 137 if (!pci_pme_capable(root_pdev, PCI_D3cold) || !pci_pr3_present(root_pdev)) 138 return false; 139 140 return true; 141 } 142 143 static void xe_pm_runtime_init(struct xe_device *xe) 144 { 145 struct device *dev = xe->drm.dev; 146 147 /* 148 * Disable the system suspend direct complete optimization. 149 * We need to ensure that the regular device suspend/resume functions 150 * are called since our runtime_pm cannot guarantee local memory 151 * eviction for d3cold. 152 * TODO: Check HDA audio dependencies claimed by i915, and then enforce 153 * this option to integrated graphics as well. 154 */ 155 if (IS_DGFX(xe)) 156 dev_pm_set_driver_flags(dev, DPM_FLAG_NO_DIRECT_COMPLETE); 157 158 pm_runtime_use_autosuspend(dev); 159 pm_runtime_set_autosuspend_delay(dev, 1000); 160 pm_runtime_set_active(dev); 161 pm_runtime_allow(dev); 162 pm_runtime_mark_last_busy(dev); 163 pm_runtime_put(dev); 164 } 165 166 void xe_pm_init_early(struct xe_device *xe) 167 { 168 INIT_LIST_HEAD(&xe->mem_access.vram_userfault.list); 169 drmm_mutex_init(&xe->drm, &xe->mem_access.vram_userfault.lock); 170 } 171 172 void xe_pm_init(struct xe_device *xe) 173 { 174 struct pci_dev *pdev = to_pci_dev(xe->drm.dev); 175 176 /* For now suspend/resume is only allowed with GuC */ 177 if (!xe_device_uc_enabled(xe)) 178 return; 179 180 drmm_mutex_init(&xe->drm, &xe->d3cold.lock); 181 182 xe->d3cold.capable = xe_pm_pci_d3cold_capable(pdev); 183 184 if (xe->d3cold.capable) { 185 xe_device_sysfs_init(xe); 186 xe_pm_set_vram_threshold(xe, DEFAULT_VRAM_THRESHOLD); 187 } 188 189 xe_pm_runtime_init(xe); 190 } 191 192 void xe_pm_runtime_fini(struct xe_device *xe) 193 { 194 struct device *dev = xe->drm.dev; 195 196 pm_runtime_get_sync(dev); 197 pm_runtime_forbid(dev); 198 } 199 200 static void xe_pm_write_callback_task(struct xe_device *xe, 201 struct task_struct *task) 202 { 203 WRITE_ONCE(xe->pm_callback_task, task); 204 205 /* 206 * Just in case it's somehow possible for our writes to be reordered to 207 * the extent that something else re-uses the task written in 208 * pm_callback_task. For example after returning from the callback, but 209 * before the reordered write that resets pm_callback_task back to NULL. 210 */ 211 smp_mb(); /* pairs with xe_pm_read_callback_task */ 212 } 213 214 struct task_struct *xe_pm_read_callback_task(struct xe_device *xe) 215 { 216 smp_mb(); /* pairs with xe_pm_write_callback_task */ 217 218 return READ_ONCE(xe->pm_callback_task); 219 } 220 221 int xe_pm_runtime_suspend(struct xe_device *xe) 222 { 223 struct xe_bo *bo, *on; 224 struct xe_gt *gt; 225 u8 id; 226 int err = 0; 227 228 if (xe->d3cold.allowed && xe_device_mem_access_ongoing(xe)) 229 return -EBUSY; 230 231 /* Disable access_ongoing asserts and prevent recursive pm calls */ 232 xe_pm_write_callback_task(xe, current); 233 234 /* 235 * The actual xe_device_mem_access_put() is always async underneath, so 236 * exactly where that is called should makes no difference to us. However 237 * we still need to be very careful with the locks that this callback 238 * acquires and the locks that are acquired and held by any callers of 239 * xe_device_mem_access_get(). We already have the matching annotation 240 * on that side, but we also need it here. For example lockdep should be 241 * able to tell us if the following scenario is in theory possible: 242 * 243 * CPU0 | CPU1 (kworker) 244 * lock(A) | 245 * | xe_pm_runtime_suspend() 246 * | lock(A) 247 * xe_device_mem_access_get() | 248 * 249 * This will clearly deadlock since rpm core needs to wait for 250 * xe_pm_runtime_suspend() to complete, but here we are holding lock(A) 251 * on CPU0 which prevents CPU1 making forward progress. With the 252 * annotation here and in xe_device_mem_access_get() lockdep will see 253 * the potential lock inversion and give us a nice splat. 254 */ 255 lock_map_acquire(&xe_device_mem_access_lockdep_map); 256 257 /* 258 * Applying lock for entire list op as xe_ttm_bo_destroy and xe_bo_move_notify 259 * also checks and delets bo entry from user fault list. 260 */ 261 mutex_lock(&xe->mem_access.vram_userfault.lock); 262 list_for_each_entry_safe(bo, on, 263 &xe->mem_access.vram_userfault.list, vram_userfault_link) 264 xe_bo_runtime_pm_release_mmap_offset(bo); 265 mutex_unlock(&xe->mem_access.vram_userfault.lock); 266 267 if (xe->d3cold.allowed) { 268 err = xe_bo_evict_all(xe); 269 if (err) 270 goto out; 271 } 272 273 for_each_gt(gt, xe, id) { 274 err = xe_gt_suspend(gt); 275 if (err) 276 goto out; 277 } 278 279 xe_irq_suspend(xe); 280 out: 281 lock_map_release(&xe_device_mem_access_lockdep_map); 282 xe_pm_write_callback_task(xe, NULL); 283 return err; 284 } 285 286 int xe_pm_runtime_resume(struct xe_device *xe) 287 { 288 struct xe_gt *gt; 289 u8 id; 290 int err = 0; 291 292 /* Disable access_ongoing asserts and prevent recursive pm calls */ 293 xe_pm_write_callback_task(xe, current); 294 295 lock_map_acquire(&xe_device_mem_access_lockdep_map); 296 297 /* 298 * It can be possible that xe has allowed d3cold but other pcie devices 299 * in gfx card soc would have blocked d3cold, therefore card has not 300 * really lost power. Detecting primary Gt power is sufficient. 301 */ 302 gt = xe_device_get_gt(xe, 0); 303 xe->d3cold.power_lost = xe_guc_in_reset(>->uc.guc); 304 305 if (xe->d3cold.allowed && xe->d3cold.power_lost) { 306 for_each_gt(gt, xe, id) { 307 err = xe_pcode_init(gt); 308 if (err) 309 goto out; 310 } 311 312 /* 313 * This only restores pinned memory which is the memory 314 * required for the GT(s) to resume. 315 */ 316 err = xe_bo_restore_kernel(xe); 317 if (err) 318 goto out; 319 } 320 321 xe_irq_resume(xe); 322 323 for_each_gt(gt, xe, id) 324 xe_gt_resume(gt); 325 326 if (xe->d3cold.allowed && xe->d3cold.power_lost) { 327 err = xe_bo_restore_user(xe); 328 if (err) 329 goto out; 330 } 331 out: 332 lock_map_release(&xe_device_mem_access_lockdep_map); 333 xe_pm_write_callback_task(xe, NULL); 334 return err; 335 } 336 337 int xe_pm_runtime_get(struct xe_device *xe) 338 { 339 return pm_runtime_get_sync(xe->drm.dev); 340 } 341 342 int xe_pm_runtime_put(struct xe_device *xe) 343 { 344 pm_runtime_mark_last_busy(xe->drm.dev); 345 return pm_runtime_put(xe->drm.dev); 346 } 347 348 int xe_pm_runtime_get_if_active(struct xe_device *xe) 349 { 350 return pm_runtime_get_if_active(xe->drm.dev, true); 351 } 352 353 void xe_pm_assert_unbounded_bridge(struct xe_device *xe) 354 { 355 struct pci_dev *pdev = to_pci_dev(xe->drm.dev); 356 struct pci_dev *bridge = pci_upstream_bridge(pdev); 357 358 if (!bridge) 359 return; 360 361 if (!bridge->driver) { 362 drm_warn(&xe->drm, "unbounded parent pci bridge, device won't support any PM support.\n"); 363 device_set_pm_not_required(&pdev->dev); 364 } 365 } 366 367 int xe_pm_set_vram_threshold(struct xe_device *xe, u32 threshold) 368 { 369 struct ttm_resource_manager *man; 370 u32 vram_total_mb = 0; 371 int i; 372 373 for (i = XE_PL_VRAM0; i <= XE_PL_VRAM1; ++i) { 374 man = ttm_manager_type(&xe->ttm, i); 375 if (man) 376 vram_total_mb += DIV_ROUND_UP_ULL(man->size, 1024 * 1024); 377 } 378 379 drm_dbg(&xe->drm, "Total vram %u mb\n", vram_total_mb); 380 381 if (threshold > vram_total_mb) 382 return -EINVAL; 383 384 mutex_lock(&xe->d3cold.lock); 385 xe->d3cold.vram_threshold = threshold; 386 mutex_unlock(&xe->d3cold.lock); 387 388 return 0; 389 } 390 391 void xe_pm_d3cold_allowed_toggle(struct xe_device *xe) 392 { 393 struct ttm_resource_manager *man; 394 u32 total_vram_used_mb = 0; 395 u64 vram_used; 396 int i; 397 398 if (!xe->d3cold.capable) { 399 xe->d3cold.allowed = false; 400 return; 401 } 402 403 for (i = XE_PL_VRAM0; i <= XE_PL_VRAM1; ++i) { 404 man = ttm_manager_type(&xe->ttm, i); 405 if (man) { 406 vram_used = ttm_resource_manager_usage(man); 407 total_vram_used_mb += DIV_ROUND_UP_ULL(vram_used, 1024 * 1024); 408 } 409 } 410 411 mutex_lock(&xe->d3cold.lock); 412 413 if (total_vram_used_mb < xe->d3cold.vram_threshold) 414 xe->d3cold.allowed = true; 415 else 416 xe->d3cold.allowed = false; 417 418 mutex_unlock(&xe->d3cold.lock); 419 420 drm_dbg(&xe->drm, 421 "d3cold: allowed=%s\n", str_yes_no(xe->d3cold.allowed)); 422 } 423