1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2022 Intel Corporation 4 */ 5 6 #include "xe_pm.h" 7 8 #include <linux/pm_runtime.h> 9 10 #include <drm/drm_managed.h> 11 #include <drm/ttm/ttm_placement.h> 12 13 #include "display/xe_display.h" 14 #include "xe_bo.h" 15 #include "xe_bo_evict.h" 16 #include "xe_device.h" 17 #include "xe_device_sysfs.h" 18 #include "xe_ggtt.h" 19 #include "xe_gt.h" 20 #include "xe_guc.h" 21 #include "xe_irq.h" 22 #include "xe_pcode.h" 23 #include "xe_wa.h" 24 25 /** 26 * DOC: Xe Power Management 27 * 28 * Xe PM shall be guided by the simplicity. 29 * Use the simplest hook options whenever possible. 30 * Let's not reinvent the runtime_pm references and hooks. 31 * Shall have a clear separation of display and gt underneath this component. 32 * 33 * What's next: 34 * 35 * For now s2idle and s3 are only working in integrated devices. The next step 36 * is to iterate through all VRAM's BO backing them up into the system memory 37 * before allowing the system suspend. 38 * 39 * Also runtime_pm needs to be here from the beginning. 40 * 41 * RC6/RPS are also critical PM features. Let's start with GuCRC and GuC SLPC 42 * and no wait boost. Frequency optimizations should come on a next stage. 43 */ 44 45 /** 46 * xe_pm_suspend - Helper for System suspend, i.e. S0->S3 / S0->S2idle 47 * @xe: xe device instance 48 * 49 * Return: 0 on success 50 */ 51 int xe_pm_suspend(struct xe_device *xe) 52 { 53 struct xe_gt *gt; 54 u8 id; 55 int err; 56 57 for_each_gt(gt, xe, id) 58 xe_gt_suspend_prepare(gt); 59 60 /* FIXME: Super racey... */ 61 err = xe_bo_evict_all(xe); 62 if (err) 63 return err; 64 65 xe_display_pm_suspend(xe); 66 67 for_each_gt(gt, xe, id) { 68 err = xe_gt_suspend(gt); 69 if (err) { 70 xe_display_pm_resume(xe); 71 return err; 72 } 73 } 74 75 xe_irq_suspend(xe); 76 77 xe_display_pm_suspend_late(xe); 78 79 return 0; 80 } 81 82 /** 83 * xe_pm_resume - Helper for System resume S3->S0 / S2idle->S0 84 * @xe: xe device instance 85 * 86 * Return: 0 on success 87 */ 88 int xe_pm_resume(struct xe_device *xe) 89 { 90 struct xe_tile *tile; 91 struct xe_gt *gt; 92 u8 id; 93 int err; 94 95 for_each_tile(tile, xe, id) 96 xe_wa_apply_tile_workarounds(tile); 97 98 for_each_gt(gt, xe, id) { 99 err = xe_pcode_init(gt); 100 if (err) 101 return err; 102 } 103 104 xe_display_pm_resume_early(xe); 105 106 /* 107 * This only restores pinned memory which is the memory required for the 108 * GT(s) to resume. 109 */ 110 err = xe_bo_restore_kernel(xe); 111 if (err) 112 return err; 113 114 xe_irq_resume(xe); 115 116 xe_display_pm_resume(xe); 117 118 for_each_gt(gt, xe, id) 119 xe_gt_resume(gt); 120 121 err = xe_bo_restore_user(xe); 122 if (err) 123 return err; 124 125 return 0; 126 } 127 128 static bool xe_pm_pci_d3cold_capable(struct xe_device *xe) 129 { 130 struct pci_dev *pdev = to_pci_dev(xe->drm.dev); 131 struct pci_dev *root_pdev; 132 133 root_pdev = pcie_find_root_port(pdev); 134 if (!root_pdev) 135 return false; 136 137 /* D3Cold requires PME capability */ 138 if (!pci_pme_capable(root_pdev, PCI_D3cold)) { 139 drm_dbg(&xe->drm, "d3cold: PME# not supported\n"); 140 return false; 141 } 142 143 /* D3Cold requires _PR3 power resource */ 144 if (!pci_pr3_present(root_pdev)) { 145 drm_dbg(&xe->drm, "d3cold: ACPI _PR3 not present\n"); 146 return false; 147 } 148 149 return true; 150 } 151 152 static void xe_pm_runtime_init(struct xe_device *xe) 153 { 154 struct device *dev = xe->drm.dev; 155 156 /* 157 * Disable the system suspend direct complete optimization. 158 * We need to ensure that the regular device suspend/resume functions 159 * are called since our runtime_pm cannot guarantee local memory 160 * eviction for d3cold. 161 * TODO: Check HDA audio dependencies claimed by i915, and then enforce 162 * this option to integrated graphics as well. 163 */ 164 if (IS_DGFX(xe)) 165 dev_pm_set_driver_flags(dev, DPM_FLAG_NO_DIRECT_COMPLETE); 166 167 pm_runtime_use_autosuspend(dev); 168 pm_runtime_set_autosuspend_delay(dev, 1000); 169 pm_runtime_set_active(dev); 170 pm_runtime_allow(dev); 171 pm_runtime_mark_last_busy(dev); 172 pm_runtime_put(dev); 173 } 174 175 void xe_pm_init_early(struct xe_device *xe) 176 { 177 INIT_LIST_HEAD(&xe->mem_access.vram_userfault.list); 178 drmm_mutex_init(&xe->drm, &xe->mem_access.vram_userfault.lock); 179 } 180 181 void xe_pm_init(struct xe_device *xe) 182 { 183 /* For now suspend/resume is only allowed with GuC */ 184 if (!xe_device_uc_enabled(xe)) 185 return; 186 187 drmm_mutex_init(&xe->drm, &xe->d3cold.lock); 188 189 xe->d3cold.capable = xe_pm_pci_d3cold_capable(xe); 190 191 if (xe->d3cold.capable) { 192 xe_device_sysfs_init(xe); 193 xe_pm_set_vram_threshold(xe, DEFAULT_VRAM_THRESHOLD); 194 } 195 196 xe_pm_runtime_init(xe); 197 } 198 199 void xe_pm_runtime_fini(struct xe_device *xe) 200 { 201 struct device *dev = xe->drm.dev; 202 203 pm_runtime_get_sync(dev); 204 pm_runtime_forbid(dev); 205 } 206 207 static void xe_pm_write_callback_task(struct xe_device *xe, 208 struct task_struct *task) 209 { 210 WRITE_ONCE(xe->pm_callback_task, task); 211 212 /* 213 * Just in case it's somehow possible for our writes to be reordered to 214 * the extent that something else re-uses the task written in 215 * pm_callback_task. For example after returning from the callback, but 216 * before the reordered write that resets pm_callback_task back to NULL. 217 */ 218 smp_mb(); /* pairs with xe_pm_read_callback_task */ 219 } 220 221 struct task_struct *xe_pm_read_callback_task(struct xe_device *xe) 222 { 223 smp_mb(); /* pairs with xe_pm_write_callback_task */ 224 225 return READ_ONCE(xe->pm_callback_task); 226 } 227 228 int xe_pm_runtime_suspend(struct xe_device *xe) 229 { 230 struct xe_bo *bo, *on; 231 struct xe_gt *gt; 232 u8 id; 233 int err = 0; 234 235 if (xe->d3cold.allowed && xe_device_mem_access_ongoing(xe)) 236 return -EBUSY; 237 238 /* Disable access_ongoing asserts and prevent recursive pm calls */ 239 xe_pm_write_callback_task(xe, current); 240 241 /* 242 * The actual xe_device_mem_access_put() is always async underneath, so 243 * exactly where that is called should makes no difference to us. However 244 * we still need to be very careful with the locks that this callback 245 * acquires and the locks that are acquired and held by any callers of 246 * xe_device_mem_access_get(). We already have the matching annotation 247 * on that side, but we also need it here. For example lockdep should be 248 * able to tell us if the following scenario is in theory possible: 249 * 250 * CPU0 | CPU1 (kworker) 251 * lock(A) | 252 * | xe_pm_runtime_suspend() 253 * | lock(A) 254 * xe_device_mem_access_get() | 255 * 256 * This will clearly deadlock since rpm core needs to wait for 257 * xe_pm_runtime_suspend() to complete, but here we are holding lock(A) 258 * on CPU0 which prevents CPU1 making forward progress. With the 259 * annotation here and in xe_device_mem_access_get() lockdep will see 260 * the potential lock inversion and give us a nice splat. 261 */ 262 lock_map_acquire(&xe_device_mem_access_lockdep_map); 263 264 /* 265 * Applying lock for entire list op as xe_ttm_bo_destroy and xe_bo_move_notify 266 * also checks and delets bo entry from user fault list. 267 */ 268 mutex_lock(&xe->mem_access.vram_userfault.lock); 269 list_for_each_entry_safe(bo, on, 270 &xe->mem_access.vram_userfault.list, vram_userfault_link) 271 xe_bo_runtime_pm_release_mmap_offset(bo); 272 mutex_unlock(&xe->mem_access.vram_userfault.lock); 273 274 if (xe->d3cold.allowed) { 275 err = xe_bo_evict_all(xe); 276 if (err) 277 goto out; 278 } 279 280 for_each_gt(gt, xe, id) { 281 err = xe_gt_suspend(gt); 282 if (err) 283 goto out; 284 } 285 286 xe_irq_suspend(xe); 287 out: 288 lock_map_release(&xe_device_mem_access_lockdep_map); 289 xe_pm_write_callback_task(xe, NULL); 290 return err; 291 } 292 293 int xe_pm_runtime_resume(struct xe_device *xe) 294 { 295 struct xe_gt *gt; 296 u8 id; 297 int err = 0; 298 299 /* Disable access_ongoing asserts and prevent recursive pm calls */ 300 xe_pm_write_callback_task(xe, current); 301 302 lock_map_acquire(&xe_device_mem_access_lockdep_map); 303 304 /* 305 * It can be possible that xe has allowed d3cold but other pcie devices 306 * in gfx card soc would have blocked d3cold, therefore card has not 307 * really lost power. Detecting primary Gt power is sufficient. 308 */ 309 gt = xe_device_get_gt(xe, 0); 310 xe->d3cold.power_lost = xe_guc_in_reset(>->uc.guc); 311 312 if (xe->d3cold.allowed && xe->d3cold.power_lost) { 313 for_each_gt(gt, xe, id) { 314 err = xe_pcode_init(gt); 315 if (err) 316 goto out; 317 } 318 319 /* 320 * This only restores pinned memory which is the memory 321 * required for the GT(s) to resume. 322 */ 323 err = xe_bo_restore_kernel(xe); 324 if (err) 325 goto out; 326 } 327 328 xe_irq_resume(xe); 329 330 for_each_gt(gt, xe, id) 331 xe_gt_resume(gt); 332 333 if (xe->d3cold.allowed && xe->d3cold.power_lost) { 334 err = xe_bo_restore_user(xe); 335 if (err) 336 goto out; 337 } 338 out: 339 lock_map_release(&xe_device_mem_access_lockdep_map); 340 xe_pm_write_callback_task(xe, NULL); 341 return err; 342 } 343 344 int xe_pm_runtime_get(struct xe_device *xe) 345 { 346 return pm_runtime_get_sync(xe->drm.dev); 347 } 348 349 int xe_pm_runtime_put(struct xe_device *xe) 350 { 351 pm_runtime_mark_last_busy(xe->drm.dev); 352 return pm_runtime_put(xe->drm.dev); 353 } 354 355 int xe_pm_runtime_get_if_active(struct xe_device *xe) 356 { 357 return pm_runtime_get_if_active(xe->drm.dev); 358 } 359 360 void xe_pm_assert_unbounded_bridge(struct xe_device *xe) 361 { 362 struct pci_dev *pdev = to_pci_dev(xe->drm.dev); 363 struct pci_dev *bridge = pci_upstream_bridge(pdev); 364 365 if (!bridge) 366 return; 367 368 if (!bridge->driver) { 369 drm_warn(&xe->drm, "unbounded parent pci bridge, device won't support any PM support.\n"); 370 device_set_pm_not_required(&pdev->dev); 371 } 372 } 373 374 int xe_pm_set_vram_threshold(struct xe_device *xe, u32 threshold) 375 { 376 struct ttm_resource_manager *man; 377 u32 vram_total_mb = 0; 378 int i; 379 380 for (i = XE_PL_VRAM0; i <= XE_PL_VRAM1; ++i) { 381 man = ttm_manager_type(&xe->ttm, i); 382 if (man) 383 vram_total_mb += DIV_ROUND_UP_ULL(man->size, 1024 * 1024); 384 } 385 386 drm_dbg(&xe->drm, "Total vram %u mb\n", vram_total_mb); 387 388 if (threshold > vram_total_mb) 389 return -EINVAL; 390 391 mutex_lock(&xe->d3cold.lock); 392 xe->d3cold.vram_threshold = threshold; 393 mutex_unlock(&xe->d3cold.lock); 394 395 return 0; 396 } 397 398 void xe_pm_d3cold_allowed_toggle(struct xe_device *xe) 399 { 400 struct ttm_resource_manager *man; 401 u32 total_vram_used_mb = 0; 402 u64 vram_used; 403 int i; 404 405 if (!xe->d3cold.capable) { 406 xe->d3cold.allowed = false; 407 return; 408 } 409 410 for (i = XE_PL_VRAM0; i <= XE_PL_VRAM1; ++i) { 411 man = ttm_manager_type(&xe->ttm, i); 412 if (man) { 413 vram_used = ttm_resource_manager_usage(man); 414 total_vram_used_mb += DIV_ROUND_UP_ULL(vram_used, 1024 * 1024); 415 } 416 } 417 418 mutex_lock(&xe->d3cold.lock); 419 420 if (total_vram_used_mb < xe->d3cold.vram_threshold) 421 xe->d3cold.allowed = true; 422 else 423 xe->d3cold.allowed = false; 424 425 mutex_unlock(&xe->d3cold.lock); 426 427 drm_dbg(&xe->drm, 428 "d3cold: allowed=%s\n", str_yes_no(xe->d3cold.allowed)); 429 } 430