1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2022 Intel Corporation 4 */ 5 6 #include "xe_pm.h" 7 8 #include <linux/pm_runtime.h> 9 10 #include <drm/drm_managed.h> 11 #include <drm/ttm/ttm_placement.h> 12 13 #include "xe_bo.h" 14 #include "xe_bo_evict.h" 15 #include "xe_device.h" 16 #include "xe_device_sysfs.h" 17 #include "xe_ggtt.h" 18 #include "xe_gt.h" 19 #include "xe_guc.h" 20 #include "xe_irq.h" 21 #include "xe_pcode.h" 22 23 /** 24 * DOC: Xe Power Management 25 * 26 * Xe PM shall be guided by the simplicity. 27 * Use the simplest hook options whenever possible. 28 * Let's not reinvent the runtime_pm references and hooks. 29 * Shall have a clear separation of display and gt underneath this component. 30 * 31 * What's next: 32 * 33 * For now s2idle and s3 are only working in integrated devices. The next step 34 * is to iterate through all VRAM's BO backing them up into the system memory 35 * before allowing the system suspend. 36 * 37 * Also runtime_pm needs to be here from the beginning. 38 * 39 * RC6/RPS are also critical PM features. Let's start with GuCRC and GuC SLPC 40 * and no wait boost. Frequency optimizations should come on a next stage. 41 */ 42 43 /** 44 * xe_pm_suspend - Helper for System suspend, i.e. S0->S3 / S0->S2idle 45 * @xe: xe device instance 46 * 47 * Return: 0 on success 48 */ 49 int xe_pm_suspend(struct xe_device *xe) 50 { 51 struct xe_gt *gt; 52 u8 id; 53 int err; 54 55 for_each_gt(gt, xe, id) 56 xe_gt_suspend_prepare(gt); 57 58 /* FIXME: Super racey... */ 59 err = xe_bo_evict_all(xe); 60 if (err) 61 return err; 62 63 for_each_gt(gt, xe, id) { 64 err = xe_gt_suspend(gt); 65 if (err) 66 return err; 67 } 68 69 xe_irq_suspend(xe); 70 71 return 0; 72 } 73 74 /** 75 * xe_pm_resume - Helper for System resume S3->S0 / S2idle->S0 76 * @xe: xe device instance 77 * 78 * Return: 0 on success 79 */ 80 int xe_pm_resume(struct xe_device *xe) 81 { 82 struct xe_gt *gt; 83 u8 id; 84 int err; 85 86 for_each_gt(gt, xe, id) { 87 err = xe_pcode_init(gt); 88 if (err) 89 return err; 90 } 91 92 /* 93 * This only restores pinned memory which is the memory required for the 94 * GT(s) to resume. 95 */ 96 err = xe_bo_restore_kernel(xe); 97 if (err) 98 return err; 99 100 xe_irq_resume(xe); 101 102 for_each_gt(gt, xe, id) 103 xe_gt_resume(gt); 104 105 err = xe_bo_restore_user(xe); 106 if (err) 107 return err; 108 109 return 0; 110 } 111 112 static bool xe_pm_pci_d3cold_capable(struct pci_dev *pdev) 113 { 114 struct pci_dev *root_pdev; 115 116 root_pdev = pcie_find_root_port(pdev); 117 if (!root_pdev) 118 return false; 119 120 /* D3Cold requires PME capability and _PR3 power resource */ 121 if (!pci_pme_capable(root_pdev, PCI_D3cold) || !pci_pr3_present(root_pdev)) 122 return false; 123 124 return true; 125 } 126 127 static void xe_pm_runtime_init(struct xe_device *xe) 128 { 129 struct device *dev = xe->drm.dev; 130 131 /* 132 * Disable the system suspend direct complete optimization. 133 * We need to ensure that the regular device suspend/resume functions 134 * are called since our runtime_pm cannot guarantee local memory 135 * eviction for d3cold. 136 * TODO: Check HDA audio dependencies claimed by i915, and then enforce 137 * this option to integrated graphics as well. 138 */ 139 if (IS_DGFX(xe)) 140 dev_pm_set_driver_flags(dev, DPM_FLAG_NO_DIRECT_COMPLETE); 141 142 pm_runtime_use_autosuspend(dev); 143 pm_runtime_set_autosuspend_delay(dev, 1000); 144 pm_runtime_set_active(dev); 145 pm_runtime_allow(dev); 146 pm_runtime_mark_last_busy(dev); 147 pm_runtime_put(dev); 148 } 149 150 void xe_pm_init(struct xe_device *xe) 151 { 152 struct pci_dev *pdev = to_pci_dev(xe->drm.dev); 153 154 drmm_mutex_init(&xe->drm, &xe->d3cold.lock); 155 156 xe->d3cold.capable = xe_pm_pci_d3cold_capable(pdev); 157 158 if (xe->d3cold.capable) { 159 xe_device_sysfs_init(xe); 160 xe_pm_set_vram_threshold(xe, DEFAULT_VRAM_THRESHOLD); 161 } 162 163 xe_pm_runtime_init(xe); 164 } 165 166 void xe_pm_runtime_fini(struct xe_device *xe) 167 { 168 struct device *dev = xe->drm.dev; 169 170 pm_runtime_get_sync(dev); 171 pm_runtime_forbid(dev); 172 } 173 174 static void xe_pm_write_callback_task(struct xe_device *xe, 175 struct task_struct *task) 176 { 177 WRITE_ONCE(xe->pm_callback_task, task); 178 179 /* 180 * Just in case it's somehow possible for our writes to be reordered to 181 * the extent that something else re-uses the task written in 182 * pm_callback_task. For example after returning from the callback, but 183 * before the reordered write that resets pm_callback_task back to NULL. 184 */ 185 smp_mb(); /* pairs with xe_pm_read_callback_task */ 186 } 187 188 struct task_struct *xe_pm_read_callback_task(struct xe_device *xe) 189 { 190 smp_mb(); /* pairs with xe_pm_write_callback_task */ 191 192 return READ_ONCE(xe->pm_callback_task); 193 } 194 195 int xe_pm_runtime_suspend(struct xe_device *xe) 196 { 197 struct xe_gt *gt; 198 u8 id; 199 int err = 0; 200 201 if (xe->d3cold.allowed && xe_device_mem_access_ongoing(xe)) 202 return -EBUSY; 203 204 /* Disable access_ongoing asserts and prevent recursive pm calls */ 205 xe_pm_write_callback_task(xe, current); 206 207 /* 208 * The actual xe_device_mem_access_put() is always async underneath, so 209 * exactly where that is called should makes no difference to us. However 210 * we still need to be very careful with the locks that this callback 211 * acquires and the locks that are acquired and held by any callers of 212 * xe_device_mem_access_get(). We already have the matching annotation 213 * on that side, but we also need it here. For example lockdep should be 214 * able to tell us if the following scenario is in theory possible: 215 * 216 * CPU0 | CPU1 (kworker) 217 * lock(A) | 218 * | xe_pm_runtime_suspend() 219 * | lock(A) 220 * xe_device_mem_access_get() | 221 * 222 * This will clearly deadlock since rpm core needs to wait for 223 * xe_pm_runtime_suspend() to complete, but here we are holding lock(A) 224 * on CPU0 which prevents CPU1 making forward progress. With the 225 * annotation here and in xe_device_mem_access_get() lockdep will see 226 * the potential lock inversion and give us a nice splat. 227 */ 228 lock_map_acquire(&xe_device_mem_access_lockdep_map); 229 230 if (xe->d3cold.allowed) { 231 err = xe_bo_evict_all(xe); 232 if (err) 233 goto out; 234 } 235 236 for_each_gt(gt, xe, id) { 237 err = xe_gt_suspend(gt); 238 if (err) 239 goto out; 240 } 241 242 xe_irq_suspend(xe); 243 out: 244 lock_map_release(&xe_device_mem_access_lockdep_map); 245 xe_pm_write_callback_task(xe, NULL); 246 return err; 247 } 248 249 int xe_pm_runtime_resume(struct xe_device *xe) 250 { 251 struct xe_gt *gt; 252 u8 id; 253 int err = 0; 254 255 /* Disable access_ongoing asserts and prevent recursive pm calls */ 256 xe_pm_write_callback_task(xe, current); 257 258 lock_map_acquire(&xe_device_mem_access_lockdep_map); 259 260 /* 261 * It can be possible that xe has allowed d3cold but other pcie devices 262 * in gfx card soc would have blocked d3cold, therefore card has not 263 * really lost power. Detecting primary Gt power is sufficient. 264 */ 265 gt = xe_device_get_gt(xe, 0); 266 xe->d3cold.power_lost = xe_guc_in_reset(>->uc.guc); 267 268 if (xe->d3cold.allowed && xe->d3cold.power_lost) { 269 for_each_gt(gt, xe, id) { 270 err = xe_pcode_init(gt); 271 if (err) 272 goto out; 273 } 274 275 /* 276 * This only restores pinned memory which is the memory 277 * required for the GT(s) to resume. 278 */ 279 err = xe_bo_restore_kernel(xe); 280 if (err) 281 goto out; 282 } 283 284 xe_irq_resume(xe); 285 286 for_each_gt(gt, xe, id) 287 xe_gt_resume(gt); 288 289 if (xe->d3cold.allowed && xe->d3cold.power_lost) { 290 err = xe_bo_restore_user(xe); 291 if (err) 292 goto out; 293 } 294 out: 295 lock_map_release(&xe_device_mem_access_lockdep_map); 296 xe_pm_write_callback_task(xe, NULL); 297 return err; 298 } 299 300 int xe_pm_runtime_get(struct xe_device *xe) 301 { 302 return pm_runtime_get_sync(xe->drm.dev); 303 } 304 305 int xe_pm_runtime_put(struct xe_device *xe) 306 { 307 pm_runtime_mark_last_busy(xe->drm.dev); 308 return pm_runtime_put(xe->drm.dev); 309 } 310 311 int xe_pm_runtime_get_if_active(struct xe_device *xe) 312 { 313 return pm_runtime_get_if_active(xe->drm.dev, true); 314 } 315 316 void xe_pm_assert_unbounded_bridge(struct xe_device *xe) 317 { 318 struct pci_dev *pdev = to_pci_dev(xe->drm.dev); 319 struct pci_dev *bridge = pci_upstream_bridge(pdev); 320 321 if (!bridge) 322 return; 323 324 if (!bridge->driver) { 325 drm_warn(&xe->drm, "unbounded parent pci bridge, device won't support any PM support.\n"); 326 device_set_pm_not_required(&pdev->dev); 327 } 328 } 329 330 int xe_pm_set_vram_threshold(struct xe_device *xe, u32 threshold) 331 { 332 struct ttm_resource_manager *man; 333 u32 vram_total_mb = 0; 334 int i; 335 336 for (i = XE_PL_VRAM0; i <= XE_PL_VRAM1; ++i) { 337 man = ttm_manager_type(&xe->ttm, i); 338 if (man) 339 vram_total_mb += DIV_ROUND_UP_ULL(man->size, 1024 * 1024); 340 } 341 342 drm_dbg(&xe->drm, "Total vram %u mb\n", vram_total_mb); 343 344 if (threshold > vram_total_mb) 345 return -EINVAL; 346 347 mutex_lock(&xe->d3cold.lock); 348 xe->d3cold.vram_threshold = threshold; 349 mutex_unlock(&xe->d3cold.lock); 350 351 return 0; 352 } 353 354 void xe_pm_d3cold_allowed_toggle(struct xe_device *xe) 355 { 356 struct ttm_resource_manager *man; 357 u32 total_vram_used_mb = 0; 358 u64 vram_used; 359 int i; 360 361 if (!xe->d3cold.capable) { 362 xe->d3cold.allowed = false; 363 return; 364 } 365 366 for (i = XE_PL_VRAM0; i <= XE_PL_VRAM1; ++i) { 367 man = ttm_manager_type(&xe->ttm, i); 368 if (man) { 369 vram_used = ttm_resource_manager_usage(man); 370 total_vram_used_mb += DIV_ROUND_UP_ULL(vram_used, 1024 * 1024); 371 } 372 } 373 374 mutex_lock(&xe->d3cold.lock); 375 376 if (total_vram_used_mb < xe->d3cold.vram_threshold) 377 xe->d3cold.allowed = true; 378 else 379 xe->d3cold.allowed = false; 380 381 mutex_unlock(&xe->d3cold.lock); 382 } 383