1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2023-2024 Intel Corporation 4 */ 5 6 #include <drm/drm_managed.h> 7 8 #include "xe_assert.h" 9 #include "xe_device.h" 10 #include "xe_gt.h" 11 #include "xe_gt_sriov_printk.h" 12 #include "xe_gt_sriov_vf.h" 13 #include "xe_guc_ct.h" 14 #include "xe_guc_submit.h" 15 #include "xe_irq.h" 16 #include "xe_lrc.h" 17 #include "xe_pm.h" 18 #include "xe_sriov.h" 19 #include "xe_sriov_printk.h" 20 #include "xe_sriov_vf.h" 21 #include "xe_tile_sriov_vf.h" 22 23 /** 24 * DOC: VF restore procedure in PF KMD and VF KMD 25 * 26 * Restoring previously saved state of a VF is one of core features of 27 * SR-IOV. All major VM Management applications allow saving and restoring 28 * the VM state, and doing that to a VM which uses SRIOV VF as one of 29 * the accessible devices requires support from KMD on both PF and VF side. 30 * VMM initiates all required operations through VFIO module, which then 31 * translates them into PF KMD calls. This description will focus on these 32 * calls, leaving out the module which initiates these steps (VFIO). 33 * 34 * In order to start the restore procedure, GuC needs to keep the VF in 35 * proper state. The PF driver can ensure GuC set it to VF_READY state 36 * by provisioning the VF, which in turn can be done after Function Level 37 * Reset of said VF (or after it was freshly created - in that case FLR 38 * is not needed). The FLR procedure ends with GuC sending message 39 * `GUC_PF_NOTIFY_VF_FLR_DONE`, and then provisioning data is sent to GuC. 40 * After the provisioning is completed, the VF needs to be paused, and 41 * at that point the actual restore can begin. 42 * 43 * During VF Restore, state of several resources is restored. These may 44 * include local memory content (system memory is restored by VMM itself), 45 * values of MMIO registers, stateless compression metadata and others. 46 * The final resource which also needs restoring is state of the VF 47 * submission maintained within GuC. For that, `GUC_PF_OPCODE_VF_RESTORE` 48 * message is used, with reference to the state blob to be consumed by 49 * GuC. 50 * 51 * Next, when VFIO is asked to set the VM into running state, the PF driver 52 * sends `GUC_PF_TRIGGER_VF_RESUME` to GuC. When sent after restore, this 53 * changes VF state within GuC to `VF_RESFIX_BLOCKED` rather than the 54 * usual `VF_RUNNING`. At this point GuC triggers an interrupt to inform 55 * the VF KMD within the VM that it was migrated. 56 * 57 * As soon as Virtual GPU of the VM starts, the VF driver within receives 58 * the MIGRATED interrupt and schedules post-migration recovery worker. 59 * That worker queries GuC for new provisioning (using MMIO communication), 60 * and applies fixups to any non-virtualized resources used by the VF. 61 * 62 * When the VF driver is ready to continue operation on the newly connected 63 * hardware, it sends `VF2GUC_NOTIFY_RESFIX_DONE` which causes it to 64 * enter the long awaited `VF_RUNNING` state, and therefore start handling 65 * CTB messages and scheduling workloads from the VF:: 66 * 67 * PF GuC VF 68 * [ ] | | 69 * [ ] PF2GUC_VF_CONTROL(pause) | | 70 * [ ]---------------------------> [ ] | 71 * [ ] [ ] GuC sets new VF state to | 72 * [ ] [ ]------- VF_READY_PAUSED | 73 * [ ] [ ] | | 74 * [ ] [ ] <----- | 75 * [ ] success [ ] | 76 * [ ] <---------------------------[ ] | 77 * [ ] | | 78 * [ ] PF loads resources from the | | 79 * [ ]------- saved image supplied | | 80 * [ ] | | | 81 * [ ] <----- | | 82 * [ ] | | 83 * [ ] GUC_PF_OPCODE_VF_RESTORE | | 84 * [ ]---------------------------> [ ] | 85 * [ ] [ ] GuC loads contexts and CTB | 86 * [ ] [ ]------- state from image | 87 * [ ] [ ] | | 88 * [ ] [ ] <----- | 89 * [ ] [ ] | 90 * [ ] [ ] GuC sets new VF state to | 91 * [ ] [ ]------- VF_RESFIX_PAUSED | 92 * [ ] [ ] | | 93 * [ ] success [ ] <----- | 94 * [ ] <---------------------------[ ] | 95 * [ ] | | 96 * [ ] GUC_PF_TRIGGER_VF_RESUME | | 97 * [ ]---------------------------> [ ] | 98 * [ ] [ ] GuC sets new VF state to | 99 * [ ] [ ]------- VF_RESFIX_BLOCKED | 100 * [ ] [ ] | | 101 * [ ] [ ] <----- | 102 * [ ] [ ] | 103 * [ ] [ ] GUC_INTR_SW_INT_0 | 104 * [ ] success [ ]---------------------------> [ ] 105 * [ ] <---------------------------[ ] [ ] 106 * | | VF2GUC_QUERY_SINGLE_KLV [ ] 107 * | [ ] <---------------------------[ ] 108 * | [ ] [ ] 109 * | [ ] new VF provisioning [ ] 110 * | [ ]---------------------------> [ ] 111 * | | [ ] 112 * | | VF driver applies post [ ] 113 * | | migration fixups -------[ ] 114 * | | | [ ] 115 * | | -----> [ ] 116 * | | [ ] 117 * | | VF2GUC_NOTIFY_RESFIX_DONE [ ] 118 * | [ ] <---------------------------[ ] 119 * | [ ] [ ] 120 * | [ ] GuC sets new VF state to [ ] 121 * | [ ]------- VF_RUNNING [ ] 122 * | [ ] | [ ] 123 * | [ ] <----- [ ] 124 * | [ ] success [ ] 125 * | [ ]---------------------------> [ ] 126 * | | | 127 * | | | 128 */ 129 130 static bool vf_migration_supported(struct xe_device *xe) 131 { 132 /* 133 * TODO: Add conditions to allow specific platforms, when they're 134 * supported at production quality. 135 */ 136 return IS_ENABLED(CONFIG_DRM_XE_DEBUG); 137 } 138 139 static void migration_worker_func(struct work_struct *w); 140 141 /** 142 * xe_sriov_vf_init_early - Initialize SR-IOV VF specific data. 143 * @xe: the &xe_device to initialize 144 */ 145 void xe_sriov_vf_init_early(struct xe_device *xe) 146 { 147 INIT_WORK(&xe->sriov.vf.migration.worker, migration_worker_func); 148 149 if (!vf_migration_supported(xe)) 150 xe_sriov_info(xe, "migration not supported by this module version\n"); 151 } 152 153 /** 154 * vf_post_migration_shutdown - Stop the driver activities after VF migration. 155 * @xe: the &xe_device struct instance 156 * 157 * After this VM is migrated and assigned to a new VF, it is running on a new 158 * hardware, and therefore many hardware-dependent states and related structures 159 * require fixups. Without fixups, the hardware cannot do any work, and therefore 160 * all GPU pipelines are stalled. 161 * Stop some of kernel activities to make the fixup process faster. 162 */ 163 static void vf_post_migration_shutdown(struct xe_device *xe) 164 { 165 struct xe_gt *gt; 166 unsigned int id; 167 int ret = 0; 168 169 for_each_gt(gt, xe, id) { 170 xe_guc_submit_pause(>->uc.guc); 171 ret |= xe_guc_submit_reset_block(>->uc.guc); 172 } 173 174 if (ret) 175 drm_info(&xe->drm, "migration recovery encountered ongoing reset\n"); 176 } 177 178 /** 179 * vf_post_migration_kickstart - Re-start the driver activities under new hardware. 180 * @xe: the &xe_device struct instance 181 * 182 * After we have finished with all post-migration fixups, restart the driver 183 * activities to continue feeding the GPU with workloads. 184 */ 185 static void vf_post_migration_kickstart(struct xe_device *xe) 186 { 187 struct xe_gt *gt; 188 unsigned int id; 189 190 /* 191 * Make sure interrupts on the new HW are properly set. The GuC IRQ 192 * must be working at this point, since the recovery did started, 193 * but the rest was not enabled using the procedure from spec. 194 */ 195 xe_irq_resume(xe); 196 197 for_each_gt(gt, xe, id) { 198 xe_guc_submit_reset_unblock(>->uc.guc); 199 xe_guc_submit_unpause(>->uc.guc); 200 } 201 } 202 203 static bool gt_vf_post_migration_needed(struct xe_gt *gt) 204 { 205 return test_bit(gt->info.id, >_to_xe(gt)->sriov.vf.migration.gt_flags); 206 } 207 208 /* 209 * Notify GuCs marked in flags about resource fixups apply finished. 210 * @xe: the &xe_device struct instance 211 * @gt_flags: flags marking to which GTs the notification shall be sent 212 */ 213 static int vf_post_migration_notify_resfix_done(struct xe_device *xe, unsigned long gt_flags) 214 { 215 struct xe_gt *gt; 216 unsigned int id; 217 int err = 0; 218 219 for_each_gt(gt, xe, id) { 220 if (!test_bit(id, >_flags)) 221 continue; 222 /* skip asking GuC for RESFIX exit if new recovery request arrived */ 223 if (gt_vf_post_migration_needed(gt)) 224 continue; 225 err = xe_gt_sriov_vf_notify_resfix_done(gt); 226 if (err) 227 break; 228 clear_bit(id, >_flags); 229 } 230 231 if (gt_flags && !err) 232 drm_dbg(&xe->drm, "another recovery imminent, skipped some notifications\n"); 233 return err; 234 } 235 236 static int vf_get_next_migrated_gt_id(struct xe_device *xe) 237 { 238 struct xe_gt *gt; 239 unsigned int id; 240 241 for_each_gt(gt, xe, id) { 242 if (test_and_clear_bit(id, &xe->sriov.vf.migration.gt_flags)) 243 return id; 244 } 245 return -1; 246 } 247 248 static size_t post_migration_scratch_size(struct xe_device *xe) 249 { 250 return max(xe_lrc_reg_size(xe), LRC_WA_BB_SIZE); 251 } 252 253 /** 254 * Perform post-migration fixups on a single GT. 255 * 256 * After migration, GuC needs to be re-queried for VF configuration to check 257 * if it matches previous provisioning. Most of VF provisioning shall be the 258 * same, except GGTT range, since GGTT is not virtualized per-VF. If GGTT 259 * range has changed, we have to perform fixups - shift all GGTT references 260 * used anywhere within the driver. After the fixups in this function succeed, 261 * it is allowed to ask the GuC bound to this GT to continue normal operation. 262 * 263 * Returns: 0 if the operation completed successfully, or a negative error 264 * code otherwise. 265 */ 266 static int gt_vf_post_migration_fixups(struct xe_gt *gt) 267 { 268 s64 shift; 269 void *buf; 270 int err; 271 272 buf = kmalloc(post_migration_scratch_size(gt_to_xe(gt)), GFP_KERNEL); 273 if (!buf) 274 return -ENOMEM; 275 276 err = xe_gt_sriov_vf_query_config(gt); 277 if (err) 278 goto out; 279 280 shift = xe_gt_sriov_vf_ggtt_shift(gt); 281 if (shift) { 282 xe_tile_sriov_vf_fixup_ggtt_nodes(gt_to_tile(gt), shift); 283 xe_gt_sriov_vf_default_lrcs_hwsp_rebase(gt); 284 err = xe_guc_contexts_hwsp_rebase(>->uc.guc, buf); 285 if (err) 286 goto out; 287 xe_guc_jobs_ring_rebase(>->uc.guc); 288 xe_guc_ct_fixup_messages_with_ggtt(>->uc.guc.ct, shift); 289 } 290 291 out: 292 kfree(buf); 293 return err; 294 } 295 296 static void vf_post_migration_recovery(struct xe_device *xe) 297 { 298 unsigned long fixed_gts = 0; 299 int id, err; 300 301 drm_dbg(&xe->drm, "migration recovery in progress\n"); 302 xe_pm_runtime_get(xe); 303 vf_post_migration_shutdown(xe); 304 305 if (!vf_migration_supported(xe)) { 306 xe_sriov_err(xe, "migration not supported by this module version\n"); 307 err = -ENOTRECOVERABLE; 308 goto fail; 309 } 310 311 while (id = vf_get_next_migrated_gt_id(xe), id >= 0) { 312 struct xe_gt *gt = xe_device_get_gt(xe, id); 313 314 err = gt_vf_post_migration_fixups(gt); 315 if (err) 316 goto fail; 317 318 set_bit(id, &fixed_gts); 319 } 320 321 vf_post_migration_kickstart(xe); 322 err = vf_post_migration_notify_resfix_done(xe, fixed_gts); 323 if (err) 324 goto fail; 325 326 xe_pm_runtime_put(xe); 327 drm_notice(&xe->drm, "migration recovery ended\n"); 328 return; 329 fail: 330 xe_pm_runtime_put(xe); 331 drm_err(&xe->drm, "migration recovery failed (%pe)\n", ERR_PTR(err)); 332 xe_device_declare_wedged(xe); 333 } 334 335 static void migration_worker_func(struct work_struct *w) 336 { 337 struct xe_device *xe = container_of(w, struct xe_device, 338 sriov.vf.migration.worker); 339 340 vf_post_migration_recovery(xe); 341 } 342 343 /* 344 * Check if post-restore recovery is coming on any of GTs. 345 * @xe: the &xe_device struct instance 346 * 347 * Return: True if migration recovery worker will soon be running. Any worker currently 348 * executing does not affect the result. 349 */ 350 static bool vf_ready_to_recovery_on_any_gts(struct xe_device *xe) 351 { 352 struct xe_gt *gt; 353 unsigned int id; 354 355 for_each_gt(gt, xe, id) { 356 if (test_bit(id, &xe->sriov.vf.migration.gt_flags)) 357 return true; 358 } 359 return false; 360 } 361 362 /** 363 * xe_sriov_vf_start_migration_recovery - Start VF migration recovery. 364 * @xe: the &xe_device to start recovery on 365 * 366 * This function shall be called only by VF. 367 */ 368 void xe_sriov_vf_start_migration_recovery(struct xe_device *xe) 369 { 370 bool started; 371 372 xe_assert(xe, IS_SRIOV_VF(xe)); 373 374 if (!vf_ready_to_recovery_on_any_gts(xe)) 375 return; 376 377 started = queue_work(xe->sriov.wq, &xe->sriov.vf.migration.worker); 378 drm_info(&xe->drm, "VF migration recovery %s\n", started ? 379 "scheduled" : "already in progress"); 380 } 381