16e6d7b41STomasz Lis // SPDX-License-Identifier: MIT 26e6d7b41STomasz Lis /* 36e6d7b41STomasz Lis * Copyright © 2023-2024 Intel Corporation 46e6d7b41STomasz Lis */ 56e6d7b41STomasz Lis 66e6d7b41STomasz Lis #include <drm/drm_managed.h> 76e6d7b41STomasz Lis 86e6d7b41STomasz Lis #include "xe_assert.h" 96e6d7b41STomasz Lis #include "xe_device.h" 106e6d7b41STomasz Lis #include "xe_gt_sriov_printk.h" 111255954dSTomasz Lis #include "xe_gt_sriov_vf.h" 121255954dSTomasz Lis #include "xe_pm.h" 136e6d7b41STomasz Lis #include "xe_sriov.h" 146e6d7b41STomasz Lis #include "xe_sriov_printk.h" 156e6d7b41STomasz Lis #include "xe_sriov_vf.h" 166e6d7b41STomasz Lis 17360a1f3eSTomasz Lis /** 18360a1f3eSTomasz Lis * DOC: VF restore procedure in PF KMD and VF KMD 19360a1f3eSTomasz Lis * 20360a1f3eSTomasz Lis * Restoring previously saved state of a VF is one of core features of 21360a1f3eSTomasz Lis * SR-IOV. All major VM Management applications allow saving and restoring 22360a1f3eSTomasz Lis * the VM state, and doing that to a VM which uses SRIOV VF as one of 23360a1f3eSTomasz Lis * the accessible devices requires support from KMD on both PF and VF side. 24360a1f3eSTomasz Lis * VMM initiates all required operations through VFIO module, which then 25360a1f3eSTomasz Lis * translates them into PF KMD calls. This description will focus on these 26360a1f3eSTomasz Lis * calls, leaving out the module which initiates these steps (VFIO). 27360a1f3eSTomasz Lis * 28360a1f3eSTomasz Lis * In order to start the restore procedure, GuC needs to keep the VF in 29360a1f3eSTomasz Lis * proper state. The PF driver can ensure GuC set it to VF_READY state 30360a1f3eSTomasz Lis * by provisioning the VF, which in turn can be done after Function Level 31360a1f3eSTomasz Lis * Reset of said VF (or after it was freshly created - in that case FLR 32360a1f3eSTomasz Lis * is not needed). The FLR procedure ends with GuC sending message 33360a1f3eSTomasz Lis * `GUC_PF_NOTIFY_VF_FLR_DONE`, and then provisioning data is sent to GuC. 34360a1f3eSTomasz Lis * After the provisioning is completed, the VF needs to be paused, and 35360a1f3eSTomasz Lis * at that point the actual restore can begin. 36360a1f3eSTomasz Lis * 37360a1f3eSTomasz Lis * During VF Restore, state of several resources is restored. These may 38360a1f3eSTomasz Lis * include local memory content (system memory is restored by VMM itself), 39360a1f3eSTomasz Lis * values of MMIO registers, stateless compression metadata and others. 40360a1f3eSTomasz Lis * The final resource which also needs restoring is state of the VF 41360a1f3eSTomasz Lis * submission maintained within GuC. For that, `GUC_PF_OPCODE_VF_RESTORE` 42360a1f3eSTomasz Lis * message is used, with reference to the state blob to be consumed by 43360a1f3eSTomasz Lis * GuC. 44360a1f3eSTomasz Lis * 45360a1f3eSTomasz Lis * Next, when VFIO is asked to set the VM into running state, the PF driver 46360a1f3eSTomasz Lis * sends `GUC_PF_TRIGGER_VF_RESUME` to GuC. When sent after restore, this 47360a1f3eSTomasz Lis * changes VF state within GuC to `VF_RESFIX_BLOCKED` rather than the 48360a1f3eSTomasz Lis * usual `VF_RUNNING`. At this point GuC triggers an interrupt to inform 49360a1f3eSTomasz Lis * the VF KMD within the VM that it was migrated. 50360a1f3eSTomasz Lis * 51360a1f3eSTomasz Lis * As soon as Virtual GPU of the VM starts, the VF driver within receives 52360a1f3eSTomasz Lis * the MIGRATED interrupt and schedules post-migration recovery worker. 53360a1f3eSTomasz Lis * That worker queries GuC for new provisioning (using MMIO communication), 54360a1f3eSTomasz Lis * and applies fixups to any non-virtualized resources used by the VF. 55360a1f3eSTomasz Lis * 56360a1f3eSTomasz Lis * When the VF driver is ready to continue operation on the newly connected 57360a1f3eSTomasz Lis * hardware, it sends `VF2GUC_NOTIFY_RESFIX_DONE` which causes it to 58360a1f3eSTomasz Lis * enter the long awaited `VF_RUNNING` state, and therefore start handling 59360a1f3eSTomasz Lis * CTB messages and scheduling workloads from the VF:: 60360a1f3eSTomasz Lis * 61360a1f3eSTomasz Lis * PF GuC VF 62360a1f3eSTomasz Lis * [ ] | | 63360a1f3eSTomasz Lis * [ ] PF2GUC_VF_CONTROL(pause) | | 64360a1f3eSTomasz Lis * [ ]---------------------------> [ ] | 65360a1f3eSTomasz Lis * [ ] [ ] GuC sets new VF state to | 66360a1f3eSTomasz Lis * [ ] [ ]------- VF_READY_PAUSED | 67360a1f3eSTomasz Lis * [ ] [ ] | | 68360a1f3eSTomasz Lis * [ ] [ ] <----- | 69360a1f3eSTomasz Lis * [ ] success [ ] | 70360a1f3eSTomasz Lis * [ ] <---------------------------[ ] | 71360a1f3eSTomasz Lis * [ ] | | 72360a1f3eSTomasz Lis * [ ] PF loads resources from the | | 73360a1f3eSTomasz Lis * [ ]------- saved image supplied | | 74360a1f3eSTomasz Lis * [ ] | | | 75360a1f3eSTomasz Lis * [ ] <----- | | 76360a1f3eSTomasz Lis * [ ] | | 77360a1f3eSTomasz Lis * [ ] GUC_PF_OPCODE_VF_RESTORE | | 78360a1f3eSTomasz Lis * [ ]---------------------------> [ ] | 79360a1f3eSTomasz Lis * [ ] [ ] GuC loads contexts and CTB | 80360a1f3eSTomasz Lis * [ ] [ ]------- state from image | 81360a1f3eSTomasz Lis * [ ] [ ] | | 82360a1f3eSTomasz Lis * [ ] [ ] <----- | 83360a1f3eSTomasz Lis * [ ] [ ] | 84360a1f3eSTomasz Lis * [ ] [ ] GuC sets new VF state to | 85360a1f3eSTomasz Lis * [ ] [ ]------- VF_RESFIX_PAUSED | 86360a1f3eSTomasz Lis * [ ] [ ] | | 87360a1f3eSTomasz Lis * [ ] success [ ] <----- | 88360a1f3eSTomasz Lis * [ ] <---------------------------[ ] | 89360a1f3eSTomasz Lis * [ ] | | 90360a1f3eSTomasz Lis * [ ] GUC_PF_TRIGGER_VF_RESUME | | 91360a1f3eSTomasz Lis * [ ]---------------------------> [ ] | 92360a1f3eSTomasz Lis * [ ] [ ] GuC sets new VF state to | 93360a1f3eSTomasz Lis * [ ] [ ]------- VF_RESFIX_BLOCKED | 94360a1f3eSTomasz Lis * [ ] [ ] | | 95360a1f3eSTomasz Lis * [ ] [ ] <----- | 96360a1f3eSTomasz Lis * [ ] [ ] | 97360a1f3eSTomasz Lis * [ ] [ ] GUC_INTR_SW_INT_0 | 98360a1f3eSTomasz Lis * [ ] success [ ]---------------------------> [ ] 99360a1f3eSTomasz Lis * [ ] <---------------------------[ ] [ ] 100360a1f3eSTomasz Lis * | | VF2GUC_QUERY_SINGLE_KLV [ ] 101360a1f3eSTomasz Lis * | [ ] <---------------------------[ ] 102360a1f3eSTomasz Lis * | [ ] [ ] 103360a1f3eSTomasz Lis * | [ ] new VF provisioning [ ] 104360a1f3eSTomasz Lis * | [ ]---------------------------> [ ] 105360a1f3eSTomasz Lis * | | [ ] 106360a1f3eSTomasz Lis * | | VF driver applies post [ ] 107360a1f3eSTomasz Lis * | | migration fixups -------[ ] 108360a1f3eSTomasz Lis * | | | [ ] 109360a1f3eSTomasz Lis * | | -----> [ ] 110360a1f3eSTomasz Lis * | | [ ] 111360a1f3eSTomasz Lis * | | VF2GUC_NOTIFY_RESFIX_DONE [ ] 112360a1f3eSTomasz Lis * | [ ] <---------------------------[ ] 113360a1f3eSTomasz Lis * | [ ] [ ] 114360a1f3eSTomasz Lis * | [ ] GuC sets new VF state to [ ] 115360a1f3eSTomasz Lis * | [ ]------- VF_RUNNING [ ] 116360a1f3eSTomasz Lis * | [ ] | [ ] 117360a1f3eSTomasz Lis * | [ ] <----- [ ] 118360a1f3eSTomasz Lis * | [ ] success [ ] 119360a1f3eSTomasz Lis * | [ ]---------------------------> [ ] 120360a1f3eSTomasz Lis * | | | 121360a1f3eSTomasz Lis * | | | 122360a1f3eSTomasz Lis */ 123360a1f3eSTomasz Lis 1246e6d7b41STomasz Lis static void migration_worker_func(struct work_struct *w); 1256e6d7b41STomasz Lis 1266e6d7b41STomasz Lis /** 1276e6d7b41STomasz Lis * xe_sriov_vf_init_early - Initialize SR-IOV VF specific data. 1286e6d7b41STomasz Lis * @xe: the &xe_device to initialize 1296e6d7b41STomasz Lis */ 1306e6d7b41STomasz Lis void xe_sriov_vf_init_early(struct xe_device *xe) 1316e6d7b41STomasz Lis { 1326e6d7b41STomasz Lis INIT_WORK(&xe->sriov.vf.migration.worker, migration_worker_func); 1336e6d7b41STomasz Lis } 1346e6d7b41STomasz Lis 1354be3fca2STomasz Lis /** 1364be3fca2STomasz Lis * vf_post_migration_requery_guc - Re-query GuC for current VF provisioning. 1374be3fca2STomasz Lis * @xe: the &xe_device struct instance 1384be3fca2STomasz Lis * 1394be3fca2STomasz Lis * After migration, we need to re-query all VF configuration to make sure 1404be3fca2STomasz Lis * they match previous provisioning. Note that most of VF provisioning 1414be3fca2STomasz Lis * shall be the same, except GGTT range, since GGTT is not virtualized per-VF. 1424be3fca2STomasz Lis * 1434be3fca2STomasz Lis * Returns: 0 if the operation completed successfully, or a negative error 1444be3fca2STomasz Lis * code otherwise. 1454be3fca2STomasz Lis */ 1464be3fca2STomasz Lis static int vf_post_migration_requery_guc(struct xe_device *xe) 1474be3fca2STomasz Lis { 1484be3fca2STomasz Lis struct xe_gt *gt; 1494be3fca2STomasz Lis unsigned int id; 1504be3fca2STomasz Lis int err, ret = 0; 1514be3fca2STomasz Lis 1524be3fca2STomasz Lis for_each_gt(gt, xe, id) { 1534be3fca2STomasz Lis err = xe_gt_sriov_vf_query_config(gt); 1544be3fca2STomasz Lis ret = ret ?: err; 1554be3fca2STomasz Lis } 1564be3fca2STomasz Lis 1574be3fca2STomasz Lis return ret; 1584be3fca2STomasz Lis } 1594be3fca2STomasz Lis 1601255954dSTomasz Lis /* 161*abd22020STomasz Lis * vf_post_migration_imminent - Check if post-restore recovery is coming. 162*abd22020STomasz Lis * @xe: the &xe_device struct instance 163*abd22020STomasz Lis * 164*abd22020STomasz Lis * Return: True if migration recovery worker will soon be running. Any worker currently 165*abd22020STomasz Lis * executing does not affect the result. 166*abd22020STomasz Lis */ 167*abd22020STomasz Lis static bool vf_post_migration_imminent(struct xe_device *xe) 168*abd22020STomasz Lis { 169*abd22020STomasz Lis return xe->sriov.vf.migration.gt_flags != 0 || 170*abd22020STomasz Lis work_pending(&xe->sriov.vf.migration.worker); 171*abd22020STomasz Lis } 172*abd22020STomasz Lis 173*abd22020STomasz Lis /* 1741255954dSTomasz Lis * Notify all GuCs about resource fixups apply finished. 1751255954dSTomasz Lis */ 1761255954dSTomasz Lis static void vf_post_migration_notify_resfix_done(struct xe_device *xe) 1771255954dSTomasz Lis { 1781255954dSTomasz Lis struct xe_gt *gt; 1791255954dSTomasz Lis unsigned int id; 1801255954dSTomasz Lis 1811255954dSTomasz Lis for_each_gt(gt, xe, id) { 182*abd22020STomasz Lis if (vf_post_migration_imminent(xe)) 183*abd22020STomasz Lis goto skip; 1841255954dSTomasz Lis xe_gt_sriov_vf_notify_resfix_done(gt); 1851255954dSTomasz Lis } 186*abd22020STomasz Lis return; 187*abd22020STomasz Lis 188*abd22020STomasz Lis skip: 189*abd22020STomasz Lis drm_dbg(&xe->drm, "another recovery imminent, skipping notifications\n"); 1901255954dSTomasz Lis } 1911255954dSTomasz Lis 1926e6d7b41STomasz Lis static void vf_post_migration_recovery(struct xe_device *xe) 1936e6d7b41STomasz Lis { 1944be3fca2STomasz Lis int err; 1954be3fca2STomasz Lis 1966e6d7b41STomasz Lis drm_dbg(&xe->drm, "migration recovery in progress\n"); 1971255954dSTomasz Lis xe_pm_runtime_get(xe); 1984be3fca2STomasz Lis err = vf_post_migration_requery_guc(xe); 199*abd22020STomasz Lis if (vf_post_migration_imminent(xe)) 200*abd22020STomasz Lis goto defer; 2014be3fca2STomasz Lis if (unlikely(err)) 2024be3fca2STomasz Lis goto fail; 2034be3fca2STomasz Lis 2046e6d7b41STomasz Lis /* FIXME: add the recovery steps */ 2051255954dSTomasz Lis vf_post_migration_notify_resfix_done(xe); 2061255954dSTomasz Lis xe_pm_runtime_put(xe); 2076e6d7b41STomasz Lis drm_notice(&xe->drm, "migration recovery ended\n"); 2084be3fca2STomasz Lis return; 209*abd22020STomasz Lis defer: 210*abd22020STomasz Lis xe_pm_runtime_put(xe); 211*abd22020STomasz Lis drm_dbg(&xe->drm, "migration recovery deferred\n"); 212*abd22020STomasz Lis return; 2134be3fca2STomasz Lis fail: 2144be3fca2STomasz Lis xe_pm_runtime_put(xe); 2154be3fca2STomasz Lis drm_err(&xe->drm, "migration recovery failed (%pe)\n", ERR_PTR(err)); 2164be3fca2STomasz Lis xe_device_declare_wedged(xe); 2176e6d7b41STomasz Lis } 2186e6d7b41STomasz Lis 2196e6d7b41STomasz Lis static void migration_worker_func(struct work_struct *w) 2206e6d7b41STomasz Lis { 2216e6d7b41STomasz Lis struct xe_device *xe = container_of(w, struct xe_device, 2226e6d7b41STomasz Lis sriov.vf.migration.worker); 2236e6d7b41STomasz Lis 2246e6d7b41STomasz Lis vf_post_migration_recovery(xe); 2256e6d7b41STomasz Lis } 2266e6d7b41STomasz Lis 2276e6d7b41STomasz Lis static bool vf_ready_to_recovery_on_all_gts(struct xe_device *xe) 2286e6d7b41STomasz Lis { 2296e6d7b41STomasz Lis struct xe_gt *gt; 2306e6d7b41STomasz Lis unsigned int id; 2316e6d7b41STomasz Lis 2326e6d7b41STomasz Lis for_each_gt(gt, xe, id) { 2336e6d7b41STomasz Lis if (!test_bit(id, &xe->sriov.vf.migration.gt_flags)) { 2346e6d7b41STomasz Lis xe_gt_sriov_dbg_verbose(gt, "still not ready to recover\n"); 2356e6d7b41STomasz Lis return false; 2366e6d7b41STomasz Lis } 2376e6d7b41STomasz Lis } 2386e6d7b41STomasz Lis return true; 2396e6d7b41STomasz Lis } 2406e6d7b41STomasz Lis 2416e6d7b41STomasz Lis /** 2426e6d7b41STomasz Lis * xe_sriov_vf_start_migration_recovery - Start VF migration recovery. 2436e6d7b41STomasz Lis * @xe: the &xe_device to start recovery on 2446e6d7b41STomasz Lis * 2456e6d7b41STomasz Lis * This function shall be called only by VF. 2466e6d7b41STomasz Lis */ 2476e6d7b41STomasz Lis void xe_sriov_vf_start_migration_recovery(struct xe_device *xe) 2486e6d7b41STomasz Lis { 2496e6d7b41STomasz Lis bool started; 2506e6d7b41STomasz Lis 2516e6d7b41STomasz Lis xe_assert(xe, IS_SRIOV_VF(xe)); 2526e6d7b41STomasz Lis 2536e6d7b41STomasz Lis if (!vf_ready_to_recovery_on_all_gts(xe)) 2546e6d7b41STomasz Lis return; 2556e6d7b41STomasz Lis 2566e6d7b41STomasz Lis WRITE_ONCE(xe->sriov.vf.migration.gt_flags, 0); 2576e6d7b41STomasz Lis /* Ensure other threads see that no flags are set now. */ 2586e6d7b41STomasz Lis smp_mb(); 2596e6d7b41STomasz Lis 2606e6d7b41STomasz Lis started = queue_work(xe->sriov.wq, &xe->sriov.vf.migration.worker); 2616e6d7b41STomasz Lis drm_info(&xe->drm, "VF migration recovery %s\n", started ? 2626e6d7b41STomasz Lis "scheduled" : "already in progress"); 2636e6d7b41STomasz Lis } 264