16e6d7b41STomasz Lis // SPDX-License-Identifier: MIT 26e6d7b41STomasz Lis /* 36e6d7b41STomasz Lis * Copyright © 2023-2024 Intel Corporation 46e6d7b41STomasz Lis */ 56e6d7b41STomasz Lis 66e6d7b41STomasz Lis #include <drm/drm_managed.h> 76e6d7b41STomasz Lis 86e6d7b41STomasz Lis #include "xe_assert.h" 96e6d7b41STomasz Lis #include "xe_device.h" 106e6d7b41STomasz Lis #include "xe_gt_sriov_printk.h" 11*1255954dSTomasz Lis #include "xe_gt_sriov_vf.h" 12*1255954dSTomasz Lis #include "xe_pm.h" 136e6d7b41STomasz Lis #include "xe_sriov.h" 146e6d7b41STomasz Lis #include "xe_sriov_printk.h" 156e6d7b41STomasz Lis #include "xe_sriov_vf.h" 166e6d7b41STomasz Lis 17360a1f3eSTomasz Lis /** 18360a1f3eSTomasz Lis * DOC: VF restore procedure in PF KMD and VF KMD 19360a1f3eSTomasz Lis * 20360a1f3eSTomasz Lis * Restoring previously saved state of a VF is one of core features of 21360a1f3eSTomasz Lis * SR-IOV. All major VM Management applications allow saving and restoring 22360a1f3eSTomasz Lis * the VM state, and doing that to a VM which uses SRIOV VF as one of 23360a1f3eSTomasz Lis * the accessible devices requires support from KMD on both PF and VF side. 24360a1f3eSTomasz Lis * VMM initiates all required operations through VFIO module, which then 25360a1f3eSTomasz Lis * translates them into PF KMD calls. This description will focus on these 26360a1f3eSTomasz Lis * calls, leaving out the module which initiates these steps (VFIO). 27360a1f3eSTomasz Lis * 28360a1f3eSTomasz Lis * In order to start the restore procedure, GuC needs to keep the VF in 29360a1f3eSTomasz Lis * proper state. The PF driver can ensure GuC set it to VF_READY state 30360a1f3eSTomasz Lis * by provisioning the VF, which in turn can be done after Function Level 31360a1f3eSTomasz Lis * Reset of said VF (or after it was freshly created - in that case FLR 32360a1f3eSTomasz Lis * is not needed). The FLR procedure ends with GuC sending message 33360a1f3eSTomasz Lis * `GUC_PF_NOTIFY_VF_FLR_DONE`, and then provisioning data is sent to GuC. 34360a1f3eSTomasz Lis * After the provisioning is completed, the VF needs to be paused, and 35360a1f3eSTomasz Lis * at that point the actual restore can begin. 36360a1f3eSTomasz Lis * 37360a1f3eSTomasz Lis * During VF Restore, state of several resources is restored. These may 38360a1f3eSTomasz Lis * include local memory content (system memory is restored by VMM itself), 39360a1f3eSTomasz Lis * values of MMIO registers, stateless compression metadata and others. 40360a1f3eSTomasz Lis * The final resource which also needs restoring is state of the VF 41360a1f3eSTomasz Lis * submission maintained within GuC. For that, `GUC_PF_OPCODE_VF_RESTORE` 42360a1f3eSTomasz Lis * message is used, with reference to the state blob to be consumed by 43360a1f3eSTomasz Lis * GuC. 44360a1f3eSTomasz Lis * 45360a1f3eSTomasz Lis * Next, when VFIO is asked to set the VM into running state, the PF driver 46360a1f3eSTomasz Lis * sends `GUC_PF_TRIGGER_VF_RESUME` to GuC. When sent after restore, this 47360a1f3eSTomasz Lis * changes VF state within GuC to `VF_RESFIX_BLOCKED` rather than the 48360a1f3eSTomasz Lis * usual `VF_RUNNING`. At this point GuC triggers an interrupt to inform 49360a1f3eSTomasz Lis * the VF KMD within the VM that it was migrated. 50360a1f3eSTomasz Lis * 51360a1f3eSTomasz Lis * As soon as Virtual GPU of the VM starts, the VF driver within receives 52360a1f3eSTomasz Lis * the MIGRATED interrupt and schedules post-migration recovery worker. 53360a1f3eSTomasz Lis * That worker queries GuC for new provisioning (using MMIO communication), 54360a1f3eSTomasz Lis * and applies fixups to any non-virtualized resources used by the VF. 55360a1f3eSTomasz Lis * 56360a1f3eSTomasz Lis * When the VF driver is ready to continue operation on the newly connected 57360a1f3eSTomasz Lis * hardware, it sends `VF2GUC_NOTIFY_RESFIX_DONE` which causes it to 58360a1f3eSTomasz Lis * enter the long awaited `VF_RUNNING` state, and therefore start handling 59360a1f3eSTomasz Lis * CTB messages and scheduling workloads from the VF:: 60360a1f3eSTomasz Lis * 61360a1f3eSTomasz Lis * PF GuC VF 62360a1f3eSTomasz Lis * [ ] | | 63360a1f3eSTomasz Lis * [ ] PF2GUC_VF_CONTROL(pause) | | 64360a1f3eSTomasz Lis * [ ]---------------------------> [ ] | 65360a1f3eSTomasz Lis * [ ] [ ] GuC sets new VF state to | 66360a1f3eSTomasz Lis * [ ] [ ]------- VF_READY_PAUSED | 67360a1f3eSTomasz Lis * [ ] [ ] | | 68360a1f3eSTomasz Lis * [ ] [ ] <----- | 69360a1f3eSTomasz Lis * [ ] success [ ] | 70360a1f3eSTomasz Lis * [ ] <---------------------------[ ] | 71360a1f3eSTomasz Lis * [ ] | | 72360a1f3eSTomasz Lis * [ ] PF loads resources from the | | 73360a1f3eSTomasz Lis * [ ]------- saved image supplied | | 74360a1f3eSTomasz Lis * [ ] | | | 75360a1f3eSTomasz Lis * [ ] <----- | | 76360a1f3eSTomasz Lis * [ ] | | 77360a1f3eSTomasz Lis * [ ] GUC_PF_OPCODE_VF_RESTORE | | 78360a1f3eSTomasz Lis * [ ]---------------------------> [ ] | 79360a1f3eSTomasz Lis * [ ] [ ] GuC loads contexts and CTB | 80360a1f3eSTomasz Lis * [ ] [ ]------- state from image | 81360a1f3eSTomasz Lis * [ ] [ ] | | 82360a1f3eSTomasz Lis * [ ] [ ] <----- | 83360a1f3eSTomasz Lis * [ ] [ ] | 84360a1f3eSTomasz Lis * [ ] [ ] GuC sets new VF state to | 85360a1f3eSTomasz Lis * [ ] [ ]------- VF_RESFIX_PAUSED | 86360a1f3eSTomasz Lis * [ ] [ ] | | 87360a1f3eSTomasz Lis * [ ] success [ ] <----- | 88360a1f3eSTomasz Lis * [ ] <---------------------------[ ] | 89360a1f3eSTomasz Lis * [ ] | | 90360a1f3eSTomasz Lis * [ ] GUC_PF_TRIGGER_VF_RESUME | | 91360a1f3eSTomasz Lis * [ ]---------------------------> [ ] | 92360a1f3eSTomasz Lis * [ ] [ ] GuC sets new VF state to | 93360a1f3eSTomasz Lis * [ ] [ ]------- VF_RESFIX_BLOCKED | 94360a1f3eSTomasz Lis * [ ] [ ] | | 95360a1f3eSTomasz Lis * [ ] [ ] <----- | 96360a1f3eSTomasz Lis * [ ] [ ] | 97360a1f3eSTomasz Lis * [ ] [ ] GUC_INTR_SW_INT_0 | 98360a1f3eSTomasz Lis * [ ] success [ ]---------------------------> [ ] 99360a1f3eSTomasz Lis * [ ] <---------------------------[ ] [ ] 100360a1f3eSTomasz Lis * | | VF2GUC_QUERY_SINGLE_KLV [ ] 101360a1f3eSTomasz Lis * | [ ] <---------------------------[ ] 102360a1f3eSTomasz Lis * | [ ] [ ] 103360a1f3eSTomasz Lis * | [ ] new VF provisioning [ ] 104360a1f3eSTomasz Lis * | [ ]---------------------------> [ ] 105360a1f3eSTomasz Lis * | | [ ] 106360a1f3eSTomasz Lis * | | VF driver applies post [ ] 107360a1f3eSTomasz Lis * | | migration fixups -------[ ] 108360a1f3eSTomasz Lis * | | | [ ] 109360a1f3eSTomasz Lis * | | -----> [ ] 110360a1f3eSTomasz Lis * | | [ ] 111360a1f3eSTomasz Lis * | | VF2GUC_NOTIFY_RESFIX_DONE [ ] 112360a1f3eSTomasz Lis * | [ ] <---------------------------[ ] 113360a1f3eSTomasz Lis * | [ ] [ ] 114360a1f3eSTomasz Lis * | [ ] GuC sets new VF state to [ ] 115360a1f3eSTomasz Lis * | [ ]------- VF_RUNNING [ ] 116360a1f3eSTomasz Lis * | [ ] | [ ] 117360a1f3eSTomasz Lis * | [ ] <----- [ ] 118360a1f3eSTomasz Lis * | [ ] success [ ] 119360a1f3eSTomasz Lis * | [ ]---------------------------> [ ] 120360a1f3eSTomasz Lis * | | | 121360a1f3eSTomasz Lis * | | | 122360a1f3eSTomasz Lis */ 123360a1f3eSTomasz Lis 1246e6d7b41STomasz Lis static void migration_worker_func(struct work_struct *w); 1256e6d7b41STomasz Lis 1266e6d7b41STomasz Lis /** 1276e6d7b41STomasz Lis * xe_sriov_vf_init_early - Initialize SR-IOV VF specific data. 1286e6d7b41STomasz Lis * @xe: the &xe_device to initialize 1296e6d7b41STomasz Lis */ 1306e6d7b41STomasz Lis void xe_sriov_vf_init_early(struct xe_device *xe) 1316e6d7b41STomasz Lis { 1326e6d7b41STomasz Lis INIT_WORK(&xe->sriov.vf.migration.worker, migration_worker_func); 1336e6d7b41STomasz Lis } 1346e6d7b41STomasz Lis 135*1255954dSTomasz Lis /* 136*1255954dSTomasz Lis * Notify all GuCs about resource fixups apply finished. 137*1255954dSTomasz Lis */ 138*1255954dSTomasz Lis static void vf_post_migration_notify_resfix_done(struct xe_device *xe) 139*1255954dSTomasz Lis { 140*1255954dSTomasz Lis struct xe_gt *gt; 141*1255954dSTomasz Lis unsigned int id; 142*1255954dSTomasz Lis 143*1255954dSTomasz Lis for_each_gt(gt, xe, id) { 144*1255954dSTomasz Lis xe_gt_sriov_vf_notify_resfix_done(gt); 145*1255954dSTomasz Lis } 146*1255954dSTomasz Lis } 147*1255954dSTomasz Lis 1486e6d7b41STomasz Lis static void vf_post_migration_recovery(struct xe_device *xe) 1496e6d7b41STomasz Lis { 1506e6d7b41STomasz Lis drm_dbg(&xe->drm, "migration recovery in progress\n"); 151*1255954dSTomasz Lis xe_pm_runtime_get(xe); 1526e6d7b41STomasz Lis /* FIXME: add the recovery steps */ 153*1255954dSTomasz Lis vf_post_migration_notify_resfix_done(xe); 154*1255954dSTomasz Lis xe_pm_runtime_put(xe); 1556e6d7b41STomasz Lis drm_notice(&xe->drm, "migration recovery ended\n"); 1566e6d7b41STomasz Lis } 1576e6d7b41STomasz Lis 1586e6d7b41STomasz Lis static void migration_worker_func(struct work_struct *w) 1596e6d7b41STomasz Lis { 1606e6d7b41STomasz Lis struct xe_device *xe = container_of(w, struct xe_device, 1616e6d7b41STomasz Lis sriov.vf.migration.worker); 1626e6d7b41STomasz Lis 1636e6d7b41STomasz Lis vf_post_migration_recovery(xe); 1646e6d7b41STomasz Lis } 1656e6d7b41STomasz Lis 1666e6d7b41STomasz Lis static bool vf_ready_to_recovery_on_all_gts(struct xe_device *xe) 1676e6d7b41STomasz Lis { 1686e6d7b41STomasz Lis struct xe_gt *gt; 1696e6d7b41STomasz Lis unsigned int id; 1706e6d7b41STomasz Lis 1716e6d7b41STomasz Lis for_each_gt(gt, xe, id) { 1726e6d7b41STomasz Lis if (!test_bit(id, &xe->sriov.vf.migration.gt_flags)) { 1736e6d7b41STomasz Lis xe_gt_sriov_dbg_verbose(gt, "still not ready to recover\n"); 1746e6d7b41STomasz Lis return false; 1756e6d7b41STomasz Lis } 1766e6d7b41STomasz Lis } 1776e6d7b41STomasz Lis return true; 1786e6d7b41STomasz Lis } 1796e6d7b41STomasz Lis 1806e6d7b41STomasz Lis /** 1816e6d7b41STomasz Lis * xe_sriov_vf_start_migration_recovery - Start VF migration recovery. 1826e6d7b41STomasz Lis * @xe: the &xe_device to start recovery on 1836e6d7b41STomasz Lis * 1846e6d7b41STomasz Lis * This function shall be called only by VF. 1856e6d7b41STomasz Lis */ 1866e6d7b41STomasz Lis void xe_sriov_vf_start_migration_recovery(struct xe_device *xe) 1876e6d7b41STomasz Lis { 1886e6d7b41STomasz Lis bool started; 1896e6d7b41STomasz Lis 1906e6d7b41STomasz Lis xe_assert(xe, IS_SRIOV_VF(xe)); 1916e6d7b41STomasz Lis 1926e6d7b41STomasz Lis if (!vf_ready_to_recovery_on_all_gts(xe)) 1936e6d7b41STomasz Lis return; 1946e6d7b41STomasz Lis 1956e6d7b41STomasz Lis WRITE_ONCE(xe->sriov.vf.migration.gt_flags, 0); 1966e6d7b41STomasz Lis /* Ensure other threads see that no flags are set now. */ 1976e6d7b41STomasz Lis smp_mb(); 1986e6d7b41STomasz Lis 1996e6d7b41STomasz Lis started = queue_work(xe->sriov.wq, &xe->sriov.vf.migration.worker); 2006e6d7b41STomasz Lis drm_info(&xe->drm, "VF migration recovery %s\n", started ? 2016e6d7b41STomasz Lis "scheduled" : "already in progress"); 2026e6d7b41STomasz Lis } 203