1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2023-2024 Intel Corporation 4 */ 5 6 #include <drm/drm_debugfs.h> 7 #include <drm/drm_managed.h> 8 9 #include "xe_gt.h" 10 #include "xe_gt_sriov_vf.h" 11 #include "xe_guc.h" 12 #include "xe_sriov_printk.h" 13 #include "xe_sriov_vf.h" 14 #include "xe_sriov_vf_ccs.h" 15 16 /** 17 * DOC: VF restore procedure in PF KMD and VF KMD 18 * 19 * Restoring previously saved state of a VF is one of core features of 20 * SR-IOV. All major VM Management applications allow saving and restoring 21 * the VM state, and doing that to a VM which uses SRIOV VF as one of 22 * the accessible devices requires support from KMD on both PF and VF side. 23 * VMM initiates all required operations through VFIO module, which then 24 * translates them into PF KMD calls. This description will focus on these 25 * calls, leaving out the module which initiates these steps (VFIO). 26 * 27 * In order to start the restore procedure, GuC needs to keep the VF in 28 * proper state. The PF driver can ensure GuC set it to VF_READY state 29 * by provisioning the VF, which in turn can be done after Function Level 30 * Reset of said VF (or after it was freshly created - in that case FLR 31 * is not needed). The FLR procedure ends with GuC sending message 32 * `GUC_PF_NOTIFY_VF_FLR_DONE`, and then provisioning data is sent to GuC. 33 * After the provisioning is completed, the VF needs to be paused, and 34 * at that point the actual restore can begin. 35 * 36 * During VF Restore, state of several resources is restored. These may 37 * include local memory content (system memory is restored by VMM itself), 38 * values of MMIO registers, stateless compression metadata and others. 39 * The final resource which also needs restoring is state of the VF 40 * submission maintained within GuC. For that, `GUC_PF_OPCODE_VF_RESTORE` 41 * message is used, with reference to the state blob to be consumed by 42 * GuC. 43 * 44 * Next, when VFIO is asked to set the VM into running state, the PF driver 45 * sends `GUC_PF_TRIGGER_VF_RESUME` to GuC. When sent after restore, this 46 * changes VF state within GuC to `VF_RESFIX_BLOCKED` rather than the 47 * usual `VF_RUNNING`. At this point GuC triggers an interrupt to inform 48 * the VF KMD within the VM that it was migrated. 49 * 50 * As soon as Virtual GPU of the VM starts, the VF driver within receives 51 * the MIGRATED interrupt and schedules post-migration recovery worker. 52 * That worker queries GuC for new provisioning (using MMIO communication), 53 * and applies fixups to any non-virtualized resources used by the VF. 54 * 55 * When the VF driver is ready to continue operation on the newly connected 56 * hardware, it sends `VF2GUC_NOTIFY_RESFIX_DONE` which causes it to 57 * enter the long awaited `VF_RUNNING` state, and therefore start handling 58 * CTB messages and scheduling workloads from the VF:: 59 * 60 * PF GuC VF 61 * [ ] | | 62 * [ ] PF2GUC_VF_CONTROL(pause) | | 63 * [ ]---------------------------> [ ] | 64 * [ ] [ ] GuC sets new VF state to | 65 * [ ] [ ]------- VF_READY_PAUSED | 66 * [ ] [ ] | | 67 * [ ] [ ] <----- | 68 * [ ] success [ ] | 69 * [ ] <---------------------------[ ] | 70 * [ ] | | 71 * [ ] PF loads resources from the | | 72 * [ ]------- saved image supplied | | 73 * [ ] | | | 74 * [ ] <----- | | 75 * [ ] | | 76 * [ ] GUC_PF_OPCODE_VF_RESTORE | | 77 * [ ]---------------------------> [ ] | 78 * [ ] [ ] GuC loads contexts and CTB | 79 * [ ] [ ]------- state from image | 80 * [ ] [ ] | | 81 * [ ] [ ] <----- | 82 * [ ] [ ] | 83 * [ ] [ ] GuC sets new VF state to | 84 * [ ] [ ]------- VF_RESFIX_PAUSED | 85 * [ ] [ ] | | 86 * [ ] success [ ] <----- | 87 * [ ] <---------------------------[ ] | 88 * [ ] | | 89 * [ ] GUC_PF_TRIGGER_VF_RESUME | | 90 * [ ]---------------------------> [ ] | 91 * [ ] [ ] GuC sets new VF state to | 92 * [ ] [ ]------- VF_RESFIX_BLOCKED | 93 * [ ] [ ] | | 94 * [ ] [ ] <----- | 95 * [ ] [ ] | 96 * [ ] [ ] GUC_INTR_SW_INT_0 | 97 * [ ] success [ ]---------------------------> [ ] 98 * [ ] <---------------------------[ ] [ ] 99 * | | VF2GUC_QUERY_SINGLE_KLV [ ] 100 * | [ ] <---------------------------[ ] 101 * | [ ] [ ] 102 * | [ ] new VF provisioning [ ] 103 * | [ ]---------------------------> [ ] 104 * | | [ ] 105 * | | VF driver applies post [ ] 106 * | | migration fixups -------[ ] 107 * | | | [ ] 108 * | | -----> [ ] 109 * | | [ ] 110 * | | VF2GUC_NOTIFY_RESFIX_DONE [ ] 111 * | [ ] <---------------------------[ ] 112 * | [ ] [ ] 113 * | [ ] GuC sets new VF state to [ ] 114 * | [ ]------- VF_RUNNING [ ] 115 * | [ ] | [ ] 116 * | [ ] <----- [ ] 117 * | [ ] success [ ] 118 * | [ ]---------------------------> [ ] 119 * | | | 120 * | | | 121 */ 122 123 /** 124 * xe_sriov_vf_migration_supported - Report whether SR-IOV VF migration is 125 * supported or not. 126 * @xe: the &xe_device to check 127 * 128 * Returns: true if VF migration is supported, false otherwise. 129 */ 130 bool xe_sriov_vf_migration_supported(struct xe_device *xe) 131 { 132 xe_assert(xe, IS_SRIOV_VF(xe)); 133 return !xe->sriov.vf.migration.disabled; 134 } 135 136 /** 137 * xe_sriov_vf_migration_disable - Turn off VF migration with given log message. 138 * @xe: the &xe_device instance. 139 * @fmt: format string for the log message, to be combined with following VAs. 140 */ 141 void xe_sriov_vf_migration_disable(struct xe_device *xe, const char *fmt, ...) 142 { 143 struct va_format vaf; 144 va_list va_args; 145 146 xe_assert(xe, IS_SRIOV_VF(xe)); 147 148 va_start(va_args, fmt); 149 vaf.fmt = fmt; 150 vaf.va = &va_args; 151 xe_sriov_notice(xe, "migration disabled: %pV\n", &vaf); 152 va_end(va_args); 153 154 xe->sriov.vf.migration.disabled = true; 155 } 156 157 static void vf_migration_init_early(struct xe_device *xe) 158 { 159 if (!xe_device_has_memirq(xe)) 160 return xe_sriov_vf_migration_disable(xe, "requires memory-based IRQ support"); 161 162 } 163 164 /** 165 * xe_sriov_vf_init_early - Initialize SR-IOV VF specific data. 166 * @xe: the &xe_device to initialize 167 */ 168 void xe_sriov_vf_init_early(struct xe_device *xe) 169 { 170 vf_migration_init_early(xe); 171 } 172 173 /** 174 * xe_sriov_vf_init_late() - SR-IOV VF late initialization functions. 175 * @xe: the &xe_device to initialize 176 * 177 * This function initializes code for CCS migration. 178 * 179 * Return: 0 on success or a negative error code on failure. 180 */ 181 int xe_sriov_vf_init_late(struct xe_device *xe) 182 { 183 return xe_sriov_vf_ccs_init(xe); 184 } 185 186 static int sa_info_vf_ccs(struct seq_file *m, void *data) 187 { 188 struct drm_info_node *node = m->private; 189 struct xe_device *xe = to_xe_device(node->minor->dev); 190 struct drm_printer p = drm_seq_file_printer(m); 191 192 xe_sriov_vf_ccs_print(xe, &p); 193 return 0; 194 } 195 196 static const struct drm_info_list debugfs_list[] = { 197 { .name = "sa_info_vf_ccs", .show = sa_info_vf_ccs }, 198 }; 199 200 /** 201 * xe_sriov_vf_debugfs_register - Register VF debugfs attributes. 202 * @xe: the &xe_device 203 * @root: the root &dentry 204 * 205 * Prepare debugfs attributes exposed by the VF. 206 */ 207 void xe_sriov_vf_debugfs_register(struct xe_device *xe, struct dentry *root) 208 { 209 drm_debugfs_create_files(debugfs_list, ARRAY_SIZE(debugfs_list), 210 root, xe->drm.primary); 211 } 212