1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2023-2024 Intel Corporation 4 */ 5 6 #include <drm/drm_debugfs.h> 7 #include <drm/drm_managed.h> 8 9 #include "xe_gt.h" 10 #include "xe_gt_sriov_vf.h" 11 #include "xe_guc.h" 12 #include "xe_sriov_printk.h" 13 #include "xe_sriov_vf.h" 14 #include "xe_sriov_vf_ccs.h" 15 16 /** 17 * DOC: VF restore procedure in PF KMD and VF KMD 18 * 19 * Restoring previously saved state of a VF is one of core features of 20 * SR-IOV. All major VM Management applications allow saving and restoring 21 * the VM state, and doing that to a VM which uses SRIOV VF as one of 22 * the accessible devices requires support from KMD on both PF and VF side. 23 * VMM initiates all required operations through VFIO module, which then 24 * translates them into PF KMD calls. This description will focus on these 25 * calls, leaving out the module which initiates these steps (VFIO). 26 * 27 * In order to start the restore procedure, GuC needs to keep the VF in 28 * proper state. The PF driver can ensure GuC set it to VF_READY state 29 * by provisioning the VF, which in turn can be done after Function Level 30 * Reset of said VF (or after it was freshly created - in that case FLR 31 * is not needed). The FLR procedure ends with GuC sending message 32 * `GUC_PF_NOTIFY_VF_FLR_DONE`, and then provisioning data is sent to GuC. 33 * After the provisioning is completed, the VF needs to be paused, and 34 * at that point the actual restore can begin. 35 * 36 * During VF Restore, state of several resources is restored. These may 37 * include local memory content (system memory is restored by VMM itself), 38 * values of MMIO registers, stateless compression metadata and others. 39 * The final resource which also needs restoring is state of the VF 40 * submission maintained within GuC. For that, `GUC_PF_OPCODE_VF_RESTORE` 41 * message is used, with reference to the state blob to be consumed by 42 * GuC. 43 * 44 * Next, when VFIO is asked to set the VM into running state, the PF driver 45 * sends `GUC_PF_TRIGGER_VF_RESUME` to GuC. When sent after restore, this 46 * changes VF state within GuC to `VF_RESFIX_BLOCKED` rather than the 47 * usual `VF_RUNNING`. At this point GuC triggers an interrupt to inform 48 * the VF KMD within the VM that it was migrated. 49 * 50 * As soon as Virtual GPU of the VM starts, the VF driver within receives 51 * the MIGRATED interrupt and schedules post-migration recovery worker. 52 * That worker queries GuC for new provisioning (using MMIO communication), 53 * and applies fixups to any non-virtualized resources used by the VF. 54 * 55 * When the VF driver is ready to continue operation on the newly connected 56 * hardware, it sends `VF2GUC_NOTIFY_RESFIX_DONE` which causes it to 57 * enter the long awaited `VF_RUNNING` state, and therefore start handling 58 * CTB messages and scheduling workloads from the VF:: 59 * 60 * PF GuC VF 61 * [ ] | | 62 * [ ] PF2GUC_VF_CONTROL(pause) | | 63 * [ ]---------------------------> [ ] | 64 * [ ] [ ] GuC sets new VF state to | 65 * [ ] [ ]------- VF_READY_PAUSED | 66 * [ ] [ ] | | 67 * [ ] [ ] <----- | 68 * [ ] success [ ] | 69 * [ ] <---------------------------[ ] | 70 * [ ] | | 71 * [ ] PF loads resources from the | | 72 * [ ]------- saved image supplied | | 73 * [ ] | | | 74 * [ ] <----- | | 75 * [ ] | | 76 * [ ] GUC_PF_OPCODE_VF_RESTORE | | 77 * [ ]---------------------------> [ ] | 78 * [ ] [ ] GuC loads contexts and CTB | 79 * [ ] [ ]------- state from image | 80 * [ ] [ ] | | 81 * [ ] [ ] <----- | 82 * [ ] [ ] | 83 * [ ] [ ] GuC sets new VF state to | 84 * [ ] [ ]------- VF_RESFIX_PAUSED | 85 * [ ] [ ] | | 86 * [ ] success [ ] <----- | 87 * [ ] <---------------------------[ ] | 88 * [ ] | | 89 * [ ] GUC_PF_TRIGGER_VF_RESUME | | 90 * [ ]---------------------------> [ ] | 91 * [ ] [ ] GuC sets new VF state to | 92 * [ ] [ ]------- VF_RESFIX_BLOCKED | 93 * [ ] [ ] | | 94 * [ ] [ ] <----- | 95 * [ ] [ ] | 96 * [ ] [ ] GUC_INTR_SW_INT_0 | 97 * [ ] success [ ]---------------------------> [ ] 98 * [ ] <---------------------------[ ] [ ] 99 * | | VF2GUC_QUERY_SINGLE_KLV [ ] 100 * | [ ] <---------------------------[ ] 101 * | [ ] [ ] 102 * | [ ] new VF provisioning [ ] 103 * | [ ]---------------------------> [ ] 104 * | | [ ] 105 * | | VF driver applies post [ ] 106 * | | migration fixups -------[ ] 107 * | | | [ ] 108 * | | -----> [ ] 109 * | | [ ] 110 * | | VF2GUC_NOTIFY_RESFIX_DONE [ ] 111 * | [ ] <---------------------------[ ] 112 * | [ ] [ ] 113 * | [ ] GuC sets new VF state to [ ] 114 * | [ ]------- VF_RUNNING [ ] 115 * | [ ] | [ ] 116 * | [ ] <----- [ ] 117 * | [ ] success [ ] 118 * | [ ]---------------------------> [ ] 119 * | | | 120 * | | | 121 */ 122 123 /** 124 * xe_sriov_vf_migration_supported - Report whether SR-IOV VF migration is 125 * supported or not. 126 * @xe: the &xe_device to check 127 * 128 * Returns: true if VF migration is supported, false otherwise. 129 */ 130 bool xe_sriov_vf_migration_supported(struct xe_device *xe) 131 { 132 xe_assert(xe, IS_SRIOV_VF(xe)); 133 return xe->sriov.vf.migration.enabled; 134 } 135 136 static void vf_disable_migration(struct xe_device *xe, const char *fmt, ...) 137 { 138 struct va_format vaf; 139 va_list va_args; 140 141 xe_assert(xe, IS_SRIOV_VF(xe)); 142 143 va_start(va_args, fmt); 144 vaf.fmt = fmt; 145 vaf.va = &va_args; 146 xe_sriov_notice(xe, "migration disabled: %pV\n", &vaf); 147 va_end(va_args); 148 149 xe->sriov.vf.migration.enabled = false; 150 } 151 152 static void vf_migration_init_early(struct xe_device *xe) 153 { 154 /* 155 * TODO: Add conditions to allow specific platforms, when they're 156 * supported at production quality. 157 */ 158 if (!IS_ENABLED(CONFIG_DRM_XE_DEBUG)) 159 return vf_disable_migration(xe, 160 "experimental feature not available on production builds"); 161 162 if (GRAPHICS_VER(xe) < 20) 163 return vf_disable_migration(xe, "requires gfx version >= 20, but only %u found", 164 GRAPHICS_VER(xe)); 165 166 if (!IS_DGFX(xe)) { 167 struct xe_uc_fw_version guc_version; 168 169 xe_gt_sriov_vf_guc_versions(xe_device_get_gt(xe, 0), NULL, &guc_version); 170 if (MAKE_GUC_VER_STRUCT(guc_version) < MAKE_GUC_VER(1, 23, 0)) 171 return vf_disable_migration(xe, 172 "CCS migration requires GuC ABI >= 1.23 but only %u.%u found", 173 guc_version.major, guc_version.minor); 174 } 175 176 xe->sriov.vf.migration.enabled = true; 177 xe_sriov_dbg(xe, "migration support enabled\n"); 178 } 179 180 /** 181 * xe_sriov_vf_init_early - Initialize SR-IOV VF specific data. 182 * @xe: the &xe_device to initialize 183 */ 184 void xe_sriov_vf_init_early(struct xe_device *xe) 185 { 186 vf_migration_init_early(xe); 187 } 188 189 /** 190 * xe_sriov_vf_init_late() - SR-IOV VF late initialization functions. 191 * @xe: the &xe_device to initialize 192 * 193 * This function initializes code for CCS migration. 194 * 195 * Return: 0 on success or a negative error code on failure. 196 */ 197 int xe_sriov_vf_init_late(struct xe_device *xe) 198 { 199 int err = 0; 200 201 if (xe_sriov_vf_migration_supported(xe)) 202 err = xe_sriov_vf_ccs_init(xe); 203 204 return err; 205 } 206 207 static int sa_info_vf_ccs(struct seq_file *m, void *data) 208 { 209 struct drm_info_node *node = m->private; 210 struct xe_device *xe = to_xe_device(node->minor->dev); 211 struct drm_printer p = drm_seq_file_printer(m); 212 213 xe_sriov_vf_ccs_print(xe, &p); 214 return 0; 215 } 216 217 static const struct drm_info_list debugfs_list[] = { 218 { .name = "sa_info_vf_ccs", .show = sa_info_vf_ccs }, 219 }; 220 221 /** 222 * xe_sriov_vf_debugfs_register - Register VF debugfs attributes. 223 * @xe: the &xe_device 224 * @root: the root &dentry 225 * 226 * Prepare debugfs attributes exposed by the VF. 227 */ 228 void xe_sriov_vf_debugfs_register(struct xe_device *xe, struct dentry *root) 229 { 230 drm_debugfs_create_files(debugfs_list, ARRAY_SIZE(debugfs_list), 231 root, xe->drm.primary); 232 } 233