1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2025 Intel Corporation 4 */ 5 6 #include "instructions/xe_mi_commands.h" 7 #include "instructions/xe_gpu_commands.h" 8 #include "xe_bb.h" 9 #include "xe_bo.h" 10 #include "xe_device.h" 11 #include "xe_exec_queue.h" 12 #include "xe_exec_queue_types.h" 13 #include "xe_guc_submit.h" 14 #include "xe_lrc.h" 15 #include "xe_migrate.h" 16 #include "xe_sa.h" 17 #include "xe_sriov_printk.h" 18 #include "xe_sriov_vf_ccs.h" 19 #include "xe_sriov_vf_ccs_types.h" 20 21 /** 22 * DOC: VF save/restore of compression Meta Data 23 * 24 * VF KMD registers two special contexts/LRCAs. 25 * 26 * Save Context/LRCA: contain necessary cmds+page table to trigger Meta data / 27 * compression control surface (Aka CCS) save in regular System memory in VM. 28 * 29 * Restore Context/LRCA: contain necessary cmds+page table to trigger Meta data / 30 * compression control surface (Aka CCS) Restore from regular System memory in 31 * VM to corresponding CCS pool. 32 * 33 * Below diagram explain steps needed for VF save/Restore of compression Meta Data:: 34 * 35 * CCS Save CCS Restore VF KMD Guc BCS 36 * LRCA LRCA 37 * | | | | | 38 * | | | | | 39 * | Create Save LRCA | | | 40 * [ ]<----------------------------- [ ] | | 41 * | | | | | 42 * | | | | | 43 * | | | Register save LRCA | | 44 * | | | with Guc | | 45 * | | [ ]--------------------------->[ ] | 46 * | | | | | 47 * | | Create restore LRCA | | | 48 * | [ ]<------------------[ ] | | 49 * | | | | | 50 * | | | Register restore LRCA | | 51 * | | | with Guc | | 52 * | | [ ]--------------------------->[ ] | 53 * | | | | | 54 * | | | | | 55 * | | [ ]------------------------- | | 56 * | | [ ] Allocate main memory. | | | 57 * | | [ ] Allocate CCS memory. | | | 58 * | | [ ] Update Main memory & | | | 59 * [ ]<------------------------------[ ] CCS pages PPGTT + BB | | | 60 * | [ ]<------------------[ ] cmds to save & restore.| | | 61 * | | [ ]<------------------------ | | 62 * | | | | | 63 * | | | | | 64 * | | | | | 65 * : : : : : 66 * ---------------------------- VF Paused ------------------------------------- 67 * | | | | | 68 * | | | | | 69 * | | | |Schedule | 70 * | | | |CCS Save | 71 * | | | | LRCA | 72 * | | | [ ]------>[ ] 73 * | | | | | 74 * | | | | | 75 * | | | |CCS save | 76 * | | | |completed| 77 * | | | [ ]<------[ ] 78 * | | | | | 79 * : : : : : 80 * ---------------------------- VM Migrated ----------------------------------- 81 * | | | | | 82 * | | | | | 83 * : : : : : 84 * ---------------------------- VF Resumed ------------------------------------ 85 * | | | | | 86 * | | | | | 87 * | | [ ]-------------- | | 88 * | | [ ] Fix up GGTT | | | 89 * | | [ ]<------------- | | 90 * | | | | | 91 * | | | | | 92 * | | | Notify VF_RESFIX_DONE | | 93 * | | [ ]--------------------------->[ ] | 94 * | | | | | 95 * | | | |Schedule | 96 * | | | |CCS | 97 * | | | |Restore | 98 * | | | |LRCA | 99 * | | | [ ]------>[ ] 100 * | | | | | 101 * | | | | | 102 * | | | |CCS | 103 * | | | |restore | 104 * | | | |completed| 105 * | | | [ ]<------[ ] 106 * | | | | | 107 * | | | | | 108 * | | | VF_RESFIX_DONE complete | | 109 * | | | notification | | 110 * | | [ ]<---------------------------[ ] | 111 * | | | | | 112 * | | | | | 113 * : : : : : 114 * ------------------------- Continue VM restore ------------------------------ 115 */ 116 117 static u64 get_ccs_bb_pool_size(struct xe_device *xe) 118 { 119 u64 sys_mem_size, ccs_mem_size, ptes, bb_pool_size; 120 struct sysinfo si; 121 122 si_meminfo(&si); 123 sys_mem_size = si.totalram * si.mem_unit; 124 ccs_mem_size = div64_u64(sys_mem_size, NUM_BYTES_PER_CCS_BYTE(xe)); 125 ptes = DIV_ROUND_UP_ULL(sys_mem_size + ccs_mem_size, XE_PAGE_SIZE); 126 127 /** 128 * We need below BB size to hold PTE mappings and some DWs for copy 129 * command. In reality, we need space for many copy commands. So, let 130 * us allocate double the calculated size which is enough to holds GPU 131 * instructions for the whole region. 132 */ 133 bb_pool_size = ptes * sizeof(u32); 134 135 return round_up(bb_pool_size * 2, SZ_1M); 136 } 137 138 static int alloc_bb_pool(struct xe_tile *tile, struct xe_tile_vf_ccs *ctx) 139 { 140 struct xe_device *xe = tile_to_xe(tile); 141 struct xe_sa_manager *sa_manager; 142 u64 bb_pool_size; 143 int offset, err; 144 145 bb_pool_size = get_ccs_bb_pool_size(xe); 146 xe_sriov_info(xe, "Allocating %s CCS BB pool size = %lldMB\n", 147 ctx->ctx_id ? "Restore" : "Save", bb_pool_size / SZ_1M); 148 149 sa_manager = xe_sa_bo_manager_init(tile, bb_pool_size, SZ_16); 150 151 if (IS_ERR(sa_manager)) { 152 xe_sriov_err(xe, "Suballocator init failed with error: %pe\n", 153 sa_manager); 154 err = PTR_ERR(sa_manager); 155 return err; 156 } 157 158 offset = 0; 159 xe_map_memset(xe, &sa_manager->bo->vmap, offset, MI_NOOP, 160 bb_pool_size); 161 162 offset = bb_pool_size - sizeof(u32); 163 xe_map_wr(xe, &sa_manager->bo->vmap, offset, u32, MI_BATCH_BUFFER_END); 164 165 ctx->mem.ccs_bb_pool = sa_manager; 166 167 return 0; 168 } 169 170 static void ccs_rw_update_ring(struct xe_tile_vf_ccs *ctx) 171 { 172 u64 addr = xe_sa_manager_gpu_addr(ctx->mem.ccs_bb_pool); 173 struct xe_lrc *lrc = xe_exec_queue_lrc(ctx->mig_q); 174 u32 dw[10], i = 0; 175 176 dw[i++] = MI_ARB_ON_OFF | MI_ARB_ENABLE; 177 dw[i++] = MI_BATCH_BUFFER_START | XE_INSTR_NUM_DW(3); 178 dw[i++] = lower_32_bits(addr); 179 dw[i++] = upper_32_bits(addr); 180 dw[i++] = MI_NOOP; 181 dw[i++] = MI_NOOP; 182 183 xe_lrc_write_ring(lrc, dw, i * sizeof(u32)); 184 xe_lrc_set_ring_tail(lrc, lrc->ring.tail); 185 } 186 187 static int register_save_restore_context(struct xe_tile_vf_ccs *ctx) 188 { 189 int err = -EINVAL; 190 int ctx_type; 191 192 switch (ctx->ctx_id) { 193 case XE_SRIOV_VF_CCS_READ_CTX: 194 ctx_type = GUC_CONTEXT_COMPRESSION_SAVE; 195 break; 196 case XE_SRIOV_VF_CCS_WRITE_CTX: 197 ctx_type = GUC_CONTEXT_COMPRESSION_RESTORE; 198 break; 199 default: 200 return err; 201 } 202 203 xe_guc_register_exec_queue(ctx->mig_q, ctx_type); 204 return 0; 205 } 206 207 /** 208 * xe_sriov_vf_ccs_register_context - Register read/write contexts with guc. 209 * @xe: the &xe_device to register contexts on. 210 * 211 * This function registers read and write contexts with Guc. Re-registration 212 * is needed whenever resuming from pm runtime suspend. 213 * 214 * Return: 0 on success. Negative error code on failure. 215 */ 216 int xe_sriov_vf_ccs_register_context(struct xe_device *xe) 217 { 218 struct xe_tile *tile = xe_device_get_root_tile(xe); 219 enum xe_sriov_vf_ccs_rw_ctxs ctx_id; 220 struct xe_tile_vf_ccs *ctx; 221 int err; 222 223 if (!IS_VF_CCS_READY(xe)) 224 return 0; 225 226 for_each_ccs_rw_ctx(ctx_id) { 227 ctx = &tile->sriov.vf.ccs[ctx_id]; 228 err = register_save_restore_context(ctx); 229 if (err) 230 return err; 231 } 232 233 return err; 234 } 235 236 static void xe_sriov_vf_ccs_fini(void *arg) 237 { 238 struct xe_tile_vf_ccs *ctx = arg; 239 struct xe_lrc *lrc = xe_exec_queue_lrc(ctx->mig_q); 240 241 /* 242 * Make TAIL = HEAD in the ring so that no issues are seen if Guc 243 * submits this context to HW on VF pause after unbinding device. 244 */ 245 xe_lrc_set_ring_tail(lrc, xe_lrc_ring_head(lrc)); 246 xe_exec_queue_put(ctx->mig_q); 247 } 248 249 /** 250 * xe_sriov_vf_ccs_init - Setup LRCA for save & restore. 251 * @xe: the &xe_device to start recovery on 252 * 253 * This function shall be called only by VF. It initializes 254 * LRCA and suballocator needed for CCS save & restore. 255 * 256 * Return: 0 on success. Negative error code on failure. 257 */ 258 int xe_sriov_vf_ccs_init(struct xe_device *xe) 259 { 260 struct xe_tile *tile = xe_device_get_root_tile(xe); 261 enum xe_sriov_vf_ccs_rw_ctxs ctx_id; 262 struct xe_tile_vf_ccs *ctx; 263 struct xe_exec_queue *q; 264 u32 flags; 265 int err; 266 267 xe_assert(xe, IS_SRIOV_VF(xe)); 268 xe_assert(xe, !IS_DGFX(xe)); 269 xe_assert(xe, xe_device_has_flat_ccs(xe)); 270 271 for_each_ccs_rw_ctx(ctx_id) { 272 ctx = &tile->sriov.vf.ccs[ctx_id]; 273 ctx->ctx_id = ctx_id; 274 275 flags = EXEC_QUEUE_FLAG_KERNEL | 276 EXEC_QUEUE_FLAG_PERMANENT | 277 EXEC_QUEUE_FLAG_MIGRATE; 278 q = xe_exec_queue_create_bind(xe, tile, flags, 0); 279 if (IS_ERR(q)) { 280 err = PTR_ERR(q); 281 goto err_ret; 282 } 283 ctx->mig_q = q; 284 285 err = alloc_bb_pool(tile, ctx); 286 if (err) 287 goto err_free_queue; 288 289 ccs_rw_update_ring(ctx); 290 291 err = register_save_restore_context(ctx); 292 if (err) 293 goto err_free_queue; 294 295 err = devm_add_action_or_reset(xe->drm.dev, 296 xe_sriov_vf_ccs_fini, 297 ctx); 298 if (err) 299 goto err_ret; 300 } 301 302 xe->sriov.vf.ccs.initialized = 1; 303 304 return 0; 305 306 err_free_queue: 307 xe_exec_queue_put(q); 308 309 err_ret: 310 return err; 311 } 312 313 /** 314 * xe_sriov_vf_ccs_attach_bo - Insert CCS read write commands in the BO. 315 * @bo: the &buffer object to which batch buffer commands will be added. 316 * 317 * This function shall be called only by VF. It inserts the PTEs and copy 318 * command instructions in the BO by calling xe_migrate_ccs_rw_copy() 319 * function. 320 * 321 * Returns: 0 if successful, negative error code on failure. 322 */ 323 int xe_sriov_vf_ccs_attach_bo(struct xe_bo *bo) 324 { 325 struct xe_device *xe = xe_bo_device(bo); 326 enum xe_sriov_vf_ccs_rw_ctxs ctx_id; 327 struct xe_tile_vf_ccs *ctx; 328 struct xe_tile *tile; 329 struct xe_bb *bb; 330 int err = 0; 331 332 if (!IS_VF_CCS_READY(xe)) 333 return 0; 334 335 tile = xe_device_get_root_tile(xe); 336 337 for_each_ccs_rw_ctx(ctx_id) { 338 bb = bo->bb_ccs[ctx_id]; 339 /* bb should be NULL here. Assert if not NULL */ 340 xe_assert(xe, !bb); 341 342 ctx = &tile->sriov.vf.ccs[ctx_id]; 343 err = xe_migrate_ccs_rw_copy(tile, ctx->mig_q, bo, ctx_id); 344 } 345 return err; 346 } 347 348 /** 349 * xe_sriov_vf_ccs_detach_bo - Remove CCS read write commands from the BO. 350 * @bo: the &buffer object from which batch buffer commands will be removed. 351 * 352 * This function shall be called only by VF. It removes the PTEs and copy 353 * command instructions from the BO. Make sure to update the BB with MI_NOOP 354 * before freeing. 355 * 356 * Returns: 0 if successful. 357 */ 358 int xe_sriov_vf_ccs_detach_bo(struct xe_bo *bo) 359 { 360 struct xe_device *xe = xe_bo_device(bo); 361 enum xe_sriov_vf_ccs_rw_ctxs ctx_id; 362 struct xe_bb *bb; 363 364 if (!IS_VF_CCS_READY(xe)) 365 return 0; 366 367 for_each_ccs_rw_ctx(ctx_id) { 368 bb = bo->bb_ccs[ctx_id]; 369 if (!bb) 370 continue; 371 372 memset(bb->cs, MI_NOOP, bb->len * sizeof(u32)); 373 xe_bb_free(bb, NULL); 374 bo->bb_ccs[ctx_id] = NULL; 375 } 376 return 0; 377 } 378