1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2025 Intel Corporation 4 */ 5 6 #include "instructions/xe_mi_commands.h" 7 #include "instructions/xe_gpu_commands.h" 8 #include "xe_bb.h" 9 #include "xe_bo.h" 10 #include "xe_device.h" 11 #include "xe_exec_queue.h" 12 #include "xe_exec_queue_types.h" 13 #include "xe_gt_sriov_vf.h" 14 #include "xe_guc.h" 15 #include "xe_guc_submit.h" 16 #include "xe_lrc.h" 17 #include "xe_mem_pool.h" 18 #include "xe_migrate.h" 19 #include "xe_pm.h" 20 #include "xe_sriov_printk.h" 21 #include "xe_sriov_vf.h" 22 #include "xe_sriov_vf_ccs.h" 23 #include "xe_sriov_vf_ccs_types.h" 24 25 /** 26 * DOC: VF save/restore of compression Meta Data 27 * 28 * VF KMD registers two special contexts/LRCAs. 29 * 30 * Save Context/LRCA: contain necessary cmds+page table to trigger Meta data / 31 * compression control surface (Aka CCS) save in regular System memory in VM. 32 * 33 * Restore Context/LRCA: contain necessary cmds+page table to trigger Meta data / 34 * compression control surface (Aka CCS) Restore from regular System memory in 35 * VM to corresponding CCS pool. 36 * 37 * Below diagram explain steps needed for VF save/Restore of compression Meta Data:: 38 * 39 * CCS Save CCS Restore VF KMD Guc BCS 40 * LRCA LRCA 41 * | | | | | 42 * | | | | | 43 * | Create Save LRCA | | | 44 * [ ]<----------------------------- [ ] | | 45 * | | | | | 46 * | | | | | 47 * | | | Register save LRCA | | 48 * | | | with Guc | | 49 * | | [ ]--------------------------->[ ] | 50 * | | | | | 51 * | | Create restore LRCA | | | 52 * | [ ]<------------------[ ] | | 53 * | | | | | 54 * | | | Register restore LRCA | | 55 * | | | with Guc | | 56 * | | [ ]--------------------------->[ ] | 57 * | | | | | 58 * | | | | | 59 * | | [ ]------------------------- | | 60 * | | [ ] Allocate main memory. | | | 61 * | | [ ] Allocate CCS memory. | | | 62 * | | [ ] Update Main memory & | | | 63 * [ ]<------------------------------[ ] CCS pages PPGTT + BB | | | 64 * | [ ]<------------------[ ] cmds to save & restore.| | | 65 * | | [ ]<------------------------ | | 66 * | | | | | 67 * | | | | | 68 * | | | | | 69 * : : : : : 70 * ---------------------------- VF Paused ------------------------------------- 71 * | | | | | 72 * | | | | | 73 * | | | |Schedule | 74 * | | | |CCS Save | 75 * | | | | LRCA | 76 * | | | [ ]------>[ ] 77 * | | | | | 78 * | | | | | 79 * | | | |CCS save | 80 * | | | |completed| 81 * | | | [ ]<------[ ] 82 * | | | | | 83 * : : : : : 84 * ---------------------------- VM Migrated ----------------------------------- 85 * | | | | | 86 * | | | | | 87 * : : : : : 88 * ---------------------------- VF Resumed ------------------------------------ 89 * | | | | | 90 * | | | | | 91 * | | [ ]-------------- | | 92 * | | [ ] Fix up GGTT | | | 93 * | | [ ]<------------- | | 94 * | | | | | 95 * | | | | | 96 * | | | Notify VF_RESFIX_DONE | | 97 * | | [ ]--------------------------->[ ] | 98 * | | | | | 99 * | | | |Schedule | 100 * | | | |CCS | 101 * | | | |Restore | 102 * | | | |LRCA | 103 * | | | [ ]------>[ ] 104 * | | | | | 105 * | | | | | 106 * | | | |CCS | 107 * | | | |restore | 108 * | | | |completed| 109 * | | | [ ]<------[ ] 110 * | | | | | 111 * | | | | | 112 * | | | VF_RESFIX_DONE complete | | 113 * | | | notification | | 114 * | | [ ]<---------------------------[ ] | 115 * | | | | | 116 * | | | | | 117 * : : : : : 118 * ------------------------- Continue VM restore ------------------------------ 119 */ 120 121 static u64 get_ccs_bb_pool_size(struct xe_device *xe) 122 { 123 u64 sys_mem_size, ccs_mem_size, ptes, bb_pool_size; 124 struct sysinfo si; 125 126 si_meminfo(&si); 127 sys_mem_size = si.totalram * si.mem_unit; 128 ccs_mem_size = div64_u64(sys_mem_size, NUM_BYTES_PER_CCS_BYTE(xe)); 129 ptes = DIV_ROUND_UP_ULL(sys_mem_size + ccs_mem_size, XE_PAGE_SIZE); 130 131 /** 132 * We need below BB size to hold PTE mappings and some DWs for copy 133 * command. In reality, we need space for many copy commands. So, let 134 * us allocate double the calculated size which is enough to holds GPU 135 * instructions for the whole region. 136 */ 137 bb_pool_size = ptes * sizeof(u32); 138 139 return round_up(bb_pool_size * 2, SZ_1M); 140 } 141 142 static int alloc_bb_pool(struct xe_tile *tile, struct xe_sriov_vf_ccs_ctx *ctx) 143 { 144 struct xe_mem_pool *pool; 145 struct xe_device *xe = tile_to_xe(tile); 146 u32 *pool_cpu_addr, *last_dw_addr; 147 u64 bb_pool_size; 148 int err; 149 150 bb_pool_size = get_ccs_bb_pool_size(xe); 151 xe_sriov_info(xe, "Allocating %s CCS BB pool size = %lldMB\n", 152 ctx->ctx_id ? "Restore" : "Save", bb_pool_size / SZ_1M); 153 154 pool = xe_mem_pool_init(tile, bb_pool_size, sizeof(u32), 155 XE_MEM_POOL_BO_FLAG_INIT_SHADOW_COPY); 156 if (IS_ERR(pool)) { 157 xe_sriov_err(xe, "xe_mem_pool_init failed with error: %pe\n", 158 pool); 159 err = PTR_ERR(pool); 160 return err; 161 } 162 163 pool_cpu_addr = xe_mem_pool_cpu_addr(pool); 164 memset(pool_cpu_addr, 0, bb_pool_size); 165 166 last_dw_addr = pool_cpu_addr + (bb_pool_size / sizeof(u32)) - 1; 167 *last_dw_addr = MI_BATCH_BUFFER_END; 168 169 /** 170 * Sync the main copy and shadow copy so that the shadow copy is 171 * replica of main copy. We sync only BBs after init part. So, we 172 * need to make sure the main pool and shadow copy are in sync after 173 * this point. This is needed as GuC may read the BB commands from 174 * shadow copy. 175 */ 176 xe_mem_pool_sync(pool); 177 178 ctx->mem.ccs_bb_pool = pool; 179 return 0; 180 } 181 182 static void ccs_rw_update_ring(struct xe_sriov_vf_ccs_ctx *ctx) 183 { 184 u64 addr = xe_mem_pool_gpu_addr(ctx->mem.ccs_bb_pool); 185 struct xe_lrc *lrc = xe_exec_queue_lrc(ctx->mig_q); 186 u32 dw[10], i = 0; 187 188 /* 189 * XXX: Save/restore fixes — for some reason, the GuC only accepts the 190 * save/restore context if the LRC head pointer is zero. This is evident 191 * from repeated VF migrations failing when the LRC head pointer is 192 * non-zero. 193 */ 194 lrc->ring.tail = 0; 195 xe_lrc_set_ring_head(lrc, 0); 196 197 dw[i++] = MI_ARB_ON_OFF | MI_ARB_ENABLE; 198 dw[i++] = MI_BATCH_BUFFER_START | XE_INSTR_NUM_DW(3); 199 dw[i++] = lower_32_bits(addr); 200 dw[i++] = upper_32_bits(addr); 201 dw[i++] = MI_NOOP; 202 dw[i++] = MI_NOOP; 203 204 xe_lrc_write_ring(lrc, dw, i * sizeof(u32)); 205 xe_lrc_set_ring_tail(lrc, lrc->ring.tail); 206 } 207 208 /** 209 * xe_sriov_vf_ccs_rebase - Rebase GGTT addresses for CCS save / restore 210 * @xe: the &xe_device. 211 */ 212 void xe_sriov_vf_ccs_rebase(struct xe_device *xe) 213 { 214 enum xe_sriov_vf_ccs_rw_ctxs ctx_id; 215 216 if (!IS_VF_CCS_READY(xe)) 217 return; 218 219 for_each_ccs_rw_ctx(ctx_id) { 220 struct xe_sriov_vf_ccs_ctx *ctx = 221 &xe->sriov.vf.ccs.contexts[ctx_id]; 222 223 ccs_rw_update_ring(ctx); 224 } 225 } 226 227 static int register_save_restore_context(struct xe_sriov_vf_ccs_ctx *ctx) 228 { 229 int ctx_type; 230 231 switch (ctx->ctx_id) { 232 case XE_SRIOV_VF_CCS_READ_CTX: 233 ctx_type = GUC_CONTEXT_COMPRESSION_SAVE; 234 break; 235 case XE_SRIOV_VF_CCS_WRITE_CTX: 236 ctx_type = GUC_CONTEXT_COMPRESSION_RESTORE; 237 break; 238 default: 239 return -EINVAL; 240 } 241 242 xe_guc_register_vf_exec_queue(ctx->mig_q, ctx_type); 243 return 0; 244 } 245 246 /** 247 * xe_sriov_vf_ccs_register_context - Register read/write contexts with guc. 248 * @xe: the &xe_device to register contexts on. 249 * 250 * This function registers read and write contexts with Guc. Re-registration 251 * is needed whenever resuming from pm runtime suspend. 252 * 253 * Return: 0 on success. Negative error code on failure. 254 */ 255 int xe_sriov_vf_ccs_register_context(struct xe_device *xe) 256 { 257 enum xe_sriov_vf_ccs_rw_ctxs ctx_id; 258 struct xe_sriov_vf_ccs_ctx *ctx; 259 int err; 260 261 xe_assert(xe, IS_VF_CCS_READY(xe)); 262 263 for_each_ccs_rw_ctx(ctx_id) { 264 ctx = &xe->sriov.vf.ccs.contexts[ctx_id]; 265 err = register_save_restore_context(ctx); 266 if (err) 267 return err; 268 } 269 270 return err; 271 } 272 273 /* 274 * Whether GuC requires CCS copy BBs for VF migration. 275 * @xe: the &xe_device instance. 276 * 277 * Only selected platforms require VF KMD to maintain CCS copy BBs and linked LRCAs. 278 * 279 * Return: true if VF driver must participate in the CCS migration, false otherwise. 280 */ 281 static bool vf_migration_ccs_bb_needed(struct xe_device *xe) 282 { 283 xe_assert(xe, IS_SRIOV_VF(xe)); 284 285 return !IS_DGFX(xe) && xe_device_has_flat_ccs(xe); 286 } 287 288 /* 289 * Check for disable migration due to no CCS BBs support in GuC FW. 290 * @xe: the &xe_device instance. 291 * 292 * Performs late disable of VF migration feature in case GuC FW cannot support it. 293 * 294 * Returns: True if VF migration with CCS BBs is supported, false otherwise. 295 */ 296 static bool vf_migration_ccs_bb_support_check(struct xe_device *xe) 297 { 298 struct xe_gt *gt = xe_root_mmio_gt(xe); 299 struct xe_uc_fw_version guc_version; 300 301 xe_gt_sriov_vf_guc_versions(gt, NULL, &guc_version); 302 if (MAKE_GUC_VER_STRUCT(guc_version) < MAKE_GUC_VER(1, 23, 0)) { 303 xe_sriov_vf_migration_disable(xe, 304 "CCS migration requires GuC ABI >= 1.23 but only %u.%u found", 305 guc_version.major, guc_version.minor); 306 return false; 307 } 308 309 return true; 310 } 311 312 static void xe_sriov_vf_ccs_fini(void *arg) 313 { 314 struct xe_sriov_vf_ccs_ctx *ctx = arg; 315 struct xe_lrc *lrc = xe_exec_queue_lrc(ctx->mig_q); 316 317 /* 318 * Make TAIL = HEAD in the ring so that no issues are seen if Guc 319 * submits this context to HW on VF pause after unbinding device. 320 */ 321 xe_lrc_set_ring_tail(lrc, xe_lrc_ring_head(lrc)); 322 xe_exec_queue_put(ctx->mig_q); 323 } 324 325 /** 326 * xe_sriov_vf_ccs_init - Setup LRCA for save & restore. 327 * @xe: the &xe_device to start recovery on 328 * 329 * This function shall be called only by VF. It initializes 330 * LRCA and suballocator needed for CCS save & restore. 331 * 332 * Return: 0 on success. Negative error code on failure. 333 */ 334 int xe_sriov_vf_ccs_init(struct xe_device *xe) 335 { 336 struct xe_tile *tile = xe_device_get_root_tile(xe); 337 enum xe_sriov_vf_ccs_rw_ctxs ctx_id; 338 struct xe_sriov_vf_ccs_ctx *ctx; 339 struct xe_exec_queue *q; 340 u32 flags; 341 int err; 342 343 xe_assert(xe, IS_SRIOV_VF(xe)); 344 345 if (!xe_sriov_vf_migration_supported(xe) || 346 !vf_migration_ccs_bb_needed(xe) || 347 !vf_migration_ccs_bb_support_check(xe)) 348 return 0; 349 350 for_each_ccs_rw_ctx(ctx_id) { 351 ctx = &xe->sriov.vf.ccs.contexts[ctx_id]; 352 ctx->ctx_id = ctx_id; 353 354 flags = EXEC_QUEUE_FLAG_KERNEL | 355 EXEC_QUEUE_FLAG_PERMANENT | 356 EXEC_QUEUE_FLAG_MIGRATE; 357 q = xe_exec_queue_create_bind(xe, tile, NULL, flags, 0); 358 if (IS_ERR(q)) { 359 err = PTR_ERR(q); 360 goto err_ret; 361 } 362 ctx->mig_q = q; 363 364 err = alloc_bb_pool(tile, ctx); 365 if (err) 366 goto err_free_queue; 367 368 ccs_rw_update_ring(ctx); 369 370 err = register_save_restore_context(ctx); 371 if (err) 372 goto err_free_queue; 373 374 err = devm_add_action_or_reset(xe->drm.dev, 375 xe_sriov_vf_ccs_fini, 376 ctx); 377 if (err) 378 goto err_ret; 379 } 380 381 xe->sriov.vf.ccs.initialized = 1; 382 383 return 0; 384 385 err_free_queue: 386 xe_exec_queue_put(q); 387 388 err_ret: 389 return err; 390 } 391 392 #define XE_SRIOV_VF_CCS_RW_BB_ADDR_OFFSET (2 * sizeof(u32)) 393 void xe_sriov_vf_ccs_rw_update_bb_addr(struct xe_sriov_vf_ccs_ctx *ctx) 394 { 395 u64 addr = xe_mem_pool_gpu_addr(ctx->mem.ccs_bb_pool); 396 struct xe_lrc *lrc = xe_exec_queue_lrc(ctx->mig_q); 397 struct xe_device *xe = gt_to_xe(ctx->mig_q->gt); 398 399 xe_device_wmb(xe); 400 xe_map_wr(xe, &lrc->bo->vmap, XE_SRIOV_VF_CCS_RW_BB_ADDR_OFFSET, u32, addr); 401 xe_device_wmb(xe); 402 } 403 404 /** 405 * xe_sriov_vf_ccs_attach_bo - Insert CCS read write commands in the BO. 406 * @bo: the &buffer object to which batch buffer commands will be added. 407 * 408 * This function shall be called only by VF. It inserts the PTEs and copy 409 * command instructions in the BO by calling xe_migrate_ccs_rw_copy() 410 * function. 411 * 412 * Returns: 0 if successful, negative error code on failure. 413 */ 414 int xe_sriov_vf_ccs_attach_bo(struct xe_bo *bo) 415 { 416 struct xe_device *xe = xe_bo_device(bo); 417 enum xe_sriov_vf_ccs_rw_ctxs ctx_id; 418 struct xe_sriov_vf_ccs_ctx *ctx; 419 struct xe_mem_pool_node *bb; 420 struct xe_tile *tile; 421 int err = 0; 422 423 xe_assert(xe, IS_VF_CCS_READY(xe)); 424 425 tile = xe_device_get_root_tile(xe); 426 427 for_each_ccs_rw_ctx(ctx_id) { 428 bb = bo->bb_ccs[ctx_id]; 429 /* bb should be NULL here. Assert if not NULL */ 430 xe_assert(xe, !bb); 431 432 ctx = &xe->sriov.vf.ccs.contexts[ctx_id]; 433 err = xe_migrate_ccs_rw_copy(tile, ctx->mig_q, bo, ctx_id); 434 } 435 return err; 436 } 437 438 /** 439 * xe_sriov_vf_ccs_detach_bo - Remove CCS read write commands from the BO. 440 * @bo: the &buffer object from which batch buffer commands will be removed. 441 * 442 * This function shall be called only by VF. It removes the PTEs and copy 443 * command instructions from the BO. Make sure to update the BB with MI_NOOP 444 * before freeing. 445 * 446 * Returns: 0 if successful. 447 */ 448 int xe_sriov_vf_ccs_detach_bo(struct xe_bo *bo) 449 { 450 struct xe_device *xe = xe_bo_device(bo); 451 enum xe_sriov_vf_ccs_rw_ctxs ctx_id; 452 struct xe_mem_pool_node *bb; 453 454 xe_assert(xe, IS_VF_CCS_READY(xe)); 455 456 if (!xe_bo_has_valid_ccs_bb(bo)) 457 return 0; 458 459 for_each_ccs_rw_ctx(ctx_id) { 460 bb = bo->bb_ccs[ctx_id]; 461 if (!bb) 462 continue; 463 464 xe_migrate_ccs_rw_copy_clear(bo, ctx_id); 465 } 466 return 0; 467 } 468 469 /** 470 * xe_sriov_vf_ccs_print - Print VF CCS details. 471 * @xe: the &xe_device 472 * @p: the &drm_printer 473 * 474 * This function is for VF use only. 475 */ 476 void xe_sriov_vf_ccs_print(struct xe_device *xe, struct drm_printer *p) 477 { 478 enum xe_sriov_vf_ccs_rw_ctxs ctx_id; 479 struct xe_mem_pool *bb_pool; 480 481 if (!IS_VF_CCS_READY(xe)) 482 return; 483 484 guard(xe_pm_runtime)(xe); 485 for_each_ccs_rw_ctx(ctx_id) { 486 bb_pool = xe->sriov.vf.ccs.contexts[ctx_id].mem.ccs_bb_pool; 487 if (!bb_pool) 488 break; 489 490 drm_printf(p, "ccs %s bb suballoc info\n", ctx_id ? "write" : "read"); 491 drm_printf(p, "-------------------------\n"); 492 xe_mem_pool_dump(bb_pool, p); 493 drm_puts(p, "\n"); 494 } 495 } 496