1 // SPDX-License-Identifier: MIT
2 /*
3 * Copyright © 2025 Intel Corporation
4 */
5
6 #include "instructions/xe_mi_commands.h"
7 #include "instructions/xe_gpu_commands.h"
8 #include "xe_bb.h"
9 #include "xe_bo.h"
10 #include "xe_device.h"
11 #include "xe_exec_queue.h"
12 #include "xe_exec_queue_types.h"
13 #include "xe_gt_sriov_vf.h"
14 #include "xe_guc.h"
15 #include "xe_guc_submit.h"
16 #include "xe_lrc.h"
17 #include "xe_mem_pool.h"
18 #include "xe_migrate.h"
19 #include "xe_pm.h"
20 #include "xe_sriov_printk.h"
21 #include "xe_sriov_vf.h"
22 #include "xe_sriov_vf_ccs.h"
23 #include "xe_sriov_vf_ccs_types.h"
24
25 /**
26 * DOC: VF save/restore of compression Meta Data
27 *
28 * VF KMD registers two special contexts/LRCAs.
29 *
30 * Save Context/LRCA: contain necessary cmds+page table to trigger Meta data /
31 * compression control surface (Aka CCS) save in regular System memory in VM.
32 *
33 * Restore Context/LRCA: contain necessary cmds+page table to trigger Meta data /
34 * compression control surface (Aka CCS) Restore from regular System memory in
35 * VM to corresponding CCS pool.
36 *
37 * Below diagram explain steps needed for VF save/Restore of compression Meta Data::
38 *
39 * CCS Save CCS Restore VF KMD Guc BCS
40 * LRCA LRCA
41 * | | | | |
42 * | | | | |
43 * | Create Save LRCA | | |
44 * [ ]<----------------------------- [ ] | |
45 * | | | | |
46 * | | | | |
47 * | | | Register save LRCA | |
48 * | | | with Guc | |
49 * | | [ ]--------------------------->[ ] |
50 * | | | | |
51 * | | Create restore LRCA | | |
52 * | [ ]<------------------[ ] | |
53 * | | | | |
54 * | | | Register restore LRCA | |
55 * | | | with Guc | |
56 * | | [ ]--------------------------->[ ] |
57 * | | | | |
58 * | | | | |
59 * | | [ ]------------------------- | |
60 * | | [ ] Allocate main memory. | | |
61 * | | [ ] Allocate CCS memory. | | |
62 * | | [ ] Update Main memory & | | |
63 * [ ]<------------------------------[ ] CCS pages PPGTT + BB | | |
64 * | [ ]<------------------[ ] cmds to save & restore.| | |
65 * | | [ ]<------------------------ | |
66 * | | | | |
67 * | | | | |
68 * | | | | |
69 * : : : : :
70 * ---------------------------- VF Paused -------------------------------------
71 * | | | | |
72 * | | | | |
73 * | | | |Schedule |
74 * | | | |CCS Save |
75 * | | | | LRCA |
76 * | | | [ ]------>[ ]
77 * | | | | |
78 * | | | | |
79 * | | | |CCS save |
80 * | | | |completed|
81 * | | | [ ]<------[ ]
82 * | | | | |
83 * : : : : :
84 * ---------------------------- VM Migrated -----------------------------------
85 * | | | | |
86 * | | | | |
87 * : : : : :
88 * ---------------------------- VF Resumed ------------------------------------
89 * | | | | |
90 * | | | | |
91 * | | [ ]-------------- | |
92 * | | [ ] Fix up GGTT | | |
93 * | | [ ]<------------- | |
94 * | | | | |
95 * | | | | |
96 * | | | Notify VF_RESFIX_DONE | |
97 * | | [ ]--------------------------->[ ] |
98 * | | | | |
99 * | | | |Schedule |
100 * | | | |CCS |
101 * | | | |Restore |
102 * | | | |LRCA |
103 * | | | [ ]------>[ ]
104 * | | | | |
105 * | | | | |
106 * | | | |CCS |
107 * | | | |restore |
108 * | | | |completed|
109 * | | | [ ]<------[ ]
110 * | | | | |
111 * | | | | |
112 * | | | VF_RESFIX_DONE complete | |
113 * | | | notification | |
114 * | | [ ]<---------------------------[ ] |
115 * | | | | |
116 * | | | | |
117 * : : : : :
118 * ------------------------- Continue VM restore ------------------------------
119 */
120
get_ccs_bb_pool_size(struct xe_device * xe)121 static u64 get_ccs_bb_pool_size(struct xe_device *xe)
122 {
123 u64 sys_mem_size, ccs_mem_size, ptes, bb_pool_size;
124 struct sysinfo si;
125
126 si_meminfo(&si);
127 sys_mem_size = si.totalram * si.mem_unit;
128 ccs_mem_size = div64_u64(sys_mem_size, NUM_BYTES_PER_CCS_BYTE(xe));
129 ptes = DIV_ROUND_UP_ULL(sys_mem_size + ccs_mem_size, XE_PAGE_SIZE);
130
131 /**
132 * We need below BB size to hold PTE mappings and some DWs for copy
133 * command. In reality, we need space for many copy commands. So, let
134 * us allocate double the calculated size which is enough to holds GPU
135 * instructions for the whole region.
136 */
137 bb_pool_size = ptes * sizeof(u32);
138
139 return round_up(bb_pool_size * 2, SZ_1M);
140 }
141
alloc_bb_pool(struct xe_tile * tile,struct xe_sriov_vf_ccs_ctx * ctx)142 static int alloc_bb_pool(struct xe_tile *tile, struct xe_sriov_vf_ccs_ctx *ctx)
143 {
144 struct xe_mem_pool *pool;
145 struct xe_device *xe = tile_to_xe(tile);
146 u32 *pool_cpu_addr, *last_dw_addr;
147 u64 bb_pool_size;
148 int err;
149
150 bb_pool_size = get_ccs_bb_pool_size(xe);
151 xe_sriov_info(xe, "Allocating %s CCS BB pool size = %lldMB\n",
152 ctx->ctx_id ? "Restore" : "Save", bb_pool_size / SZ_1M);
153
154 pool = xe_mem_pool_init(tile, bb_pool_size, sizeof(u32),
155 XE_MEM_POOL_BO_FLAG_INIT_SHADOW_COPY);
156 if (IS_ERR(pool)) {
157 xe_sriov_err(xe, "xe_mem_pool_init failed with error: %pe\n",
158 pool);
159 err = PTR_ERR(pool);
160 return err;
161 }
162
163 pool_cpu_addr = xe_mem_pool_cpu_addr(pool);
164 memset(pool_cpu_addr, 0, bb_pool_size);
165
166 last_dw_addr = pool_cpu_addr + (bb_pool_size / sizeof(u32)) - 1;
167 *last_dw_addr = MI_BATCH_BUFFER_END;
168
169 /**
170 * Sync the main copy and shadow copy so that the shadow copy is
171 * replica of main copy. We sync only BBs after init part. So, we
172 * need to make sure the main pool and shadow copy are in sync after
173 * this point. This is needed as GuC may read the BB commands from
174 * shadow copy.
175 */
176 xe_mem_pool_sync(pool);
177
178 ctx->mem.ccs_bb_pool = pool;
179 return 0;
180 }
181
ccs_rw_update_ring(struct xe_sriov_vf_ccs_ctx * ctx)182 static void ccs_rw_update_ring(struct xe_sriov_vf_ccs_ctx *ctx)
183 {
184 u64 addr = xe_mem_pool_gpu_addr(ctx->mem.ccs_bb_pool);
185 struct xe_lrc *lrc = xe_exec_queue_lrc(ctx->mig_q);
186 u32 dw[10], i = 0;
187
188 /*
189 * XXX: Save/restore fixes — for some reason, the GuC only accepts the
190 * save/restore context if the LRC head pointer is zero. This is evident
191 * from repeated VF migrations failing when the LRC head pointer is
192 * non-zero.
193 */
194 lrc->ring.tail = 0;
195 xe_lrc_set_ring_head(lrc, 0);
196
197 dw[i++] = MI_ARB_ON_OFF | MI_ARB_ENABLE;
198 dw[i++] = MI_BATCH_BUFFER_START | XE_INSTR_NUM_DW(3);
199 dw[i++] = lower_32_bits(addr);
200 dw[i++] = upper_32_bits(addr);
201 dw[i++] = MI_NOOP;
202 dw[i++] = MI_NOOP;
203
204 xe_lrc_write_ring(lrc, dw, i * sizeof(u32));
205 xe_lrc_set_ring_tail(lrc, lrc->ring.tail);
206 }
207
208 /**
209 * xe_sriov_vf_ccs_rebase - Rebase GGTT addresses for CCS save / restore
210 * @xe: the &xe_device.
211 */
xe_sriov_vf_ccs_rebase(struct xe_device * xe)212 void xe_sriov_vf_ccs_rebase(struct xe_device *xe)
213 {
214 enum xe_sriov_vf_ccs_rw_ctxs ctx_id;
215
216 if (!IS_VF_CCS_READY(xe))
217 return;
218
219 for_each_ccs_rw_ctx(ctx_id) {
220 struct xe_sriov_vf_ccs_ctx *ctx =
221 &xe->sriov.vf.ccs.contexts[ctx_id];
222
223 ccs_rw_update_ring(ctx);
224 }
225 }
226
register_save_restore_context(struct xe_sriov_vf_ccs_ctx * ctx)227 static int register_save_restore_context(struct xe_sriov_vf_ccs_ctx *ctx)
228 {
229 int ctx_type;
230
231 switch (ctx->ctx_id) {
232 case XE_SRIOV_VF_CCS_READ_CTX:
233 ctx_type = GUC_CONTEXT_COMPRESSION_SAVE;
234 break;
235 case XE_SRIOV_VF_CCS_WRITE_CTX:
236 ctx_type = GUC_CONTEXT_COMPRESSION_RESTORE;
237 break;
238 default:
239 return -EINVAL;
240 }
241
242 xe_guc_register_vf_exec_queue(ctx->mig_q, ctx_type);
243 return 0;
244 }
245
246 /**
247 * xe_sriov_vf_ccs_register_context - Register read/write contexts with guc.
248 * @xe: the &xe_device to register contexts on.
249 *
250 * This function registers read and write contexts with Guc. Re-registration
251 * is needed whenever resuming from pm runtime suspend.
252 *
253 * Return: 0 on success. Negative error code on failure.
254 */
xe_sriov_vf_ccs_register_context(struct xe_device * xe)255 int xe_sriov_vf_ccs_register_context(struct xe_device *xe)
256 {
257 enum xe_sriov_vf_ccs_rw_ctxs ctx_id;
258 struct xe_sriov_vf_ccs_ctx *ctx;
259 int err;
260
261 xe_assert(xe, IS_VF_CCS_READY(xe));
262
263 for_each_ccs_rw_ctx(ctx_id) {
264 ctx = &xe->sriov.vf.ccs.contexts[ctx_id];
265 err = register_save_restore_context(ctx);
266 if (err)
267 return err;
268 }
269
270 return err;
271 }
272
273 /*
274 * Whether GuC requires CCS copy BBs for VF migration.
275 * @xe: the &xe_device instance.
276 *
277 * Only selected platforms require VF KMD to maintain CCS copy BBs and linked LRCAs.
278 *
279 * Return: true if VF driver must participate in the CCS migration, false otherwise.
280 */
vf_migration_ccs_bb_needed(struct xe_device * xe)281 static bool vf_migration_ccs_bb_needed(struct xe_device *xe)
282 {
283 xe_assert(xe, IS_SRIOV_VF(xe));
284
285 return !IS_DGFX(xe) && xe_device_has_flat_ccs(xe);
286 }
287
288 /*
289 * Check for disable migration due to no CCS BBs support in GuC FW.
290 * @xe: the &xe_device instance.
291 *
292 * Performs late disable of VF migration feature in case GuC FW cannot support it.
293 *
294 * Returns: True if VF migration with CCS BBs is supported, false otherwise.
295 */
vf_migration_ccs_bb_support_check(struct xe_device * xe)296 static bool vf_migration_ccs_bb_support_check(struct xe_device *xe)
297 {
298 struct xe_gt *gt = xe_root_mmio_gt(xe);
299 struct xe_uc_fw_version guc_version;
300
301 xe_gt_sriov_vf_guc_versions(gt, NULL, &guc_version);
302 if (MAKE_GUC_VER_STRUCT(guc_version) < MAKE_GUC_VER(1, 23, 0)) {
303 xe_sriov_vf_migration_disable(xe,
304 "CCS migration requires GuC ABI >= 1.23 but only %u.%u found",
305 guc_version.major, guc_version.minor);
306 return false;
307 }
308
309 return true;
310 }
311
xe_sriov_vf_ccs_fini(void * arg)312 static void xe_sriov_vf_ccs_fini(void *arg)
313 {
314 struct xe_sriov_vf_ccs_ctx *ctx = arg;
315 struct xe_lrc *lrc = xe_exec_queue_lrc(ctx->mig_q);
316
317 /*
318 * Make TAIL = HEAD in the ring so that no issues are seen if Guc
319 * submits this context to HW on VF pause after unbinding device.
320 */
321 xe_lrc_set_ring_tail(lrc, xe_lrc_ring_head(lrc));
322 xe_exec_queue_put(ctx->mig_q);
323 }
324
325 /**
326 * xe_sriov_vf_ccs_init - Setup LRCA for save & restore.
327 * @xe: the &xe_device to start recovery on
328 *
329 * This function shall be called only by VF. It initializes
330 * LRCA and suballocator needed for CCS save & restore.
331 *
332 * Return: 0 on success. Negative error code on failure.
333 */
xe_sriov_vf_ccs_init(struct xe_device * xe)334 int xe_sriov_vf_ccs_init(struct xe_device *xe)
335 {
336 struct xe_tile *tile = xe_device_get_root_tile(xe);
337 enum xe_sriov_vf_ccs_rw_ctxs ctx_id;
338 struct xe_sriov_vf_ccs_ctx *ctx;
339 struct xe_exec_queue *q;
340 u32 flags;
341 int err;
342
343 xe_assert(xe, IS_SRIOV_VF(xe));
344
345 if (!xe_sriov_vf_migration_supported(xe) ||
346 !vf_migration_ccs_bb_needed(xe) ||
347 !vf_migration_ccs_bb_support_check(xe))
348 return 0;
349
350 for_each_ccs_rw_ctx(ctx_id) {
351 ctx = &xe->sriov.vf.ccs.contexts[ctx_id];
352 ctx->ctx_id = ctx_id;
353
354 flags = EXEC_QUEUE_FLAG_KERNEL |
355 EXEC_QUEUE_FLAG_PERMANENT |
356 EXEC_QUEUE_FLAG_MIGRATE;
357 q = xe_exec_queue_create_bind(xe, tile, NULL, flags, 0);
358 if (IS_ERR(q)) {
359 err = PTR_ERR(q);
360 goto err_ret;
361 }
362 ctx->mig_q = q;
363
364 err = alloc_bb_pool(tile, ctx);
365 if (err)
366 goto err_free_queue;
367
368 ccs_rw_update_ring(ctx);
369
370 err = register_save_restore_context(ctx);
371 if (err)
372 goto err_free_queue;
373
374 err = devm_add_action_or_reset(xe->drm.dev,
375 xe_sriov_vf_ccs_fini,
376 ctx);
377 if (err)
378 goto err_ret;
379 }
380
381 xe->sriov.vf.ccs.initialized = 1;
382
383 return 0;
384
385 err_free_queue:
386 xe_exec_queue_put(q);
387
388 err_ret:
389 return err;
390 }
391
392 #define XE_SRIOV_VF_CCS_RW_BB_ADDR_OFFSET (2 * sizeof(u32))
xe_sriov_vf_ccs_rw_update_bb_addr(struct xe_sriov_vf_ccs_ctx * ctx)393 void xe_sriov_vf_ccs_rw_update_bb_addr(struct xe_sriov_vf_ccs_ctx *ctx)
394 {
395 u64 addr = xe_mem_pool_gpu_addr(ctx->mem.ccs_bb_pool);
396 struct xe_lrc *lrc = xe_exec_queue_lrc(ctx->mig_q);
397 struct xe_device *xe = gt_to_xe(ctx->mig_q->gt);
398
399 xe_device_wmb(xe);
400 xe_map_wr(xe, &lrc->bo->vmap, XE_SRIOV_VF_CCS_RW_BB_ADDR_OFFSET, u32, addr);
401 xe_device_wmb(xe);
402 }
403
404 /**
405 * xe_sriov_vf_ccs_attach_bo - Insert CCS read write commands in the BO.
406 * @bo: the &buffer object to which batch buffer commands will be added.
407 *
408 * This function shall be called only by VF. It inserts the PTEs and copy
409 * command instructions in the BO by calling xe_migrate_ccs_rw_copy()
410 * function.
411 *
412 * Returns: 0 if successful, negative error code on failure.
413 */
xe_sriov_vf_ccs_attach_bo(struct xe_bo * bo)414 int xe_sriov_vf_ccs_attach_bo(struct xe_bo *bo)
415 {
416 struct xe_device *xe = xe_bo_device(bo);
417 enum xe_sriov_vf_ccs_rw_ctxs ctx_id;
418 struct xe_sriov_vf_ccs_ctx *ctx;
419 struct xe_mem_pool_node *bb;
420 struct xe_tile *tile;
421 int err = 0;
422
423 xe_assert(xe, IS_VF_CCS_READY(xe));
424
425 tile = xe_device_get_root_tile(xe);
426
427 for_each_ccs_rw_ctx(ctx_id) {
428 bb = bo->bb_ccs[ctx_id];
429 /* bb should be NULL here. Assert if not NULL */
430 xe_assert(xe, !bb);
431
432 ctx = &xe->sriov.vf.ccs.contexts[ctx_id];
433 err = xe_migrate_ccs_rw_copy(tile, ctx->mig_q, bo, ctx_id);
434 }
435 return err;
436 }
437
438 /**
439 * xe_sriov_vf_ccs_detach_bo - Remove CCS read write commands from the BO.
440 * @bo: the &buffer object from which batch buffer commands will be removed.
441 *
442 * This function shall be called only by VF. It removes the PTEs and copy
443 * command instructions from the BO. Make sure to update the BB with MI_NOOP
444 * before freeing.
445 *
446 * Returns: 0 if successful.
447 */
xe_sriov_vf_ccs_detach_bo(struct xe_bo * bo)448 int xe_sriov_vf_ccs_detach_bo(struct xe_bo *bo)
449 {
450 struct xe_device *xe = xe_bo_device(bo);
451 enum xe_sriov_vf_ccs_rw_ctxs ctx_id;
452 struct xe_mem_pool_node *bb;
453
454 xe_assert(xe, IS_VF_CCS_READY(xe));
455
456 if (!xe_bo_has_valid_ccs_bb(bo))
457 return 0;
458
459 for_each_ccs_rw_ctx(ctx_id) {
460 bb = bo->bb_ccs[ctx_id];
461 if (!bb)
462 continue;
463
464 xe_migrate_ccs_rw_copy_clear(bo, ctx_id);
465 }
466 return 0;
467 }
468
469 /**
470 * xe_sriov_vf_ccs_print - Print VF CCS details.
471 * @xe: the &xe_device
472 * @p: the &drm_printer
473 *
474 * This function is for VF use only.
475 */
xe_sriov_vf_ccs_print(struct xe_device * xe,struct drm_printer * p)476 void xe_sriov_vf_ccs_print(struct xe_device *xe, struct drm_printer *p)
477 {
478 enum xe_sriov_vf_ccs_rw_ctxs ctx_id;
479 struct xe_mem_pool *bb_pool;
480
481 if (!IS_VF_CCS_READY(xe))
482 return;
483
484 guard(xe_pm_runtime)(xe);
485 for_each_ccs_rw_ctx(ctx_id) {
486 bb_pool = xe->sriov.vf.ccs.contexts[ctx_id].mem.ccs_bb_pool;
487 if (!bb_pool)
488 break;
489
490 drm_printf(p, "ccs %s bb suballoc info\n", ctx_id ? "write" : "read");
491 drm_printf(p, "-------------------------\n");
492 xe_mem_pool_dump(bb_pool, p);
493 drm_puts(p, "\n");
494 }
495 }
496