xref: /linux/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c (revision fbf5df34a4dbcd09d433dd4f0916bf9b2ddb16de)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2025 Intel Corporation
4  */
5 
6 #include "instructions/xe_mi_commands.h"
7 #include "instructions/xe_gpu_commands.h"
8 #include "xe_bb.h"
9 #include "xe_bo.h"
10 #include "xe_device.h"
11 #include "xe_exec_queue.h"
12 #include "xe_exec_queue_types.h"
13 #include "xe_gt_sriov_vf.h"
14 #include "xe_guc.h"
15 #include "xe_guc_submit.h"
16 #include "xe_lrc.h"
17 #include "xe_mem_pool.h"
18 #include "xe_migrate.h"
19 #include "xe_pm.h"
20 #include "xe_sriov_printk.h"
21 #include "xe_sriov_vf.h"
22 #include "xe_sriov_vf_ccs.h"
23 #include "xe_sriov_vf_ccs_types.h"
24 
25 /**
26  * DOC: VF save/restore of compression Meta Data
27  *
28  * VF KMD registers two special contexts/LRCAs.
29  *
30  * Save Context/LRCA: contain necessary cmds+page table to trigger Meta data /
31  * compression control surface (Aka CCS) save in regular System memory in VM.
32  *
33  * Restore Context/LRCA: contain necessary cmds+page table to trigger Meta data /
34  * compression control surface (Aka CCS) Restore from regular System memory in
35  * VM to corresponding CCS pool.
36  *
37  * Below diagram explain steps needed for VF save/Restore of compression Meta Data::
38  *
39  *    CCS Save    CCS Restore          VF KMD                          Guc       BCS
40  *     LRCA        LRCA
41  *      |           |                     |                              |         |
42  *      |           |                     |                              |         |
43  *      |     Create Save LRCA            |                              |         |
44  *     [ ]<----------------------------- [ ]                             |         |
45  *      |           |                     |                              |         |
46  *      |           |                     |                              |         |
47  *      |           |                     |       Register save LRCA     |         |
48  *      |           |                     |           with Guc           |         |
49  *      |           |                    [ ]--------------------------->[ ]        |
50  *      |           |                     |                              |         |
51  *      |           | Create restore LRCA |                              |         |
52  *      |          [ ]<------------------[ ]                             |         |
53  *      |           |                     |                              |         |
54  *      |           |                     |       Register restore LRCA  |         |
55  *      |           |                     |           with Guc           |         |
56  *      |           |                    [ ]--------------------------->[ ]        |
57  *      |           |                     |                              |         |
58  *      |           |                     |                              |         |
59  *      |           |                    [ ]-------------------------    |         |
60  *      |           |                    [ ]  Allocate main memory.  |   |         |
61  *      |           |                    [ ]  Allocate CCS memory.   |   |         |
62  *      |           |                    [ ]  Update Main memory &   |   |         |
63  *     [ ]<------------------------------[ ]  CCS pages PPGTT + BB   |   |         |
64  *      |          [ ]<------------------[ ]  cmds to save & restore.|   |         |
65  *      |           |                    [ ]<------------------------    |         |
66  *      |           |                     |                              |         |
67  *      |           |                     |                              |         |
68  *      |           |                     |                              |         |
69  *      :           :                     :                              :         :
70  *      ---------------------------- VF Paused -------------------------------------
71  *      |           |                     |                              |         |
72  *      |           |                     |                              |         |
73  *      |           |                     |                              |Schedule |
74  *      |           |                     |                              |CCS Save |
75  *      |           |                     |                              | LRCA    |
76  *      |           |                     |                             [ ]------>[ ]
77  *      |           |                     |                              |         |
78  *      |           |                     |                              |         |
79  *      |           |                     |                              |CCS save |
80  *      |           |                     |                              |completed|
81  *      |           |                     |                             [ ]<------[ ]
82  *      |           |                     |                              |         |
83  *      :           :                     :                              :         :
84  *      ---------------------------- VM Migrated -----------------------------------
85  *      |           |                     |                              |         |
86  *      |           |                     |                              |         |
87  *      :           :                     :                              :         :
88  *      ---------------------------- VF Resumed ------------------------------------
89  *      |           |                     |                              |         |
90  *      |           |                     |                              |         |
91  *      |           |                    [ ]--------------               |         |
92  *      |           |                    [ ] Fix up GGTT  |              |         |
93  *      |           |                    [ ]<-------------               |         |
94  *      |           |                     |                              |         |
95  *      |           |                     |                              |         |
96  *      |           |                     |  Notify VF_RESFIX_DONE       |         |
97  *      |           |                    [ ]--------------------------->[ ]        |
98  *      |           |                     |                              |         |
99  *      |           |                     |                              |Schedule |
100  *      |           |                     |                              |CCS      |
101  *      |           |                     |                              |Restore  |
102  *      |           |                     |                              |LRCA     |
103  *      |           |                     |                             [ ]------>[ ]
104  *      |           |                     |                              |         |
105  *      |           |                     |                              |         |
106  *      |           |                     |                              |CCS      |
107  *      |           |                     |                              |restore  |
108  *      |           |                     |                              |completed|
109  *      |           |                     |                             [ ]<------[ ]
110  *      |           |                     |                              |         |
111  *      |           |                     |                              |         |
112  *      |           |                     |  VF_RESFIX_DONE complete     |         |
113  *      |           |                     |       notification           |         |
114  *      |           |                    [ ]<---------------------------[ ]        |
115  *      |           |                     |                              |         |
116  *      |           |                     |                              |         |
117  *      :           :                     :                              :         :
118  *      ------------------------- Continue VM restore ------------------------------
119  */
120 
121 static u64 get_ccs_bb_pool_size(struct xe_device *xe)
122 {
123 	u64 sys_mem_size, ccs_mem_size, ptes, bb_pool_size;
124 	struct sysinfo si;
125 
126 	si_meminfo(&si);
127 	sys_mem_size = si.totalram * si.mem_unit;
128 	ccs_mem_size = div64_u64(sys_mem_size, NUM_BYTES_PER_CCS_BYTE(xe));
129 	ptes = DIV_ROUND_UP_ULL(sys_mem_size + ccs_mem_size, XE_PAGE_SIZE);
130 
131 	/**
132 	 * We need below BB size to hold PTE mappings and some DWs for copy
133 	 * command. In reality, we need space for many copy commands. So, let
134 	 * us allocate double the calculated size which is enough to holds GPU
135 	 * instructions for the whole region.
136 	 */
137 	bb_pool_size = ptes * sizeof(u32);
138 
139 	return round_up(bb_pool_size * 2, SZ_1M);
140 }
141 
142 static int alloc_bb_pool(struct xe_tile *tile, struct xe_sriov_vf_ccs_ctx *ctx)
143 {
144 	struct xe_mem_pool *pool;
145 	struct xe_device *xe = tile_to_xe(tile);
146 	u32 *pool_cpu_addr, *last_dw_addr;
147 	u64 bb_pool_size;
148 	int err;
149 
150 	bb_pool_size = get_ccs_bb_pool_size(xe);
151 	xe_sriov_info(xe, "Allocating %s CCS BB pool size = %lldMB\n",
152 		      ctx->ctx_id ? "Restore" : "Save", bb_pool_size / SZ_1M);
153 
154 	pool = xe_mem_pool_init(tile, bb_pool_size, sizeof(u32),
155 				XE_MEM_POOL_BO_FLAG_INIT_SHADOW_COPY);
156 	if (IS_ERR(pool)) {
157 		xe_sriov_err(xe, "xe_mem_pool_init failed with error: %pe\n",
158 			     pool);
159 		err = PTR_ERR(pool);
160 		return err;
161 	}
162 
163 	pool_cpu_addr = xe_mem_pool_cpu_addr(pool);
164 	memset(pool_cpu_addr, 0, bb_pool_size);
165 
166 	last_dw_addr = pool_cpu_addr + (bb_pool_size / sizeof(u32)) - 1;
167 	*last_dw_addr = MI_BATCH_BUFFER_END;
168 
169 	/**
170 	 * Sync the main copy and shadow copy so that the shadow copy is
171 	 * replica of main copy. We sync only BBs after init part. So, we
172 	 * need to make sure the main pool and shadow copy are in sync after
173 	 * this point. This is needed as GuC may read the BB commands from
174 	 * shadow copy.
175 	 */
176 	xe_mem_pool_sync(pool);
177 
178 	ctx->mem.ccs_bb_pool = pool;
179 	return 0;
180 }
181 
182 static void ccs_rw_update_ring(struct xe_sriov_vf_ccs_ctx *ctx)
183 {
184 	u64 addr = xe_mem_pool_gpu_addr(ctx->mem.ccs_bb_pool);
185 	struct xe_lrc *lrc = xe_exec_queue_lrc(ctx->mig_q);
186 	u32 dw[10], i = 0;
187 
188 	/*
189 	 * XXX: Save/restore fixes — for some reason, the GuC only accepts the
190 	 * save/restore context if the LRC head pointer is zero. This is evident
191 	 * from repeated VF migrations failing when the LRC head pointer is
192 	 * non-zero.
193 	 */
194 	lrc->ring.tail = 0;
195 	xe_lrc_set_ring_head(lrc, 0);
196 
197 	dw[i++] = MI_ARB_ON_OFF | MI_ARB_ENABLE;
198 	dw[i++] = MI_BATCH_BUFFER_START | XE_INSTR_NUM_DW(3);
199 	dw[i++] = lower_32_bits(addr);
200 	dw[i++] = upper_32_bits(addr);
201 	dw[i++] = MI_NOOP;
202 	dw[i++] = MI_NOOP;
203 
204 	xe_lrc_write_ring(lrc, dw, i * sizeof(u32));
205 	xe_lrc_set_ring_tail(lrc, lrc->ring.tail);
206 }
207 
208 /**
209  * xe_sriov_vf_ccs_rebase - Rebase GGTT addresses for CCS save / restore
210  * @xe: the &xe_device.
211  */
212 void xe_sriov_vf_ccs_rebase(struct xe_device *xe)
213 {
214 	enum xe_sriov_vf_ccs_rw_ctxs ctx_id;
215 
216 	if (!IS_VF_CCS_READY(xe))
217 		return;
218 
219 	for_each_ccs_rw_ctx(ctx_id) {
220 		struct xe_sriov_vf_ccs_ctx *ctx =
221 			&xe->sriov.vf.ccs.contexts[ctx_id];
222 
223 		ccs_rw_update_ring(ctx);
224 	}
225 }
226 
227 static int register_save_restore_context(struct xe_sriov_vf_ccs_ctx *ctx)
228 {
229 	int ctx_type;
230 
231 	switch (ctx->ctx_id) {
232 	case XE_SRIOV_VF_CCS_READ_CTX:
233 		ctx_type = GUC_CONTEXT_COMPRESSION_SAVE;
234 		break;
235 	case XE_SRIOV_VF_CCS_WRITE_CTX:
236 		ctx_type = GUC_CONTEXT_COMPRESSION_RESTORE;
237 		break;
238 	default:
239 		return -EINVAL;
240 	}
241 
242 	xe_guc_register_vf_exec_queue(ctx->mig_q, ctx_type);
243 	return 0;
244 }
245 
246 /**
247  * xe_sriov_vf_ccs_register_context - Register read/write contexts with guc.
248  * @xe: the &xe_device to register contexts on.
249  *
250  * This function registers read and write contexts with Guc. Re-registration
251  * is needed whenever resuming from pm runtime suspend.
252  *
253  * Return: 0 on success. Negative error code on failure.
254  */
255 int xe_sriov_vf_ccs_register_context(struct xe_device *xe)
256 {
257 	enum xe_sriov_vf_ccs_rw_ctxs ctx_id;
258 	struct xe_sriov_vf_ccs_ctx *ctx;
259 	int err;
260 
261 	xe_assert(xe, IS_VF_CCS_READY(xe));
262 
263 	for_each_ccs_rw_ctx(ctx_id) {
264 		ctx = &xe->sriov.vf.ccs.contexts[ctx_id];
265 		err = register_save_restore_context(ctx);
266 		if (err)
267 			return err;
268 	}
269 
270 	return err;
271 }
272 
273 /*
274  * Whether GuC requires CCS copy BBs for VF migration.
275  * @xe: the &xe_device instance.
276  *
277  * Only selected platforms require VF KMD to maintain CCS copy BBs and linked LRCAs.
278  *
279  * Return: true if VF driver must participate in the CCS migration, false otherwise.
280  */
281 static bool vf_migration_ccs_bb_needed(struct xe_device *xe)
282 {
283 	xe_assert(xe, IS_SRIOV_VF(xe));
284 
285 	return !IS_DGFX(xe) && xe_device_has_flat_ccs(xe);
286 }
287 
288 /*
289  * Check for disable migration due to no CCS BBs support in GuC FW.
290  * @xe: the &xe_device instance.
291  *
292  * Performs late disable of VF migration feature in case GuC FW cannot support it.
293  *
294  * Returns: True if VF migration with CCS BBs is supported, false otherwise.
295  */
296 static bool vf_migration_ccs_bb_support_check(struct xe_device *xe)
297 {
298 	struct xe_gt *gt = xe_root_mmio_gt(xe);
299 	struct xe_uc_fw_version guc_version;
300 
301 	xe_gt_sriov_vf_guc_versions(gt, NULL, &guc_version);
302 	if (MAKE_GUC_VER_STRUCT(guc_version) < MAKE_GUC_VER(1, 23, 0)) {
303 		xe_sriov_vf_migration_disable(xe,
304 					      "CCS migration requires GuC ABI >= 1.23 but only %u.%u found",
305 					      guc_version.major, guc_version.minor);
306 		return false;
307 	}
308 
309 	return true;
310 }
311 
312 static void xe_sriov_vf_ccs_fini(void *arg)
313 {
314 	struct xe_sriov_vf_ccs_ctx *ctx = arg;
315 	struct xe_lrc *lrc = xe_exec_queue_lrc(ctx->mig_q);
316 
317 	/*
318 	 * Make TAIL = HEAD in the ring so that no issues are seen if Guc
319 	 * submits this context to HW on VF pause after unbinding device.
320 	 */
321 	xe_lrc_set_ring_tail(lrc, xe_lrc_ring_head(lrc));
322 	xe_exec_queue_put(ctx->mig_q);
323 }
324 
325 /**
326  * xe_sriov_vf_ccs_init - Setup LRCA for save & restore.
327  * @xe: the &xe_device to start recovery on
328  *
329  * This function shall be called only by VF. It initializes
330  * LRCA and suballocator needed for CCS save & restore.
331  *
332  * Return: 0 on success. Negative error code on failure.
333  */
334 int xe_sriov_vf_ccs_init(struct xe_device *xe)
335 {
336 	struct xe_tile *tile = xe_device_get_root_tile(xe);
337 	enum xe_sriov_vf_ccs_rw_ctxs ctx_id;
338 	struct xe_sriov_vf_ccs_ctx *ctx;
339 	struct xe_exec_queue *q;
340 	u32 flags;
341 	int err;
342 
343 	xe_assert(xe, IS_SRIOV_VF(xe));
344 
345 	if (!xe_sriov_vf_migration_supported(xe) ||
346 	    !vf_migration_ccs_bb_needed(xe) ||
347 	    !vf_migration_ccs_bb_support_check(xe))
348 		return 0;
349 
350 	for_each_ccs_rw_ctx(ctx_id) {
351 		ctx = &xe->sriov.vf.ccs.contexts[ctx_id];
352 		ctx->ctx_id = ctx_id;
353 
354 		flags = EXEC_QUEUE_FLAG_KERNEL |
355 			EXEC_QUEUE_FLAG_PERMANENT |
356 			EXEC_QUEUE_FLAG_MIGRATE;
357 		q = xe_exec_queue_create_bind(xe, tile, NULL, flags, 0);
358 		if (IS_ERR(q)) {
359 			err = PTR_ERR(q);
360 			goto err_ret;
361 		}
362 		ctx->mig_q = q;
363 
364 		err = alloc_bb_pool(tile, ctx);
365 		if (err)
366 			goto err_free_queue;
367 
368 		ccs_rw_update_ring(ctx);
369 
370 		err = register_save_restore_context(ctx);
371 		if (err)
372 			goto err_free_queue;
373 
374 		err = devm_add_action_or_reset(xe->drm.dev,
375 					       xe_sriov_vf_ccs_fini,
376 					       ctx);
377 		if (err)
378 			goto err_ret;
379 	}
380 
381 	xe->sriov.vf.ccs.initialized = 1;
382 
383 	return 0;
384 
385 err_free_queue:
386 	xe_exec_queue_put(q);
387 
388 err_ret:
389 	return err;
390 }
391 
392 #define XE_SRIOV_VF_CCS_RW_BB_ADDR_OFFSET	(2 * sizeof(u32))
393 void xe_sriov_vf_ccs_rw_update_bb_addr(struct xe_sriov_vf_ccs_ctx *ctx)
394 {
395 	u64 addr = xe_mem_pool_gpu_addr(ctx->mem.ccs_bb_pool);
396 	struct xe_lrc *lrc = xe_exec_queue_lrc(ctx->mig_q);
397 	struct xe_device *xe = gt_to_xe(ctx->mig_q->gt);
398 
399 	xe_device_wmb(xe);
400 	xe_map_wr(xe, &lrc->bo->vmap, XE_SRIOV_VF_CCS_RW_BB_ADDR_OFFSET, u32, addr);
401 	xe_device_wmb(xe);
402 }
403 
404 /**
405  * xe_sriov_vf_ccs_attach_bo - Insert CCS read write commands in the BO.
406  * @bo: the &buffer object to which batch buffer commands will be added.
407  *
408  * This function shall be called only by VF. It inserts the PTEs and copy
409  * command instructions in the BO by calling xe_migrate_ccs_rw_copy()
410  * function.
411  *
412  * Returns: 0 if successful, negative error code on failure.
413  */
414 int xe_sriov_vf_ccs_attach_bo(struct xe_bo *bo)
415 {
416 	struct xe_device *xe = xe_bo_device(bo);
417 	enum xe_sriov_vf_ccs_rw_ctxs ctx_id;
418 	struct xe_sriov_vf_ccs_ctx *ctx;
419 	struct xe_mem_pool_node *bb;
420 	struct xe_tile *tile;
421 	int err = 0;
422 
423 	xe_assert(xe, IS_VF_CCS_READY(xe));
424 
425 	tile = xe_device_get_root_tile(xe);
426 
427 	for_each_ccs_rw_ctx(ctx_id) {
428 		bb = bo->bb_ccs[ctx_id];
429 		/* bb should be NULL here. Assert if not NULL */
430 		xe_assert(xe, !bb);
431 
432 		ctx = &xe->sriov.vf.ccs.contexts[ctx_id];
433 		err = xe_migrate_ccs_rw_copy(tile, ctx->mig_q, bo, ctx_id);
434 	}
435 	return err;
436 }
437 
438 /**
439  * xe_sriov_vf_ccs_detach_bo - Remove CCS read write commands from the BO.
440  * @bo: the &buffer object from which batch buffer commands will be removed.
441  *
442  * This function shall be called only by VF. It removes the PTEs and copy
443  * command instructions from the BO. Make sure to update the BB with MI_NOOP
444  * before freeing.
445  *
446  * Returns: 0 if successful.
447  */
448 int xe_sriov_vf_ccs_detach_bo(struct xe_bo *bo)
449 {
450 	struct xe_device *xe = xe_bo_device(bo);
451 	enum xe_sriov_vf_ccs_rw_ctxs ctx_id;
452 	struct xe_mem_pool_node *bb;
453 
454 	xe_assert(xe, IS_VF_CCS_READY(xe));
455 
456 	if (!xe_bo_has_valid_ccs_bb(bo))
457 		return 0;
458 
459 	for_each_ccs_rw_ctx(ctx_id) {
460 		bb = bo->bb_ccs[ctx_id];
461 		if (!bb)
462 			continue;
463 
464 		xe_migrate_ccs_rw_copy_clear(bo, ctx_id);
465 	}
466 	return 0;
467 }
468 
469 /**
470  * xe_sriov_vf_ccs_print - Print VF CCS details.
471  * @xe: the &xe_device
472  * @p: the &drm_printer
473  *
474  * This function is for VF use only.
475  */
476 void xe_sriov_vf_ccs_print(struct xe_device *xe, struct drm_printer *p)
477 {
478 	enum xe_sriov_vf_ccs_rw_ctxs ctx_id;
479 	struct xe_mem_pool *bb_pool;
480 
481 	if (!IS_VF_CCS_READY(xe))
482 		return;
483 
484 	guard(xe_pm_runtime)(xe);
485 	for_each_ccs_rw_ctx(ctx_id) {
486 		bb_pool = xe->sriov.vf.ccs.contexts[ctx_id].mem.ccs_bb_pool;
487 		if (!bb_pool)
488 			break;
489 
490 		drm_printf(p, "ccs %s bb suballoc info\n", ctx_id ? "write" : "read");
491 		drm_printf(p, "-------------------------\n");
492 		xe_mem_pool_dump(bb_pool, p);
493 		drm_puts(p, "\n");
494 	}
495 }
496