xref: /linux/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c (revision d3b402c5a2d47f51eb0581da1a7b142f82cb10d1)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2025 Intel Corporation
4  */
5 
6 #include "instructions/xe_mi_commands.h"
7 #include "instructions/xe_gpu_commands.h"
8 #include "xe_bb.h"
9 #include "xe_bo.h"
10 #include "xe_device.h"
11 #include "xe_exec_queue.h"
12 #include "xe_exec_queue_types.h"
13 #include "xe_gt_sriov_vf.h"
14 #include "xe_guc.h"
15 #include "xe_guc_submit.h"
16 #include "xe_lrc.h"
17 #include "xe_migrate.h"
18 #include "xe_pm.h"
19 #include "xe_sa.h"
20 #include "xe_sriov_printk.h"
21 #include "xe_sriov_vf.h"
22 #include "xe_sriov_vf_ccs.h"
23 #include "xe_sriov_vf_ccs_types.h"
24 
25 /**
26  * DOC: VF save/restore of compression Meta Data
27  *
28  * VF KMD registers two special contexts/LRCAs.
29  *
30  * Save Context/LRCA: contain necessary cmds+page table to trigger Meta data /
31  * compression control surface (Aka CCS) save in regular System memory in VM.
32  *
33  * Restore Context/LRCA: contain necessary cmds+page table to trigger Meta data /
34  * compression control surface (Aka CCS) Restore from regular System memory in
35  * VM to corresponding CCS pool.
36  *
37  * Below diagram explain steps needed for VF save/Restore of compression Meta Data::
38  *
39  *    CCS Save    CCS Restore          VF KMD                          Guc       BCS
40  *     LRCA        LRCA
41  *      |           |                     |                              |         |
42  *      |           |                     |                              |         |
43  *      |     Create Save LRCA            |                              |         |
44  *     [ ]<----------------------------- [ ]                             |         |
45  *      |           |                     |                              |         |
46  *      |           |                     |                              |         |
47  *      |           |                     |       Register save LRCA     |         |
48  *      |           |                     |           with Guc           |         |
49  *      |           |                    [ ]--------------------------->[ ]        |
50  *      |           |                     |                              |         |
51  *      |           | Create restore LRCA |                              |         |
52  *      |          [ ]<------------------[ ]                             |         |
53  *      |           |                     |                              |         |
54  *      |           |                     |       Register restore LRCA  |         |
55  *      |           |                     |           with Guc           |         |
56  *      |           |                    [ ]--------------------------->[ ]        |
57  *      |           |                     |                              |         |
58  *      |           |                     |                              |         |
59  *      |           |                    [ ]-------------------------    |         |
60  *      |           |                    [ ]  Allocate main memory.  |   |         |
61  *      |           |                    [ ]  Allocate CCS memory.   |   |         |
62  *      |           |                    [ ]  Update Main memory &   |   |         |
63  *     [ ]<------------------------------[ ]  CCS pages PPGTT + BB   |   |         |
64  *      |          [ ]<------------------[ ]  cmds to save & restore.|   |         |
65  *      |           |                    [ ]<------------------------    |         |
66  *      |           |                     |                              |         |
67  *      |           |                     |                              |         |
68  *      |           |                     |                              |         |
69  *      :           :                     :                              :         :
70  *      ---------------------------- VF Paused -------------------------------------
71  *      |           |                     |                              |         |
72  *      |           |                     |                              |         |
73  *      |           |                     |                              |Schedule |
74  *      |           |                     |                              |CCS Save |
75  *      |           |                     |                              | LRCA    |
76  *      |           |                     |                             [ ]------>[ ]
77  *      |           |                     |                              |         |
78  *      |           |                     |                              |         |
79  *      |           |                     |                              |CCS save |
80  *      |           |                     |                              |completed|
81  *      |           |                     |                             [ ]<------[ ]
82  *      |           |                     |                              |         |
83  *      :           :                     :                              :         :
84  *      ---------------------------- VM Migrated -----------------------------------
85  *      |           |                     |                              |         |
86  *      |           |                     |                              |         |
87  *      :           :                     :                              :         :
88  *      ---------------------------- VF Resumed ------------------------------------
89  *      |           |                     |                              |         |
90  *      |           |                     |                              |         |
91  *      |           |                    [ ]--------------               |         |
92  *      |           |                    [ ] Fix up GGTT  |              |         |
93  *      |           |                    [ ]<-------------               |         |
94  *      |           |                     |                              |         |
95  *      |           |                     |                              |         |
96  *      |           |                     |  Notify VF_RESFIX_DONE       |         |
97  *      |           |                    [ ]--------------------------->[ ]        |
98  *      |           |                     |                              |         |
99  *      |           |                     |                              |Schedule |
100  *      |           |                     |                              |CCS      |
101  *      |           |                     |                              |Restore  |
102  *      |           |                     |                              |LRCA     |
103  *      |           |                     |                             [ ]------>[ ]
104  *      |           |                     |                              |         |
105  *      |           |                     |                              |         |
106  *      |           |                     |                              |CCS      |
107  *      |           |                     |                              |restore  |
108  *      |           |                     |                              |completed|
109  *      |           |                     |                             [ ]<------[ ]
110  *      |           |                     |                              |         |
111  *      |           |                     |                              |         |
112  *      |           |                     |  VF_RESFIX_DONE complete     |         |
113  *      |           |                     |       notification           |         |
114  *      |           |                    [ ]<---------------------------[ ]        |
115  *      |           |                     |                              |         |
116  *      |           |                     |                              |         |
117  *      :           :                     :                              :         :
118  *      ------------------------- Continue VM restore ------------------------------
119  */
120 
get_ccs_bb_pool_size(struct xe_device * xe)121 static u64 get_ccs_bb_pool_size(struct xe_device *xe)
122 {
123 	u64 sys_mem_size, ccs_mem_size, ptes, bb_pool_size;
124 	struct sysinfo si;
125 
126 	si_meminfo(&si);
127 	sys_mem_size = si.totalram * si.mem_unit;
128 	ccs_mem_size = div64_u64(sys_mem_size, NUM_BYTES_PER_CCS_BYTE(xe));
129 	ptes = DIV_ROUND_UP_ULL(sys_mem_size + ccs_mem_size, XE_PAGE_SIZE);
130 
131 	/**
132 	 * We need below BB size to hold PTE mappings and some DWs for copy
133 	 * command. In reality, we need space for many copy commands. So, let
134 	 * us allocate double the calculated size which is enough to holds GPU
135 	 * instructions for the whole region.
136 	 */
137 	bb_pool_size = ptes * sizeof(u32);
138 
139 	return round_up(bb_pool_size * 2, SZ_1M);
140 }
141 
alloc_bb_pool(struct xe_tile * tile,struct xe_sriov_vf_ccs_ctx * ctx)142 static int alloc_bb_pool(struct xe_tile *tile, struct xe_sriov_vf_ccs_ctx *ctx)
143 {
144 	struct xe_device *xe = tile_to_xe(tile);
145 	struct xe_sa_manager *sa_manager;
146 	u64 bb_pool_size;
147 	int offset, err;
148 
149 	bb_pool_size = get_ccs_bb_pool_size(xe);
150 	xe_sriov_info(xe, "Allocating %s CCS BB pool size = %lldMB\n",
151 		      ctx->ctx_id ? "Restore" : "Save", bb_pool_size / SZ_1M);
152 
153 	sa_manager = __xe_sa_bo_manager_init(tile, bb_pool_size, SZ_4K, SZ_16,
154 					     XE_SA_BO_MANAGER_FLAG_SHADOW);
155 
156 	if (IS_ERR(sa_manager)) {
157 		xe_sriov_err(xe, "Suballocator init failed with error: %pe\n",
158 			     sa_manager);
159 		err = PTR_ERR(sa_manager);
160 		return err;
161 	}
162 
163 	offset = 0;
164 	xe_map_memset(xe, &sa_manager->bo->vmap, offset, MI_NOOP,
165 		      bb_pool_size);
166 	xe_map_memset(xe, &sa_manager->shadow->vmap, offset, MI_NOOP,
167 		      bb_pool_size);
168 
169 	offset = bb_pool_size - sizeof(u32);
170 	xe_map_wr(xe, &sa_manager->bo->vmap, offset, u32, MI_BATCH_BUFFER_END);
171 	xe_map_wr(xe, &sa_manager->shadow->vmap, offset, u32, MI_BATCH_BUFFER_END);
172 
173 	ctx->mem.ccs_bb_pool = sa_manager;
174 
175 	return 0;
176 }
177 
ccs_rw_update_ring(struct xe_sriov_vf_ccs_ctx * ctx)178 static void ccs_rw_update_ring(struct xe_sriov_vf_ccs_ctx *ctx)
179 {
180 	u64 addr = xe_sa_manager_gpu_addr(ctx->mem.ccs_bb_pool);
181 	struct xe_lrc *lrc = xe_exec_queue_lrc(ctx->mig_q);
182 	u32 dw[10], i = 0;
183 
184 	/*
185 	 * XXX: Save/restore fixes — for some reason, the GuC only accepts the
186 	 * save/restore context if the LRC head pointer is zero. This is evident
187 	 * from repeated VF migrations failing when the LRC head pointer is
188 	 * non-zero.
189 	 */
190 	lrc->ring.tail = 0;
191 	xe_lrc_set_ring_head(lrc, 0);
192 
193 	dw[i++] = MI_ARB_ON_OFF | MI_ARB_ENABLE;
194 	dw[i++] = MI_BATCH_BUFFER_START | XE_INSTR_NUM_DW(3);
195 	dw[i++] = lower_32_bits(addr);
196 	dw[i++] = upper_32_bits(addr);
197 	dw[i++] = MI_NOOP;
198 	dw[i++] = MI_NOOP;
199 
200 	xe_lrc_write_ring(lrc, dw, i * sizeof(u32));
201 	xe_lrc_set_ring_tail(lrc, lrc->ring.tail);
202 }
203 
204 /**
205  * xe_sriov_vf_ccs_rebase - Rebase GGTT addresses for CCS save / restore
206  * @xe: the &xe_device.
207  */
xe_sriov_vf_ccs_rebase(struct xe_device * xe)208 void xe_sriov_vf_ccs_rebase(struct xe_device *xe)
209 {
210 	enum xe_sriov_vf_ccs_rw_ctxs ctx_id;
211 
212 	if (!IS_VF_CCS_READY(xe))
213 		return;
214 
215 	for_each_ccs_rw_ctx(ctx_id) {
216 		struct xe_sriov_vf_ccs_ctx *ctx =
217 			&xe->sriov.vf.ccs.contexts[ctx_id];
218 
219 		ccs_rw_update_ring(ctx);
220 	}
221 }
222 
register_save_restore_context(struct xe_sriov_vf_ccs_ctx * ctx)223 static int register_save_restore_context(struct xe_sriov_vf_ccs_ctx *ctx)
224 {
225 	int ctx_type;
226 
227 	switch (ctx->ctx_id) {
228 	case XE_SRIOV_VF_CCS_READ_CTX:
229 		ctx_type = GUC_CONTEXT_COMPRESSION_SAVE;
230 		break;
231 	case XE_SRIOV_VF_CCS_WRITE_CTX:
232 		ctx_type = GUC_CONTEXT_COMPRESSION_RESTORE;
233 		break;
234 	default:
235 		return -EINVAL;
236 	}
237 
238 	xe_guc_register_vf_exec_queue(ctx->mig_q, ctx_type);
239 	return 0;
240 }
241 
242 /**
243  * xe_sriov_vf_ccs_register_context - Register read/write contexts with guc.
244  * @xe: the &xe_device to register contexts on.
245  *
246  * This function registers read and write contexts with Guc. Re-registration
247  * is needed whenever resuming from pm runtime suspend.
248  *
249  * Return: 0 on success. Negative error code on failure.
250  */
xe_sriov_vf_ccs_register_context(struct xe_device * xe)251 int xe_sriov_vf_ccs_register_context(struct xe_device *xe)
252 {
253 	enum xe_sriov_vf_ccs_rw_ctxs ctx_id;
254 	struct xe_sriov_vf_ccs_ctx *ctx;
255 	int err;
256 
257 	xe_assert(xe, IS_VF_CCS_READY(xe));
258 
259 	for_each_ccs_rw_ctx(ctx_id) {
260 		ctx = &xe->sriov.vf.ccs.contexts[ctx_id];
261 		err = register_save_restore_context(ctx);
262 		if (err)
263 			return err;
264 	}
265 
266 	return err;
267 }
268 
269 /*
270  * Whether GuC requires CCS copy BBs for VF migration.
271  * @xe: the &xe_device instance.
272  *
273  * Only selected platforms require VF KMD to maintain CCS copy BBs and linked LRCAs.
274  *
275  * Return: true if VF driver must participate in the CCS migration, false otherwise.
276  */
vf_migration_ccs_bb_needed(struct xe_device * xe)277 static bool vf_migration_ccs_bb_needed(struct xe_device *xe)
278 {
279 	xe_assert(xe, IS_SRIOV_VF(xe));
280 
281 	return !IS_DGFX(xe) && xe_device_has_flat_ccs(xe);
282 }
283 
284 /*
285  * Check for disable migration due to no CCS BBs support in GuC FW.
286  * @xe: the &xe_device instance.
287  *
288  * Performs late disable of VF migration feature in case GuC FW cannot support it.
289  *
290  * Returns: True if VF migration with CCS BBs is supported, false otherwise.
291  */
vf_migration_ccs_bb_support_check(struct xe_device * xe)292 static bool vf_migration_ccs_bb_support_check(struct xe_device *xe)
293 {
294 	struct xe_gt *gt = xe_root_mmio_gt(xe);
295 	struct xe_uc_fw_version guc_version;
296 
297 	xe_gt_sriov_vf_guc_versions(gt, NULL, &guc_version);
298 	if (MAKE_GUC_VER_STRUCT(guc_version) < MAKE_GUC_VER(1, 23, 0)) {
299 		xe_sriov_vf_migration_disable(xe,
300 					      "CCS migration requires GuC ABI >= 1.23 but only %u.%u found",
301 					      guc_version.major, guc_version.minor);
302 		return false;
303 	}
304 
305 	return true;
306 }
307 
xe_sriov_vf_ccs_fini(void * arg)308 static void xe_sriov_vf_ccs_fini(void *arg)
309 {
310 	struct xe_sriov_vf_ccs_ctx *ctx = arg;
311 	struct xe_lrc *lrc = xe_exec_queue_lrc(ctx->mig_q);
312 
313 	/*
314 	 * Make TAIL = HEAD in the ring so that no issues are seen if Guc
315 	 * submits this context to HW on VF pause after unbinding device.
316 	 */
317 	xe_lrc_set_ring_tail(lrc, xe_lrc_ring_head(lrc));
318 	xe_exec_queue_put(ctx->mig_q);
319 }
320 
321 /**
322  * xe_sriov_vf_ccs_init - Setup LRCA for save & restore.
323  * @xe: the &xe_device to start recovery on
324  *
325  * This function shall be called only by VF. It initializes
326  * LRCA and suballocator needed for CCS save & restore.
327  *
328  * Return: 0 on success. Negative error code on failure.
329  */
xe_sriov_vf_ccs_init(struct xe_device * xe)330 int xe_sriov_vf_ccs_init(struct xe_device *xe)
331 {
332 	struct xe_tile *tile = xe_device_get_root_tile(xe);
333 	enum xe_sriov_vf_ccs_rw_ctxs ctx_id;
334 	struct xe_sriov_vf_ccs_ctx *ctx;
335 	struct xe_exec_queue *q;
336 	u32 flags;
337 	int err;
338 
339 	xe_assert(xe, IS_SRIOV_VF(xe));
340 
341 	if (!xe_sriov_vf_migration_supported(xe) ||
342 	    !vf_migration_ccs_bb_needed(xe) ||
343 	    !vf_migration_ccs_bb_support_check(xe))
344 		return 0;
345 
346 	for_each_ccs_rw_ctx(ctx_id) {
347 		ctx = &xe->sriov.vf.ccs.contexts[ctx_id];
348 		ctx->ctx_id = ctx_id;
349 
350 		flags = EXEC_QUEUE_FLAG_KERNEL |
351 			EXEC_QUEUE_FLAG_PERMANENT |
352 			EXEC_QUEUE_FLAG_MIGRATE;
353 		q = xe_exec_queue_create_bind(xe, tile, NULL, flags, 0);
354 		if (IS_ERR(q)) {
355 			err = PTR_ERR(q);
356 			goto err_ret;
357 		}
358 		ctx->mig_q = q;
359 
360 		err = alloc_bb_pool(tile, ctx);
361 		if (err)
362 			goto err_free_queue;
363 
364 		ccs_rw_update_ring(ctx);
365 
366 		err = register_save_restore_context(ctx);
367 		if (err)
368 			goto err_free_queue;
369 
370 		err = devm_add_action_or_reset(xe->drm.dev,
371 					       xe_sriov_vf_ccs_fini,
372 					       ctx);
373 		if (err)
374 			goto err_ret;
375 	}
376 
377 	xe->sriov.vf.ccs.initialized = 1;
378 
379 	return 0;
380 
381 err_free_queue:
382 	xe_exec_queue_put(q);
383 
384 err_ret:
385 	return err;
386 }
387 
388 #define XE_SRIOV_VF_CCS_RW_BB_ADDR_OFFSET	(2 * sizeof(u32))
xe_sriov_vf_ccs_rw_update_bb_addr(struct xe_sriov_vf_ccs_ctx * ctx)389 void xe_sriov_vf_ccs_rw_update_bb_addr(struct xe_sriov_vf_ccs_ctx *ctx)
390 {
391 	u64 addr = xe_sa_manager_gpu_addr(ctx->mem.ccs_bb_pool);
392 	struct xe_lrc *lrc = xe_exec_queue_lrc(ctx->mig_q);
393 	struct xe_device *xe = gt_to_xe(ctx->mig_q->gt);
394 
395 	xe_device_wmb(xe);
396 	xe_map_wr(xe, &lrc->bo->vmap, XE_SRIOV_VF_CCS_RW_BB_ADDR_OFFSET, u32, addr);
397 	xe_device_wmb(xe);
398 }
399 
400 /**
401  * xe_sriov_vf_ccs_attach_bo - Insert CCS read write commands in the BO.
402  * @bo: the &buffer object to which batch buffer commands will be added.
403  *
404  * This function shall be called only by VF. It inserts the PTEs and copy
405  * command instructions in the BO by calling xe_migrate_ccs_rw_copy()
406  * function.
407  *
408  * Returns: 0 if successful, negative error code on failure.
409  */
xe_sriov_vf_ccs_attach_bo(struct xe_bo * bo)410 int xe_sriov_vf_ccs_attach_bo(struct xe_bo *bo)
411 {
412 	struct xe_device *xe = xe_bo_device(bo);
413 	enum xe_sriov_vf_ccs_rw_ctxs ctx_id;
414 	struct xe_sriov_vf_ccs_ctx *ctx;
415 	struct xe_tile *tile;
416 	struct xe_bb *bb;
417 	int err = 0;
418 
419 	xe_assert(xe, IS_VF_CCS_READY(xe));
420 
421 	tile = xe_device_get_root_tile(xe);
422 
423 	for_each_ccs_rw_ctx(ctx_id) {
424 		bb = bo->bb_ccs[ctx_id];
425 		/* bb should be NULL here. Assert if not NULL */
426 		xe_assert(xe, !bb);
427 
428 		ctx = &xe->sriov.vf.ccs.contexts[ctx_id];
429 		err = xe_migrate_ccs_rw_copy(tile, ctx->mig_q, bo, ctx_id);
430 	}
431 	return err;
432 }
433 
434 /**
435  * xe_sriov_vf_ccs_detach_bo - Remove CCS read write commands from the BO.
436  * @bo: the &buffer object from which batch buffer commands will be removed.
437  *
438  * This function shall be called only by VF. It removes the PTEs and copy
439  * command instructions from the BO. Make sure to update the BB with MI_NOOP
440  * before freeing.
441  *
442  * Returns: 0 if successful.
443  */
xe_sriov_vf_ccs_detach_bo(struct xe_bo * bo)444 int xe_sriov_vf_ccs_detach_bo(struct xe_bo *bo)
445 {
446 	struct xe_device *xe = xe_bo_device(bo);
447 	enum xe_sriov_vf_ccs_rw_ctxs ctx_id;
448 	struct xe_bb *bb;
449 
450 	xe_assert(xe, IS_VF_CCS_READY(xe));
451 
452 	if (!xe_bo_has_valid_ccs_bb(bo))
453 		return 0;
454 
455 	for_each_ccs_rw_ctx(ctx_id) {
456 		bb = bo->bb_ccs[ctx_id];
457 		if (!bb)
458 			continue;
459 
460 		xe_migrate_ccs_rw_copy_clear(bo, ctx_id);
461 	}
462 	return 0;
463 }
464 
465 /**
466  * xe_sriov_vf_ccs_print - Print VF CCS details.
467  * @xe: the &xe_device
468  * @p: the &drm_printer
469  *
470  * This function is for VF use only.
471  */
xe_sriov_vf_ccs_print(struct xe_device * xe,struct drm_printer * p)472 void xe_sriov_vf_ccs_print(struct xe_device *xe, struct drm_printer *p)
473 {
474 	struct xe_sa_manager *bb_pool;
475 	enum xe_sriov_vf_ccs_rw_ctxs ctx_id;
476 
477 	if (!IS_VF_CCS_READY(xe))
478 		return;
479 
480 	guard(xe_pm_runtime)(xe);
481 	for_each_ccs_rw_ctx(ctx_id) {
482 		bb_pool = xe->sriov.vf.ccs.contexts[ctx_id].mem.ccs_bb_pool;
483 		if (!bb_pool)
484 			break;
485 
486 		drm_printf(p, "ccs %s bb suballoc info\n", ctx_id ? "write" : "read");
487 		drm_printf(p, "-------------------------\n");
488 		drm_suballoc_dump_debug_info(&bb_pool->base, p, xe_sa_manager_gpu_addr(bb_pool));
489 		drm_puts(p, "\n");
490 	}
491 }
492