xref: /linux/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c (revision 58809f614e0e3f4e12b489bddf680bfeb31c0a20)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2025 Intel Corporation
4  */
5 
6 #include "instructions/xe_mi_commands.h"
7 #include "instructions/xe_gpu_commands.h"
8 #include "xe_bb.h"
9 #include "xe_bo.h"
10 #include "xe_device.h"
11 #include "xe_exec_queue.h"
12 #include "xe_exec_queue_types.h"
13 #include "xe_guc_submit.h"
14 #include "xe_lrc.h"
15 #include "xe_migrate.h"
16 #include "xe_pm.h"
17 #include "xe_sa.h"
18 #include "xe_sriov_printk.h"
19 #include "xe_sriov_vf.h"
20 #include "xe_sriov_vf_ccs.h"
21 #include "xe_sriov_vf_ccs_types.h"
22 
23 /**
24  * DOC: VF save/restore of compression Meta Data
25  *
26  * VF KMD registers two special contexts/LRCAs.
27  *
28  * Save Context/LRCA: contain necessary cmds+page table to trigger Meta data /
29  * compression control surface (Aka CCS) save in regular System memory in VM.
30  *
31  * Restore Context/LRCA: contain necessary cmds+page table to trigger Meta data /
32  * compression control surface (Aka CCS) Restore from regular System memory in
33  * VM to corresponding CCS pool.
34  *
35  * Below diagram explain steps needed for VF save/Restore of compression Meta Data::
36  *
37  *    CCS Save    CCS Restore          VF KMD                          Guc       BCS
38  *     LRCA        LRCA
39  *      |           |                     |                              |         |
40  *      |           |                     |                              |         |
41  *      |     Create Save LRCA            |                              |         |
42  *     [ ]<----------------------------- [ ]                             |         |
43  *      |           |                     |                              |         |
44  *      |           |                     |                              |         |
45  *      |           |                     |       Register save LRCA     |         |
46  *      |           |                     |           with Guc           |         |
47  *      |           |                    [ ]--------------------------->[ ]        |
48  *      |           |                     |                              |         |
49  *      |           | Create restore LRCA |                              |         |
50  *      |          [ ]<------------------[ ]                             |         |
51  *      |           |                     |                              |         |
52  *      |           |                     |       Register restore LRCA  |         |
53  *      |           |                     |           with Guc           |         |
54  *      |           |                    [ ]--------------------------->[ ]        |
55  *      |           |                     |                              |         |
56  *      |           |                     |                              |         |
57  *      |           |                    [ ]-------------------------    |         |
58  *      |           |                    [ ]  Allocate main memory.  |   |         |
59  *      |           |                    [ ]  Allocate CCS memory.   |   |         |
60  *      |           |                    [ ]  Update Main memory &   |   |         |
61  *     [ ]<------------------------------[ ]  CCS pages PPGTT + BB   |   |         |
62  *      |          [ ]<------------------[ ]  cmds to save & restore.|   |         |
63  *      |           |                    [ ]<------------------------    |         |
64  *      |           |                     |                              |         |
65  *      |           |                     |                              |         |
66  *      |           |                     |                              |         |
67  *      :           :                     :                              :         :
68  *      ---------------------------- VF Paused -------------------------------------
69  *      |           |                     |                              |         |
70  *      |           |                     |                              |         |
71  *      |           |                     |                              |Schedule |
72  *      |           |                     |                              |CCS Save |
73  *      |           |                     |                              | LRCA    |
74  *      |           |                     |                             [ ]------>[ ]
75  *      |           |                     |                              |         |
76  *      |           |                     |                              |         |
77  *      |           |                     |                              |CCS save |
78  *      |           |                     |                              |completed|
79  *      |           |                     |                             [ ]<------[ ]
80  *      |           |                     |                              |         |
81  *      :           :                     :                              :         :
82  *      ---------------------------- VM Migrated -----------------------------------
83  *      |           |                     |                              |         |
84  *      |           |                     |                              |         |
85  *      :           :                     :                              :         :
86  *      ---------------------------- VF Resumed ------------------------------------
87  *      |           |                     |                              |         |
88  *      |           |                     |                              |         |
89  *      |           |                    [ ]--------------               |         |
90  *      |           |                    [ ] Fix up GGTT  |              |         |
91  *      |           |                    [ ]<-------------               |         |
92  *      |           |                     |                              |         |
93  *      |           |                     |                              |         |
94  *      |           |                     |  Notify VF_RESFIX_DONE       |         |
95  *      |           |                    [ ]--------------------------->[ ]        |
96  *      |           |                     |                              |         |
97  *      |           |                     |                              |Schedule |
98  *      |           |                     |                              |CCS      |
99  *      |           |                     |                              |Restore  |
100  *      |           |                     |                              |LRCA     |
101  *      |           |                     |                             [ ]------>[ ]
102  *      |           |                     |                              |         |
103  *      |           |                     |                              |         |
104  *      |           |                     |                              |CCS      |
105  *      |           |                     |                              |restore  |
106  *      |           |                     |                              |completed|
107  *      |           |                     |                             [ ]<------[ ]
108  *      |           |                     |                              |         |
109  *      |           |                     |                              |         |
110  *      |           |                     |  VF_RESFIX_DONE complete     |         |
111  *      |           |                     |       notification           |         |
112  *      |           |                    [ ]<---------------------------[ ]        |
113  *      |           |                     |                              |         |
114  *      |           |                     |                              |         |
115  *      :           :                     :                              :         :
116  *      ------------------------- Continue VM restore ------------------------------
117  */
118 
119 static u64 get_ccs_bb_pool_size(struct xe_device *xe)
120 {
121 	u64 sys_mem_size, ccs_mem_size, ptes, bb_pool_size;
122 	struct sysinfo si;
123 
124 	si_meminfo(&si);
125 	sys_mem_size = si.totalram * si.mem_unit;
126 	ccs_mem_size = div64_u64(sys_mem_size, NUM_BYTES_PER_CCS_BYTE(xe));
127 	ptes = DIV_ROUND_UP_ULL(sys_mem_size + ccs_mem_size, XE_PAGE_SIZE);
128 
129 	/**
130 	 * We need below BB size to hold PTE mappings and some DWs for copy
131 	 * command. In reality, we need space for many copy commands. So, let
132 	 * us allocate double the calculated size which is enough to holds GPU
133 	 * instructions for the whole region.
134 	 */
135 	bb_pool_size = ptes * sizeof(u32);
136 
137 	return round_up(bb_pool_size * 2, SZ_1M);
138 }
139 
140 static int alloc_bb_pool(struct xe_tile *tile, struct xe_sriov_vf_ccs_ctx *ctx)
141 {
142 	struct xe_device *xe = tile_to_xe(tile);
143 	struct xe_sa_manager *sa_manager;
144 	u64 bb_pool_size;
145 	int offset, err;
146 
147 	bb_pool_size = get_ccs_bb_pool_size(xe);
148 	xe_sriov_info(xe, "Allocating %s CCS BB pool size = %lldMB\n",
149 		      ctx->ctx_id ? "Restore" : "Save", bb_pool_size / SZ_1M);
150 
151 	sa_manager = xe_sa_bo_manager_init(tile, bb_pool_size, SZ_16);
152 
153 	if (IS_ERR(sa_manager)) {
154 		xe_sriov_err(xe, "Suballocator init failed with error: %pe\n",
155 			     sa_manager);
156 		err = PTR_ERR(sa_manager);
157 		return err;
158 	}
159 
160 	offset = 0;
161 	xe_map_memset(xe, &sa_manager->bo->vmap, offset, MI_NOOP,
162 		      bb_pool_size);
163 
164 	offset = bb_pool_size - sizeof(u32);
165 	xe_map_wr(xe, &sa_manager->bo->vmap, offset, u32, MI_BATCH_BUFFER_END);
166 
167 	ctx->mem.ccs_bb_pool = sa_manager;
168 
169 	return 0;
170 }
171 
172 static void ccs_rw_update_ring(struct xe_sriov_vf_ccs_ctx *ctx)
173 {
174 	u64 addr = xe_sa_manager_gpu_addr(ctx->mem.ccs_bb_pool);
175 	struct xe_lrc *lrc = xe_exec_queue_lrc(ctx->mig_q);
176 	u32 dw[10], i = 0;
177 
178 	dw[i++] = MI_ARB_ON_OFF | MI_ARB_ENABLE;
179 	dw[i++] = MI_BATCH_BUFFER_START | XE_INSTR_NUM_DW(3);
180 	dw[i++] = lower_32_bits(addr);
181 	dw[i++] = upper_32_bits(addr);
182 	dw[i++] = MI_NOOP;
183 	dw[i++] = MI_NOOP;
184 
185 	xe_lrc_write_ring(lrc, dw, i * sizeof(u32));
186 	xe_lrc_set_ring_tail(lrc, lrc->ring.tail);
187 }
188 
189 static int register_save_restore_context(struct xe_sriov_vf_ccs_ctx *ctx)
190 {
191 	int ctx_type;
192 
193 	switch (ctx->ctx_id) {
194 	case XE_SRIOV_VF_CCS_READ_CTX:
195 		ctx_type = GUC_CONTEXT_COMPRESSION_SAVE;
196 		break;
197 	case XE_SRIOV_VF_CCS_WRITE_CTX:
198 		ctx_type = GUC_CONTEXT_COMPRESSION_RESTORE;
199 		break;
200 	default:
201 		return -EINVAL;
202 	}
203 
204 	xe_guc_register_vf_exec_queue(ctx->mig_q, ctx_type);
205 	return 0;
206 }
207 
208 /**
209  * xe_sriov_vf_ccs_register_context - Register read/write contexts with guc.
210  * @xe: the &xe_device to register contexts on.
211  *
212  * This function registers read and write contexts with Guc. Re-registration
213  * is needed whenever resuming from pm runtime suspend.
214  *
215  * Return: 0 on success. Negative error code on failure.
216  */
217 int xe_sriov_vf_ccs_register_context(struct xe_device *xe)
218 {
219 	enum xe_sriov_vf_ccs_rw_ctxs ctx_id;
220 	struct xe_sriov_vf_ccs_ctx *ctx;
221 	int err;
222 
223 	xe_assert(xe, IS_VF_CCS_READY(xe));
224 
225 	for_each_ccs_rw_ctx(ctx_id) {
226 		ctx = &xe->sriov.vf.ccs.contexts[ctx_id];
227 		err = register_save_restore_context(ctx);
228 		if (err)
229 			return err;
230 	}
231 
232 	return err;
233 }
234 
235 static void xe_sriov_vf_ccs_fini(void *arg)
236 {
237 	struct xe_sriov_vf_ccs_ctx *ctx = arg;
238 	struct xe_lrc *lrc = xe_exec_queue_lrc(ctx->mig_q);
239 
240 	/*
241 	 * Make TAIL = HEAD in the ring so that no issues are seen if Guc
242 	 * submits this context to HW on VF pause after unbinding device.
243 	 */
244 	xe_lrc_set_ring_tail(lrc, xe_lrc_ring_head(lrc));
245 	xe_exec_queue_put(ctx->mig_q);
246 }
247 
248 /**
249  * xe_sriov_vf_ccs_init - Setup LRCA for save & restore.
250  * @xe: the &xe_device to start recovery on
251  *
252  * This function shall be called only by VF. It initializes
253  * LRCA and suballocator needed for CCS save & restore.
254  *
255  * Return: 0 on success. Negative error code on failure.
256  */
257 int xe_sriov_vf_ccs_init(struct xe_device *xe)
258 {
259 	struct xe_tile *tile = xe_device_get_root_tile(xe);
260 	enum xe_sriov_vf_ccs_rw_ctxs ctx_id;
261 	struct xe_sriov_vf_ccs_ctx *ctx;
262 	struct xe_exec_queue *q;
263 	u32 flags;
264 	int err;
265 
266 	xe_assert(xe, IS_SRIOV_VF(xe));
267 	xe_assert(xe, xe_sriov_vf_migration_supported(xe));
268 
269 	if (IS_DGFX(xe) || !xe_device_has_flat_ccs(xe))
270 		return 0;
271 
272 	for_each_ccs_rw_ctx(ctx_id) {
273 		ctx = &xe->sriov.vf.ccs.contexts[ctx_id];
274 		ctx->ctx_id = ctx_id;
275 
276 		flags = EXEC_QUEUE_FLAG_KERNEL |
277 			EXEC_QUEUE_FLAG_PERMANENT |
278 			EXEC_QUEUE_FLAG_MIGRATE;
279 		q = xe_exec_queue_create_bind(xe, tile, flags, 0);
280 		if (IS_ERR(q)) {
281 			err = PTR_ERR(q);
282 			goto err_ret;
283 		}
284 		ctx->mig_q = q;
285 
286 		err = alloc_bb_pool(tile, ctx);
287 		if (err)
288 			goto err_free_queue;
289 
290 		ccs_rw_update_ring(ctx);
291 
292 		err = register_save_restore_context(ctx);
293 		if (err)
294 			goto err_free_queue;
295 
296 		err = devm_add_action_or_reset(xe->drm.dev,
297 					       xe_sriov_vf_ccs_fini,
298 					       ctx);
299 		if (err)
300 			goto err_ret;
301 	}
302 
303 	xe->sriov.vf.ccs.initialized = 1;
304 
305 	return 0;
306 
307 err_free_queue:
308 	xe_exec_queue_put(q);
309 
310 err_ret:
311 	return err;
312 }
313 
314 /**
315  * xe_sriov_vf_ccs_attach_bo - Insert CCS read write commands in the BO.
316  * @bo: the &buffer object to which batch buffer commands will be added.
317  *
318  * This function shall be called only by VF. It inserts the PTEs and copy
319  * command instructions in the BO by calling xe_migrate_ccs_rw_copy()
320  * function.
321  *
322  * Returns: 0 if successful, negative error code on failure.
323  */
324 int xe_sriov_vf_ccs_attach_bo(struct xe_bo *bo)
325 {
326 	struct xe_device *xe = xe_bo_device(bo);
327 	enum xe_sriov_vf_ccs_rw_ctxs ctx_id;
328 	struct xe_sriov_vf_ccs_ctx *ctx;
329 	struct xe_tile *tile;
330 	struct xe_bb *bb;
331 	int err = 0;
332 
333 	xe_assert(xe, IS_VF_CCS_READY(xe));
334 
335 	tile = xe_device_get_root_tile(xe);
336 
337 	for_each_ccs_rw_ctx(ctx_id) {
338 		bb = bo->bb_ccs[ctx_id];
339 		/* bb should be NULL here. Assert if not NULL */
340 		xe_assert(xe, !bb);
341 
342 		ctx = &xe->sriov.vf.ccs.contexts[ctx_id];
343 		err = xe_migrate_ccs_rw_copy(tile, ctx->mig_q, bo, ctx_id);
344 	}
345 	return err;
346 }
347 
348 /**
349  * xe_sriov_vf_ccs_detach_bo - Remove CCS read write commands from the BO.
350  * @bo: the &buffer object from which batch buffer commands will be removed.
351  *
352  * This function shall be called only by VF. It removes the PTEs and copy
353  * command instructions from the BO. Make sure to update the BB with MI_NOOP
354  * before freeing.
355  *
356  * Returns: 0 if successful.
357  */
358 int xe_sriov_vf_ccs_detach_bo(struct xe_bo *bo)
359 {
360 	struct xe_device *xe = xe_bo_device(bo);
361 	enum xe_sriov_vf_ccs_rw_ctxs ctx_id;
362 	struct xe_bb *bb;
363 
364 	xe_assert(xe, IS_VF_CCS_READY(xe));
365 
366 	if (!xe_bo_has_valid_ccs_bb(bo))
367 		return 0;
368 
369 	for_each_ccs_rw_ctx(ctx_id) {
370 		bb = bo->bb_ccs[ctx_id];
371 		if (!bb)
372 			continue;
373 
374 		memset(bb->cs, MI_NOOP, bb->len * sizeof(u32));
375 		xe_bb_free(bb, NULL);
376 		bo->bb_ccs[ctx_id] = NULL;
377 	}
378 	return 0;
379 }
380 
381 /**
382  * xe_sriov_vf_ccs_print - Print VF CCS details.
383  * @xe: the &xe_device
384  * @p: the &drm_printer
385  *
386  * This function is for VF use only.
387  */
388 void xe_sriov_vf_ccs_print(struct xe_device *xe, struct drm_printer *p)
389 {
390 	struct xe_sa_manager *bb_pool;
391 	enum xe_sriov_vf_ccs_rw_ctxs ctx_id;
392 
393 	if (!IS_VF_CCS_READY(xe))
394 		return;
395 
396 	xe_pm_runtime_get(xe);
397 
398 	for_each_ccs_rw_ctx(ctx_id) {
399 		bb_pool = xe->sriov.vf.ccs.contexts[ctx_id].mem.ccs_bb_pool;
400 		if (!bb_pool)
401 			break;
402 
403 		drm_printf(p, "ccs %s bb suballoc info\n", ctx_id ? "write" : "read");
404 		drm_printf(p, "-------------------------\n");
405 		drm_suballoc_dump_debug_info(&bb_pool->base, p, xe_sa_manager_gpu_addr(bb_pool));
406 		drm_puts(p, "\n");
407 	}
408 
409 	xe_pm_runtime_put(xe);
410 }
411