xref: /linux/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.c (revision d97e2634fbdcd238a51bc363267df0139c17f4da)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2024 Intel Corporation
4  */
5 
6 #include <drm/drm_managed.h>
7 
8 #include "abi/guc_actions_sriov_abi.h"
9 #include "xe_bo.h"
10 #include "xe_gt_sriov_pf_helpers.h"
11 #include "xe_gt_sriov_pf_migration.h"
12 #include "xe_gt_sriov_printk.h"
13 #include "xe_guc.h"
14 #include "xe_guc_ct.h"
15 #include "xe_sriov.h"
16 
17 /* Return: number of dwords saved/restored/required or a negative error code on failure */
18 static int guc_action_vf_save_restore(struct xe_guc *guc, u32 vfid, u32 opcode,
19 				      u64 addr, u32 ndwords)
20 {
21 	u32 request[PF2GUC_SAVE_RESTORE_VF_REQUEST_MSG_LEN] = {
22 		FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) |
23 		FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) |
24 		FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION, GUC_ACTION_PF2GUC_SAVE_RESTORE_VF) |
25 		FIELD_PREP(PF2GUC_SAVE_RESTORE_VF_REQUEST_MSG_0_OPCODE, opcode),
26 		FIELD_PREP(PF2GUC_SAVE_RESTORE_VF_REQUEST_MSG_1_VFID, vfid),
27 		FIELD_PREP(PF2GUC_SAVE_RESTORE_VF_REQUEST_MSG_2_ADDR_LO, lower_32_bits(addr)),
28 		FIELD_PREP(PF2GUC_SAVE_RESTORE_VF_REQUEST_MSG_3_ADDR_HI, upper_32_bits(addr)),
29 		FIELD_PREP(PF2GUC_SAVE_RESTORE_VF_REQUEST_MSG_4_SIZE, ndwords),
30 	};
31 
32 	return xe_guc_ct_send_block(&guc->ct, request, ARRAY_SIZE(request));
33 }
34 
35 /* Return: size of the state in dwords or a negative error code on failure */
36 static int pf_send_guc_query_vf_state_size(struct xe_gt *gt, unsigned int vfid)
37 {
38 	int ret;
39 
40 	ret = guc_action_vf_save_restore(&gt->uc.guc, vfid, GUC_PF_OPCODE_VF_SAVE, 0, 0);
41 	return ret ?: -ENODATA;
42 }
43 
44 /* Return: number of state dwords saved or a negative error code on failure */
45 static int pf_send_guc_save_vf_state(struct xe_gt *gt, unsigned int vfid,
46 				     void *buff, size_t size)
47 {
48 	const int ndwords = size / sizeof(u32);
49 	struct xe_tile *tile = gt_to_tile(gt);
50 	struct xe_device *xe = tile_to_xe(tile);
51 	struct xe_guc *guc = &gt->uc.guc;
52 	struct xe_bo *bo;
53 	int ret;
54 
55 	xe_gt_assert(gt, size % sizeof(u32) == 0);
56 	xe_gt_assert(gt, size == ndwords * sizeof(u32));
57 
58 	bo = xe_bo_create_pin_map(xe, tile, NULL,
59 				  ALIGN(size, PAGE_SIZE),
60 				  ttm_bo_type_kernel,
61 				  XE_BO_FLAG_SYSTEM |
62 				  XE_BO_FLAG_GGTT |
63 				  XE_BO_FLAG_GGTT_INVALIDATE);
64 	if (IS_ERR(bo))
65 		return PTR_ERR(bo);
66 
67 	ret = guc_action_vf_save_restore(guc, vfid, GUC_PF_OPCODE_VF_SAVE,
68 					 xe_bo_ggtt_addr(bo), ndwords);
69 	if (!ret)
70 		ret = -ENODATA;
71 	else if (ret > ndwords)
72 		ret = -EPROTO;
73 	else if (ret > 0)
74 		xe_map_memcpy_from(xe, buff, &bo->vmap, 0, ret * sizeof(u32));
75 
76 	xe_bo_unpin_map_no_vm(bo);
77 	return ret;
78 }
79 
80 /* Return: number of state dwords restored or a negative error code on failure */
81 static int pf_send_guc_restore_vf_state(struct xe_gt *gt, unsigned int vfid,
82 					const void *buff, size_t size)
83 {
84 	const int ndwords = size / sizeof(u32);
85 	struct xe_tile *tile = gt_to_tile(gt);
86 	struct xe_device *xe = tile_to_xe(tile);
87 	struct xe_guc *guc = &gt->uc.guc;
88 	struct xe_bo *bo;
89 	int ret;
90 
91 	xe_gt_assert(gt, size % sizeof(u32) == 0);
92 	xe_gt_assert(gt, size == ndwords * sizeof(u32));
93 
94 	bo = xe_bo_create_pin_map(xe, tile, NULL,
95 				  ALIGN(size, PAGE_SIZE),
96 				  ttm_bo_type_kernel,
97 				  XE_BO_FLAG_SYSTEM |
98 				  XE_BO_FLAG_GGTT |
99 				  XE_BO_FLAG_GGTT_INVALIDATE);
100 	if (IS_ERR(bo))
101 		return PTR_ERR(bo);
102 
103 	xe_map_memcpy_to(xe, &bo->vmap, 0, buff, size);
104 
105 	ret = guc_action_vf_save_restore(guc, vfid, GUC_PF_OPCODE_VF_RESTORE,
106 					 xe_bo_ggtt_addr(bo), ndwords);
107 	if (!ret)
108 		ret = -ENODATA;
109 	else if (ret > ndwords)
110 		ret = -EPROTO;
111 
112 	xe_bo_unpin_map_no_vm(bo);
113 	return ret;
114 }
115 
116 static bool pf_migration_supported(struct xe_gt *gt)
117 {
118 	xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
119 	return gt->sriov.pf.migration.supported;
120 }
121 
122 static struct mutex *pf_migration_mutex(struct xe_gt *gt)
123 {
124 	xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
125 	return &gt->sriov.pf.migration.snapshot_lock;
126 }
127 
128 static struct xe_gt_sriov_state_snapshot *pf_pick_vf_snapshot(struct xe_gt *gt,
129 							      unsigned int vfid)
130 {
131 	xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
132 	xe_gt_assert(gt, vfid <= xe_sriov_pf_get_totalvfs(gt_to_xe(gt)));
133 	lockdep_assert_held(pf_migration_mutex(gt));
134 
135 	return &gt->sriov.pf.vfs[vfid].snapshot;
136 }
137 
138 static unsigned int pf_snapshot_index(struct xe_gt *gt, struct xe_gt_sriov_state_snapshot *snapshot)
139 {
140 	return container_of(snapshot, struct xe_gt_sriov_metadata, snapshot) - gt->sriov.pf.vfs;
141 }
142 
143 static void pf_free_guc_state(struct xe_gt *gt, struct xe_gt_sriov_state_snapshot *snapshot)
144 {
145 	struct xe_device *xe = gt_to_xe(gt);
146 
147 	drmm_kfree(&xe->drm, snapshot->guc.buff);
148 	snapshot->guc.buff = NULL;
149 	snapshot->guc.size = 0;
150 }
151 
152 static int pf_alloc_guc_state(struct xe_gt *gt,
153 			      struct xe_gt_sriov_state_snapshot *snapshot,
154 			      size_t size)
155 {
156 	struct xe_device *xe = gt_to_xe(gt);
157 	void *p;
158 
159 	pf_free_guc_state(gt, snapshot);
160 
161 	if (!size)
162 		return -ENODATA;
163 
164 	if (size % sizeof(u32))
165 		return -EINVAL;
166 
167 	if (size > SZ_2M)
168 		return -EFBIG;
169 
170 	p = drmm_kzalloc(&xe->drm, size, GFP_KERNEL);
171 	if (!p)
172 		return -ENOMEM;
173 
174 	snapshot->guc.buff = p;
175 	snapshot->guc.size = size;
176 	return 0;
177 }
178 
179 static void pf_dump_guc_state(struct xe_gt *gt, struct xe_gt_sriov_state_snapshot *snapshot)
180 {
181 	if (IS_ENABLED(CONFIG_DRM_XE_DEBUG_SRIOV)) {
182 		unsigned int vfid __maybe_unused = pf_snapshot_index(gt, snapshot);
183 
184 		xe_gt_sriov_dbg_verbose(gt, "VF%u GuC state is %zu dwords:\n",
185 					vfid, snapshot->guc.size / sizeof(u32));
186 		print_hex_dump_bytes("state: ", DUMP_PREFIX_OFFSET,
187 				     snapshot->guc.buff, min(SZ_64, snapshot->guc.size));
188 	}
189 }
190 
191 static int pf_save_vf_guc_state(struct xe_gt *gt, unsigned int vfid)
192 {
193 	struct xe_gt_sriov_state_snapshot *snapshot = pf_pick_vf_snapshot(gt, vfid);
194 	size_t size;
195 	int ret;
196 
197 	ret = pf_send_guc_query_vf_state_size(gt, vfid);
198 	if (ret < 0)
199 		goto fail;
200 	size = ret * sizeof(u32);
201 	xe_gt_sriov_dbg_verbose(gt, "VF%u state size is %d dwords (%zu bytes)\n", vfid, ret, size);
202 
203 	ret = pf_alloc_guc_state(gt, snapshot, size);
204 	if (ret < 0)
205 		goto fail;
206 
207 	ret = pf_send_guc_save_vf_state(gt, vfid, snapshot->guc.buff, size);
208 	if (ret < 0)
209 		goto fail;
210 	size = ret * sizeof(u32);
211 	xe_gt_assert(gt, size);
212 	xe_gt_assert(gt, size <= snapshot->guc.size);
213 	snapshot->guc.size = size;
214 
215 	pf_dump_guc_state(gt, snapshot);
216 	return 0;
217 
218 fail:
219 	xe_gt_sriov_dbg(gt, "Unable to save VF%u state (%pe)\n", vfid, ERR_PTR(ret));
220 	pf_free_guc_state(gt, snapshot);
221 	return ret;
222 }
223 
224 /**
225  * xe_gt_sriov_pf_migration_save_guc_state() - Take a GuC VF state snapshot.
226  * @gt: the &xe_gt
227  * @vfid: the VF identifier
228  *
229  * This function is for PF only.
230  *
231  * Return: 0 on success or a negative error code on failure.
232  */
233 int xe_gt_sriov_pf_migration_save_guc_state(struct xe_gt *gt, unsigned int vfid)
234 {
235 	int err;
236 
237 	xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
238 	xe_gt_assert(gt, vfid != PFID);
239 	xe_gt_assert(gt, vfid <= xe_sriov_pf_get_totalvfs(gt_to_xe(gt)));
240 
241 	if (!pf_migration_supported(gt))
242 		return -ENOPKG;
243 
244 	mutex_lock(pf_migration_mutex(gt));
245 	err = pf_save_vf_guc_state(gt, vfid);
246 	mutex_unlock(pf_migration_mutex(gt));
247 
248 	return err;
249 }
250 
251 static int pf_restore_vf_guc_state(struct xe_gt *gt, unsigned int vfid)
252 {
253 	struct xe_gt_sriov_state_snapshot *snapshot = pf_pick_vf_snapshot(gt, vfid);
254 	int ret;
255 
256 	if (!snapshot->guc.size)
257 		return -ENODATA;
258 
259 	xe_gt_sriov_dbg_verbose(gt, "restoring %zu dwords of VF%u GuC state\n",
260 				snapshot->guc.size / sizeof(u32), vfid);
261 	ret = pf_send_guc_restore_vf_state(gt, vfid, snapshot->guc.buff, snapshot->guc.size);
262 	if (ret < 0)
263 		goto fail;
264 
265 	xe_gt_sriov_dbg_verbose(gt, "restored %d dwords of VF%u GuC state\n", ret, vfid);
266 	return 0;
267 
268 fail:
269 	xe_gt_sriov_dbg(gt, "Failed to restore VF%u GuC state (%pe)\n", vfid, ERR_PTR(ret));
270 	return ret;
271 }
272 
273 /**
274  * xe_gt_sriov_pf_migration_restore_guc_state() - Restore a GuC VF state.
275  * @gt: the &xe_gt
276  * @vfid: the VF identifier
277  *
278  * This function is for PF only.
279  *
280  * Return: 0 on success or a negative error code on failure.
281  */
282 int xe_gt_sriov_pf_migration_restore_guc_state(struct xe_gt *gt, unsigned int vfid)
283 {
284 	int ret;
285 
286 	xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
287 	xe_gt_assert(gt, vfid != PFID);
288 	xe_gt_assert(gt, vfid <= xe_sriov_pf_get_totalvfs(gt_to_xe(gt)));
289 
290 	if (!pf_migration_supported(gt))
291 		return -ENOPKG;
292 
293 	mutex_lock(pf_migration_mutex(gt));
294 	ret = pf_restore_vf_guc_state(gt, vfid);
295 	mutex_unlock(pf_migration_mutex(gt));
296 
297 	return ret;
298 }
299 
300 #ifdef CONFIG_DEBUG_FS
301 /**
302  * xe_gt_sriov_pf_migration_read_guc_state() - Read a GuC VF state.
303  * @gt: the &xe_gt
304  * @vfid: the VF identifier
305  * @buf: the user space buffer to read to
306  * @count: the maximum number of bytes to read
307  * @pos: the current position in the buffer
308  *
309  * This function is for PF only.
310  *
311  * This function reads up to @count bytes from the saved VF GuC state buffer
312  * at offset @pos into the user space address starting at @buf.
313  *
314  * Return: the number of bytes read or a negative error code on failure.
315  */
316 ssize_t xe_gt_sriov_pf_migration_read_guc_state(struct xe_gt *gt, unsigned int vfid,
317 						char __user *buf, size_t count, loff_t *pos)
318 {
319 	struct xe_gt_sriov_state_snapshot *snapshot;
320 	ssize_t ret;
321 
322 	xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
323 	xe_gt_assert(gt, vfid != PFID);
324 	xe_gt_assert(gt, vfid <= xe_sriov_pf_get_totalvfs(gt_to_xe(gt)));
325 
326 	if (!pf_migration_supported(gt))
327 		return -ENOPKG;
328 
329 	mutex_lock(pf_migration_mutex(gt));
330 	snapshot = pf_pick_vf_snapshot(gt, vfid);
331 	if (snapshot->guc.size)
332 		ret = simple_read_from_buffer(buf, count, pos, snapshot->guc.buff,
333 					      snapshot->guc.size);
334 	else
335 		ret = -ENODATA;
336 	mutex_unlock(pf_migration_mutex(gt));
337 
338 	return ret;
339 }
340 
341 /**
342  * xe_gt_sriov_pf_migration_write_guc_state() - Write a GuC VF state.
343  * @gt: the &xe_gt
344  * @vfid: the VF identifier
345  * @buf: the user space buffer with GuC VF state
346  * @size: the size of GuC VF state (in bytes)
347  *
348  * This function is for PF only.
349  *
350  * This function reads @size bytes of the VF GuC state stored at user space
351  * address @buf and writes it into a internal VF state buffer.
352  *
353  * Return: the number of bytes used or a negative error code on failure.
354  */
355 ssize_t xe_gt_sriov_pf_migration_write_guc_state(struct xe_gt *gt, unsigned int vfid,
356 						 const char __user *buf, size_t size)
357 {
358 	struct xe_gt_sriov_state_snapshot *snapshot;
359 	loff_t pos = 0;
360 	ssize_t ret;
361 
362 	xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
363 	xe_gt_assert(gt, vfid != PFID);
364 	xe_gt_assert(gt, vfid <= xe_sriov_pf_get_totalvfs(gt_to_xe(gt)));
365 
366 	if (!pf_migration_supported(gt))
367 		return -ENOPKG;
368 
369 	mutex_lock(pf_migration_mutex(gt));
370 	snapshot = pf_pick_vf_snapshot(gt, vfid);
371 	ret = pf_alloc_guc_state(gt, snapshot, size);
372 	if (!ret) {
373 		ret = simple_write_to_buffer(snapshot->guc.buff, size, &pos, buf, size);
374 		if (ret < 0)
375 			pf_free_guc_state(gt, snapshot);
376 		else
377 			pf_dump_guc_state(gt, snapshot);
378 	}
379 	mutex_unlock(pf_migration_mutex(gt));
380 
381 	return ret;
382 }
383 #endif /* CONFIG_DEBUG_FS */
384 
385 static bool pf_check_migration_support(struct xe_gt *gt)
386 {
387 	/* GuC 70.25 with save/restore v2 is required */
388 	xe_gt_assert(gt, GUC_FIRMWARE_VER(&gt->uc.guc) >= MAKE_GUC_VER(70, 25, 0));
389 
390 	/* XXX: for now this is for feature enabling only */
391 	return IS_ENABLED(CONFIG_DRM_XE_DEBUG);
392 }
393 
394 /**
395  * xe_gt_sriov_pf_migration_init() - Initialize support for VF migration.
396  * @gt: the &xe_gt
397  *
398  * This function is for PF only.
399  *
400  * Return: 0 on success or a negative error code on failure.
401  */
402 int xe_gt_sriov_pf_migration_init(struct xe_gt *gt)
403 {
404 	struct xe_device *xe = gt_to_xe(gt);
405 	int err;
406 
407 	xe_gt_assert(gt, IS_SRIOV_PF(xe));
408 
409 	gt->sriov.pf.migration.supported = pf_check_migration_support(gt);
410 
411 	if (!pf_migration_supported(gt))
412 		return 0;
413 
414 	err = drmm_mutex_init(&xe->drm, &gt->sriov.pf.migration.snapshot_lock);
415 	if (err)
416 		return err;
417 
418 	return 0;
419 }
420