xref: /linux/drivers/gpu/drm/xe/xe_sriov_pf_migration.c (revision 51d24842acb9b8d643046c71314cc3d7a846a3cf)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2025 Intel Corporation
4  */
5 
6 #include <drm/drm_managed.h>
7 
8 #include "xe_device.h"
9 #include "xe_gt_sriov_pf_control.h"
10 #include "xe_gt_sriov_pf_migration.h"
11 #include "xe_pm.h"
12 #include "xe_sriov.h"
13 #include "xe_sriov_packet.h"
14 #include "xe_sriov_packet_types.h"
15 #include "xe_sriov_pf_helpers.h"
16 #include "xe_sriov_pf_migration.h"
17 #include "xe_sriov_printk.h"
18 
pf_pick_migration(struct xe_device * xe,unsigned int vfid)19 static struct xe_sriov_migration_state *pf_pick_migration(struct xe_device *xe, unsigned int vfid)
20 {
21 	xe_assert(xe, IS_SRIOV_PF(xe));
22 	xe_assert(xe, vfid <= xe_sriov_pf_get_totalvfs(xe));
23 
24 	return &xe->sriov.pf.vfs[vfid].migration;
25 }
26 
27 /**
28  * xe_sriov_pf_migration_waitqueue() - Get waitqueue for migration.
29  * @xe: the &xe_device
30  * @vfid: the VF identifier
31  *
32  * Return: pointer to the migration waitqueue.
33  */
xe_sriov_pf_migration_waitqueue(struct xe_device * xe,unsigned int vfid)34 wait_queue_head_t *xe_sriov_pf_migration_waitqueue(struct xe_device *xe, unsigned int vfid)
35 {
36 	return &pf_pick_migration(xe, vfid)->wq;
37 }
38 
39 /**
40  * xe_sriov_pf_migration_supported() - Check if SR-IOV VF migration is supported by the device
41  * @xe: the &xe_device
42  *
43  * Return: true if migration is supported, false otherwise
44  */
xe_sriov_pf_migration_supported(struct xe_device * xe)45 bool xe_sriov_pf_migration_supported(struct xe_device *xe)
46 {
47 	xe_assert(xe, IS_SRIOV_PF(xe));
48 
49 	return IS_ENABLED(CONFIG_DRM_XE_DEBUG) || !xe->sriov.pf.migration.disabled;
50 }
51 
52 /**
53  * xe_sriov_pf_migration_disable() - Turn off SR-IOV VF migration support on PF.
54  * @xe: the &xe_device instance.
55  * @fmt: format string for the log message, to be combined with following VAs.
56  */
xe_sriov_pf_migration_disable(struct xe_device * xe,const char * fmt,...)57 void xe_sriov_pf_migration_disable(struct xe_device *xe, const char *fmt, ...)
58 {
59 	struct va_format vaf;
60 	va_list va_args;
61 
62 	xe_assert(xe, IS_SRIOV_PF(xe));
63 
64 	va_start(va_args, fmt);
65 	vaf.fmt = fmt;
66 	vaf.va  = &va_args;
67 	xe_sriov_notice(xe, "migration %s: %pV\n",
68 			IS_ENABLED(CONFIG_DRM_XE_DEBUG) ?
69 			"missing prerequisite" : "disabled",
70 			&vaf);
71 	va_end(va_args);
72 
73 	xe->sriov.pf.migration.disabled = true;
74 }
75 
pf_migration_check_support(struct xe_device * xe)76 static void pf_migration_check_support(struct xe_device *xe)
77 {
78 	if (!xe_device_has_memirq(xe))
79 		xe_sriov_pf_migration_disable(xe, "requires memory-based IRQ support");
80 }
81 
pf_migration_cleanup(void * arg)82 static void pf_migration_cleanup(void *arg)
83 {
84 	struct xe_sriov_migration_state *migration = arg;
85 
86 	xe_sriov_packet_free(migration->pending);
87 	xe_sriov_packet_free(migration->trailer);
88 	xe_sriov_packet_free(migration->descriptor);
89 }
90 
91 /**
92  * xe_sriov_pf_migration_init() - Initialize support for SR-IOV VF migration.
93  * @xe: the &xe_device
94  *
95  * Return: 0 on success or a negative error code on failure.
96  */
xe_sriov_pf_migration_init(struct xe_device * xe)97 int xe_sriov_pf_migration_init(struct xe_device *xe)
98 {
99 	unsigned int n, totalvfs;
100 	int err;
101 
102 	xe_assert(xe, IS_SRIOV_PF(xe));
103 
104 	pf_migration_check_support(xe);
105 
106 	if (!xe_sriov_pf_migration_supported(xe))
107 		return 0;
108 
109 	totalvfs = xe_sriov_pf_get_totalvfs(xe);
110 	for (n = 1; n <= totalvfs; n++) {
111 		struct xe_sriov_migration_state *migration = pf_pick_migration(xe, n);
112 
113 		err = drmm_mutex_init(&xe->drm, &migration->lock);
114 		if (err)
115 			return err;
116 
117 		init_waitqueue_head(&migration->wq);
118 
119 		err = devm_add_action_or_reset(xe->drm.dev, pf_migration_cleanup, migration);
120 		if (err)
121 			return err;
122 	}
123 
124 	return 0;
125 }
126 
pf_migration_data_ready(struct xe_device * xe,unsigned int vfid)127 static bool pf_migration_data_ready(struct xe_device *xe, unsigned int vfid)
128 {
129 	struct xe_gt *gt;
130 	u8 gt_id;
131 
132 	for_each_gt(gt, xe, gt_id) {
133 		if (xe_gt_sriov_pf_control_check_save_failed(gt, vfid) ||
134 		    xe_gt_sriov_pf_control_check_save_data_done(gt, vfid) ||
135 		    !xe_gt_sriov_pf_migration_ring_empty(gt, vfid))
136 			return true;
137 	}
138 
139 	return false;
140 }
141 
142 static struct xe_sriov_packet *
pf_migration_consume(struct xe_device * xe,unsigned int vfid)143 pf_migration_consume(struct xe_device *xe, unsigned int vfid)
144 {
145 	struct xe_sriov_packet *data;
146 	bool more_data = false;
147 	struct xe_gt *gt;
148 	u8 gt_id;
149 
150 	for_each_gt(gt, xe, gt_id) {
151 		data = xe_gt_sriov_pf_migration_save_consume(gt, vfid);
152 		if (!data)
153 			continue;
154 		if (!IS_ERR(data) || PTR_ERR(data) != -EAGAIN)
155 			return data;
156 		more_data = true;
157 	}
158 
159 	if (!more_data)
160 		return NULL;
161 
162 	return ERR_PTR(-EAGAIN);
163 }
164 
165 /**
166  * xe_sriov_pf_migration_save_consume() - Consume a VF migration data packet from the device.
167  * @xe: the &xe_device
168  * @vfid: the VF identifier
169  *
170  * Called by the save migration data consumer (userspace) when
171  * processing migration data.
172  * If there is no migration data to process, wait until more data is available.
173  *
174  * Return: Pointer to &xe_sriov_packet on success,
175  *	   NULL if ring is empty and no more migration data is expected,
176  *	   ERR_PTR value in case of error.
177  */
178 struct xe_sriov_packet *
xe_sriov_pf_migration_save_consume(struct xe_device * xe,unsigned int vfid)179 xe_sriov_pf_migration_save_consume(struct xe_device *xe, unsigned int vfid)
180 {
181 	struct xe_sriov_migration_state *migration = pf_pick_migration(xe, vfid);
182 	struct xe_sriov_packet *data;
183 	int ret;
184 
185 	xe_assert(xe, IS_SRIOV_PF(xe));
186 
187 	for (;;) {
188 		data = pf_migration_consume(xe, vfid);
189 		if (PTR_ERR(data) != -EAGAIN)
190 			break;
191 
192 		ret = wait_event_interruptible(migration->wq,
193 					       pf_migration_data_ready(xe, vfid));
194 		if (ret)
195 			return ERR_PTR(ret);
196 	}
197 
198 	return data;
199 }
200 
pf_handle_descriptor(struct xe_device * xe,unsigned int vfid,struct xe_sriov_packet * data)201 static int pf_handle_descriptor(struct xe_device *xe, unsigned int vfid,
202 				struct xe_sriov_packet *data)
203 {
204 	int ret;
205 
206 	if (data->hdr.tile_id != 0 || data->hdr.gt_id != 0)
207 		return -EINVAL;
208 
209 	ret = xe_sriov_packet_process_descriptor(xe, vfid, data);
210 	if (ret)
211 		return ret;
212 
213 	xe_sriov_packet_free(data);
214 
215 	return 0;
216 }
217 
pf_handle_trailer(struct xe_device * xe,unsigned int vfid,struct xe_sriov_packet * data)218 static int pf_handle_trailer(struct xe_device *xe, unsigned int vfid,
219 			     struct xe_sriov_packet *data)
220 {
221 	struct xe_gt *gt;
222 	u8 gt_id;
223 
224 	if (data->hdr.tile_id != 0 || data->hdr.gt_id != 0)
225 		return -EINVAL;
226 	if (data->hdr.offset != 0 || data->hdr.size != 0 || data->buff || data->bo)
227 		return -EINVAL;
228 
229 	xe_sriov_packet_free(data);
230 
231 	for_each_gt(gt, xe, gt_id)
232 		xe_gt_sriov_pf_control_restore_data_done(gt, vfid);
233 
234 	return 0;
235 }
236 
237 /**
238  * xe_sriov_pf_migration_restore_produce() - Produce a VF migration data packet to the device.
239  * @xe: the &xe_device
240  * @vfid: the VF identifier
241  * @data: Pointer to &xe_sriov_packet
242  *
243  * Called by the restore migration data producer (userspace) when processing
244  * migration data.
245  * If the underlying data structure is full, wait until there is space.
246  *
247  * Return: 0 on success or a negative error code on failure.
248  */
xe_sriov_pf_migration_restore_produce(struct xe_device * xe,unsigned int vfid,struct xe_sriov_packet * data)249 int xe_sriov_pf_migration_restore_produce(struct xe_device *xe, unsigned int vfid,
250 					  struct xe_sriov_packet *data)
251 {
252 	struct xe_gt *gt;
253 
254 	xe_assert(xe, IS_SRIOV_PF(xe));
255 
256 	if (data->hdr.type == XE_SRIOV_PACKET_TYPE_DESCRIPTOR)
257 		return pf_handle_descriptor(xe, vfid, data);
258 	if (data->hdr.type == XE_SRIOV_PACKET_TYPE_TRAILER)
259 		return pf_handle_trailer(xe, vfid, data);
260 
261 	gt = xe_device_get_gt(xe, data->hdr.gt_id);
262 	if (!gt || data->hdr.tile_id != gt->tile->id || data->hdr.type == 0) {
263 		xe_sriov_err_ratelimited(xe, "Received invalid restore packet for VF%u (type:%u, tile:%u, GT:%u)\n",
264 					 vfid, data->hdr.type, data->hdr.tile_id, data->hdr.gt_id);
265 		return -EINVAL;
266 	}
267 
268 	return xe_gt_sriov_pf_migration_restore_produce(gt, vfid, data);
269 }
270 
271 /**
272  * xe_sriov_pf_migration_read() - Read migration data from the device.
273  * @xe: the &xe_device
274  * @vfid: the VF identifier
275  * @buf: start address of userspace buffer
276  * @len: requested read size from userspace
277  *
278  * Return: number of bytes that has been successfully read,
279  *	   0 if no more migration data is available,
280  *	   -errno on failure.
281  */
xe_sriov_pf_migration_read(struct xe_device * xe,unsigned int vfid,char __user * buf,size_t len)282 ssize_t xe_sriov_pf_migration_read(struct xe_device *xe, unsigned int vfid,
283 				   char __user *buf, size_t len)
284 {
285 	struct xe_sriov_migration_state *migration = pf_pick_migration(xe, vfid);
286 	ssize_t ret, consumed = 0;
287 
288 	xe_assert(xe, IS_SRIOV_PF(xe));
289 
290 	scoped_cond_guard(mutex_intr, return -EINTR, &migration->lock) {
291 		while (consumed < len) {
292 			ret = xe_sriov_packet_read_single(xe, vfid, buf, len - consumed);
293 			if (ret == -ENODATA)
294 				break;
295 			if (ret < 0)
296 				return ret;
297 
298 			consumed += ret;
299 			buf += ret;
300 		}
301 	}
302 
303 	return consumed;
304 }
305 
306 /**
307  * xe_sriov_pf_migration_write() - Write migration data to the device.
308  * @xe: the &xe_device
309  * @vfid: the VF identifier
310  * @buf: start address of userspace buffer
311  * @len: requested write size from userspace
312  *
313  * Return: number of bytes that has been successfully written,
314  *	   -errno on failure.
315  */
xe_sriov_pf_migration_write(struct xe_device * xe,unsigned int vfid,const char __user * buf,size_t len)316 ssize_t xe_sriov_pf_migration_write(struct xe_device *xe, unsigned int vfid,
317 				    const char __user *buf, size_t len)
318 {
319 	struct xe_sriov_migration_state *migration = pf_pick_migration(xe, vfid);
320 	ssize_t ret, produced = 0;
321 
322 	xe_assert(xe, IS_SRIOV_PF(xe));
323 
324 	scoped_cond_guard(mutex_intr, return -EINTR, &migration->lock) {
325 		while (produced < len) {
326 			ret = xe_sriov_packet_write_single(xe, vfid, buf, len - produced);
327 			if (ret < 0)
328 				return ret;
329 
330 			produced += ret;
331 			buf += ret;
332 		}
333 	}
334 
335 	return produced;
336 }
337 
338 /**
339  * xe_sriov_pf_migration_size() - Total size of migration data from all components within a device
340  * @xe: the &xe_device
341  * @vfid: the VF identifier (can't be 0)
342  *
343  * This function is for PF only.
344  *
345  * Return: total migration data size in bytes or a negative error code on failure.
346  */
xe_sriov_pf_migration_size(struct xe_device * xe,unsigned int vfid)347 ssize_t xe_sriov_pf_migration_size(struct xe_device *xe, unsigned int vfid)
348 {
349 	size_t size = 0;
350 	struct xe_gt *gt;
351 	ssize_t ret;
352 	u8 gt_id;
353 
354 	xe_assert(xe, IS_SRIOV_PF(xe));
355 	xe_assert(xe, vfid);
356 
357 	for_each_gt(gt, xe, gt_id) {
358 		ret = xe_gt_sriov_pf_migration_size(gt, vfid);
359 		if (ret < 0)
360 			return ret;
361 
362 		size += ret;
363 	}
364 
365 	return size;
366 }
367