xref: /linux/drivers/gpu/drm/xe/xe_sriov_pf_migration.c (revision deb879faa9d2f327ac5c079d9d1a1747b79260e3)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2025 Intel Corporation
4  */
5 
6 #include <drm/drm_managed.h>
7 
8 #include "xe_device.h"
9 #include "xe_gt_sriov_pf_control.h"
10 #include "xe_gt_sriov_pf_migration.h"
11 #include "xe_pm.h"
12 #include "xe_sriov.h"
13 #include "xe_sriov_packet.h"
14 #include "xe_sriov_packet_types.h"
15 #include "xe_sriov_pf_helpers.h"
16 #include "xe_sriov_pf_migration.h"
17 #include "xe_sriov_printk.h"
18 
pf_pick_migration(struct xe_device * xe,unsigned int vfid)19 static struct xe_sriov_migration_state *pf_pick_migration(struct xe_device *xe, unsigned int vfid)
20 {
21 	xe_assert(xe, IS_SRIOV_PF(xe));
22 	xe_assert(xe, vfid <= xe_sriov_pf_get_totalvfs(xe));
23 
24 	return &xe->sriov.pf.vfs[vfid].migration;
25 }
26 
27 /**
28  * xe_sriov_pf_migration_waitqueue() - Get waitqueue for migration.
29  * @xe: the &xe_device
30  * @vfid: the VF identifier
31  *
32  * Return: pointer to the migration waitqueue.
33  */
xe_sriov_pf_migration_waitqueue(struct xe_device * xe,unsigned int vfid)34 wait_queue_head_t *xe_sriov_pf_migration_waitqueue(struct xe_device *xe, unsigned int vfid)
35 {
36 	return &pf_pick_migration(xe, vfid)->wq;
37 }
38 
39 /**
40  * xe_sriov_pf_migration_supported() - Check if SR-IOV VF migration is supported by the device
41  * @xe: the &xe_device
42  *
43  * Return: true if migration is supported, false otherwise
44  */
xe_sriov_pf_migration_supported(struct xe_device * xe)45 bool xe_sriov_pf_migration_supported(struct xe_device *xe)
46 {
47 	xe_assert(xe, IS_SRIOV_PF(xe));
48 
49 	return IS_ENABLED(CONFIG_DRM_XE_DEBUG) || !xe->sriov.pf.migration.disabled;
50 }
51 
52 /**
53  * xe_sriov_pf_migration_disable() - Turn off SR-IOV VF migration support on PF.
54  * @xe: the &xe_device instance.
55  * @fmt: format string for the log message, to be combined with following VAs.
56  */
xe_sriov_pf_migration_disable(struct xe_device * xe,const char * fmt,...)57 void xe_sriov_pf_migration_disable(struct xe_device *xe, const char *fmt, ...)
58 {
59 	struct va_format vaf;
60 	va_list va_args;
61 
62 	xe_assert(xe, IS_SRIOV_PF(xe));
63 
64 	va_start(va_args, fmt);
65 	vaf.fmt = fmt;
66 	vaf.va  = &va_args;
67 	xe_sriov_notice(xe, "migration %s: %pV\n",
68 			IS_ENABLED(CONFIG_DRM_XE_DEBUG) ?
69 			"missing prerequisite" : "disabled",
70 			&vaf);
71 	va_end(va_args);
72 
73 	xe->sriov.pf.migration.disabled = true;
74 }
75 
pf_migration_check_support(struct xe_device * xe)76 static void pf_migration_check_support(struct xe_device *xe)
77 {
78 	if (!xe_device_has_memirq(xe))
79 		xe_sriov_pf_migration_disable(xe, "requires memory-based IRQ support");
80 }
81 
pf_migration_cleanup(void * arg)82 static void pf_migration_cleanup(void *arg)
83 {
84 	struct xe_sriov_migration_state *migration = arg;
85 
86 	xe_sriov_packet_free(migration->pending);
87 	xe_sriov_packet_free(migration->trailer);
88 	xe_sriov_packet_free(migration->descriptor);
89 }
90 
91 /**
92  * xe_sriov_pf_migration_init() - Initialize support for SR-IOV VF migration.
93  * @xe: the &xe_device
94  *
95  * Return: 0 on success or a negative error code on failure.
96  */
xe_sriov_pf_migration_init(struct xe_device * xe)97 int xe_sriov_pf_migration_init(struct xe_device *xe)
98 {
99 	unsigned int n, totalvfs;
100 	int err;
101 
102 	xe_assert(xe, IS_SRIOV_PF(xe));
103 
104 	pf_migration_check_support(xe);
105 
106 	if (!xe_sriov_pf_migration_supported(xe))
107 		return 0;
108 
109 	totalvfs = xe_sriov_pf_get_totalvfs(xe);
110 	for (n = 1; n <= totalvfs; n++) {
111 		struct xe_sriov_migration_state *migration = pf_pick_migration(xe, n);
112 
113 		err = drmm_mutex_init(&xe->drm, &migration->lock);
114 		if (err)
115 			return err;
116 
117 		init_waitqueue_head(&migration->wq);
118 
119 		err = devm_add_action_or_reset(xe->drm.dev, pf_migration_cleanup, migration);
120 		if (err)
121 			return err;
122 	}
123 
124 	return 0;
125 }
126 
pf_migration_data_ready(struct xe_device * xe,unsigned int vfid)127 static bool pf_migration_data_ready(struct xe_device *xe, unsigned int vfid)
128 {
129 	struct xe_gt *gt;
130 	u8 gt_id;
131 
132 	for_each_gt(gt, xe, gt_id) {
133 		if (xe_gt_sriov_pf_control_check_save_failed(gt, vfid) ||
134 		    xe_gt_sriov_pf_control_check_save_data_done(gt, vfid) ||
135 		    !xe_gt_sriov_pf_migration_ring_empty(gt, vfid))
136 			return true;
137 	}
138 
139 	return false;
140 }
141 
142 static struct xe_sriov_packet *
pf_migration_consume(struct xe_device * xe,unsigned int vfid)143 pf_migration_consume(struct xe_device *xe, unsigned int vfid)
144 {
145 	struct xe_sriov_packet *data;
146 	bool more_data = false;
147 	struct xe_gt *gt;
148 	u8 gt_id;
149 
150 	for_each_gt(gt, xe, gt_id) {
151 		data = xe_gt_sriov_pf_migration_save_consume(gt, vfid);
152 		if (data && PTR_ERR(data) != EAGAIN)
153 			return data;
154 		if (PTR_ERR(data) == -EAGAIN)
155 			more_data = true;
156 	}
157 
158 	if (!more_data)
159 		return NULL;
160 
161 	return ERR_PTR(-EAGAIN);
162 }
163 
164 /**
165  * xe_sriov_pf_migration_save_consume() - Consume a VF migration data packet from the device.
166  * @xe: the &xe_device
167  * @vfid: the VF identifier
168  *
169  * Called by the save migration data consumer (userspace) when
170  * processing migration data.
171  * If there is no migration data to process, wait until more data is available.
172  *
173  * Return: Pointer to &xe_sriov_packet on success,
174  *	   NULL if ring is empty and no more migration data is expected,
175  *	   ERR_PTR value in case of error.
176  */
177 struct xe_sriov_packet *
xe_sriov_pf_migration_save_consume(struct xe_device * xe,unsigned int vfid)178 xe_sriov_pf_migration_save_consume(struct xe_device *xe, unsigned int vfid)
179 {
180 	struct xe_sriov_migration_state *migration = pf_pick_migration(xe, vfid);
181 	struct xe_sriov_packet *data;
182 	int ret;
183 
184 	xe_assert(xe, IS_SRIOV_PF(xe));
185 
186 	for (;;) {
187 		data = pf_migration_consume(xe, vfid);
188 		if (PTR_ERR(data) != -EAGAIN)
189 			break;
190 
191 		ret = wait_event_interruptible(migration->wq,
192 					       pf_migration_data_ready(xe, vfid));
193 		if (ret)
194 			return ERR_PTR(ret);
195 	}
196 
197 	return data;
198 }
199 
pf_handle_descriptor(struct xe_device * xe,unsigned int vfid,struct xe_sriov_packet * data)200 static int pf_handle_descriptor(struct xe_device *xe, unsigned int vfid,
201 				struct xe_sriov_packet *data)
202 {
203 	int ret;
204 
205 	if (data->hdr.tile_id != 0 || data->hdr.gt_id != 0)
206 		return -EINVAL;
207 
208 	ret = xe_sriov_packet_process_descriptor(xe, vfid, data);
209 	if (ret)
210 		return ret;
211 
212 	xe_sriov_packet_free(data);
213 
214 	return 0;
215 }
216 
pf_handle_trailer(struct xe_device * xe,unsigned int vfid,struct xe_sriov_packet * data)217 static int pf_handle_trailer(struct xe_device *xe, unsigned int vfid,
218 			     struct xe_sriov_packet *data)
219 {
220 	struct xe_gt *gt;
221 	u8 gt_id;
222 
223 	if (data->hdr.tile_id != 0 || data->hdr.gt_id != 0)
224 		return -EINVAL;
225 	if (data->hdr.offset != 0 || data->hdr.size != 0 || data->buff || data->bo)
226 		return -EINVAL;
227 
228 	xe_sriov_packet_free(data);
229 
230 	for_each_gt(gt, xe, gt_id)
231 		xe_gt_sriov_pf_control_restore_data_done(gt, vfid);
232 
233 	return 0;
234 }
235 
236 /**
237  * xe_sriov_pf_migration_restore_produce() - Produce a VF migration data packet to the device.
238  * @xe: the &xe_device
239  * @vfid: the VF identifier
240  * @data: Pointer to &xe_sriov_packet
241  *
242  * Called by the restore migration data producer (userspace) when processing
243  * migration data.
244  * If the underlying data structure is full, wait until there is space.
245  *
246  * Return: 0 on success or a negative error code on failure.
247  */
xe_sriov_pf_migration_restore_produce(struct xe_device * xe,unsigned int vfid,struct xe_sriov_packet * data)248 int xe_sriov_pf_migration_restore_produce(struct xe_device *xe, unsigned int vfid,
249 					  struct xe_sriov_packet *data)
250 {
251 	struct xe_gt *gt;
252 
253 	xe_assert(xe, IS_SRIOV_PF(xe));
254 
255 	if (data->hdr.type == XE_SRIOV_PACKET_TYPE_DESCRIPTOR)
256 		return pf_handle_descriptor(xe, vfid, data);
257 	if (data->hdr.type == XE_SRIOV_PACKET_TYPE_TRAILER)
258 		return pf_handle_trailer(xe, vfid, data);
259 
260 	gt = xe_device_get_gt(xe, data->hdr.gt_id);
261 	if (!gt || data->hdr.tile_id != gt->tile->id || data->hdr.type == 0) {
262 		xe_sriov_err_ratelimited(xe, "Received invalid restore packet for VF%u (type:%u, tile:%u, GT:%u)\n",
263 					 vfid, data->hdr.type, data->hdr.tile_id, data->hdr.gt_id);
264 		return -EINVAL;
265 	}
266 
267 	return xe_gt_sriov_pf_migration_restore_produce(gt, vfid, data);
268 }
269 
270 /**
271  * xe_sriov_pf_migration_read() - Read migration data from the device.
272  * @xe: the &xe_device
273  * @vfid: the VF identifier
274  * @buf: start address of userspace buffer
275  * @len: requested read size from userspace
276  *
277  * Return: number of bytes that has been successfully read,
278  *	   0 if no more migration data is available,
279  *	   -errno on failure.
280  */
xe_sriov_pf_migration_read(struct xe_device * xe,unsigned int vfid,char __user * buf,size_t len)281 ssize_t xe_sriov_pf_migration_read(struct xe_device *xe, unsigned int vfid,
282 				   char __user *buf, size_t len)
283 {
284 	struct xe_sriov_migration_state *migration = pf_pick_migration(xe, vfid);
285 	ssize_t ret, consumed = 0;
286 
287 	xe_assert(xe, IS_SRIOV_PF(xe));
288 
289 	scoped_cond_guard(mutex_intr, return -EINTR, &migration->lock) {
290 		while (consumed < len) {
291 			ret = xe_sriov_packet_read_single(xe, vfid, buf, len - consumed);
292 			if (ret == -ENODATA)
293 				break;
294 			if (ret < 0)
295 				return ret;
296 
297 			consumed += ret;
298 			buf += ret;
299 		}
300 	}
301 
302 	return consumed;
303 }
304 
305 /**
306  * xe_sriov_pf_migration_write() - Write migration data to the device.
307  * @xe: the &xe_device
308  * @vfid: the VF identifier
309  * @buf: start address of userspace buffer
310  * @len: requested write size from userspace
311  *
312  * Return: number of bytes that has been successfully written,
313  *	   -errno on failure.
314  */
xe_sriov_pf_migration_write(struct xe_device * xe,unsigned int vfid,const char __user * buf,size_t len)315 ssize_t xe_sriov_pf_migration_write(struct xe_device *xe, unsigned int vfid,
316 				    const char __user *buf, size_t len)
317 {
318 	struct xe_sriov_migration_state *migration = pf_pick_migration(xe, vfid);
319 	ssize_t ret, produced = 0;
320 
321 	xe_assert(xe, IS_SRIOV_PF(xe));
322 
323 	scoped_cond_guard(mutex_intr, return -EINTR, &migration->lock) {
324 		while (produced < len) {
325 			ret = xe_sriov_packet_write_single(xe, vfid, buf, len - produced);
326 			if (ret < 0)
327 				return ret;
328 
329 			produced += ret;
330 			buf += ret;
331 		}
332 	}
333 
334 	return produced;
335 }
336 
337 /**
338  * xe_sriov_pf_migration_size() - Total size of migration data from all components within a device
339  * @xe: the &xe_device
340  * @vfid: the VF identifier (can't be 0)
341  *
342  * This function is for PF only.
343  *
344  * Return: total migration data size in bytes or a negative error code on failure.
345  */
xe_sriov_pf_migration_size(struct xe_device * xe,unsigned int vfid)346 ssize_t xe_sriov_pf_migration_size(struct xe_device *xe, unsigned int vfid)
347 {
348 	size_t size = 0;
349 	struct xe_gt *gt;
350 	ssize_t ret;
351 	u8 gt_id;
352 
353 	xe_assert(xe, IS_SRIOV_PF(xe));
354 	xe_assert(xe, vfid);
355 
356 	for_each_gt(gt, xe, gt_id) {
357 		ret = xe_gt_sriov_pf_migration_size(gt, vfid);
358 		if (ret < 0)
359 			return ret;
360 
361 		size += ret;
362 	}
363 
364 	return size;
365 }
366