xref: /linux/drivers/vfio/pci/virtio/migrate.c (revision 87e801e1678342fc23b1eb92c0eecedf5dca79cb)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved
4  */
5 
6 #include <linux/device.h>
7 #include <linux/module.h>
8 #include <linux/mutex.h>
9 #include <linux/pci.h>
10 #include <linux/pm_runtime.h>
11 #include <linux/types.h>
12 #include <linux/uaccess.h>
13 #include <linux/vfio.h>
14 #include <linux/vfio_pci_core.h>
15 #include <linux/virtio_pci.h>
16 #include <linux/virtio_net.h>
17 #include <linux/virtio_pci_admin.h>
18 #include <linux/anon_inodes.h>
19 
20 #include "common.h"
21 
22 /* Device specification max parts size */
23 #define MAX_LOAD_SIZE (BIT_ULL(BITS_PER_TYPE \
24 	(((struct virtio_admin_cmd_dev_parts_metadata_result *)0)->parts_size.size)) - 1)
25 
26 /* Initial target buffer size */
27 #define VIRTIOVF_TARGET_INITIAL_BUF_SIZE SZ_1M
28 
29 static int
30 virtiovf_read_device_context_chunk(struct virtiovf_migration_file *migf,
31 				   u32 ctx_size);
32 
33 static struct page *
34 virtiovf_get_migration_page(struct virtiovf_data_buffer *buf,
35 			    unsigned long offset)
36 {
37 	unsigned long cur_offset = 0;
38 	struct scatterlist *sg;
39 	unsigned int i;
40 
41 	/* All accesses are sequential */
42 	if (offset < buf->last_offset || !buf->last_offset_sg) {
43 		buf->last_offset = 0;
44 		buf->last_offset_sg = buf->table.sgt.sgl;
45 		buf->sg_last_entry = 0;
46 	}
47 
48 	cur_offset = buf->last_offset;
49 
50 	for_each_sg(buf->last_offset_sg, sg,
51 		    buf->table.sgt.orig_nents - buf->sg_last_entry, i) {
52 		if (offset < sg->length + cur_offset) {
53 			buf->last_offset_sg = sg;
54 			buf->sg_last_entry += i;
55 			buf->last_offset = cur_offset;
56 			return sg_page(sg) + (offset - cur_offset) / PAGE_SIZE;
57 		}
58 		cur_offset += sg->length;
59 	}
60 	return NULL;
61 }
62 
63 static int virtiovf_add_migration_pages(struct virtiovf_data_buffer *buf,
64 					unsigned int npages)
65 {
66 	unsigned int to_alloc = npages;
67 	struct page **page_list;
68 	unsigned long filled;
69 	unsigned int to_fill;
70 	int ret;
71 	int i;
72 
73 	to_fill = min_t(unsigned int, npages, PAGE_SIZE / sizeof(*page_list));
74 	page_list = kvzalloc_objs(*page_list, to_fill, GFP_KERNEL_ACCOUNT);
75 	if (!page_list)
76 		return -ENOMEM;
77 
78 	do {
79 		filled = alloc_pages_bulk(GFP_KERNEL_ACCOUNT, to_fill,
80 					  page_list);
81 		if (!filled) {
82 			ret = -ENOMEM;
83 			goto err;
84 		}
85 		to_alloc -= filled;
86 		ret = sg_alloc_append_table_from_pages(&buf->table, page_list,
87 			filled, 0, filled << PAGE_SHIFT, UINT_MAX,
88 			SG_MAX_SINGLE_ALLOC, GFP_KERNEL_ACCOUNT);
89 
90 		if (ret)
91 			goto err_append;
92 		buf->allocated_length += filled * PAGE_SIZE;
93 		/* clean input for another bulk allocation */
94 		memset(page_list, 0, filled * sizeof(*page_list));
95 		to_fill = min_t(unsigned int, to_alloc,
96 				PAGE_SIZE / sizeof(*page_list));
97 	} while (to_alloc > 0);
98 
99 	kvfree(page_list);
100 	return 0;
101 
102 err_append:
103 	for (i = filled - 1; i >= 0; i--)
104 		__free_page(page_list[i]);
105 err:
106 	kvfree(page_list);
107 	return ret;
108 }
109 
110 static void virtiovf_free_data_buffer(struct virtiovf_data_buffer *buf)
111 {
112 	struct sg_page_iter sg_iter;
113 
114 	/* Undo alloc_pages_bulk() */
115 	for_each_sgtable_page(&buf->table.sgt, &sg_iter, 0)
116 		__free_page(sg_page_iter_page(&sg_iter));
117 	sg_free_append_table(&buf->table);
118 	kfree(buf);
119 }
120 
121 static struct virtiovf_data_buffer *
122 virtiovf_alloc_data_buffer(struct virtiovf_migration_file *migf, size_t length)
123 {
124 	struct virtiovf_data_buffer *buf;
125 	int ret;
126 
127 	buf = kzalloc_obj(*buf, GFP_KERNEL_ACCOUNT);
128 	if (!buf)
129 		return ERR_PTR(-ENOMEM);
130 
131 	ret = virtiovf_add_migration_pages(buf,
132 				DIV_ROUND_UP_ULL(length, PAGE_SIZE));
133 	if (ret)
134 		goto end;
135 
136 	buf->migf = migf;
137 	return buf;
138 end:
139 	virtiovf_free_data_buffer(buf);
140 	return ERR_PTR(ret);
141 }
142 
143 static void virtiovf_put_data_buffer(struct virtiovf_data_buffer *buf)
144 {
145 	guard(mutex)(&buf->migf->list_lock);
146 	list_add_tail(&buf->buf_elm, &buf->migf->avail_list);
147 }
148 
149 static int
150 virtiovf_pci_alloc_obj_id(struct virtiovf_pci_core_device *virtvdev, u8 type,
151 			  u32 *obj_id)
152 {
153 	return virtio_pci_admin_obj_create(virtvdev->core_device.pdev,
154 					   VIRTIO_RESOURCE_OBJ_DEV_PARTS, type, obj_id);
155 }
156 
157 static void
158 virtiovf_pci_free_obj_id(struct virtiovf_pci_core_device *virtvdev, u32 obj_id)
159 {
160 	virtio_pci_admin_obj_destroy(virtvdev->core_device.pdev,
161 			VIRTIO_RESOURCE_OBJ_DEV_PARTS, obj_id);
162 }
163 
164 static struct virtiovf_data_buffer *
165 virtiovf_get_data_buffer(struct virtiovf_migration_file *migf, size_t length)
166 {
167 	struct virtiovf_data_buffer *buf, *temp_buf;
168 	struct list_head free_list;
169 
170 	INIT_LIST_HEAD(&free_list);
171 
172 	mutex_lock(&migf->list_lock);
173 	list_for_each_entry_safe(buf, temp_buf, &migf->avail_list, buf_elm) {
174 		list_del_init(&buf->buf_elm);
175 		if (buf->allocated_length >= length) {
176 			mutex_unlock(&migf->list_lock);
177 			goto found;
178 		}
179 		/*
180 		 * Prevent holding redundant buffers. Put in a free
181 		 * list and call at the end not under the mutex
182 		 * (&migf->list_lock) to minimize its scope usage.
183 		 */
184 		list_add(&buf->buf_elm, &free_list);
185 	}
186 	mutex_unlock(&migf->list_lock);
187 	buf = virtiovf_alloc_data_buffer(migf, length);
188 
189 found:
190 	while ((temp_buf = list_first_entry_or_null(&free_list,
191 				struct virtiovf_data_buffer, buf_elm))) {
192 		list_del(&temp_buf->buf_elm);
193 		virtiovf_free_data_buffer(temp_buf);
194 	}
195 
196 	return buf;
197 }
198 
199 static void virtiovf_clean_migf_resources(struct virtiovf_migration_file *migf)
200 {
201 	struct virtiovf_data_buffer *entry;
202 
203 	if (migf->buf) {
204 		virtiovf_free_data_buffer(migf->buf);
205 		migf->buf = NULL;
206 	}
207 
208 	if (migf->buf_header) {
209 		virtiovf_free_data_buffer(migf->buf_header);
210 		migf->buf_header = NULL;
211 	}
212 
213 	list_splice(&migf->avail_list, &migf->buf_list);
214 
215 	while ((entry = list_first_entry_or_null(&migf->buf_list,
216 				struct virtiovf_data_buffer, buf_elm))) {
217 		list_del(&entry->buf_elm);
218 		virtiovf_free_data_buffer(entry);
219 	}
220 
221 	if (migf->has_obj_id)
222 		virtiovf_pci_free_obj_id(migf->virtvdev, migf->obj_id);
223 }
224 
225 static void virtiovf_disable_fd(struct virtiovf_migration_file *migf)
226 {
227 	guard(mutex)(&migf->lock);
228 	migf->state = VIRTIOVF_MIGF_STATE_ERROR;
229 	migf->filp->f_pos = 0;
230 }
231 
232 static void virtiovf_disable_fds(struct virtiovf_pci_core_device *virtvdev)
233 {
234 	if (virtvdev->resuming_migf) {
235 		virtiovf_disable_fd(virtvdev->resuming_migf);
236 		virtiovf_clean_migf_resources(virtvdev->resuming_migf);
237 		fput(virtvdev->resuming_migf->filp);
238 		virtvdev->resuming_migf = NULL;
239 	}
240 	if (virtvdev->saving_migf) {
241 		virtiovf_disable_fd(virtvdev->saving_migf);
242 		virtiovf_clean_migf_resources(virtvdev->saving_migf);
243 		fput(virtvdev->saving_migf->filp);
244 		virtvdev->saving_migf = NULL;
245 	}
246 }
247 
248 /*
249  * This function is called in all state_mutex unlock cases to
250  * handle a 'deferred_reset' if exists.
251  */
252 static void virtiovf_state_mutex_unlock(struct virtiovf_pci_core_device *virtvdev)
253 {
254 again:
255 	spin_lock(&virtvdev->reset_lock);
256 	if (virtvdev->deferred_reset) {
257 		virtvdev->deferred_reset = false;
258 		spin_unlock(&virtvdev->reset_lock);
259 		virtvdev->mig_state = VFIO_DEVICE_STATE_RUNNING;
260 		virtiovf_disable_fds(virtvdev);
261 		goto again;
262 	}
263 	mutex_unlock(&virtvdev->state_mutex);
264 	spin_unlock(&virtvdev->reset_lock);
265 }
266 
267 void virtiovf_migration_reset_done(struct pci_dev *pdev)
268 {
269 	struct virtiovf_pci_core_device *virtvdev = dev_get_drvdata(&pdev->dev);
270 
271 	if (!virtvdev->migrate_cap)
272 		return;
273 
274 	/*
275 	 * As the higher VFIO layers are holding locks across reset and using
276 	 * those same locks with the mm_lock we need to prevent ABBA deadlock
277 	 * with the state_mutex and mm_lock.
278 	 * In case the state_mutex was taken already we defer the cleanup work
279 	 * to the unlock flow of the other running context.
280 	 */
281 	spin_lock(&virtvdev->reset_lock);
282 	virtvdev->deferred_reset = true;
283 	if (!mutex_trylock(&virtvdev->state_mutex)) {
284 		spin_unlock(&virtvdev->reset_lock);
285 		return;
286 	}
287 	spin_unlock(&virtvdev->reset_lock);
288 	virtiovf_state_mutex_unlock(virtvdev);
289 }
290 
291 static int virtiovf_release_file(struct inode *inode, struct file *filp)
292 {
293 	struct virtiovf_migration_file *migf = filp->private_data;
294 
295 	virtiovf_disable_fd(migf);
296 	mutex_destroy(&migf->list_lock);
297 	mutex_destroy(&migf->lock);
298 	kfree(migf);
299 	return 0;
300 }
301 
302 static struct virtiovf_data_buffer *
303 virtiovf_get_data_buff_from_pos(struct virtiovf_migration_file *migf,
304 				loff_t pos, bool *end_of_data)
305 {
306 	struct virtiovf_data_buffer *buf;
307 
308 	*end_of_data = false;
309 	guard(mutex)(&migf->list_lock);
310 
311 	if (list_empty(&migf->buf_list)) {
312 		*end_of_data = true;
313 		return NULL;
314 	}
315 
316 	buf = list_first_entry(&migf->buf_list, struct virtiovf_data_buffer,
317 			       buf_elm);
318 	if (pos >= buf->start_pos &&
319 	    pos < buf->start_pos + buf->length)
320 		return buf;
321 
322 	/*
323 	 * As we use a stream based FD we may expect having the data always
324 	 * on first chunk
325 	 */
326 	migf->state = VIRTIOVF_MIGF_STATE_ERROR;
327 	return NULL;
328 }
329 
330 static ssize_t virtiovf_buf_read(struct virtiovf_data_buffer *vhca_buf,
331 				 char __user **buf, size_t *len, loff_t *pos)
332 {
333 	unsigned long offset;
334 	ssize_t done = 0;
335 	size_t copy_len;
336 
337 	copy_len = min_t(size_t,
338 			 vhca_buf->start_pos + vhca_buf->length - *pos, *len);
339 	while (copy_len) {
340 		size_t page_offset;
341 		struct page *page;
342 		size_t page_len;
343 		u8 *from_buff;
344 		int ret;
345 
346 		offset = *pos - vhca_buf->start_pos;
347 		page_offset = offset % PAGE_SIZE;
348 		offset -= page_offset;
349 		page = virtiovf_get_migration_page(vhca_buf, offset);
350 		if (!page)
351 			return -EINVAL;
352 		page_len = min_t(size_t, copy_len, PAGE_SIZE - page_offset);
353 		from_buff = kmap_local_page(page);
354 		ret = copy_to_user(*buf, from_buff + page_offset, page_len);
355 		kunmap_local(from_buff);
356 		if (ret)
357 			return -EFAULT;
358 		*pos += page_len;
359 		*len -= page_len;
360 		*buf += page_len;
361 		done += page_len;
362 		copy_len -= page_len;
363 	}
364 
365 	if (*pos >= vhca_buf->start_pos + vhca_buf->length) {
366 		guard(mutex)(&vhca_buf->migf->list_lock);
367 		list_del_init(&vhca_buf->buf_elm);
368 		list_add_tail(&vhca_buf->buf_elm, &vhca_buf->migf->avail_list);
369 	}
370 
371 	return done;
372 }
373 
374 static ssize_t virtiovf_save_read(struct file *filp, char __user *buf, size_t len,
375 				  loff_t *pos)
376 {
377 	struct virtiovf_migration_file *migf = filp->private_data;
378 	struct virtiovf_data_buffer *vhca_buf;
379 	bool first_loop_call = true;
380 	bool end_of_data;
381 	ssize_t done = 0;
382 
383 	if (pos)
384 		return -ESPIPE;
385 	pos = &filp->f_pos;
386 
387 	guard(mutex)(&migf->lock);
388 
389 	if (migf->state == VIRTIOVF_MIGF_STATE_ERROR)
390 		return -ENODEV;
391 
392 	while (len) {
393 		ssize_t count;
394 
395 		vhca_buf = virtiovf_get_data_buff_from_pos(migf, *pos, &end_of_data);
396 		if (first_loop_call) {
397 			first_loop_call = false;
398 			/* Temporary end of file as part of PRE_COPY */
399 			if (end_of_data && migf->state == VIRTIOVF_MIGF_STATE_PRECOPY)
400 				return -ENOMSG;
401 			if (end_of_data && migf->state != VIRTIOVF_MIGF_STATE_COMPLETE)
402 				return -EINVAL;
403 		}
404 
405 		if (end_of_data)
406 			return done;
407 
408 		if (!vhca_buf)
409 			return -EINVAL;
410 
411 		count = virtiovf_buf_read(vhca_buf, &buf, &len, pos);
412 		if (count < 0)
413 			return count;
414 		done += count;
415 	}
416 
417 	return done;
418 }
419 
420 static long virtiovf_precopy_ioctl(struct file *filp, unsigned int cmd,
421 				   unsigned long arg)
422 {
423 	struct virtiovf_migration_file *migf = filp->private_data;
424 	struct virtiovf_pci_core_device *virtvdev = migf->virtvdev;
425 	struct vfio_precopy_info info = {};
426 	loff_t *pos = &filp->f_pos;
427 	bool end_of_data = false;
428 	u32 ctx_size = 0;
429 	int ret;
430 
431 	ret = vfio_check_precopy_ioctl(&virtvdev->core_device.vdev, cmd, arg,
432 				       &info);
433 	if (ret)
434 		return ret;
435 
436 	mutex_lock(&virtvdev->state_mutex);
437 	if (virtvdev->mig_state != VFIO_DEVICE_STATE_PRE_COPY &&
438 	    virtvdev->mig_state != VFIO_DEVICE_STATE_PRE_COPY_P2P) {
439 		ret = -EINVAL;
440 		goto err_state_unlock;
441 	}
442 
443 	/*
444 	 * The virtio specification does not include a PRE_COPY concept.
445 	 * Since we can expect the data to remain the same for a certain period,
446 	 * we use a rate limiter mechanism before making a call to the device.
447 	 */
448 	if (__ratelimit(&migf->pre_copy_rl_state)) {
449 
450 		ret = virtio_pci_admin_dev_parts_metadata_get(virtvdev->core_device.pdev,
451 					VIRTIO_RESOURCE_OBJ_DEV_PARTS, migf->obj_id,
452 					VIRTIO_ADMIN_CMD_DEV_PARTS_METADATA_TYPE_SIZE,
453 					&ctx_size);
454 		if (ret)
455 			goto err_state_unlock;
456 	}
457 
458 	mutex_lock(&migf->lock);
459 	if (migf->state == VIRTIOVF_MIGF_STATE_ERROR) {
460 		ret = -ENODEV;
461 		goto err_migf_unlock;
462 	}
463 
464 	if (migf->pre_copy_initial_bytes > *pos) {
465 		info.initial_bytes = migf->pre_copy_initial_bytes - *pos;
466 	} else {
467 		info.dirty_bytes = migf->max_pos - *pos;
468 		if (!info.dirty_bytes)
469 			end_of_data = true;
470 		info.dirty_bytes += ctx_size;
471 	}
472 
473 	if (!end_of_data || !ctx_size) {
474 		mutex_unlock(&migf->lock);
475 		goto done;
476 	}
477 
478 	mutex_unlock(&migf->lock);
479 	/*
480 	 * We finished transferring the current state and the device has a
481 	 * dirty state, read a new state.
482 	 */
483 	ret = virtiovf_read_device_context_chunk(migf, ctx_size);
484 	if (ret)
485 		/*
486 		 * The machine is running, and context size could be grow, so no reason to mark
487 		 * the device state as VIRTIOVF_MIGF_STATE_ERROR.
488 		 */
489 		goto err_state_unlock;
490 
491 done:
492 	virtiovf_state_mutex_unlock(virtvdev);
493 	if (copy_to_user((void __user *)arg, &info,
494 			 offsetofend(struct vfio_precopy_info, dirty_bytes)))
495 		return -EFAULT;
496 	return 0;
497 
498 err_migf_unlock:
499 	mutex_unlock(&migf->lock);
500 err_state_unlock:
501 	virtiovf_state_mutex_unlock(virtvdev);
502 	return ret;
503 }
504 
505 static const struct file_operations virtiovf_save_fops = {
506 	.owner = THIS_MODULE,
507 	.read = virtiovf_save_read,
508 	.unlocked_ioctl = virtiovf_precopy_ioctl,
509 	.compat_ioctl = compat_ptr_ioctl,
510 	.release = virtiovf_release_file,
511 };
512 
513 static int
514 virtiovf_add_buf_header(struct virtiovf_data_buffer *header_buf,
515 			u32 data_size)
516 {
517 	struct virtiovf_migration_file *migf = header_buf->migf;
518 	struct virtiovf_migration_header header = {};
519 	struct page *page;
520 	u8 *to_buff;
521 
522 	header.record_size = cpu_to_le64(data_size);
523 	header.flags = cpu_to_le32(VIRTIOVF_MIGF_HEADER_FLAGS_TAG_MANDATORY);
524 	header.tag = cpu_to_le32(VIRTIOVF_MIGF_HEADER_TAG_DEVICE_DATA);
525 	page = virtiovf_get_migration_page(header_buf, 0);
526 	if (!page)
527 		return -EINVAL;
528 	to_buff = kmap_local_page(page);
529 	memcpy(to_buff, &header, sizeof(header));
530 	kunmap_local(to_buff);
531 	header_buf->length = sizeof(header);
532 	header_buf->start_pos = header_buf->migf->max_pos;
533 	migf->max_pos += header_buf->length;
534 
535 	scoped_guard(mutex, &migf->list_lock)
536 		list_add_tail(&header_buf->buf_elm, &migf->buf_list);
537 
538 	return 0;
539 }
540 
541 static int
542 virtiovf_read_device_context_chunk(struct virtiovf_migration_file *migf,
543 				   u32 ctx_size)
544 {
545 	struct virtiovf_data_buffer *header_buf;
546 	struct virtiovf_data_buffer *buf;
547 	bool unmark_end = false;
548 	struct scatterlist *sg;
549 	unsigned int i;
550 	u32 res_size;
551 	int nent;
552 	int ret;
553 
554 	buf = virtiovf_get_data_buffer(migf, ctx_size);
555 	if (IS_ERR(buf))
556 		return PTR_ERR(buf);
557 
558 	/* Find the total count of SG entries which satisfies the size */
559 	nent = sg_nents_for_len(buf->table.sgt.sgl, ctx_size);
560 	if (nent <= 0) {
561 		ret = -EINVAL;
562 		goto out;
563 	}
564 
565 	/*
566 	 * Iterate to that SG entry and mark it as last (if it's not already)
567 	 * to let underlay layers iterate only till that entry.
568 	 */
569 	for_each_sg(buf->table.sgt.sgl, sg, nent - 1, i)
570 		;
571 
572 	if (!sg_is_last(sg)) {
573 		unmark_end = true;
574 		sg_mark_end(sg);
575 	}
576 
577 	ret = virtio_pci_admin_dev_parts_get(migf->virtvdev->core_device.pdev,
578 					     VIRTIO_RESOURCE_OBJ_DEV_PARTS,
579 					     migf->obj_id,
580 					     VIRTIO_ADMIN_CMD_DEV_PARTS_GET_TYPE_ALL,
581 					     buf->table.sgt.sgl, &res_size);
582 	/* Restore the original SG mark end */
583 	if (unmark_end)
584 		sg_unmark_end(sg);
585 	if (ret)
586 		goto out;
587 
588 	buf->length = res_size;
589 	header_buf = virtiovf_get_data_buffer(migf,
590 				sizeof(struct virtiovf_migration_header));
591 	if (IS_ERR(header_buf)) {
592 		ret = PTR_ERR(header_buf);
593 		goto out;
594 	}
595 
596 	ret = virtiovf_add_buf_header(header_buf, res_size);
597 	if (ret)
598 		goto out_header;
599 
600 	buf->start_pos = buf->migf->max_pos;
601 	migf->max_pos += buf->length;
602 
603 	scoped_guard(mutex, &migf->list_lock)
604 		list_add_tail(&buf->buf_elm, &migf->buf_list);
605 
606 	return 0;
607 
608 out_header:
609 	virtiovf_put_data_buffer(header_buf);
610 out:
611 	virtiovf_put_data_buffer(buf);
612 	return ret;
613 }
614 
615 static int
616 virtiovf_pci_save_device_final_data(struct virtiovf_pci_core_device *virtvdev)
617 {
618 	struct virtiovf_migration_file *migf = virtvdev->saving_migf;
619 	u32 ctx_size;
620 	int ret;
621 
622 	if (migf->state == VIRTIOVF_MIGF_STATE_ERROR)
623 		return -ENODEV;
624 
625 	ret = virtio_pci_admin_dev_parts_metadata_get(virtvdev->core_device.pdev,
626 				VIRTIO_RESOURCE_OBJ_DEV_PARTS, migf->obj_id,
627 				VIRTIO_ADMIN_CMD_DEV_PARTS_METADATA_TYPE_SIZE,
628 				&ctx_size);
629 	if (ret)
630 		goto err;
631 
632 	if (!ctx_size) {
633 		ret = -EINVAL;
634 		goto err;
635 	}
636 
637 	ret = virtiovf_read_device_context_chunk(migf, ctx_size);
638 	if (ret)
639 		goto err;
640 
641 	migf->state = VIRTIOVF_MIGF_STATE_COMPLETE;
642 	return 0;
643 
644 err:
645 	migf->state = VIRTIOVF_MIGF_STATE_ERROR;
646 	return ret;
647 }
648 
649 static struct virtiovf_migration_file *
650 virtiovf_pci_save_device_data(struct virtiovf_pci_core_device *virtvdev,
651 			      bool pre_copy)
652 {
653 	struct virtiovf_migration_file *migf;
654 	u32 ctx_size;
655 	u32 obj_id;
656 	int ret;
657 
658 	migf = kzalloc_obj(*migf, GFP_KERNEL_ACCOUNT);
659 	if (!migf)
660 		return ERR_PTR(-ENOMEM);
661 
662 	migf->filp = anon_inode_getfile("virtiovf_mig", &virtiovf_save_fops, migf,
663 					O_RDONLY);
664 	if (IS_ERR(migf->filp)) {
665 		ret = PTR_ERR(migf->filp);
666 		kfree(migf);
667 		return ERR_PTR(ret);
668 	}
669 
670 	stream_open(migf->filp->f_inode, migf->filp);
671 	mutex_init(&migf->lock);
672 	INIT_LIST_HEAD(&migf->buf_list);
673 	INIT_LIST_HEAD(&migf->avail_list);
674 	mutex_init(&migf->list_lock);
675 	migf->virtvdev = virtvdev;
676 
677 	lockdep_assert_held(&virtvdev->state_mutex);
678 	ret = virtiovf_pci_alloc_obj_id(virtvdev, VIRTIO_RESOURCE_OBJ_DEV_PARTS_TYPE_GET,
679 					&obj_id);
680 	if (ret)
681 		goto out;
682 
683 	migf->obj_id = obj_id;
684 	/* Mark as having a valid obj id which can be even 0 */
685 	migf->has_obj_id = true;
686 	ret = virtio_pci_admin_dev_parts_metadata_get(virtvdev->core_device.pdev,
687 				VIRTIO_RESOURCE_OBJ_DEV_PARTS, obj_id,
688 				VIRTIO_ADMIN_CMD_DEV_PARTS_METADATA_TYPE_SIZE,
689 				&ctx_size);
690 	if (ret)
691 		goto out_clean;
692 
693 	if (!ctx_size) {
694 		ret = -EINVAL;
695 		goto out_clean;
696 	}
697 
698 	ret = virtiovf_read_device_context_chunk(migf, ctx_size);
699 	if (ret)
700 		goto out_clean;
701 
702 	if (pre_copy) {
703 		migf->pre_copy_initial_bytes = migf->max_pos;
704 		/* Arbitrarily set the pre-copy rate limit to 1-second intervals */
705 		ratelimit_state_init(&migf->pre_copy_rl_state, 1 * HZ, 1);
706 		/* Prevent any rate messages upon its usage */
707 		ratelimit_set_flags(&migf->pre_copy_rl_state,
708 				    RATELIMIT_MSG_ON_RELEASE);
709 		migf->state = VIRTIOVF_MIGF_STATE_PRECOPY;
710 	} else {
711 		migf->state = VIRTIOVF_MIGF_STATE_COMPLETE;
712 	}
713 
714 	return migf;
715 
716 out_clean:
717 	virtiovf_clean_migf_resources(migf);
718 out:
719 	fput(migf->filp);
720 	return ERR_PTR(ret);
721 }
722 
723 /*
724  * Set the required object header at the beginning of the buffer.
725  * The actual device parts data will be written post of the header offset.
726  */
727 static int virtiovf_set_obj_cmd_header(struct virtiovf_data_buffer *vhca_buf)
728 {
729 	struct virtio_admin_cmd_resource_obj_cmd_hdr obj_hdr = {};
730 	struct page *page;
731 	u8 *to_buff;
732 
733 	obj_hdr.type = cpu_to_le16(VIRTIO_RESOURCE_OBJ_DEV_PARTS);
734 	obj_hdr.id = cpu_to_le32(vhca_buf->migf->obj_id);
735 	page = virtiovf_get_migration_page(vhca_buf, 0);
736 	if (!page)
737 		return -EINVAL;
738 	to_buff = kmap_local_page(page);
739 	memcpy(to_buff, &obj_hdr, sizeof(obj_hdr));
740 	kunmap_local(to_buff);
741 
742 	/* Mark the buffer as including the header object data */
743 	vhca_buf->include_header_object = 1;
744 	return 0;
745 }
746 
747 static int
748 virtiovf_append_page_to_mig_buf(struct virtiovf_data_buffer *vhca_buf,
749 				const char __user **buf, size_t *len,
750 				loff_t *pos, ssize_t *done)
751 {
752 	unsigned long offset;
753 	size_t page_offset;
754 	struct page *page;
755 	size_t page_len;
756 	u8 *to_buff;
757 	int ret;
758 
759 	offset = *pos - vhca_buf->start_pos;
760 
761 	if (vhca_buf->include_header_object)
762 		/* The buffer holds the object header, update the offset accordingly */
763 		offset += sizeof(struct virtio_admin_cmd_resource_obj_cmd_hdr);
764 
765 	page_offset = offset % PAGE_SIZE;
766 
767 	page = virtiovf_get_migration_page(vhca_buf, offset - page_offset);
768 	if (!page)
769 		return -EINVAL;
770 
771 	page_len = min_t(size_t, *len, PAGE_SIZE - page_offset);
772 	to_buff = kmap_local_page(page);
773 	ret = copy_from_user(to_buff + page_offset, *buf, page_len);
774 	kunmap_local(to_buff);
775 	if (ret)
776 		return -EFAULT;
777 
778 	*pos += page_len;
779 	*done += page_len;
780 	*buf += page_len;
781 	*len -= page_len;
782 	vhca_buf->length += page_len;
783 	return 0;
784 }
785 
786 static ssize_t
787 virtiovf_resume_read_chunk(struct virtiovf_migration_file *migf,
788 			   struct virtiovf_data_buffer *vhca_buf,
789 			   size_t chunk_size, const char __user **buf,
790 			   size_t *len, loff_t *pos, ssize_t *done,
791 			   bool *has_work)
792 {
793 	size_t copy_len, to_copy;
794 	int ret;
795 
796 	to_copy = min_t(size_t, *len, chunk_size - vhca_buf->length);
797 	copy_len = to_copy;
798 	while (to_copy) {
799 		ret = virtiovf_append_page_to_mig_buf(vhca_buf, buf, &to_copy,
800 						      pos, done);
801 		if (ret)
802 			return ret;
803 	}
804 
805 	*len -= copy_len;
806 	if (vhca_buf->length == chunk_size) {
807 		migf->load_state = VIRTIOVF_LOAD_STATE_LOAD_CHUNK;
808 		migf->max_pos += chunk_size;
809 		*has_work = true;
810 	}
811 
812 	return 0;
813 }
814 
815 static int
816 virtiovf_resume_read_header_data(struct virtiovf_migration_file *migf,
817 				 struct virtiovf_data_buffer *vhca_buf,
818 				 const char __user **buf, size_t *len,
819 				 loff_t *pos, ssize_t *done)
820 {
821 	size_t copy_len, to_copy;
822 	size_t required_data;
823 	int ret;
824 
825 	required_data = migf->record_size - vhca_buf->length;
826 	to_copy = min_t(size_t, *len, required_data);
827 	copy_len = to_copy;
828 	while (to_copy) {
829 		ret = virtiovf_append_page_to_mig_buf(vhca_buf, buf, &to_copy,
830 						      pos, done);
831 		if (ret)
832 			return ret;
833 	}
834 
835 	*len -= copy_len;
836 	if (vhca_buf->length == migf->record_size) {
837 		switch (migf->record_tag) {
838 		default:
839 			/* Optional tag */
840 			break;
841 		}
842 
843 		migf->load_state = VIRTIOVF_LOAD_STATE_READ_HEADER;
844 		migf->max_pos += migf->record_size;
845 		vhca_buf->length = 0;
846 	}
847 
848 	return 0;
849 }
850 
851 static int
852 virtiovf_resume_read_header(struct virtiovf_migration_file *migf,
853 			    struct virtiovf_data_buffer *vhca_buf,
854 			    const char __user **buf,
855 			    size_t *len, loff_t *pos,
856 			    ssize_t *done, bool *has_work)
857 {
858 	struct page *page;
859 	size_t copy_len;
860 	u8 *to_buff;
861 	int ret;
862 
863 	copy_len = min_t(size_t, *len,
864 		sizeof(struct virtiovf_migration_header) - vhca_buf->length);
865 	page = virtiovf_get_migration_page(vhca_buf, 0);
866 	if (!page)
867 		return -EINVAL;
868 	to_buff = kmap_local_page(page);
869 	ret = copy_from_user(to_buff + vhca_buf->length, *buf, copy_len);
870 	if (ret) {
871 		ret = -EFAULT;
872 		goto end;
873 	}
874 
875 	*buf += copy_len;
876 	*pos += copy_len;
877 	*done += copy_len;
878 	*len -= copy_len;
879 	vhca_buf->length += copy_len;
880 	if (vhca_buf->length == sizeof(struct virtiovf_migration_header)) {
881 		u64 record_size;
882 		u32 flags;
883 
884 		record_size = le64_to_cpup((__le64 *)to_buff);
885 		if (record_size > MAX_LOAD_SIZE) {
886 			ret = -ENOMEM;
887 			goto end;
888 		}
889 
890 		migf->record_size = record_size;
891 		flags = le32_to_cpup((__le32 *)(to_buff +
892 			    offsetof(struct virtiovf_migration_header, flags)));
893 		migf->record_tag = le32_to_cpup((__le32 *)(to_buff +
894 			    offsetof(struct virtiovf_migration_header, tag)));
895 		switch (migf->record_tag) {
896 		case VIRTIOVF_MIGF_HEADER_TAG_DEVICE_DATA:
897 			migf->load_state = VIRTIOVF_LOAD_STATE_PREP_CHUNK;
898 			break;
899 		default:
900 			if (!(flags & VIRTIOVF_MIGF_HEADER_FLAGS_TAG_OPTIONAL)) {
901 				ret = -EOPNOTSUPP;
902 				goto end;
903 			}
904 			/* We may read and skip this optional record data */
905 			migf->load_state = VIRTIOVF_LOAD_STATE_PREP_HEADER_DATA;
906 		}
907 
908 		migf->max_pos += vhca_buf->length;
909 		vhca_buf->length = 0;
910 		*has_work = true;
911 	}
912 end:
913 	kunmap_local(to_buff);
914 	return ret;
915 }
916 
917 static ssize_t virtiovf_resume_write(struct file *filp, const char __user *buf,
918 				     size_t len, loff_t *pos)
919 {
920 	struct virtiovf_migration_file *migf = filp->private_data;
921 	struct virtiovf_data_buffer *vhca_buf = migf->buf;
922 	struct virtiovf_data_buffer *vhca_buf_header = migf->buf_header;
923 	unsigned int orig_length;
924 	bool has_work = false;
925 	ssize_t done = 0;
926 	int ret = 0;
927 
928 	if (pos)
929 		return -ESPIPE;
930 
931 	pos = &filp->f_pos;
932 	if (*pos < vhca_buf->start_pos)
933 		return -EINVAL;
934 
935 	mutex_lock(&migf->virtvdev->state_mutex);
936 	mutex_lock(&migf->lock);
937 	if (migf->state == VIRTIOVF_MIGF_STATE_ERROR) {
938 		done = -ENODEV;
939 		goto out_unlock;
940 	}
941 
942 	while (len || has_work) {
943 		has_work = false;
944 		switch (migf->load_state) {
945 		case VIRTIOVF_LOAD_STATE_READ_HEADER:
946 			ret = virtiovf_resume_read_header(migf, vhca_buf_header, &buf,
947 							  &len, pos, &done, &has_work);
948 			if (ret)
949 				goto out_unlock;
950 			break;
951 		case VIRTIOVF_LOAD_STATE_PREP_HEADER_DATA:
952 			if (vhca_buf_header->allocated_length < migf->record_size) {
953 				virtiovf_free_data_buffer(vhca_buf_header);
954 
955 				migf->buf_header = virtiovf_alloc_data_buffer(migf,
956 						migf->record_size);
957 				if (IS_ERR(migf->buf_header)) {
958 					ret = PTR_ERR(migf->buf_header);
959 					migf->buf_header = NULL;
960 					goto out_unlock;
961 				}
962 
963 				vhca_buf_header = migf->buf_header;
964 			}
965 
966 			vhca_buf_header->start_pos = migf->max_pos;
967 			migf->load_state = VIRTIOVF_LOAD_STATE_READ_HEADER_DATA;
968 			break;
969 		case VIRTIOVF_LOAD_STATE_READ_HEADER_DATA:
970 			ret = virtiovf_resume_read_header_data(migf, vhca_buf_header,
971 							       &buf, &len, pos, &done);
972 			if (ret)
973 				goto out_unlock;
974 			break;
975 		case VIRTIOVF_LOAD_STATE_PREP_CHUNK:
976 		{
977 			u32 cmd_size = migf->record_size +
978 				sizeof(struct virtio_admin_cmd_resource_obj_cmd_hdr);
979 
980 			/*
981 			 * The DMA map/unmap is managed in virtio layer, we just need to extend
982 			 * the SG pages to hold the extra required chunk data.
983 			 */
984 			if (vhca_buf->allocated_length < cmd_size) {
985 				ret = virtiovf_add_migration_pages(vhca_buf,
986 					DIV_ROUND_UP_ULL(cmd_size - vhca_buf->allocated_length,
987 							 PAGE_SIZE));
988 				if (ret)
989 					goto out_unlock;
990 			}
991 
992 			vhca_buf->start_pos = migf->max_pos;
993 			migf->load_state = VIRTIOVF_LOAD_STATE_READ_CHUNK;
994 			break;
995 		}
996 		case VIRTIOVF_LOAD_STATE_READ_CHUNK:
997 			ret = virtiovf_resume_read_chunk(migf, vhca_buf, migf->record_size,
998 							 &buf, &len, pos, &done, &has_work);
999 			if (ret)
1000 				goto out_unlock;
1001 			break;
1002 		case VIRTIOVF_LOAD_STATE_LOAD_CHUNK:
1003 			/* Mark the last SG entry and set its length */
1004 			sg_mark_end(vhca_buf->last_offset_sg);
1005 			orig_length = vhca_buf->last_offset_sg->length;
1006 			/* Length should include the resource object command header */
1007 			vhca_buf->last_offset_sg->length = vhca_buf->length +
1008 					sizeof(struct virtio_admin_cmd_resource_obj_cmd_hdr) -
1009 					vhca_buf->last_offset;
1010 			ret = virtio_pci_admin_dev_parts_set(migf->virtvdev->core_device.pdev,
1011 							     vhca_buf->table.sgt.sgl);
1012 			/* Restore the original SG data */
1013 			vhca_buf->last_offset_sg->length = orig_length;
1014 			sg_unmark_end(vhca_buf->last_offset_sg);
1015 			if (ret)
1016 				goto out_unlock;
1017 			migf->load_state = VIRTIOVF_LOAD_STATE_READ_HEADER;
1018 			/* be ready for reading the next chunk */
1019 			vhca_buf->length = 0;
1020 			break;
1021 		default:
1022 			break;
1023 		}
1024 	}
1025 
1026 out_unlock:
1027 	if (ret)
1028 		migf->state = VIRTIOVF_MIGF_STATE_ERROR;
1029 	mutex_unlock(&migf->lock);
1030 	virtiovf_state_mutex_unlock(migf->virtvdev);
1031 	return ret ? ret : done;
1032 }
1033 
1034 static const struct file_operations virtiovf_resume_fops = {
1035 	.owner = THIS_MODULE,
1036 	.write = virtiovf_resume_write,
1037 	.release = virtiovf_release_file,
1038 };
1039 
1040 static struct virtiovf_migration_file *
1041 virtiovf_pci_resume_device_data(struct virtiovf_pci_core_device *virtvdev)
1042 {
1043 	struct virtiovf_migration_file *migf;
1044 	struct virtiovf_data_buffer *buf;
1045 	u32 obj_id;
1046 	int ret;
1047 
1048 	migf = kzalloc_obj(*migf, GFP_KERNEL_ACCOUNT);
1049 	if (!migf)
1050 		return ERR_PTR(-ENOMEM);
1051 
1052 	migf->filp = anon_inode_getfile("virtiovf_mig", &virtiovf_resume_fops, migf,
1053 					O_WRONLY);
1054 	if (IS_ERR(migf->filp)) {
1055 		ret = PTR_ERR(migf->filp);
1056 		kfree(migf);
1057 		return ERR_PTR(ret);
1058 	}
1059 
1060 	stream_open(migf->filp->f_inode, migf->filp);
1061 	mutex_init(&migf->lock);
1062 	INIT_LIST_HEAD(&migf->buf_list);
1063 	INIT_LIST_HEAD(&migf->avail_list);
1064 	mutex_init(&migf->list_lock);
1065 
1066 	buf = virtiovf_alloc_data_buffer(migf, VIRTIOVF_TARGET_INITIAL_BUF_SIZE);
1067 	if (IS_ERR(buf)) {
1068 		ret = PTR_ERR(buf);
1069 		goto out;
1070 	}
1071 
1072 	migf->buf = buf;
1073 
1074 	buf = virtiovf_alloc_data_buffer(migf,
1075 		sizeof(struct virtiovf_migration_header));
1076 	if (IS_ERR(buf)) {
1077 		ret = PTR_ERR(buf);
1078 		goto out_clean;
1079 	}
1080 
1081 	migf->buf_header = buf;
1082 	migf->load_state = VIRTIOVF_LOAD_STATE_READ_HEADER;
1083 
1084 	migf->virtvdev = virtvdev;
1085 	ret = virtiovf_pci_alloc_obj_id(virtvdev, VIRTIO_RESOURCE_OBJ_DEV_PARTS_TYPE_SET,
1086 					&obj_id);
1087 	if (ret)
1088 		goto out_clean;
1089 
1090 	migf->obj_id = obj_id;
1091 	/* Mark as having a valid obj id which can be even 0 */
1092 	migf->has_obj_id = true;
1093 	ret = virtiovf_set_obj_cmd_header(migf->buf);
1094 	if (ret)
1095 		goto out_clean;
1096 
1097 	return migf;
1098 
1099 out_clean:
1100 	virtiovf_clean_migf_resources(migf);
1101 out:
1102 	fput(migf->filp);
1103 	return ERR_PTR(ret);
1104 }
1105 
1106 static struct file *
1107 virtiovf_pci_step_device_state_locked(struct virtiovf_pci_core_device *virtvdev,
1108 				      u32 new)
1109 {
1110 	u32 cur = virtvdev->mig_state;
1111 	int ret;
1112 
1113 	if (cur == VFIO_DEVICE_STATE_RUNNING_P2P && new == VFIO_DEVICE_STATE_STOP) {
1114 		/* NOP */
1115 		return NULL;
1116 	}
1117 
1118 	if (cur == VFIO_DEVICE_STATE_STOP && new == VFIO_DEVICE_STATE_RUNNING_P2P) {
1119 		/* NOP */
1120 		return NULL;
1121 	}
1122 
1123 	if ((cur == VFIO_DEVICE_STATE_RUNNING && new == VFIO_DEVICE_STATE_RUNNING_P2P) ||
1124 	    (cur == VFIO_DEVICE_STATE_PRE_COPY && new == VFIO_DEVICE_STATE_PRE_COPY_P2P)) {
1125 		ret = virtio_pci_admin_mode_set(virtvdev->core_device.pdev,
1126 						BIT(VIRTIO_ADMIN_CMD_DEV_MODE_F_STOPPED));
1127 		if (ret)
1128 			return ERR_PTR(ret);
1129 		return NULL;
1130 	}
1131 
1132 	if ((cur == VFIO_DEVICE_STATE_RUNNING_P2P && new == VFIO_DEVICE_STATE_RUNNING) ||
1133 	    (cur == VFIO_DEVICE_STATE_PRE_COPY_P2P && new == VFIO_DEVICE_STATE_PRE_COPY)) {
1134 		ret = virtio_pci_admin_mode_set(virtvdev->core_device.pdev, 0);
1135 		if (ret)
1136 			return ERR_PTR(ret);
1137 		return NULL;
1138 	}
1139 
1140 	if (cur == VFIO_DEVICE_STATE_STOP && new == VFIO_DEVICE_STATE_STOP_COPY) {
1141 		struct virtiovf_migration_file *migf;
1142 
1143 		migf = virtiovf_pci_save_device_data(virtvdev, false);
1144 		if (IS_ERR(migf))
1145 			return ERR_CAST(migf);
1146 		get_file(migf->filp);
1147 		virtvdev->saving_migf = migf;
1148 		return migf->filp;
1149 	}
1150 
1151 	if ((cur == VFIO_DEVICE_STATE_STOP_COPY && new == VFIO_DEVICE_STATE_STOP) ||
1152 	    (cur == VFIO_DEVICE_STATE_PRE_COPY && new == VFIO_DEVICE_STATE_RUNNING) ||
1153 	    (cur == VFIO_DEVICE_STATE_PRE_COPY_P2P && new == VFIO_DEVICE_STATE_RUNNING_P2P)) {
1154 		virtiovf_disable_fds(virtvdev);
1155 		return NULL;
1156 	}
1157 
1158 	if (cur == VFIO_DEVICE_STATE_STOP && new == VFIO_DEVICE_STATE_RESUMING) {
1159 		struct virtiovf_migration_file *migf;
1160 
1161 		migf = virtiovf_pci_resume_device_data(virtvdev);
1162 		if (IS_ERR(migf))
1163 			return ERR_CAST(migf);
1164 		get_file(migf->filp);
1165 		virtvdev->resuming_migf = migf;
1166 		return migf->filp;
1167 	}
1168 
1169 	if (cur == VFIO_DEVICE_STATE_RESUMING && new == VFIO_DEVICE_STATE_STOP) {
1170 		virtiovf_disable_fds(virtvdev);
1171 		return NULL;
1172 	}
1173 
1174 	if ((cur == VFIO_DEVICE_STATE_RUNNING && new == VFIO_DEVICE_STATE_PRE_COPY) ||
1175 	    (cur == VFIO_DEVICE_STATE_RUNNING_P2P &&
1176 	     new == VFIO_DEVICE_STATE_PRE_COPY_P2P)) {
1177 		struct virtiovf_migration_file *migf;
1178 
1179 		migf = virtiovf_pci_save_device_data(virtvdev, true);
1180 		if (IS_ERR(migf))
1181 			return ERR_CAST(migf);
1182 		get_file(migf->filp);
1183 		virtvdev->saving_migf = migf;
1184 		return migf->filp;
1185 	}
1186 
1187 	if (cur == VFIO_DEVICE_STATE_PRE_COPY_P2P && new == VFIO_DEVICE_STATE_STOP_COPY) {
1188 		ret = virtiovf_pci_save_device_final_data(virtvdev);
1189 		return ret ? ERR_PTR(ret) : NULL;
1190 	}
1191 
1192 	/*
1193 	 * vfio_mig_get_next_state() does not use arcs other than the above
1194 	 */
1195 	WARN_ON(true);
1196 	return ERR_PTR(-EINVAL);
1197 }
1198 
1199 static struct file *
1200 virtiovf_pci_set_device_state(struct vfio_device *vdev,
1201 			      enum vfio_device_mig_state new_state)
1202 {
1203 	struct virtiovf_pci_core_device *virtvdev = container_of(
1204 		vdev, struct virtiovf_pci_core_device, core_device.vdev);
1205 	enum vfio_device_mig_state next_state;
1206 	struct file *res = NULL;
1207 	int ret;
1208 
1209 	mutex_lock(&virtvdev->state_mutex);
1210 	while (new_state != virtvdev->mig_state) {
1211 		ret = vfio_mig_get_next_state(vdev, virtvdev->mig_state,
1212 					      new_state, &next_state);
1213 		if (ret) {
1214 			res = ERR_PTR(ret);
1215 			break;
1216 		}
1217 		res = virtiovf_pci_step_device_state_locked(virtvdev, next_state);
1218 		if (IS_ERR(res))
1219 			break;
1220 		virtvdev->mig_state = next_state;
1221 		if (WARN_ON(res && new_state != virtvdev->mig_state)) {
1222 			fput(res);
1223 			res = ERR_PTR(-EINVAL);
1224 			break;
1225 		}
1226 	}
1227 	virtiovf_state_mutex_unlock(virtvdev);
1228 	return res;
1229 }
1230 
1231 static int virtiovf_pci_get_device_state(struct vfio_device *vdev,
1232 				       enum vfio_device_mig_state *curr_state)
1233 {
1234 	struct virtiovf_pci_core_device *virtvdev = container_of(
1235 		vdev, struct virtiovf_pci_core_device, core_device.vdev);
1236 
1237 	mutex_lock(&virtvdev->state_mutex);
1238 	*curr_state = virtvdev->mig_state;
1239 	virtiovf_state_mutex_unlock(virtvdev);
1240 	return 0;
1241 }
1242 
1243 static int virtiovf_pci_get_data_size(struct vfio_device *vdev,
1244 				      unsigned long *stop_copy_length)
1245 {
1246 	struct virtiovf_pci_core_device *virtvdev = container_of(
1247 		vdev, struct virtiovf_pci_core_device, core_device.vdev);
1248 	bool obj_id_exists;
1249 	u32 res_size;
1250 	u32 obj_id;
1251 	int ret;
1252 
1253 	mutex_lock(&virtvdev->state_mutex);
1254 	obj_id_exists = virtvdev->saving_migf && virtvdev->saving_migf->has_obj_id;
1255 	if (!obj_id_exists) {
1256 		ret = virtiovf_pci_alloc_obj_id(virtvdev,
1257 						VIRTIO_RESOURCE_OBJ_DEV_PARTS_TYPE_GET,
1258 						&obj_id);
1259 		if (ret)
1260 			goto end;
1261 	} else {
1262 		obj_id = virtvdev->saving_migf->obj_id;
1263 	}
1264 
1265 	ret = virtio_pci_admin_dev_parts_metadata_get(virtvdev->core_device.pdev,
1266 				VIRTIO_RESOURCE_OBJ_DEV_PARTS, obj_id,
1267 				VIRTIO_ADMIN_CMD_DEV_PARTS_METADATA_TYPE_SIZE,
1268 				&res_size);
1269 	if (!ret)
1270 		*stop_copy_length = res_size;
1271 
1272 	/*
1273 	 * We can't leave this obj_id alive if didn't exist before, otherwise, it might
1274 	 * stay alive, even without an active migration flow (e.g. migration was cancelled)
1275 	 */
1276 	if (!obj_id_exists)
1277 		virtiovf_pci_free_obj_id(virtvdev, obj_id);
1278 end:
1279 	virtiovf_state_mutex_unlock(virtvdev);
1280 	return ret;
1281 }
1282 
1283 static const struct vfio_migration_ops virtvdev_pci_mig_ops = {
1284 	.migration_set_state = virtiovf_pci_set_device_state,
1285 	.migration_get_state = virtiovf_pci_get_device_state,
1286 	.migration_get_data_size = virtiovf_pci_get_data_size,
1287 };
1288 
1289 void virtiovf_set_migratable(struct virtiovf_pci_core_device *virtvdev)
1290 {
1291 	virtvdev->migrate_cap = 1;
1292 	mutex_init(&virtvdev->state_mutex);
1293 	spin_lock_init(&virtvdev->reset_lock);
1294 	virtvdev->core_device.vdev.migration_flags =
1295 		VFIO_MIGRATION_STOP_COPY |
1296 		VFIO_MIGRATION_P2P |
1297 		VFIO_MIGRATION_PRE_COPY;
1298 	virtvdev->core_device.vdev.mig_ops = &virtvdev_pci_mig_ops;
1299 }
1300 
1301 void virtiovf_open_migration(struct virtiovf_pci_core_device *virtvdev)
1302 {
1303 	if (!virtvdev->migrate_cap)
1304 		return;
1305 
1306 	virtvdev->mig_state = VFIO_DEVICE_STATE_RUNNING;
1307 }
1308 
1309 void virtiovf_close_migration(struct virtiovf_pci_core_device *virtvdev)
1310 {
1311 	if (!virtvdev->migrate_cap)
1312 		return;
1313 
1314 	virtiovf_disable_fds(virtvdev);
1315 }
1316