xref: /linux/drivers/vfio/pci/virtio/migrate.c (revision c995498636c704641c9e809c31b59445b48f7adc)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved
4  */
5 
6 #include <linux/device.h>
7 #include <linux/module.h>
8 #include <linux/mutex.h>
9 #include <linux/pci.h>
10 #include <linux/pm_runtime.h>
11 #include <linux/types.h>
12 #include <linux/uaccess.h>
13 #include <linux/vfio.h>
14 #include <linux/vfio_pci_core.h>
15 #include <linux/virtio_pci.h>
16 #include <linux/virtio_net.h>
17 #include <linux/virtio_pci_admin.h>
18 #include <linux/anon_inodes.h>
19 
20 #include "common.h"
21 
22 /* Device specification max parts size */
23 #define MAX_LOAD_SIZE (BIT_ULL(BITS_PER_TYPE \
24 	(((struct virtio_admin_cmd_dev_parts_metadata_result *)0)->parts_size.size)) - 1)
25 
26 /* Initial target buffer size */
27 #define VIRTIOVF_TARGET_INITIAL_BUF_SIZE SZ_1M
28 
29 static int
30 virtiovf_read_device_context_chunk(struct virtiovf_migration_file *migf,
31 				   u32 ctx_size);
32 
33 static struct page *
34 virtiovf_get_migration_page(struct virtiovf_data_buffer *buf,
35 			    unsigned long offset)
36 {
37 	unsigned long cur_offset = 0;
38 	struct scatterlist *sg;
39 	unsigned int i;
40 
41 	/* All accesses are sequential */
42 	if (offset < buf->last_offset || !buf->last_offset_sg) {
43 		buf->last_offset = 0;
44 		buf->last_offset_sg = buf->table.sgt.sgl;
45 		buf->sg_last_entry = 0;
46 	}
47 
48 	cur_offset = buf->last_offset;
49 
50 	for_each_sg(buf->last_offset_sg, sg,
51 		    buf->table.sgt.orig_nents - buf->sg_last_entry, i) {
52 		if (offset < sg->length + cur_offset) {
53 			buf->last_offset_sg = sg;
54 			buf->sg_last_entry += i;
55 			buf->last_offset = cur_offset;
56 			return sg_page(sg) + (offset - cur_offset) / PAGE_SIZE;
57 		}
58 		cur_offset += sg->length;
59 	}
60 	return NULL;
61 }
62 
63 static int virtiovf_add_migration_pages(struct virtiovf_data_buffer *buf,
64 					unsigned int npages)
65 {
66 	unsigned int to_alloc = npages;
67 	struct page **page_list;
68 	unsigned long filled;
69 	unsigned int to_fill;
70 	int ret;
71 	int i;
72 
73 	to_fill = min_t(unsigned int, npages, PAGE_SIZE / sizeof(*page_list));
74 	page_list = kvzalloc_objs(*page_list, to_fill, GFP_KERNEL_ACCOUNT);
75 	if (!page_list)
76 		return -ENOMEM;
77 
78 	do {
79 		filled = alloc_pages_bulk(GFP_KERNEL_ACCOUNT, to_fill,
80 					  page_list);
81 		if (!filled) {
82 			ret = -ENOMEM;
83 			goto err;
84 		}
85 		to_alloc -= filled;
86 		ret = sg_alloc_append_table_from_pages(&buf->table, page_list,
87 			filled, 0, filled << PAGE_SHIFT, UINT_MAX,
88 			SG_MAX_SINGLE_ALLOC, GFP_KERNEL_ACCOUNT);
89 
90 		if (ret)
91 			goto err_append;
92 		buf->allocated_length += filled * PAGE_SIZE;
93 		/* clean input for another bulk allocation */
94 		memset(page_list, 0, filled * sizeof(*page_list));
95 		to_fill = min_t(unsigned int, to_alloc,
96 				PAGE_SIZE / sizeof(*page_list));
97 	} while (to_alloc > 0);
98 
99 	kvfree(page_list);
100 	return 0;
101 
102 err_append:
103 	for (i = filled - 1; i >= 0; i--)
104 		__free_page(page_list[i]);
105 err:
106 	kvfree(page_list);
107 	return ret;
108 }
109 
110 static void virtiovf_free_data_buffer(struct virtiovf_data_buffer *buf)
111 {
112 	struct sg_page_iter sg_iter;
113 
114 	/* Undo alloc_pages_bulk() */
115 	for_each_sgtable_page(&buf->table.sgt, &sg_iter, 0)
116 		__free_page(sg_page_iter_page(&sg_iter));
117 	sg_free_append_table(&buf->table);
118 	kfree(buf);
119 }
120 
121 static struct virtiovf_data_buffer *
122 virtiovf_alloc_data_buffer(struct virtiovf_migration_file *migf, size_t length)
123 {
124 	struct virtiovf_data_buffer *buf;
125 	int ret;
126 
127 	buf = kzalloc_obj(*buf, GFP_KERNEL_ACCOUNT);
128 	if (!buf)
129 		return ERR_PTR(-ENOMEM);
130 
131 	ret = virtiovf_add_migration_pages(buf,
132 				DIV_ROUND_UP_ULL(length, PAGE_SIZE));
133 	if (ret)
134 		goto end;
135 
136 	buf->migf = migf;
137 	return buf;
138 end:
139 	virtiovf_free_data_buffer(buf);
140 	return ERR_PTR(ret);
141 }
142 
143 static void virtiovf_put_data_buffer(struct virtiovf_data_buffer *buf)
144 {
145 	spin_lock_irq(&buf->migf->list_lock);
146 	list_add_tail(&buf->buf_elm, &buf->migf->avail_list);
147 	spin_unlock_irq(&buf->migf->list_lock);
148 }
149 
150 static int
151 virtiovf_pci_alloc_obj_id(struct virtiovf_pci_core_device *virtvdev, u8 type,
152 			  u32 *obj_id)
153 {
154 	return virtio_pci_admin_obj_create(virtvdev->core_device.pdev,
155 					   VIRTIO_RESOURCE_OBJ_DEV_PARTS, type, obj_id);
156 }
157 
158 static void
159 virtiovf_pci_free_obj_id(struct virtiovf_pci_core_device *virtvdev, u32 obj_id)
160 {
161 	virtio_pci_admin_obj_destroy(virtvdev->core_device.pdev,
162 			VIRTIO_RESOURCE_OBJ_DEV_PARTS, obj_id);
163 }
164 
165 static struct virtiovf_data_buffer *
166 virtiovf_get_data_buffer(struct virtiovf_migration_file *migf, size_t length)
167 {
168 	struct virtiovf_data_buffer *buf, *temp_buf;
169 	struct list_head free_list;
170 
171 	INIT_LIST_HEAD(&free_list);
172 
173 	spin_lock_irq(&migf->list_lock);
174 	list_for_each_entry_safe(buf, temp_buf, &migf->avail_list, buf_elm) {
175 		list_del_init(&buf->buf_elm);
176 		if (buf->allocated_length >= length) {
177 			spin_unlock_irq(&migf->list_lock);
178 			goto found;
179 		}
180 		/*
181 		 * Prevent holding redundant buffers. Put in a free
182 		 * list and call at the end not under the spin lock
183 		 * (&migf->list_lock) to minimize its scope usage.
184 		 */
185 		list_add(&buf->buf_elm, &free_list);
186 	}
187 	spin_unlock_irq(&migf->list_lock);
188 	buf = virtiovf_alloc_data_buffer(migf, length);
189 
190 found:
191 	while ((temp_buf = list_first_entry_or_null(&free_list,
192 				struct virtiovf_data_buffer, buf_elm))) {
193 		list_del(&temp_buf->buf_elm);
194 		virtiovf_free_data_buffer(temp_buf);
195 	}
196 
197 	return buf;
198 }
199 
200 static void virtiovf_clean_migf_resources(struct virtiovf_migration_file *migf)
201 {
202 	struct virtiovf_data_buffer *entry;
203 
204 	if (migf->buf) {
205 		virtiovf_free_data_buffer(migf->buf);
206 		migf->buf = NULL;
207 	}
208 
209 	if (migf->buf_header) {
210 		virtiovf_free_data_buffer(migf->buf_header);
211 		migf->buf_header = NULL;
212 	}
213 
214 	list_splice(&migf->avail_list, &migf->buf_list);
215 
216 	while ((entry = list_first_entry_or_null(&migf->buf_list,
217 				struct virtiovf_data_buffer, buf_elm))) {
218 		list_del(&entry->buf_elm);
219 		virtiovf_free_data_buffer(entry);
220 	}
221 
222 	if (migf->has_obj_id)
223 		virtiovf_pci_free_obj_id(migf->virtvdev, migf->obj_id);
224 }
225 
226 static void virtiovf_disable_fd(struct virtiovf_migration_file *migf)
227 {
228 	mutex_lock(&migf->lock);
229 	migf->state = VIRTIOVF_MIGF_STATE_ERROR;
230 	migf->filp->f_pos = 0;
231 	mutex_unlock(&migf->lock);
232 }
233 
234 static void virtiovf_disable_fds(struct virtiovf_pci_core_device *virtvdev)
235 {
236 	if (virtvdev->resuming_migf) {
237 		virtiovf_disable_fd(virtvdev->resuming_migf);
238 		virtiovf_clean_migf_resources(virtvdev->resuming_migf);
239 		fput(virtvdev->resuming_migf->filp);
240 		virtvdev->resuming_migf = NULL;
241 	}
242 	if (virtvdev->saving_migf) {
243 		virtiovf_disable_fd(virtvdev->saving_migf);
244 		virtiovf_clean_migf_resources(virtvdev->saving_migf);
245 		fput(virtvdev->saving_migf->filp);
246 		virtvdev->saving_migf = NULL;
247 	}
248 }
249 
250 /*
251  * This function is called in all state_mutex unlock cases to
252  * handle a 'deferred_reset' if exists.
253  */
254 static void virtiovf_state_mutex_unlock(struct virtiovf_pci_core_device *virtvdev)
255 {
256 again:
257 	spin_lock(&virtvdev->reset_lock);
258 	if (virtvdev->deferred_reset) {
259 		virtvdev->deferred_reset = false;
260 		spin_unlock(&virtvdev->reset_lock);
261 		virtvdev->mig_state = VFIO_DEVICE_STATE_RUNNING;
262 		virtiovf_disable_fds(virtvdev);
263 		goto again;
264 	}
265 	mutex_unlock(&virtvdev->state_mutex);
266 	spin_unlock(&virtvdev->reset_lock);
267 }
268 
269 void virtiovf_migration_reset_done(struct pci_dev *pdev)
270 {
271 	struct virtiovf_pci_core_device *virtvdev = dev_get_drvdata(&pdev->dev);
272 
273 	if (!virtvdev->migrate_cap)
274 		return;
275 
276 	/*
277 	 * As the higher VFIO layers are holding locks across reset and using
278 	 * those same locks with the mm_lock we need to prevent ABBA deadlock
279 	 * with the state_mutex and mm_lock.
280 	 * In case the state_mutex was taken already we defer the cleanup work
281 	 * to the unlock flow of the other running context.
282 	 */
283 	spin_lock(&virtvdev->reset_lock);
284 	virtvdev->deferred_reset = true;
285 	if (!mutex_trylock(&virtvdev->state_mutex)) {
286 		spin_unlock(&virtvdev->reset_lock);
287 		return;
288 	}
289 	spin_unlock(&virtvdev->reset_lock);
290 	virtiovf_state_mutex_unlock(virtvdev);
291 }
292 
293 static int virtiovf_release_file(struct inode *inode, struct file *filp)
294 {
295 	struct virtiovf_migration_file *migf = filp->private_data;
296 
297 	virtiovf_disable_fd(migf);
298 	mutex_destroy(&migf->lock);
299 	kfree(migf);
300 	return 0;
301 }
302 
303 static struct virtiovf_data_buffer *
304 virtiovf_get_data_buff_from_pos(struct virtiovf_migration_file *migf,
305 				loff_t pos, bool *end_of_data)
306 {
307 	struct virtiovf_data_buffer *buf;
308 	bool found = false;
309 
310 	*end_of_data = false;
311 	spin_lock_irq(&migf->list_lock);
312 	if (list_empty(&migf->buf_list)) {
313 		*end_of_data = true;
314 		goto end;
315 	}
316 
317 	buf = list_first_entry(&migf->buf_list, struct virtiovf_data_buffer,
318 			       buf_elm);
319 	if (pos >= buf->start_pos &&
320 	    pos < buf->start_pos + buf->length) {
321 		found = true;
322 		goto end;
323 	}
324 
325 	/*
326 	 * As we use a stream based FD we may expect having the data always
327 	 * on first chunk
328 	 */
329 	migf->state = VIRTIOVF_MIGF_STATE_ERROR;
330 
331 end:
332 	spin_unlock_irq(&migf->list_lock);
333 	return found ? buf : NULL;
334 }
335 
336 static ssize_t virtiovf_buf_read(struct virtiovf_data_buffer *vhca_buf,
337 				 char __user **buf, size_t *len, loff_t *pos)
338 {
339 	unsigned long offset;
340 	ssize_t done = 0;
341 	size_t copy_len;
342 
343 	copy_len = min_t(size_t,
344 			 vhca_buf->start_pos + vhca_buf->length - *pos, *len);
345 	while (copy_len) {
346 		size_t page_offset;
347 		struct page *page;
348 		size_t page_len;
349 		u8 *from_buff;
350 		int ret;
351 
352 		offset = *pos - vhca_buf->start_pos;
353 		page_offset = offset % PAGE_SIZE;
354 		offset -= page_offset;
355 		page = virtiovf_get_migration_page(vhca_buf, offset);
356 		if (!page)
357 			return -EINVAL;
358 		page_len = min_t(size_t, copy_len, PAGE_SIZE - page_offset);
359 		from_buff = kmap_local_page(page);
360 		ret = copy_to_user(*buf, from_buff + page_offset, page_len);
361 		kunmap_local(from_buff);
362 		if (ret)
363 			return -EFAULT;
364 		*pos += page_len;
365 		*len -= page_len;
366 		*buf += page_len;
367 		done += page_len;
368 		copy_len -= page_len;
369 	}
370 
371 	if (*pos >= vhca_buf->start_pos + vhca_buf->length) {
372 		spin_lock_irq(&vhca_buf->migf->list_lock);
373 		list_del_init(&vhca_buf->buf_elm);
374 		list_add_tail(&vhca_buf->buf_elm, &vhca_buf->migf->avail_list);
375 		spin_unlock_irq(&vhca_buf->migf->list_lock);
376 	}
377 
378 	return done;
379 }
380 
381 static ssize_t virtiovf_save_read(struct file *filp, char __user *buf, size_t len,
382 				  loff_t *pos)
383 {
384 	struct virtiovf_migration_file *migf = filp->private_data;
385 	struct virtiovf_data_buffer *vhca_buf;
386 	bool first_loop_call = true;
387 	bool end_of_data;
388 	ssize_t done = 0;
389 
390 	if (pos)
391 		return -ESPIPE;
392 	pos = &filp->f_pos;
393 
394 	mutex_lock(&migf->lock);
395 	if (migf->state == VIRTIOVF_MIGF_STATE_ERROR) {
396 		done = -ENODEV;
397 		goto out_unlock;
398 	}
399 
400 	while (len) {
401 		ssize_t count;
402 
403 		vhca_buf = virtiovf_get_data_buff_from_pos(migf, *pos, &end_of_data);
404 		if (first_loop_call) {
405 			first_loop_call = false;
406 			/* Temporary end of file as part of PRE_COPY */
407 			if (end_of_data && migf->state == VIRTIOVF_MIGF_STATE_PRECOPY) {
408 				done = -ENOMSG;
409 				goto out_unlock;
410 			}
411 			if (end_of_data && migf->state != VIRTIOVF_MIGF_STATE_COMPLETE) {
412 				done = -EINVAL;
413 				goto out_unlock;
414 			}
415 		}
416 
417 		if (end_of_data)
418 			goto out_unlock;
419 
420 		if (!vhca_buf) {
421 			done = -EINVAL;
422 			goto out_unlock;
423 		}
424 
425 		count = virtiovf_buf_read(vhca_buf, &buf, &len, pos);
426 		if (count < 0) {
427 			done = count;
428 			goto out_unlock;
429 		}
430 		done += count;
431 	}
432 
433 out_unlock:
434 	mutex_unlock(&migf->lock);
435 	return done;
436 }
437 
438 static long virtiovf_precopy_ioctl(struct file *filp, unsigned int cmd,
439 				   unsigned long arg)
440 {
441 	struct virtiovf_migration_file *migf = filp->private_data;
442 	struct virtiovf_pci_core_device *virtvdev = migf->virtvdev;
443 	struct vfio_precopy_info info = {};
444 	loff_t *pos = &filp->f_pos;
445 	bool end_of_data = false;
446 	u32 ctx_size = 0;
447 	int ret;
448 
449 	ret = vfio_check_precopy_ioctl(&virtvdev->core_device.vdev, cmd, arg,
450 				       &info);
451 	if (ret)
452 		return ret;
453 
454 	mutex_lock(&virtvdev->state_mutex);
455 	if (virtvdev->mig_state != VFIO_DEVICE_STATE_PRE_COPY &&
456 	    virtvdev->mig_state != VFIO_DEVICE_STATE_PRE_COPY_P2P) {
457 		ret = -EINVAL;
458 		goto err_state_unlock;
459 	}
460 
461 	/*
462 	 * The virtio specification does not include a PRE_COPY concept.
463 	 * Since we can expect the data to remain the same for a certain period,
464 	 * we use a rate limiter mechanism before making a call to the device.
465 	 */
466 	if (__ratelimit(&migf->pre_copy_rl_state)) {
467 
468 		ret = virtio_pci_admin_dev_parts_metadata_get(virtvdev->core_device.pdev,
469 					VIRTIO_RESOURCE_OBJ_DEV_PARTS, migf->obj_id,
470 					VIRTIO_ADMIN_CMD_DEV_PARTS_METADATA_TYPE_SIZE,
471 					&ctx_size);
472 		if (ret)
473 			goto err_state_unlock;
474 	}
475 
476 	mutex_lock(&migf->lock);
477 	if (migf->state == VIRTIOVF_MIGF_STATE_ERROR) {
478 		ret = -ENODEV;
479 		goto err_migf_unlock;
480 	}
481 
482 	if (migf->pre_copy_initial_bytes > *pos) {
483 		info.initial_bytes = migf->pre_copy_initial_bytes - *pos;
484 	} else {
485 		info.dirty_bytes = migf->max_pos - *pos;
486 		if (!info.dirty_bytes)
487 			end_of_data = true;
488 		info.dirty_bytes += ctx_size;
489 	}
490 
491 	if (!end_of_data || !ctx_size) {
492 		mutex_unlock(&migf->lock);
493 		goto done;
494 	}
495 
496 	mutex_unlock(&migf->lock);
497 	/*
498 	 * We finished transferring the current state and the device has a
499 	 * dirty state, read a new state.
500 	 */
501 	ret = virtiovf_read_device_context_chunk(migf, ctx_size);
502 	if (ret)
503 		/*
504 		 * The machine is running, and context size could be grow, so no reason to mark
505 		 * the device state as VIRTIOVF_MIGF_STATE_ERROR.
506 		 */
507 		goto err_state_unlock;
508 
509 done:
510 	virtiovf_state_mutex_unlock(virtvdev);
511 	if (copy_to_user((void __user *)arg, &info,
512 			 offsetofend(struct vfio_precopy_info, dirty_bytes)))
513 		return -EFAULT;
514 	return 0;
515 
516 err_migf_unlock:
517 	mutex_unlock(&migf->lock);
518 err_state_unlock:
519 	virtiovf_state_mutex_unlock(virtvdev);
520 	return ret;
521 }
522 
523 static const struct file_operations virtiovf_save_fops = {
524 	.owner = THIS_MODULE,
525 	.read = virtiovf_save_read,
526 	.unlocked_ioctl = virtiovf_precopy_ioctl,
527 	.compat_ioctl = compat_ptr_ioctl,
528 	.release = virtiovf_release_file,
529 };
530 
531 static int
532 virtiovf_add_buf_header(struct virtiovf_data_buffer *header_buf,
533 			u32 data_size)
534 {
535 	struct virtiovf_migration_file *migf = header_buf->migf;
536 	struct virtiovf_migration_header header = {};
537 	struct page *page;
538 	u8 *to_buff;
539 
540 	header.record_size = cpu_to_le64(data_size);
541 	header.flags = cpu_to_le32(VIRTIOVF_MIGF_HEADER_FLAGS_TAG_MANDATORY);
542 	header.tag = cpu_to_le32(VIRTIOVF_MIGF_HEADER_TAG_DEVICE_DATA);
543 	page = virtiovf_get_migration_page(header_buf, 0);
544 	if (!page)
545 		return -EINVAL;
546 	to_buff = kmap_local_page(page);
547 	memcpy(to_buff, &header, sizeof(header));
548 	kunmap_local(to_buff);
549 	header_buf->length = sizeof(header);
550 	header_buf->start_pos = header_buf->migf->max_pos;
551 	migf->max_pos += header_buf->length;
552 	spin_lock_irq(&migf->list_lock);
553 	list_add_tail(&header_buf->buf_elm, &migf->buf_list);
554 	spin_unlock_irq(&migf->list_lock);
555 	return 0;
556 }
557 
558 static int
559 virtiovf_read_device_context_chunk(struct virtiovf_migration_file *migf,
560 				   u32 ctx_size)
561 {
562 	struct virtiovf_data_buffer *header_buf;
563 	struct virtiovf_data_buffer *buf;
564 	bool unmark_end = false;
565 	struct scatterlist *sg;
566 	unsigned int i;
567 	u32 res_size;
568 	int nent;
569 	int ret;
570 
571 	buf = virtiovf_get_data_buffer(migf, ctx_size);
572 	if (IS_ERR(buf))
573 		return PTR_ERR(buf);
574 
575 	/* Find the total count of SG entries which satisfies the size */
576 	nent = sg_nents_for_len(buf->table.sgt.sgl, ctx_size);
577 	if (nent <= 0) {
578 		ret = -EINVAL;
579 		goto out;
580 	}
581 
582 	/*
583 	 * Iterate to that SG entry and mark it as last (if it's not already)
584 	 * to let underlay layers iterate only till that entry.
585 	 */
586 	for_each_sg(buf->table.sgt.sgl, sg, nent - 1, i)
587 		;
588 
589 	if (!sg_is_last(sg)) {
590 		unmark_end = true;
591 		sg_mark_end(sg);
592 	}
593 
594 	ret = virtio_pci_admin_dev_parts_get(migf->virtvdev->core_device.pdev,
595 					     VIRTIO_RESOURCE_OBJ_DEV_PARTS,
596 					     migf->obj_id,
597 					     VIRTIO_ADMIN_CMD_DEV_PARTS_GET_TYPE_ALL,
598 					     buf->table.sgt.sgl, &res_size);
599 	/* Restore the original SG mark end */
600 	if (unmark_end)
601 		sg_unmark_end(sg);
602 	if (ret)
603 		goto out;
604 
605 	buf->length = res_size;
606 	header_buf = virtiovf_get_data_buffer(migf,
607 				sizeof(struct virtiovf_migration_header));
608 	if (IS_ERR(header_buf)) {
609 		ret = PTR_ERR(header_buf);
610 		goto out;
611 	}
612 
613 	ret = virtiovf_add_buf_header(header_buf, res_size);
614 	if (ret)
615 		goto out_header;
616 
617 	buf->start_pos = buf->migf->max_pos;
618 	migf->max_pos += buf->length;
619 	spin_lock(&migf->list_lock);
620 	list_add_tail(&buf->buf_elm, &migf->buf_list);
621 	spin_unlock_irq(&migf->list_lock);
622 	return 0;
623 
624 out_header:
625 	virtiovf_put_data_buffer(header_buf);
626 out:
627 	virtiovf_put_data_buffer(buf);
628 	return ret;
629 }
630 
631 static int
632 virtiovf_pci_save_device_final_data(struct virtiovf_pci_core_device *virtvdev)
633 {
634 	struct virtiovf_migration_file *migf = virtvdev->saving_migf;
635 	u32 ctx_size;
636 	int ret;
637 
638 	if (migf->state == VIRTIOVF_MIGF_STATE_ERROR)
639 		return -ENODEV;
640 
641 	ret = virtio_pci_admin_dev_parts_metadata_get(virtvdev->core_device.pdev,
642 				VIRTIO_RESOURCE_OBJ_DEV_PARTS, migf->obj_id,
643 				VIRTIO_ADMIN_CMD_DEV_PARTS_METADATA_TYPE_SIZE,
644 				&ctx_size);
645 	if (ret)
646 		goto err;
647 
648 	if (!ctx_size) {
649 		ret = -EINVAL;
650 		goto err;
651 	}
652 
653 	ret = virtiovf_read_device_context_chunk(migf, ctx_size);
654 	if (ret)
655 		goto err;
656 
657 	migf->state = VIRTIOVF_MIGF_STATE_COMPLETE;
658 	return 0;
659 
660 err:
661 	migf->state = VIRTIOVF_MIGF_STATE_ERROR;
662 	return ret;
663 }
664 
665 static struct virtiovf_migration_file *
666 virtiovf_pci_save_device_data(struct virtiovf_pci_core_device *virtvdev,
667 			      bool pre_copy)
668 {
669 	struct virtiovf_migration_file *migf;
670 	u32 ctx_size;
671 	u32 obj_id;
672 	int ret;
673 
674 	migf = kzalloc_obj(*migf, GFP_KERNEL_ACCOUNT);
675 	if (!migf)
676 		return ERR_PTR(-ENOMEM);
677 
678 	migf->filp = anon_inode_getfile("virtiovf_mig", &virtiovf_save_fops, migf,
679 					O_RDONLY);
680 	if (IS_ERR(migf->filp)) {
681 		ret = PTR_ERR(migf->filp);
682 		kfree(migf);
683 		return ERR_PTR(ret);
684 	}
685 
686 	stream_open(migf->filp->f_inode, migf->filp);
687 	mutex_init(&migf->lock);
688 	INIT_LIST_HEAD(&migf->buf_list);
689 	INIT_LIST_HEAD(&migf->avail_list);
690 	spin_lock_init(&migf->list_lock);
691 	migf->virtvdev = virtvdev;
692 
693 	lockdep_assert_held(&virtvdev->state_mutex);
694 	ret = virtiovf_pci_alloc_obj_id(virtvdev, VIRTIO_RESOURCE_OBJ_DEV_PARTS_TYPE_GET,
695 					&obj_id);
696 	if (ret)
697 		goto out;
698 
699 	migf->obj_id = obj_id;
700 	/* Mark as having a valid obj id which can be even 0 */
701 	migf->has_obj_id = true;
702 	ret = virtio_pci_admin_dev_parts_metadata_get(virtvdev->core_device.pdev,
703 				VIRTIO_RESOURCE_OBJ_DEV_PARTS, obj_id,
704 				VIRTIO_ADMIN_CMD_DEV_PARTS_METADATA_TYPE_SIZE,
705 				&ctx_size);
706 	if (ret)
707 		goto out_clean;
708 
709 	if (!ctx_size) {
710 		ret = -EINVAL;
711 		goto out_clean;
712 	}
713 
714 	ret = virtiovf_read_device_context_chunk(migf, ctx_size);
715 	if (ret)
716 		goto out_clean;
717 
718 	if (pre_copy) {
719 		migf->pre_copy_initial_bytes = migf->max_pos;
720 		/* Arbitrarily set the pre-copy rate limit to 1-second intervals */
721 		ratelimit_state_init(&migf->pre_copy_rl_state, 1 * HZ, 1);
722 		/* Prevent any rate messages upon its usage */
723 		ratelimit_set_flags(&migf->pre_copy_rl_state,
724 				    RATELIMIT_MSG_ON_RELEASE);
725 		migf->state = VIRTIOVF_MIGF_STATE_PRECOPY;
726 	} else {
727 		migf->state = VIRTIOVF_MIGF_STATE_COMPLETE;
728 	}
729 
730 	return migf;
731 
732 out_clean:
733 	virtiovf_clean_migf_resources(migf);
734 out:
735 	fput(migf->filp);
736 	return ERR_PTR(ret);
737 }
738 
739 /*
740  * Set the required object header at the beginning of the buffer.
741  * The actual device parts data will be written post of the header offset.
742  */
743 static int virtiovf_set_obj_cmd_header(struct virtiovf_data_buffer *vhca_buf)
744 {
745 	struct virtio_admin_cmd_resource_obj_cmd_hdr obj_hdr = {};
746 	struct page *page;
747 	u8 *to_buff;
748 
749 	obj_hdr.type = cpu_to_le16(VIRTIO_RESOURCE_OBJ_DEV_PARTS);
750 	obj_hdr.id = cpu_to_le32(vhca_buf->migf->obj_id);
751 	page = virtiovf_get_migration_page(vhca_buf, 0);
752 	if (!page)
753 		return -EINVAL;
754 	to_buff = kmap_local_page(page);
755 	memcpy(to_buff, &obj_hdr, sizeof(obj_hdr));
756 	kunmap_local(to_buff);
757 
758 	/* Mark the buffer as including the header object data */
759 	vhca_buf->include_header_object = 1;
760 	return 0;
761 }
762 
763 static int
764 virtiovf_append_page_to_mig_buf(struct virtiovf_data_buffer *vhca_buf,
765 				const char __user **buf, size_t *len,
766 				loff_t *pos, ssize_t *done)
767 {
768 	unsigned long offset;
769 	size_t page_offset;
770 	struct page *page;
771 	size_t page_len;
772 	u8 *to_buff;
773 	int ret;
774 
775 	offset = *pos - vhca_buf->start_pos;
776 
777 	if (vhca_buf->include_header_object)
778 		/* The buffer holds the object header, update the offset accordingly */
779 		offset += sizeof(struct virtio_admin_cmd_resource_obj_cmd_hdr);
780 
781 	page_offset = offset % PAGE_SIZE;
782 
783 	page = virtiovf_get_migration_page(vhca_buf, offset - page_offset);
784 	if (!page)
785 		return -EINVAL;
786 
787 	page_len = min_t(size_t, *len, PAGE_SIZE - page_offset);
788 	to_buff = kmap_local_page(page);
789 	ret = copy_from_user(to_buff + page_offset, *buf, page_len);
790 	kunmap_local(to_buff);
791 	if (ret)
792 		return -EFAULT;
793 
794 	*pos += page_len;
795 	*done += page_len;
796 	*buf += page_len;
797 	*len -= page_len;
798 	vhca_buf->length += page_len;
799 	return 0;
800 }
801 
802 static ssize_t
803 virtiovf_resume_read_chunk(struct virtiovf_migration_file *migf,
804 			   struct virtiovf_data_buffer *vhca_buf,
805 			   size_t chunk_size, const char __user **buf,
806 			   size_t *len, loff_t *pos, ssize_t *done,
807 			   bool *has_work)
808 {
809 	size_t copy_len, to_copy;
810 	int ret;
811 
812 	to_copy = min_t(size_t, *len, chunk_size - vhca_buf->length);
813 	copy_len = to_copy;
814 	while (to_copy) {
815 		ret = virtiovf_append_page_to_mig_buf(vhca_buf, buf, &to_copy,
816 						      pos, done);
817 		if (ret)
818 			return ret;
819 	}
820 
821 	*len -= copy_len;
822 	if (vhca_buf->length == chunk_size) {
823 		migf->load_state = VIRTIOVF_LOAD_STATE_LOAD_CHUNK;
824 		migf->max_pos += chunk_size;
825 		*has_work = true;
826 	}
827 
828 	return 0;
829 }
830 
831 static int
832 virtiovf_resume_read_header_data(struct virtiovf_migration_file *migf,
833 				 struct virtiovf_data_buffer *vhca_buf,
834 				 const char __user **buf, size_t *len,
835 				 loff_t *pos, ssize_t *done)
836 {
837 	size_t copy_len, to_copy;
838 	size_t required_data;
839 	int ret;
840 
841 	required_data = migf->record_size - vhca_buf->length;
842 	to_copy = min_t(size_t, *len, required_data);
843 	copy_len = to_copy;
844 	while (to_copy) {
845 		ret = virtiovf_append_page_to_mig_buf(vhca_buf, buf, &to_copy,
846 						      pos, done);
847 		if (ret)
848 			return ret;
849 	}
850 
851 	*len -= copy_len;
852 	if (vhca_buf->length == migf->record_size) {
853 		switch (migf->record_tag) {
854 		default:
855 			/* Optional tag */
856 			break;
857 		}
858 
859 		migf->load_state = VIRTIOVF_LOAD_STATE_READ_HEADER;
860 		migf->max_pos += migf->record_size;
861 		vhca_buf->length = 0;
862 	}
863 
864 	return 0;
865 }
866 
867 static int
868 virtiovf_resume_read_header(struct virtiovf_migration_file *migf,
869 			    struct virtiovf_data_buffer *vhca_buf,
870 			    const char __user **buf,
871 			    size_t *len, loff_t *pos,
872 			    ssize_t *done, bool *has_work)
873 {
874 	struct page *page;
875 	size_t copy_len;
876 	u8 *to_buff;
877 	int ret;
878 
879 	copy_len = min_t(size_t, *len,
880 		sizeof(struct virtiovf_migration_header) - vhca_buf->length);
881 	page = virtiovf_get_migration_page(vhca_buf, 0);
882 	if (!page)
883 		return -EINVAL;
884 	to_buff = kmap_local_page(page);
885 	ret = copy_from_user(to_buff + vhca_buf->length, *buf, copy_len);
886 	if (ret) {
887 		ret = -EFAULT;
888 		goto end;
889 	}
890 
891 	*buf += copy_len;
892 	*pos += copy_len;
893 	*done += copy_len;
894 	*len -= copy_len;
895 	vhca_buf->length += copy_len;
896 	if (vhca_buf->length == sizeof(struct virtiovf_migration_header)) {
897 		u64 record_size;
898 		u32 flags;
899 
900 		record_size = le64_to_cpup((__le64 *)to_buff);
901 		if (record_size > MAX_LOAD_SIZE) {
902 			ret = -ENOMEM;
903 			goto end;
904 		}
905 
906 		migf->record_size = record_size;
907 		flags = le32_to_cpup((__le32 *)(to_buff +
908 			    offsetof(struct virtiovf_migration_header, flags)));
909 		migf->record_tag = le32_to_cpup((__le32 *)(to_buff +
910 			    offsetof(struct virtiovf_migration_header, tag)));
911 		switch (migf->record_tag) {
912 		case VIRTIOVF_MIGF_HEADER_TAG_DEVICE_DATA:
913 			migf->load_state = VIRTIOVF_LOAD_STATE_PREP_CHUNK;
914 			break;
915 		default:
916 			if (!(flags & VIRTIOVF_MIGF_HEADER_FLAGS_TAG_OPTIONAL)) {
917 				ret = -EOPNOTSUPP;
918 				goto end;
919 			}
920 			/* We may read and skip this optional record data */
921 			migf->load_state = VIRTIOVF_LOAD_STATE_PREP_HEADER_DATA;
922 		}
923 
924 		migf->max_pos += vhca_buf->length;
925 		vhca_buf->length = 0;
926 		*has_work = true;
927 	}
928 end:
929 	kunmap_local(to_buff);
930 	return ret;
931 }
932 
933 static ssize_t virtiovf_resume_write(struct file *filp, const char __user *buf,
934 				     size_t len, loff_t *pos)
935 {
936 	struct virtiovf_migration_file *migf = filp->private_data;
937 	struct virtiovf_data_buffer *vhca_buf = migf->buf;
938 	struct virtiovf_data_buffer *vhca_buf_header = migf->buf_header;
939 	unsigned int orig_length;
940 	bool has_work = false;
941 	ssize_t done = 0;
942 	int ret = 0;
943 
944 	if (pos)
945 		return -ESPIPE;
946 
947 	pos = &filp->f_pos;
948 	if (*pos < vhca_buf->start_pos)
949 		return -EINVAL;
950 
951 	mutex_lock(&migf->virtvdev->state_mutex);
952 	mutex_lock(&migf->lock);
953 	if (migf->state == VIRTIOVF_MIGF_STATE_ERROR) {
954 		done = -ENODEV;
955 		goto out_unlock;
956 	}
957 
958 	while (len || has_work) {
959 		has_work = false;
960 		switch (migf->load_state) {
961 		case VIRTIOVF_LOAD_STATE_READ_HEADER:
962 			ret = virtiovf_resume_read_header(migf, vhca_buf_header, &buf,
963 							  &len, pos, &done, &has_work);
964 			if (ret)
965 				goto out_unlock;
966 			break;
967 		case VIRTIOVF_LOAD_STATE_PREP_HEADER_DATA:
968 			if (vhca_buf_header->allocated_length < migf->record_size) {
969 				virtiovf_free_data_buffer(vhca_buf_header);
970 
971 				migf->buf_header = virtiovf_alloc_data_buffer(migf,
972 						migf->record_size);
973 				if (IS_ERR(migf->buf_header)) {
974 					ret = PTR_ERR(migf->buf_header);
975 					migf->buf_header = NULL;
976 					goto out_unlock;
977 				}
978 
979 				vhca_buf_header = migf->buf_header;
980 			}
981 
982 			vhca_buf_header->start_pos = migf->max_pos;
983 			migf->load_state = VIRTIOVF_LOAD_STATE_READ_HEADER_DATA;
984 			break;
985 		case VIRTIOVF_LOAD_STATE_READ_HEADER_DATA:
986 			ret = virtiovf_resume_read_header_data(migf, vhca_buf_header,
987 							       &buf, &len, pos, &done);
988 			if (ret)
989 				goto out_unlock;
990 			break;
991 		case VIRTIOVF_LOAD_STATE_PREP_CHUNK:
992 		{
993 			u32 cmd_size = migf->record_size +
994 				sizeof(struct virtio_admin_cmd_resource_obj_cmd_hdr);
995 
996 			/*
997 			 * The DMA map/unmap is managed in virtio layer, we just need to extend
998 			 * the SG pages to hold the extra required chunk data.
999 			 */
1000 			if (vhca_buf->allocated_length < cmd_size) {
1001 				ret = virtiovf_add_migration_pages(vhca_buf,
1002 					DIV_ROUND_UP_ULL(cmd_size - vhca_buf->allocated_length,
1003 							 PAGE_SIZE));
1004 				if (ret)
1005 					goto out_unlock;
1006 			}
1007 
1008 			vhca_buf->start_pos = migf->max_pos;
1009 			migf->load_state = VIRTIOVF_LOAD_STATE_READ_CHUNK;
1010 			break;
1011 		}
1012 		case VIRTIOVF_LOAD_STATE_READ_CHUNK:
1013 			ret = virtiovf_resume_read_chunk(migf, vhca_buf, migf->record_size,
1014 							 &buf, &len, pos, &done, &has_work);
1015 			if (ret)
1016 				goto out_unlock;
1017 			break;
1018 		case VIRTIOVF_LOAD_STATE_LOAD_CHUNK:
1019 			/* Mark the last SG entry and set its length */
1020 			sg_mark_end(vhca_buf->last_offset_sg);
1021 			orig_length = vhca_buf->last_offset_sg->length;
1022 			/* Length should include the resource object command header */
1023 			vhca_buf->last_offset_sg->length = vhca_buf->length +
1024 					sizeof(struct virtio_admin_cmd_resource_obj_cmd_hdr) -
1025 					vhca_buf->last_offset;
1026 			ret = virtio_pci_admin_dev_parts_set(migf->virtvdev->core_device.pdev,
1027 							     vhca_buf->table.sgt.sgl);
1028 			/* Restore the original SG data */
1029 			vhca_buf->last_offset_sg->length = orig_length;
1030 			sg_unmark_end(vhca_buf->last_offset_sg);
1031 			if (ret)
1032 				goto out_unlock;
1033 			migf->load_state = VIRTIOVF_LOAD_STATE_READ_HEADER;
1034 			/* be ready for reading the next chunk */
1035 			vhca_buf->length = 0;
1036 			break;
1037 		default:
1038 			break;
1039 		}
1040 	}
1041 
1042 out_unlock:
1043 	if (ret)
1044 		migf->state = VIRTIOVF_MIGF_STATE_ERROR;
1045 	mutex_unlock(&migf->lock);
1046 	virtiovf_state_mutex_unlock(migf->virtvdev);
1047 	return ret ? ret : done;
1048 }
1049 
1050 static const struct file_operations virtiovf_resume_fops = {
1051 	.owner = THIS_MODULE,
1052 	.write = virtiovf_resume_write,
1053 	.release = virtiovf_release_file,
1054 };
1055 
1056 static struct virtiovf_migration_file *
1057 virtiovf_pci_resume_device_data(struct virtiovf_pci_core_device *virtvdev)
1058 {
1059 	struct virtiovf_migration_file *migf;
1060 	struct virtiovf_data_buffer *buf;
1061 	u32 obj_id;
1062 	int ret;
1063 
1064 	migf = kzalloc_obj(*migf, GFP_KERNEL_ACCOUNT);
1065 	if (!migf)
1066 		return ERR_PTR(-ENOMEM);
1067 
1068 	migf->filp = anon_inode_getfile("virtiovf_mig", &virtiovf_resume_fops, migf,
1069 					O_WRONLY);
1070 	if (IS_ERR(migf->filp)) {
1071 		ret = PTR_ERR(migf->filp);
1072 		kfree(migf);
1073 		return ERR_PTR(ret);
1074 	}
1075 
1076 	stream_open(migf->filp->f_inode, migf->filp);
1077 	mutex_init(&migf->lock);
1078 	INIT_LIST_HEAD(&migf->buf_list);
1079 	INIT_LIST_HEAD(&migf->avail_list);
1080 	spin_lock_init(&migf->list_lock);
1081 
1082 	buf = virtiovf_alloc_data_buffer(migf, VIRTIOVF_TARGET_INITIAL_BUF_SIZE);
1083 	if (IS_ERR(buf)) {
1084 		ret = PTR_ERR(buf);
1085 		goto out;
1086 	}
1087 
1088 	migf->buf = buf;
1089 
1090 	buf = virtiovf_alloc_data_buffer(migf,
1091 		sizeof(struct virtiovf_migration_header));
1092 	if (IS_ERR(buf)) {
1093 		ret = PTR_ERR(buf);
1094 		goto out_clean;
1095 	}
1096 
1097 	migf->buf_header = buf;
1098 	migf->load_state = VIRTIOVF_LOAD_STATE_READ_HEADER;
1099 
1100 	migf->virtvdev = virtvdev;
1101 	ret = virtiovf_pci_alloc_obj_id(virtvdev, VIRTIO_RESOURCE_OBJ_DEV_PARTS_TYPE_SET,
1102 					&obj_id);
1103 	if (ret)
1104 		goto out_clean;
1105 
1106 	migf->obj_id = obj_id;
1107 	/* Mark as having a valid obj id which can be even 0 */
1108 	migf->has_obj_id = true;
1109 	ret = virtiovf_set_obj_cmd_header(migf->buf);
1110 	if (ret)
1111 		goto out_clean;
1112 
1113 	return migf;
1114 
1115 out_clean:
1116 	virtiovf_clean_migf_resources(migf);
1117 out:
1118 	fput(migf->filp);
1119 	return ERR_PTR(ret);
1120 }
1121 
1122 static struct file *
1123 virtiovf_pci_step_device_state_locked(struct virtiovf_pci_core_device *virtvdev,
1124 				      u32 new)
1125 {
1126 	u32 cur = virtvdev->mig_state;
1127 	int ret;
1128 
1129 	if (cur == VFIO_DEVICE_STATE_RUNNING_P2P && new == VFIO_DEVICE_STATE_STOP) {
1130 		/* NOP */
1131 		return NULL;
1132 	}
1133 
1134 	if (cur == VFIO_DEVICE_STATE_STOP && new == VFIO_DEVICE_STATE_RUNNING_P2P) {
1135 		/* NOP */
1136 		return NULL;
1137 	}
1138 
1139 	if ((cur == VFIO_DEVICE_STATE_RUNNING && new == VFIO_DEVICE_STATE_RUNNING_P2P) ||
1140 	    (cur == VFIO_DEVICE_STATE_PRE_COPY && new == VFIO_DEVICE_STATE_PRE_COPY_P2P)) {
1141 		ret = virtio_pci_admin_mode_set(virtvdev->core_device.pdev,
1142 						BIT(VIRTIO_ADMIN_CMD_DEV_MODE_F_STOPPED));
1143 		if (ret)
1144 			return ERR_PTR(ret);
1145 		return NULL;
1146 	}
1147 
1148 	if ((cur == VFIO_DEVICE_STATE_RUNNING_P2P && new == VFIO_DEVICE_STATE_RUNNING) ||
1149 	    (cur == VFIO_DEVICE_STATE_PRE_COPY_P2P && new == VFIO_DEVICE_STATE_PRE_COPY)) {
1150 		ret = virtio_pci_admin_mode_set(virtvdev->core_device.pdev, 0);
1151 		if (ret)
1152 			return ERR_PTR(ret);
1153 		return NULL;
1154 	}
1155 
1156 	if (cur == VFIO_DEVICE_STATE_STOP && new == VFIO_DEVICE_STATE_STOP_COPY) {
1157 		struct virtiovf_migration_file *migf;
1158 
1159 		migf = virtiovf_pci_save_device_data(virtvdev, false);
1160 		if (IS_ERR(migf))
1161 			return ERR_CAST(migf);
1162 		get_file(migf->filp);
1163 		virtvdev->saving_migf = migf;
1164 		return migf->filp;
1165 	}
1166 
1167 	if ((cur == VFIO_DEVICE_STATE_STOP_COPY && new == VFIO_DEVICE_STATE_STOP) ||
1168 	    (cur == VFIO_DEVICE_STATE_PRE_COPY && new == VFIO_DEVICE_STATE_RUNNING) ||
1169 	    (cur == VFIO_DEVICE_STATE_PRE_COPY_P2P && new == VFIO_DEVICE_STATE_RUNNING_P2P)) {
1170 		virtiovf_disable_fds(virtvdev);
1171 		return NULL;
1172 	}
1173 
1174 	if (cur == VFIO_DEVICE_STATE_STOP && new == VFIO_DEVICE_STATE_RESUMING) {
1175 		struct virtiovf_migration_file *migf;
1176 
1177 		migf = virtiovf_pci_resume_device_data(virtvdev);
1178 		if (IS_ERR(migf))
1179 			return ERR_CAST(migf);
1180 		get_file(migf->filp);
1181 		virtvdev->resuming_migf = migf;
1182 		return migf->filp;
1183 	}
1184 
1185 	if (cur == VFIO_DEVICE_STATE_RESUMING && new == VFIO_DEVICE_STATE_STOP) {
1186 		virtiovf_disable_fds(virtvdev);
1187 		return NULL;
1188 	}
1189 
1190 	if ((cur == VFIO_DEVICE_STATE_RUNNING && new == VFIO_DEVICE_STATE_PRE_COPY) ||
1191 	    (cur == VFIO_DEVICE_STATE_RUNNING_P2P &&
1192 	     new == VFIO_DEVICE_STATE_PRE_COPY_P2P)) {
1193 		struct virtiovf_migration_file *migf;
1194 
1195 		migf = virtiovf_pci_save_device_data(virtvdev, true);
1196 		if (IS_ERR(migf))
1197 			return ERR_CAST(migf);
1198 		get_file(migf->filp);
1199 		virtvdev->saving_migf = migf;
1200 		return migf->filp;
1201 	}
1202 
1203 	if (cur == VFIO_DEVICE_STATE_PRE_COPY_P2P && new == VFIO_DEVICE_STATE_STOP_COPY) {
1204 		ret = virtiovf_pci_save_device_final_data(virtvdev);
1205 		return ret ? ERR_PTR(ret) : NULL;
1206 	}
1207 
1208 	/*
1209 	 * vfio_mig_get_next_state() does not use arcs other than the above
1210 	 */
1211 	WARN_ON(true);
1212 	return ERR_PTR(-EINVAL);
1213 }
1214 
1215 static struct file *
1216 virtiovf_pci_set_device_state(struct vfio_device *vdev,
1217 			      enum vfio_device_mig_state new_state)
1218 {
1219 	struct virtiovf_pci_core_device *virtvdev = container_of(
1220 		vdev, struct virtiovf_pci_core_device, core_device.vdev);
1221 	enum vfio_device_mig_state next_state;
1222 	struct file *res = NULL;
1223 	int ret;
1224 
1225 	mutex_lock(&virtvdev->state_mutex);
1226 	while (new_state != virtvdev->mig_state) {
1227 		ret = vfio_mig_get_next_state(vdev, virtvdev->mig_state,
1228 					      new_state, &next_state);
1229 		if (ret) {
1230 			res = ERR_PTR(ret);
1231 			break;
1232 		}
1233 		res = virtiovf_pci_step_device_state_locked(virtvdev, next_state);
1234 		if (IS_ERR(res))
1235 			break;
1236 		virtvdev->mig_state = next_state;
1237 		if (WARN_ON(res && new_state != virtvdev->mig_state)) {
1238 			fput(res);
1239 			res = ERR_PTR(-EINVAL);
1240 			break;
1241 		}
1242 	}
1243 	virtiovf_state_mutex_unlock(virtvdev);
1244 	return res;
1245 }
1246 
1247 static int virtiovf_pci_get_device_state(struct vfio_device *vdev,
1248 				       enum vfio_device_mig_state *curr_state)
1249 {
1250 	struct virtiovf_pci_core_device *virtvdev = container_of(
1251 		vdev, struct virtiovf_pci_core_device, core_device.vdev);
1252 
1253 	mutex_lock(&virtvdev->state_mutex);
1254 	*curr_state = virtvdev->mig_state;
1255 	virtiovf_state_mutex_unlock(virtvdev);
1256 	return 0;
1257 }
1258 
1259 static int virtiovf_pci_get_data_size(struct vfio_device *vdev,
1260 				      unsigned long *stop_copy_length)
1261 {
1262 	struct virtiovf_pci_core_device *virtvdev = container_of(
1263 		vdev, struct virtiovf_pci_core_device, core_device.vdev);
1264 	bool obj_id_exists;
1265 	u32 res_size;
1266 	u32 obj_id;
1267 	int ret;
1268 
1269 	mutex_lock(&virtvdev->state_mutex);
1270 	obj_id_exists = virtvdev->saving_migf && virtvdev->saving_migf->has_obj_id;
1271 	if (!obj_id_exists) {
1272 		ret = virtiovf_pci_alloc_obj_id(virtvdev,
1273 						VIRTIO_RESOURCE_OBJ_DEV_PARTS_TYPE_GET,
1274 						&obj_id);
1275 		if (ret)
1276 			goto end;
1277 	} else {
1278 		obj_id = virtvdev->saving_migf->obj_id;
1279 	}
1280 
1281 	ret = virtio_pci_admin_dev_parts_metadata_get(virtvdev->core_device.pdev,
1282 				VIRTIO_RESOURCE_OBJ_DEV_PARTS, obj_id,
1283 				VIRTIO_ADMIN_CMD_DEV_PARTS_METADATA_TYPE_SIZE,
1284 				&res_size);
1285 	if (!ret)
1286 		*stop_copy_length = res_size;
1287 
1288 	/*
1289 	 * We can't leave this obj_id alive if didn't exist before, otherwise, it might
1290 	 * stay alive, even without an active migration flow (e.g. migration was cancelled)
1291 	 */
1292 	if (!obj_id_exists)
1293 		virtiovf_pci_free_obj_id(virtvdev, obj_id);
1294 end:
1295 	virtiovf_state_mutex_unlock(virtvdev);
1296 	return ret;
1297 }
1298 
1299 static const struct vfio_migration_ops virtvdev_pci_mig_ops = {
1300 	.migration_set_state = virtiovf_pci_set_device_state,
1301 	.migration_get_state = virtiovf_pci_get_device_state,
1302 	.migration_get_data_size = virtiovf_pci_get_data_size,
1303 };
1304 
1305 void virtiovf_set_migratable(struct virtiovf_pci_core_device *virtvdev)
1306 {
1307 	virtvdev->migrate_cap = 1;
1308 	mutex_init(&virtvdev->state_mutex);
1309 	spin_lock_init(&virtvdev->reset_lock);
1310 	virtvdev->core_device.vdev.migration_flags =
1311 		VFIO_MIGRATION_STOP_COPY |
1312 		VFIO_MIGRATION_P2P |
1313 		VFIO_MIGRATION_PRE_COPY;
1314 	virtvdev->core_device.vdev.mig_ops = &virtvdev_pci_mig_ops;
1315 }
1316 
1317 void virtiovf_open_migration(struct virtiovf_pci_core_device *virtvdev)
1318 {
1319 	if (!virtvdev->migrate_cap)
1320 		return;
1321 
1322 	virtvdev->mig_state = VFIO_DEVICE_STATE_RUNNING;
1323 }
1324 
1325 void virtiovf_close_migration(struct virtiovf_pci_core_device *virtvdev)
1326 {
1327 	if (!virtvdev->migrate_cap)
1328 		return;
1329 
1330 	virtiovf_disable_fds(virtvdev);
1331 }
1332