xref: /linux/drivers/vfio/pci/virtio/migrate.c (revision 8804d970fab45726b3c7cd7f240b31122aa94219)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved
4  */
5 
6 #include <linux/device.h>
7 #include <linux/module.h>
8 #include <linux/mutex.h>
9 #include <linux/pci.h>
10 #include <linux/pm_runtime.h>
11 #include <linux/types.h>
12 #include <linux/uaccess.h>
13 #include <linux/vfio.h>
14 #include <linux/vfio_pci_core.h>
15 #include <linux/virtio_pci.h>
16 #include <linux/virtio_net.h>
17 #include <linux/virtio_pci_admin.h>
18 #include <linux/anon_inodes.h>
19 
20 #include "common.h"
21 
22 /* Device specification max parts size */
23 #define MAX_LOAD_SIZE (BIT_ULL(BITS_PER_TYPE \
24 	(((struct virtio_admin_cmd_dev_parts_metadata_result *)0)->parts_size.size)) - 1)
25 
26 /* Initial target buffer size */
27 #define VIRTIOVF_TARGET_INITIAL_BUF_SIZE SZ_1M
28 
29 static int
30 virtiovf_read_device_context_chunk(struct virtiovf_migration_file *migf,
31 				   u32 ctx_size);
32 
33 static struct page *
virtiovf_get_migration_page(struct virtiovf_data_buffer * buf,unsigned long offset)34 virtiovf_get_migration_page(struct virtiovf_data_buffer *buf,
35 			    unsigned long offset)
36 {
37 	unsigned long cur_offset = 0;
38 	struct scatterlist *sg;
39 	unsigned int i;
40 
41 	/* All accesses are sequential */
42 	if (offset < buf->last_offset || !buf->last_offset_sg) {
43 		buf->last_offset = 0;
44 		buf->last_offset_sg = buf->table.sgt.sgl;
45 		buf->sg_last_entry = 0;
46 	}
47 
48 	cur_offset = buf->last_offset;
49 
50 	for_each_sg(buf->last_offset_sg, sg,
51 		    buf->table.sgt.orig_nents - buf->sg_last_entry, i) {
52 		if (offset < sg->length + cur_offset) {
53 			buf->last_offset_sg = sg;
54 			buf->sg_last_entry += i;
55 			buf->last_offset = cur_offset;
56 			return sg_page(sg) + (offset - cur_offset) / PAGE_SIZE;
57 		}
58 		cur_offset += sg->length;
59 	}
60 	return NULL;
61 }
62 
virtiovf_add_migration_pages(struct virtiovf_data_buffer * buf,unsigned int npages)63 static int virtiovf_add_migration_pages(struct virtiovf_data_buffer *buf,
64 					unsigned int npages)
65 {
66 	unsigned int to_alloc = npages;
67 	struct page **page_list;
68 	unsigned long filled;
69 	unsigned int to_fill;
70 	int ret;
71 	int i;
72 
73 	to_fill = min_t(unsigned int, npages, PAGE_SIZE / sizeof(*page_list));
74 	page_list = kvcalloc(to_fill, sizeof(*page_list), GFP_KERNEL_ACCOUNT);
75 	if (!page_list)
76 		return -ENOMEM;
77 
78 	do {
79 		filled = alloc_pages_bulk(GFP_KERNEL_ACCOUNT, to_fill,
80 					  page_list);
81 		if (!filled) {
82 			ret = -ENOMEM;
83 			goto err;
84 		}
85 		to_alloc -= filled;
86 		ret = sg_alloc_append_table_from_pages(&buf->table, page_list,
87 			filled, 0, filled << PAGE_SHIFT, UINT_MAX,
88 			SG_MAX_SINGLE_ALLOC, GFP_KERNEL_ACCOUNT);
89 
90 		if (ret)
91 			goto err_append;
92 		buf->allocated_length += filled * PAGE_SIZE;
93 		/* clean input for another bulk allocation */
94 		memset(page_list, 0, filled * sizeof(*page_list));
95 		to_fill = min_t(unsigned int, to_alloc,
96 				PAGE_SIZE / sizeof(*page_list));
97 	} while (to_alloc > 0);
98 
99 	kvfree(page_list);
100 	return 0;
101 
102 err_append:
103 	for (i = filled - 1; i >= 0; i--)
104 		__free_page(page_list[i]);
105 err:
106 	kvfree(page_list);
107 	return ret;
108 }
109 
virtiovf_free_data_buffer(struct virtiovf_data_buffer * buf)110 static void virtiovf_free_data_buffer(struct virtiovf_data_buffer *buf)
111 {
112 	struct sg_page_iter sg_iter;
113 
114 	/* Undo alloc_pages_bulk() */
115 	for_each_sgtable_page(&buf->table.sgt, &sg_iter, 0)
116 		__free_page(sg_page_iter_page(&sg_iter));
117 	sg_free_append_table(&buf->table);
118 	kfree(buf);
119 }
120 
121 static struct virtiovf_data_buffer *
virtiovf_alloc_data_buffer(struct virtiovf_migration_file * migf,size_t length)122 virtiovf_alloc_data_buffer(struct virtiovf_migration_file *migf, size_t length)
123 {
124 	struct virtiovf_data_buffer *buf;
125 	int ret;
126 
127 	buf = kzalloc(sizeof(*buf), GFP_KERNEL_ACCOUNT);
128 	if (!buf)
129 		return ERR_PTR(-ENOMEM);
130 
131 	ret = virtiovf_add_migration_pages(buf,
132 				DIV_ROUND_UP_ULL(length, PAGE_SIZE));
133 	if (ret)
134 		goto end;
135 
136 	buf->migf = migf;
137 	return buf;
138 end:
139 	virtiovf_free_data_buffer(buf);
140 	return ERR_PTR(ret);
141 }
142 
virtiovf_put_data_buffer(struct virtiovf_data_buffer * buf)143 static void virtiovf_put_data_buffer(struct virtiovf_data_buffer *buf)
144 {
145 	spin_lock_irq(&buf->migf->list_lock);
146 	list_add_tail(&buf->buf_elm, &buf->migf->avail_list);
147 	spin_unlock_irq(&buf->migf->list_lock);
148 }
149 
150 static int
virtiovf_pci_alloc_obj_id(struct virtiovf_pci_core_device * virtvdev,u8 type,u32 * obj_id)151 virtiovf_pci_alloc_obj_id(struct virtiovf_pci_core_device *virtvdev, u8 type,
152 			  u32 *obj_id)
153 {
154 	return virtio_pci_admin_obj_create(virtvdev->core_device.pdev,
155 					   VIRTIO_RESOURCE_OBJ_DEV_PARTS, type, obj_id);
156 }
157 
158 static void
virtiovf_pci_free_obj_id(struct virtiovf_pci_core_device * virtvdev,u32 obj_id)159 virtiovf_pci_free_obj_id(struct virtiovf_pci_core_device *virtvdev, u32 obj_id)
160 {
161 	virtio_pci_admin_obj_destroy(virtvdev->core_device.pdev,
162 			VIRTIO_RESOURCE_OBJ_DEV_PARTS, obj_id);
163 }
164 
165 static struct virtiovf_data_buffer *
virtiovf_get_data_buffer(struct virtiovf_migration_file * migf,size_t length)166 virtiovf_get_data_buffer(struct virtiovf_migration_file *migf, size_t length)
167 {
168 	struct virtiovf_data_buffer *buf, *temp_buf;
169 	struct list_head free_list;
170 
171 	INIT_LIST_HEAD(&free_list);
172 
173 	spin_lock_irq(&migf->list_lock);
174 	list_for_each_entry_safe(buf, temp_buf, &migf->avail_list, buf_elm) {
175 		list_del_init(&buf->buf_elm);
176 		if (buf->allocated_length >= length) {
177 			spin_unlock_irq(&migf->list_lock);
178 			goto found;
179 		}
180 		/*
181 		 * Prevent holding redundant buffers. Put in a free
182 		 * list and call at the end not under the spin lock
183 		 * (&migf->list_lock) to minimize its scope usage.
184 		 */
185 		list_add(&buf->buf_elm, &free_list);
186 	}
187 	spin_unlock_irq(&migf->list_lock);
188 	buf = virtiovf_alloc_data_buffer(migf, length);
189 
190 found:
191 	while ((temp_buf = list_first_entry_or_null(&free_list,
192 				struct virtiovf_data_buffer, buf_elm))) {
193 		list_del(&temp_buf->buf_elm);
194 		virtiovf_free_data_buffer(temp_buf);
195 	}
196 
197 	return buf;
198 }
199 
virtiovf_clean_migf_resources(struct virtiovf_migration_file * migf)200 static void virtiovf_clean_migf_resources(struct virtiovf_migration_file *migf)
201 {
202 	struct virtiovf_data_buffer *entry;
203 
204 	if (migf->buf) {
205 		virtiovf_free_data_buffer(migf->buf);
206 		migf->buf = NULL;
207 	}
208 
209 	if (migf->buf_header) {
210 		virtiovf_free_data_buffer(migf->buf_header);
211 		migf->buf_header = NULL;
212 	}
213 
214 	list_splice(&migf->avail_list, &migf->buf_list);
215 
216 	while ((entry = list_first_entry_or_null(&migf->buf_list,
217 				struct virtiovf_data_buffer, buf_elm))) {
218 		list_del(&entry->buf_elm);
219 		virtiovf_free_data_buffer(entry);
220 	}
221 
222 	if (migf->has_obj_id)
223 		virtiovf_pci_free_obj_id(migf->virtvdev, migf->obj_id);
224 }
225 
virtiovf_disable_fd(struct virtiovf_migration_file * migf)226 static void virtiovf_disable_fd(struct virtiovf_migration_file *migf)
227 {
228 	mutex_lock(&migf->lock);
229 	migf->state = VIRTIOVF_MIGF_STATE_ERROR;
230 	migf->filp->f_pos = 0;
231 	mutex_unlock(&migf->lock);
232 }
233 
virtiovf_disable_fds(struct virtiovf_pci_core_device * virtvdev)234 static void virtiovf_disable_fds(struct virtiovf_pci_core_device *virtvdev)
235 {
236 	if (virtvdev->resuming_migf) {
237 		virtiovf_disable_fd(virtvdev->resuming_migf);
238 		virtiovf_clean_migf_resources(virtvdev->resuming_migf);
239 		fput(virtvdev->resuming_migf->filp);
240 		virtvdev->resuming_migf = NULL;
241 	}
242 	if (virtvdev->saving_migf) {
243 		virtiovf_disable_fd(virtvdev->saving_migf);
244 		virtiovf_clean_migf_resources(virtvdev->saving_migf);
245 		fput(virtvdev->saving_migf->filp);
246 		virtvdev->saving_migf = NULL;
247 	}
248 }
249 
250 /*
251  * This function is called in all state_mutex unlock cases to
252  * handle a 'deferred_reset' if exists.
253  */
virtiovf_state_mutex_unlock(struct virtiovf_pci_core_device * virtvdev)254 static void virtiovf_state_mutex_unlock(struct virtiovf_pci_core_device *virtvdev)
255 {
256 again:
257 	spin_lock(&virtvdev->reset_lock);
258 	if (virtvdev->deferred_reset) {
259 		virtvdev->deferred_reset = false;
260 		spin_unlock(&virtvdev->reset_lock);
261 		virtvdev->mig_state = VFIO_DEVICE_STATE_RUNNING;
262 		virtiovf_disable_fds(virtvdev);
263 		goto again;
264 	}
265 	mutex_unlock(&virtvdev->state_mutex);
266 	spin_unlock(&virtvdev->reset_lock);
267 }
268 
virtiovf_migration_reset_done(struct pci_dev * pdev)269 void virtiovf_migration_reset_done(struct pci_dev *pdev)
270 {
271 	struct virtiovf_pci_core_device *virtvdev = dev_get_drvdata(&pdev->dev);
272 
273 	if (!virtvdev->migrate_cap)
274 		return;
275 
276 	/*
277 	 * As the higher VFIO layers are holding locks across reset and using
278 	 * those same locks with the mm_lock we need to prevent ABBA deadlock
279 	 * with the state_mutex and mm_lock.
280 	 * In case the state_mutex was taken already we defer the cleanup work
281 	 * to the unlock flow of the other running context.
282 	 */
283 	spin_lock(&virtvdev->reset_lock);
284 	virtvdev->deferred_reset = true;
285 	if (!mutex_trylock(&virtvdev->state_mutex)) {
286 		spin_unlock(&virtvdev->reset_lock);
287 		return;
288 	}
289 	spin_unlock(&virtvdev->reset_lock);
290 	virtiovf_state_mutex_unlock(virtvdev);
291 }
292 
virtiovf_release_file(struct inode * inode,struct file * filp)293 static int virtiovf_release_file(struct inode *inode, struct file *filp)
294 {
295 	struct virtiovf_migration_file *migf = filp->private_data;
296 
297 	virtiovf_disable_fd(migf);
298 	mutex_destroy(&migf->lock);
299 	kfree(migf);
300 	return 0;
301 }
302 
303 static struct virtiovf_data_buffer *
virtiovf_get_data_buff_from_pos(struct virtiovf_migration_file * migf,loff_t pos,bool * end_of_data)304 virtiovf_get_data_buff_from_pos(struct virtiovf_migration_file *migf,
305 				loff_t pos, bool *end_of_data)
306 {
307 	struct virtiovf_data_buffer *buf;
308 	bool found = false;
309 
310 	*end_of_data = false;
311 	spin_lock_irq(&migf->list_lock);
312 	if (list_empty(&migf->buf_list)) {
313 		*end_of_data = true;
314 		goto end;
315 	}
316 
317 	buf = list_first_entry(&migf->buf_list, struct virtiovf_data_buffer,
318 			       buf_elm);
319 	if (pos >= buf->start_pos &&
320 	    pos < buf->start_pos + buf->length) {
321 		found = true;
322 		goto end;
323 	}
324 
325 	/*
326 	 * As we use a stream based FD we may expect having the data always
327 	 * on first chunk
328 	 */
329 	migf->state = VIRTIOVF_MIGF_STATE_ERROR;
330 
331 end:
332 	spin_unlock_irq(&migf->list_lock);
333 	return found ? buf : NULL;
334 }
335 
virtiovf_buf_read(struct virtiovf_data_buffer * vhca_buf,char __user ** buf,size_t * len,loff_t * pos)336 static ssize_t virtiovf_buf_read(struct virtiovf_data_buffer *vhca_buf,
337 				 char __user **buf, size_t *len, loff_t *pos)
338 {
339 	unsigned long offset;
340 	ssize_t done = 0;
341 	size_t copy_len;
342 
343 	copy_len = min_t(size_t,
344 			 vhca_buf->start_pos + vhca_buf->length - *pos, *len);
345 	while (copy_len) {
346 		size_t page_offset;
347 		struct page *page;
348 		size_t page_len;
349 		u8 *from_buff;
350 		int ret;
351 
352 		offset = *pos - vhca_buf->start_pos;
353 		page_offset = offset % PAGE_SIZE;
354 		offset -= page_offset;
355 		page = virtiovf_get_migration_page(vhca_buf, offset);
356 		if (!page)
357 			return -EINVAL;
358 		page_len = min_t(size_t, copy_len, PAGE_SIZE - page_offset);
359 		from_buff = kmap_local_page(page);
360 		ret = copy_to_user(*buf, from_buff + page_offset, page_len);
361 		kunmap_local(from_buff);
362 		if (ret)
363 			return -EFAULT;
364 		*pos += page_len;
365 		*len -= page_len;
366 		*buf += page_len;
367 		done += page_len;
368 		copy_len -= page_len;
369 	}
370 
371 	if (*pos >= vhca_buf->start_pos + vhca_buf->length) {
372 		spin_lock_irq(&vhca_buf->migf->list_lock);
373 		list_del_init(&vhca_buf->buf_elm);
374 		list_add_tail(&vhca_buf->buf_elm, &vhca_buf->migf->avail_list);
375 		spin_unlock_irq(&vhca_buf->migf->list_lock);
376 	}
377 
378 	return done;
379 }
380 
virtiovf_save_read(struct file * filp,char __user * buf,size_t len,loff_t * pos)381 static ssize_t virtiovf_save_read(struct file *filp, char __user *buf, size_t len,
382 				  loff_t *pos)
383 {
384 	struct virtiovf_migration_file *migf = filp->private_data;
385 	struct virtiovf_data_buffer *vhca_buf;
386 	bool first_loop_call = true;
387 	bool end_of_data;
388 	ssize_t done = 0;
389 
390 	if (pos)
391 		return -ESPIPE;
392 	pos = &filp->f_pos;
393 
394 	mutex_lock(&migf->lock);
395 	if (migf->state == VIRTIOVF_MIGF_STATE_ERROR) {
396 		done = -ENODEV;
397 		goto out_unlock;
398 	}
399 
400 	while (len) {
401 		ssize_t count;
402 
403 		vhca_buf = virtiovf_get_data_buff_from_pos(migf, *pos, &end_of_data);
404 		if (first_loop_call) {
405 			first_loop_call = false;
406 			/* Temporary end of file as part of PRE_COPY */
407 			if (end_of_data && migf->state == VIRTIOVF_MIGF_STATE_PRECOPY) {
408 				done = -ENOMSG;
409 				goto out_unlock;
410 			}
411 			if (end_of_data && migf->state != VIRTIOVF_MIGF_STATE_COMPLETE) {
412 				done = -EINVAL;
413 				goto out_unlock;
414 			}
415 		}
416 
417 		if (end_of_data)
418 			goto out_unlock;
419 
420 		if (!vhca_buf) {
421 			done = -EINVAL;
422 			goto out_unlock;
423 		}
424 
425 		count = virtiovf_buf_read(vhca_buf, &buf, &len, pos);
426 		if (count < 0) {
427 			done = count;
428 			goto out_unlock;
429 		}
430 		done += count;
431 	}
432 
433 out_unlock:
434 	mutex_unlock(&migf->lock);
435 	return done;
436 }
437 
virtiovf_precopy_ioctl(struct file * filp,unsigned int cmd,unsigned long arg)438 static long virtiovf_precopy_ioctl(struct file *filp, unsigned int cmd,
439 				   unsigned long arg)
440 {
441 	struct virtiovf_migration_file *migf = filp->private_data;
442 	struct virtiovf_pci_core_device *virtvdev = migf->virtvdev;
443 	struct vfio_precopy_info info = {};
444 	loff_t *pos = &filp->f_pos;
445 	bool end_of_data = false;
446 	unsigned long minsz;
447 	u32 ctx_size = 0;
448 	int ret;
449 
450 	if (cmd != VFIO_MIG_GET_PRECOPY_INFO)
451 		return -ENOTTY;
452 
453 	minsz = offsetofend(struct vfio_precopy_info, dirty_bytes);
454 	if (copy_from_user(&info, (void __user *)arg, minsz))
455 		return -EFAULT;
456 
457 	if (info.argsz < minsz)
458 		return -EINVAL;
459 
460 	mutex_lock(&virtvdev->state_mutex);
461 	if (virtvdev->mig_state != VFIO_DEVICE_STATE_PRE_COPY &&
462 	    virtvdev->mig_state != VFIO_DEVICE_STATE_PRE_COPY_P2P) {
463 		ret = -EINVAL;
464 		goto err_state_unlock;
465 	}
466 
467 	/*
468 	 * The virtio specification does not include a PRE_COPY concept.
469 	 * Since we can expect the data to remain the same for a certain period,
470 	 * we use a rate limiter mechanism before making a call to the device.
471 	 */
472 	if (__ratelimit(&migf->pre_copy_rl_state)) {
473 
474 		ret = virtio_pci_admin_dev_parts_metadata_get(virtvdev->core_device.pdev,
475 					VIRTIO_RESOURCE_OBJ_DEV_PARTS, migf->obj_id,
476 					VIRTIO_ADMIN_CMD_DEV_PARTS_METADATA_TYPE_SIZE,
477 					&ctx_size);
478 		if (ret)
479 			goto err_state_unlock;
480 	}
481 
482 	mutex_lock(&migf->lock);
483 	if (migf->state == VIRTIOVF_MIGF_STATE_ERROR) {
484 		ret = -ENODEV;
485 		goto err_migf_unlock;
486 	}
487 
488 	if (migf->pre_copy_initial_bytes > *pos) {
489 		info.initial_bytes = migf->pre_copy_initial_bytes - *pos;
490 	} else {
491 		info.dirty_bytes = migf->max_pos - *pos;
492 		if (!info.dirty_bytes)
493 			end_of_data = true;
494 		info.dirty_bytes += ctx_size;
495 	}
496 
497 	if (!end_of_data || !ctx_size) {
498 		mutex_unlock(&migf->lock);
499 		goto done;
500 	}
501 
502 	mutex_unlock(&migf->lock);
503 	/*
504 	 * We finished transferring the current state and the device has a
505 	 * dirty state, read a new state.
506 	 */
507 	ret = virtiovf_read_device_context_chunk(migf, ctx_size);
508 	if (ret)
509 		/*
510 		 * The machine is running, and context size could be grow, so no reason to mark
511 		 * the device state as VIRTIOVF_MIGF_STATE_ERROR.
512 		 */
513 		goto err_state_unlock;
514 
515 done:
516 	virtiovf_state_mutex_unlock(virtvdev);
517 	if (copy_to_user((void __user *)arg, &info, minsz))
518 		return -EFAULT;
519 	return 0;
520 
521 err_migf_unlock:
522 	mutex_unlock(&migf->lock);
523 err_state_unlock:
524 	virtiovf_state_mutex_unlock(virtvdev);
525 	return ret;
526 }
527 
528 static const struct file_operations virtiovf_save_fops = {
529 	.owner = THIS_MODULE,
530 	.read = virtiovf_save_read,
531 	.unlocked_ioctl = virtiovf_precopy_ioctl,
532 	.compat_ioctl = compat_ptr_ioctl,
533 	.release = virtiovf_release_file,
534 };
535 
536 static int
virtiovf_add_buf_header(struct virtiovf_data_buffer * header_buf,u32 data_size)537 virtiovf_add_buf_header(struct virtiovf_data_buffer *header_buf,
538 			u32 data_size)
539 {
540 	struct virtiovf_migration_file *migf = header_buf->migf;
541 	struct virtiovf_migration_header header = {};
542 	struct page *page;
543 	u8 *to_buff;
544 
545 	header.record_size = cpu_to_le64(data_size);
546 	header.flags = cpu_to_le32(VIRTIOVF_MIGF_HEADER_FLAGS_TAG_MANDATORY);
547 	header.tag = cpu_to_le32(VIRTIOVF_MIGF_HEADER_TAG_DEVICE_DATA);
548 	page = virtiovf_get_migration_page(header_buf, 0);
549 	if (!page)
550 		return -EINVAL;
551 	to_buff = kmap_local_page(page);
552 	memcpy(to_buff, &header, sizeof(header));
553 	kunmap_local(to_buff);
554 	header_buf->length = sizeof(header);
555 	header_buf->start_pos = header_buf->migf->max_pos;
556 	migf->max_pos += header_buf->length;
557 	spin_lock_irq(&migf->list_lock);
558 	list_add_tail(&header_buf->buf_elm, &migf->buf_list);
559 	spin_unlock_irq(&migf->list_lock);
560 	return 0;
561 }
562 
563 static int
virtiovf_read_device_context_chunk(struct virtiovf_migration_file * migf,u32 ctx_size)564 virtiovf_read_device_context_chunk(struct virtiovf_migration_file *migf,
565 				   u32 ctx_size)
566 {
567 	struct virtiovf_data_buffer *header_buf;
568 	struct virtiovf_data_buffer *buf;
569 	bool unmark_end = false;
570 	struct scatterlist *sg;
571 	unsigned int i;
572 	u32 res_size;
573 	int nent;
574 	int ret;
575 
576 	buf = virtiovf_get_data_buffer(migf, ctx_size);
577 	if (IS_ERR(buf))
578 		return PTR_ERR(buf);
579 
580 	/* Find the total count of SG entries which satisfies the size */
581 	nent = sg_nents_for_len(buf->table.sgt.sgl, ctx_size);
582 	if (nent <= 0) {
583 		ret = -EINVAL;
584 		goto out;
585 	}
586 
587 	/*
588 	 * Iterate to that SG entry and mark it as last (if it's not already)
589 	 * to let underlay layers iterate only till that entry.
590 	 */
591 	for_each_sg(buf->table.sgt.sgl, sg, nent - 1, i)
592 		;
593 
594 	if (!sg_is_last(sg)) {
595 		unmark_end = true;
596 		sg_mark_end(sg);
597 	}
598 
599 	ret = virtio_pci_admin_dev_parts_get(migf->virtvdev->core_device.pdev,
600 					     VIRTIO_RESOURCE_OBJ_DEV_PARTS,
601 					     migf->obj_id,
602 					     VIRTIO_ADMIN_CMD_DEV_PARTS_GET_TYPE_ALL,
603 					     buf->table.sgt.sgl, &res_size);
604 	/* Restore the original SG mark end */
605 	if (unmark_end)
606 		sg_unmark_end(sg);
607 	if (ret)
608 		goto out;
609 
610 	buf->length = res_size;
611 	header_buf = virtiovf_get_data_buffer(migf,
612 				sizeof(struct virtiovf_migration_header));
613 	if (IS_ERR(header_buf)) {
614 		ret = PTR_ERR(header_buf);
615 		goto out;
616 	}
617 
618 	ret = virtiovf_add_buf_header(header_buf, res_size);
619 	if (ret)
620 		goto out_header;
621 
622 	buf->start_pos = buf->migf->max_pos;
623 	migf->max_pos += buf->length;
624 	spin_lock(&migf->list_lock);
625 	list_add_tail(&buf->buf_elm, &migf->buf_list);
626 	spin_unlock_irq(&migf->list_lock);
627 	return 0;
628 
629 out_header:
630 	virtiovf_put_data_buffer(header_buf);
631 out:
632 	virtiovf_put_data_buffer(buf);
633 	return ret;
634 }
635 
636 static int
virtiovf_pci_save_device_final_data(struct virtiovf_pci_core_device * virtvdev)637 virtiovf_pci_save_device_final_data(struct virtiovf_pci_core_device *virtvdev)
638 {
639 	struct virtiovf_migration_file *migf = virtvdev->saving_migf;
640 	u32 ctx_size;
641 	int ret;
642 
643 	if (migf->state == VIRTIOVF_MIGF_STATE_ERROR)
644 		return -ENODEV;
645 
646 	ret = virtio_pci_admin_dev_parts_metadata_get(virtvdev->core_device.pdev,
647 				VIRTIO_RESOURCE_OBJ_DEV_PARTS, migf->obj_id,
648 				VIRTIO_ADMIN_CMD_DEV_PARTS_METADATA_TYPE_SIZE,
649 				&ctx_size);
650 	if (ret)
651 		goto err;
652 
653 	if (!ctx_size) {
654 		ret = -EINVAL;
655 		goto err;
656 	}
657 
658 	ret = virtiovf_read_device_context_chunk(migf, ctx_size);
659 	if (ret)
660 		goto err;
661 
662 	migf->state = VIRTIOVF_MIGF_STATE_COMPLETE;
663 	return 0;
664 
665 err:
666 	migf->state = VIRTIOVF_MIGF_STATE_ERROR;
667 	return ret;
668 }
669 
670 static struct virtiovf_migration_file *
virtiovf_pci_save_device_data(struct virtiovf_pci_core_device * virtvdev,bool pre_copy)671 virtiovf_pci_save_device_data(struct virtiovf_pci_core_device *virtvdev,
672 			      bool pre_copy)
673 {
674 	struct virtiovf_migration_file *migf;
675 	u32 ctx_size;
676 	u32 obj_id;
677 	int ret;
678 
679 	migf = kzalloc(sizeof(*migf), GFP_KERNEL_ACCOUNT);
680 	if (!migf)
681 		return ERR_PTR(-ENOMEM);
682 
683 	migf->filp = anon_inode_getfile("virtiovf_mig", &virtiovf_save_fops, migf,
684 					O_RDONLY);
685 	if (IS_ERR(migf->filp)) {
686 		ret = PTR_ERR(migf->filp);
687 		kfree(migf);
688 		return ERR_PTR(ret);
689 	}
690 
691 	stream_open(migf->filp->f_inode, migf->filp);
692 	mutex_init(&migf->lock);
693 	INIT_LIST_HEAD(&migf->buf_list);
694 	INIT_LIST_HEAD(&migf->avail_list);
695 	spin_lock_init(&migf->list_lock);
696 	migf->virtvdev = virtvdev;
697 
698 	lockdep_assert_held(&virtvdev->state_mutex);
699 	ret = virtiovf_pci_alloc_obj_id(virtvdev, VIRTIO_RESOURCE_OBJ_DEV_PARTS_TYPE_GET,
700 					&obj_id);
701 	if (ret)
702 		goto out;
703 
704 	migf->obj_id = obj_id;
705 	/* Mark as having a valid obj id which can be even 0 */
706 	migf->has_obj_id = true;
707 	ret = virtio_pci_admin_dev_parts_metadata_get(virtvdev->core_device.pdev,
708 				VIRTIO_RESOURCE_OBJ_DEV_PARTS, obj_id,
709 				VIRTIO_ADMIN_CMD_DEV_PARTS_METADATA_TYPE_SIZE,
710 				&ctx_size);
711 	if (ret)
712 		goto out_clean;
713 
714 	if (!ctx_size) {
715 		ret = -EINVAL;
716 		goto out_clean;
717 	}
718 
719 	ret = virtiovf_read_device_context_chunk(migf, ctx_size);
720 	if (ret)
721 		goto out_clean;
722 
723 	if (pre_copy) {
724 		migf->pre_copy_initial_bytes = migf->max_pos;
725 		/* Arbitrarily set the pre-copy rate limit to 1-second intervals */
726 		ratelimit_state_init(&migf->pre_copy_rl_state, 1 * HZ, 1);
727 		/* Prevent any rate messages upon its usage */
728 		ratelimit_set_flags(&migf->pre_copy_rl_state,
729 				    RATELIMIT_MSG_ON_RELEASE);
730 		migf->state = VIRTIOVF_MIGF_STATE_PRECOPY;
731 	} else {
732 		migf->state = VIRTIOVF_MIGF_STATE_COMPLETE;
733 	}
734 
735 	return migf;
736 
737 out_clean:
738 	virtiovf_clean_migf_resources(migf);
739 out:
740 	fput(migf->filp);
741 	return ERR_PTR(ret);
742 }
743 
744 /*
745  * Set the required object header at the beginning of the buffer.
746  * The actual device parts data will be written post of the header offset.
747  */
virtiovf_set_obj_cmd_header(struct virtiovf_data_buffer * vhca_buf)748 static int virtiovf_set_obj_cmd_header(struct virtiovf_data_buffer *vhca_buf)
749 {
750 	struct virtio_admin_cmd_resource_obj_cmd_hdr obj_hdr = {};
751 	struct page *page;
752 	u8 *to_buff;
753 
754 	obj_hdr.type = cpu_to_le16(VIRTIO_RESOURCE_OBJ_DEV_PARTS);
755 	obj_hdr.id = cpu_to_le32(vhca_buf->migf->obj_id);
756 	page = virtiovf_get_migration_page(vhca_buf, 0);
757 	if (!page)
758 		return -EINVAL;
759 	to_buff = kmap_local_page(page);
760 	memcpy(to_buff, &obj_hdr, sizeof(obj_hdr));
761 	kunmap_local(to_buff);
762 
763 	/* Mark the buffer as including the header object data */
764 	vhca_buf->include_header_object = 1;
765 	return 0;
766 }
767 
768 static int
virtiovf_append_page_to_mig_buf(struct virtiovf_data_buffer * vhca_buf,const char __user ** buf,size_t * len,loff_t * pos,ssize_t * done)769 virtiovf_append_page_to_mig_buf(struct virtiovf_data_buffer *vhca_buf,
770 				const char __user **buf, size_t *len,
771 				loff_t *pos, ssize_t *done)
772 {
773 	unsigned long offset;
774 	size_t page_offset;
775 	struct page *page;
776 	size_t page_len;
777 	u8 *to_buff;
778 	int ret;
779 
780 	offset = *pos - vhca_buf->start_pos;
781 
782 	if (vhca_buf->include_header_object)
783 		/* The buffer holds the object header, update the offset accordingly */
784 		offset += sizeof(struct virtio_admin_cmd_resource_obj_cmd_hdr);
785 
786 	page_offset = offset % PAGE_SIZE;
787 
788 	page = virtiovf_get_migration_page(vhca_buf, offset - page_offset);
789 	if (!page)
790 		return -EINVAL;
791 
792 	page_len = min_t(size_t, *len, PAGE_SIZE - page_offset);
793 	to_buff = kmap_local_page(page);
794 	ret = copy_from_user(to_buff + page_offset, *buf, page_len);
795 	kunmap_local(to_buff);
796 	if (ret)
797 		return -EFAULT;
798 
799 	*pos += page_len;
800 	*done += page_len;
801 	*buf += page_len;
802 	*len -= page_len;
803 	vhca_buf->length += page_len;
804 	return 0;
805 }
806 
807 static ssize_t
virtiovf_resume_read_chunk(struct virtiovf_migration_file * migf,struct virtiovf_data_buffer * vhca_buf,size_t chunk_size,const char __user ** buf,size_t * len,loff_t * pos,ssize_t * done,bool * has_work)808 virtiovf_resume_read_chunk(struct virtiovf_migration_file *migf,
809 			   struct virtiovf_data_buffer *vhca_buf,
810 			   size_t chunk_size, const char __user **buf,
811 			   size_t *len, loff_t *pos, ssize_t *done,
812 			   bool *has_work)
813 {
814 	size_t copy_len, to_copy;
815 	int ret;
816 
817 	to_copy = min_t(size_t, *len, chunk_size - vhca_buf->length);
818 	copy_len = to_copy;
819 	while (to_copy) {
820 		ret = virtiovf_append_page_to_mig_buf(vhca_buf, buf, &to_copy,
821 						      pos, done);
822 		if (ret)
823 			return ret;
824 	}
825 
826 	*len -= copy_len;
827 	if (vhca_buf->length == chunk_size) {
828 		migf->load_state = VIRTIOVF_LOAD_STATE_LOAD_CHUNK;
829 		migf->max_pos += chunk_size;
830 		*has_work = true;
831 	}
832 
833 	return 0;
834 }
835 
836 static int
virtiovf_resume_read_header_data(struct virtiovf_migration_file * migf,struct virtiovf_data_buffer * vhca_buf,const char __user ** buf,size_t * len,loff_t * pos,ssize_t * done)837 virtiovf_resume_read_header_data(struct virtiovf_migration_file *migf,
838 				 struct virtiovf_data_buffer *vhca_buf,
839 				 const char __user **buf, size_t *len,
840 				 loff_t *pos, ssize_t *done)
841 {
842 	size_t copy_len, to_copy;
843 	size_t required_data;
844 	int ret;
845 
846 	required_data = migf->record_size - vhca_buf->length;
847 	to_copy = min_t(size_t, *len, required_data);
848 	copy_len = to_copy;
849 	while (to_copy) {
850 		ret = virtiovf_append_page_to_mig_buf(vhca_buf, buf, &to_copy,
851 						      pos, done);
852 		if (ret)
853 			return ret;
854 	}
855 
856 	*len -= copy_len;
857 	if (vhca_buf->length == migf->record_size) {
858 		switch (migf->record_tag) {
859 		default:
860 			/* Optional tag */
861 			break;
862 		}
863 
864 		migf->load_state = VIRTIOVF_LOAD_STATE_READ_HEADER;
865 		migf->max_pos += migf->record_size;
866 		vhca_buf->length = 0;
867 	}
868 
869 	return 0;
870 }
871 
872 static int
virtiovf_resume_read_header(struct virtiovf_migration_file * migf,struct virtiovf_data_buffer * vhca_buf,const char __user ** buf,size_t * len,loff_t * pos,ssize_t * done,bool * has_work)873 virtiovf_resume_read_header(struct virtiovf_migration_file *migf,
874 			    struct virtiovf_data_buffer *vhca_buf,
875 			    const char __user **buf,
876 			    size_t *len, loff_t *pos,
877 			    ssize_t *done, bool *has_work)
878 {
879 	struct page *page;
880 	size_t copy_len;
881 	u8 *to_buff;
882 	int ret;
883 
884 	copy_len = min_t(size_t, *len,
885 		sizeof(struct virtiovf_migration_header) - vhca_buf->length);
886 	page = virtiovf_get_migration_page(vhca_buf, 0);
887 	if (!page)
888 		return -EINVAL;
889 	to_buff = kmap_local_page(page);
890 	ret = copy_from_user(to_buff + vhca_buf->length, *buf, copy_len);
891 	if (ret) {
892 		ret = -EFAULT;
893 		goto end;
894 	}
895 
896 	*buf += copy_len;
897 	*pos += copy_len;
898 	*done += copy_len;
899 	*len -= copy_len;
900 	vhca_buf->length += copy_len;
901 	if (vhca_buf->length == sizeof(struct virtiovf_migration_header)) {
902 		u64 record_size;
903 		u32 flags;
904 
905 		record_size = le64_to_cpup((__le64 *)to_buff);
906 		if (record_size > MAX_LOAD_SIZE) {
907 			ret = -ENOMEM;
908 			goto end;
909 		}
910 
911 		migf->record_size = record_size;
912 		flags = le32_to_cpup((__le32 *)(to_buff +
913 			    offsetof(struct virtiovf_migration_header, flags)));
914 		migf->record_tag = le32_to_cpup((__le32 *)(to_buff +
915 			    offsetof(struct virtiovf_migration_header, tag)));
916 		switch (migf->record_tag) {
917 		case VIRTIOVF_MIGF_HEADER_TAG_DEVICE_DATA:
918 			migf->load_state = VIRTIOVF_LOAD_STATE_PREP_CHUNK;
919 			break;
920 		default:
921 			if (!(flags & VIRTIOVF_MIGF_HEADER_FLAGS_TAG_OPTIONAL)) {
922 				ret = -EOPNOTSUPP;
923 				goto end;
924 			}
925 			/* We may read and skip this optional record data */
926 			migf->load_state = VIRTIOVF_LOAD_STATE_PREP_HEADER_DATA;
927 		}
928 
929 		migf->max_pos += vhca_buf->length;
930 		vhca_buf->length = 0;
931 		*has_work = true;
932 	}
933 end:
934 	kunmap_local(to_buff);
935 	return ret;
936 }
937 
virtiovf_resume_write(struct file * filp,const char __user * buf,size_t len,loff_t * pos)938 static ssize_t virtiovf_resume_write(struct file *filp, const char __user *buf,
939 				     size_t len, loff_t *pos)
940 {
941 	struct virtiovf_migration_file *migf = filp->private_data;
942 	struct virtiovf_data_buffer *vhca_buf = migf->buf;
943 	struct virtiovf_data_buffer *vhca_buf_header = migf->buf_header;
944 	unsigned int orig_length;
945 	bool has_work = false;
946 	ssize_t done = 0;
947 	int ret = 0;
948 
949 	if (pos)
950 		return -ESPIPE;
951 
952 	pos = &filp->f_pos;
953 	if (*pos < vhca_buf->start_pos)
954 		return -EINVAL;
955 
956 	mutex_lock(&migf->virtvdev->state_mutex);
957 	mutex_lock(&migf->lock);
958 	if (migf->state == VIRTIOVF_MIGF_STATE_ERROR) {
959 		done = -ENODEV;
960 		goto out_unlock;
961 	}
962 
963 	while (len || has_work) {
964 		has_work = false;
965 		switch (migf->load_state) {
966 		case VIRTIOVF_LOAD_STATE_READ_HEADER:
967 			ret = virtiovf_resume_read_header(migf, vhca_buf_header, &buf,
968 							  &len, pos, &done, &has_work);
969 			if (ret)
970 				goto out_unlock;
971 			break;
972 		case VIRTIOVF_LOAD_STATE_PREP_HEADER_DATA:
973 			if (vhca_buf_header->allocated_length < migf->record_size) {
974 				virtiovf_free_data_buffer(vhca_buf_header);
975 
976 				migf->buf_header = virtiovf_alloc_data_buffer(migf,
977 						migf->record_size);
978 				if (IS_ERR(migf->buf_header)) {
979 					ret = PTR_ERR(migf->buf_header);
980 					migf->buf_header = NULL;
981 					goto out_unlock;
982 				}
983 
984 				vhca_buf_header = migf->buf_header;
985 			}
986 
987 			vhca_buf_header->start_pos = migf->max_pos;
988 			migf->load_state = VIRTIOVF_LOAD_STATE_READ_HEADER_DATA;
989 			break;
990 		case VIRTIOVF_LOAD_STATE_READ_HEADER_DATA:
991 			ret = virtiovf_resume_read_header_data(migf, vhca_buf_header,
992 							       &buf, &len, pos, &done);
993 			if (ret)
994 				goto out_unlock;
995 			break;
996 		case VIRTIOVF_LOAD_STATE_PREP_CHUNK:
997 		{
998 			u32 cmd_size = migf->record_size +
999 				sizeof(struct virtio_admin_cmd_resource_obj_cmd_hdr);
1000 
1001 			/*
1002 			 * The DMA map/unmap is managed in virtio layer, we just need to extend
1003 			 * the SG pages to hold the extra required chunk data.
1004 			 */
1005 			if (vhca_buf->allocated_length < cmd_size) {
1006 				ret = virtiovf_add_migration_pages(vhca_buf,
1007 					DIV_ROUND_UP_ULL(cmd_size - vhca_buf->allocated_length,
1008 							 PAGE_SIZE));
1009 				if (ret)
1010 					goto out_unlock;
1011 			}
1012 
1013 			vhca_buf->start_pos = migf->max_pos;
1014 			migf->load_state = VIRTIOVF_LOAD_STATE_READ_CHUNK;
1015 			break;
1016 		}
1017 		case VIRTIOVF_LOAD_STATE_READ_CHUNK:
1018 			ret = virtiovf_resume_read_chunk(migf, vhca_buf, migf->record_size,
1019 							 &buf, &len, pos, &done, &has_work);
1020 			if (ret)
1021 				goto out_unlock;
1022 			break;
1023 		case VIRTIOVF_LOAD_STATE_LOAD_CHUNK:
1024 			/* Mark the last SG entry and set its length */
1025 			sg_mark_end(vhca_buf->last_offset_sg);
1026 			orig_length = vhca_buf->last_offset_sg->length;
1027 			/* Length should include the resource object command header */
1028 			vhca_buf->last_offset_sg->length = vhca_buf->length +
1029 					sizeof(struct virtio_admin_cmd_resource_obj_cmd_hdr) -
1030 					vhca_buf->last_offset;
1031 			ret = virtio_pci_admin_dev_parts_set(migf->virtvdev->core_device.pdev,
1032 							     vhca_buf->table.sgt.sgl);
1033 			/* Restore the original SG data */
1034 			vhca_buf->last_offset_sg->length = orig_length;
1035 			sg_unmark_end(vhca_buf->last_offset_sg);
1036 			if (ret)
1037 				goto out_unlock;
1038 			migf->load_state = VIRTIOVF_LOAD_STATE_READ_HEADER;
1039 			/* be ready for reading the next chunk */
1040 			vhca_buf->length = 0;
1041 			break;
1042 		default:
1043 			break;
1044 		}
1045 	}
1046 
1047 out_unlock:
1048 	if (ret)
1049 		migf->state = VIRTIOVF_MIGF_STATE_ERROR;
1050 	mutex_unlock(&migf->lock);
1051 	virtiovf_state_mutex_unlock(migf->virtvdev);
1052 	return ret ? ret : done;
1053 }
1054 
1055 static const struct file_operations virtiovf_resume_fops = {
1056 	.owner = THIS_MODULE,
1057 	.write = virtiovf_resume_write,
1058 	.release = virtiovf_release_file,
1059 };
1060 
1061 static struct virtiovf_migration_file *
virtiovf_pci_resume_device_data(struct virtiovf_pci_core_device * virtvdev)1062 virtiovf_pci_resume_device_data(struct virtiovf_pci_core_device *virtvdev)
1063 {
1064 	struct virtiovf_migration_file *migf;
1065 	struct virtiovf_data_buffer *buf;
1066 	u32 obj_id;
1067 	int ret;
1068 
1069 	migf = kzalloc(sizeof(*migf), GFP_KERNEL_ACCOUNT);
1070 	if (!migf)
1071 		return ERR_PTR(-ENOMEM);
1072 
1073 	migf->filp = anon_inode_getfile("virtiovf_mig", &virtiovf_resume_fops, migf,
1074 					O_WRONLY);
1075 	if (IS_ERR(migf->filp)) {
1076 		ret = PTR_ERR(migf->filp);
1077 		kfree(migf);
1078 		return ERR_PTR(ret);
1079 	}
1080 
1081 	stream_open(migf->filp->f_inode, migf->filp);
1082 	mutex_init(&migf->lock);
1083 	INIT_LIST_HEAD(&migf->buf_list);
1084 	INIT_LIST_HEAD(&migf->avail_list);
1085 	spin_lock_init(&migf->list_lock);
1086 
1087 	buf = virtiovf_alloc_data_buffer(migf, VIRTIOVF_TARGET_INITIAL_BUF_SIZE);
1088 	if (IS_ERR(buf)) {
1089 		ret = PTR_ERR(buf);
1090 		goto out;
1091 	}
1092 
1093 	migf->buf = buf;
1094 
1095 	buf = virtiovf_alloc_data_buffer(migf,
1096 		sizeof(struct virtiovf_migration_header));
1097 	if (IS_ERR(buf)) {
1098 		ret = PTR_ERR(buf);
1099 		goto out_clean;
1100 	}
1101 
1102 	migf->buf_header = buf;
1103 	migf->load_state = VIRTIOVF_LOAD_STATE_READ_HEADER;
1104 
1105 	migf->virtvdev = virtvdev;
1106 	ret = virtiovf_pci_alloc_obj_id(virtvdev, VIRTIO_RESOURCE_OBJ_DEV_PARTS_TYPE_SET,
1107 					&obj_id);
1108 	if (ret)
1109 		goto out_clean;
1110 
1111 	migf->obj_id = obj_id;
1112 	/* Mark as having a valid obj id which can be even 0 */
1113 	migf->has_obj_id = true;
1114 	ret = virtiovf_set_obj_cmd_header(migf->buf);
1115 	if (ret)
1116 		goto out_clean;
1117 
1118 	return migf;
1119 
1120 out_clean:
1121 	virtiovf_clean_migf_resources(migf);
1122 out:
1123 	fput(migf->filp);
1124 	return ERR_PTR(ret);
1125 }
1126 
1127 static struct file *
virtiovf_pci_step_device_state_locked(struct virtiovf_pci_core_device * virtvdev,u32 new)1128 virtiovf_pci_step_device_state_locked(struct virtiovf_pci_core_device *virtvdev,
1129 				      u32 new)
1130 {
1131 	u32 cur = virtvdev->mig_state;
1132 	int ret;
1133 
1134 	if (cur == VFIO_DEVICE_STATE_RUNNING_P2P && new == VFIO_DEVICE_STATE_STOP) {
1135 		/* NOP */
1136 		return NULL;
1137 	}
1138 
1139 	if (cur == VFIO_DEVICE_STATE_STOP && new == VFIO_DEVICE_STATE_RUNNING_P2P) {
1140 		/* NOP */
1141 		return NULL;
1142 	}
1143 
1144 	if ((cur == VFIO_DEVICE_STATE_RUNNING && new == VFIO_DEVICE_STATE_RUNNING_P2P) ||
1145 	    (cur == VFIO_DEVICE_STATE_PRE_COPY && new == VFIO_DEVICE_STATE_PRE_COPY_P2P)) {
1146 		ret = virtio_pci_admin_mode_set(virtvdev->core_device.pdev,
1147 						BIT(VIRTIO_ADMIN_CMD_DEV_MODE_F_STOPPED));
1148 		if (ret)
1149 			return ERR_PTR(ret);
1150 		return NULL;
1151 	}
1152 
1153 	if ((cur == VFIO_DEVICE_STATE_RUNNING_P2P && new == VFIO_DEVICE_STATE_RUNNING) ||
1154 	    (cur == VFIO_DEVICE_STATE_PRE_COPY_P2P && new == VFIO_DEVICE_STATE_PRE_COPY)) {
1155 		ret = virtio_pci_admin_mode_set(virtvdev->core_device.pdev, 0);
1156 		if (ret)
1157 			return ERR_PTR(ret);
1158 		return NULL;
1159 	}
1160 
1161 	if (cur == VFIO_DEVICE_STATE_STOP && new == VFIO_DEVICE_STATE_STOP_COPY) {
1162 		struct virtiovf_migration_file *migf;
1163 
1164 		migf = virtiovf_pci_save_device_data(virtvdev, false);
1165 		if (IS_ERR(migf))
1166 			return ERR_CAST(migf);
1167 		get_file(migf->filp);
1168 		virtvdev->saving_migf = migf;
1169 		return migf->filp;
1170 	}
1171 
1172 	if ((cur == VFIO_DEVICE_STATE_STOP_COPY && new == VFIO_DEVICE_STATE_STOP) ||
1173 	    (cur == VFIO_DEVICE_STATE_PRE_COPY && new == VFIO_DEVICE_STATE_RUNNING) ||
1174 	    (cur == VFIO_DEVICE_STATE_PRE_COPY_P2P && new == VFIO_DEVICE_STATE_RUNNING_P2P)) {
1175 		virtiovf_disable_fds(virtvdev);
1176 		return NULL;
1177 	}
1178 
1179 	if (cur == VFIO_DEVICE_STATE_STOP && new == VFIO_DEVICE_STATE_RESUMING) {
1180 		struct virtiovf_migration_file *migf;
1181 
1182 		migf = virtiovf_pci_resume_device_data(virtvdev);
1183 		if (IS_ERR(migf))
1184 			return ERR_CAST(migf);
1185 		get_file(migf->filp);
1186 		virtvdev->resuming_migf = migf;
1187 		return migf->filp;
1188 	}
1189 
1190 	if (cur == VFIO_DEVICE_STATE_RESUMING && new == VFIO_DEVICE_STATE_STOP) {
1191 		virtiovf_disable_fds(virtvdev);
1192 		return NULL;
1193 	}
1194 
1195 	if ((cur == VFIO_DEVICE_STATE_RUNNING && new == VFIO_DEVICE_STATE_PRE_COPY) ||
1196 	    (cur == VFIO_DEVICE_STATE_RUNNING_P2P &&
1197 	     new == VFIO_DEVICE_STATE_PRE_COPY_P2P)) {
1198 		struct virtiovf_migration_file *migf;
1199 
1200 		migf = virtiovf_pci_save_device_data(virtvdev, true);
1201 		if (IS_ERR(migf))
1202 			return ERR_CAST(migf);
1203 		get_file(migf->filp);
1204 		virtvdev->saving_migf = migf;
1205 		return migf->filp;
1206 	}
1207 
1208 	if (cur == VFIO_DEVICE_STATE_PRE_COPY_P2P && new == VFIO_DEVICE_STATE_STOP_COPY) {
1209 		ret = virtiovf_pci_save_device_final_data(virtvdev);
1210 		return ret ? ERR_PTR(ret) : NULL;
1211 	}
1212 
1213 	/*
1214 	 * vfio_mig_get_next_state() does not use arcs other than the above
1215 	 */
1216 	WARN_ON(true);
1217 	return ERR_PTR(-EINVAL);
1218 }
1219 
1220 static struct file *
virtiovf_pci_set_device_state(struct vfio_device * vdev,enum vfio_device_mig_state new_state)1221 virtiovf_pci_set_device_state(struct vfio_device *vdev,
1222 			      enum vfio_device_mig_state new_state)
1223 {
1224 	struct virtiovf_pci_core_device *virtvdev = container_of(
1225 		vdev, struct virtiovf_pci_core_device, core_device.vdev);
1226 	enum vfio_device_mig_state next_state;
1227 	struct file *res = NULL;
1228 	int ret;
1229 
1230 	mutex_lock(&virtvdev->state_mutex);
1231 	while (new_state != virtvdev->mig_state) {
1232 		ret = vfio_mig_get_next_state(vdev, virtvdev->mig_state,
1233 					      new_state, &next_state);
1234 		if (ret) {
1235 			res = ERR_PTR(ret);
1236 			break;
1237 		}
1238 		res = virtiovf_pci_step_device_state_locked(virtvdev, next_state);
1239 		if (IS_ERR(res))
1240 			break;
1241 		virtvdev->mig_state = next_state;
1242 		if (WARN_ON(res && new_state != virtvdev->mig_state)) {
1243 			fput(res);
1244 			res = ERR_PTR(-EINVAL);
1245 			break;
1246 		}
1247 	}
1248 	virtiovf_state_mutex_unlock(virtvdev);
1249 	return res;
1250 }
1251 
virtiovf_pci_get_device_state(struct vfio_device * vdev,enum vfio_device_mig_state * curr_state)1252 static int virtiovf_pci_get_device_state(struct vfio_device *vdev,
1253 				       enum vfio_device_mig_state *curr_state)
1254 {
1255 	struct virtiovf_pci_core_device *virtvdev = container_of(
1256 		vdev, struct virtiovf_pci_core_device, core_device.vdev);
1257 
1258 	mutex_lock(&virtvdev->state_mutex);
1259 	*curr_state = virtvdev->mig_state;
1260 	virtiovf_state_mutex_unlock(virtvdev);
1261 	return 0;
1262 }
1263 
virtiovf_pci_get_data_size(struct vfio_device * vdev,unsigned long * stop_copy_length)1264 static int virtiovf_pci_get_data_size(struct vfio_device *vdev,
1265 				      unsigned long *stop_copy_length)
1266 {
1267 	struct virtiovf_pci_core_device *virtvdev = container_of(
1268 		vdev, struct virtiovf_pci_core_device, core_device.vdev);
1269 	bool obj_id_exists;
1270 	u32 res_size;
1271 	u32 obj_id;
1272 	int ret;
1273 
1274 	mutex_lock(&virtvdev->state_mutex);
1275 	obj_id_exists = virtvdev->saving_migf && virtvdev->saving_migf->has_obj_id;
1276 	if (!obj_id_exists) {
1277 		ret = virtiovf_pci_alloc_obj_id(virtvdev,
1278 						VIRTIO_RESOURCE_OBJ_DEV_PARTS_TYPE_GET,
1279 						&obj_id);
1280 		if (ret)
1281 			goto end;
1282 	} else {
1283 		obj_id = virtvdev->saving_migf->obj_id;
1284 	}
1285 
1286 	ret = virtio_pci_admin_dev_parts_metadata_get(virtvdev->core_device.pdev,
1287 				VIRTIO_RESOURCE_OBJ_DEV_PARTS, obj_id,
1288 				VIRTIO_ADMIN_CMD_DEV_PARTS_METADATA_TYPE_SIZE,
1289 				&res_size);
1290 	if (!ret)
1291 		*stop_copy_length = res_size;
1292 
1293 	/*
1294 	 * We can't leave this obj_id alive if didn't exist before, otherwise, it might
1295 	 * stay alive, even without an active migration flow (e.g. migration was cancelled)
1296 	 */
1297 	if (!obj_id_exists)
1298 		virtiovf_pci_free_obj_id(virtvdev, obj_id);
1299 end:
1300 	virtiovf_state_mutex_unlock(virtvdev);
1301 	return ret;
1302 }
1303 
1304 static const struct vfio_migration_ops virtvdev_pci_mig_ops = {
1305 	.migration_set_state = virtiovf_pci_set_device_state,
1306 	.migration_get_state = virtiovf_pci_get_device_state,
1307 	.migration_get_data_size = virtiovf_pci_get_data_size,
1308 };
1309 
virtiovf_set_migratable(struct virtiovf_pci_core_device * virtvdev)1310 void virtiovf_set_migratable(struct virtiovf_pci_core_device *virtvdev)
1311 {
1312 	virtvdev->migrate_cap = 1;
1313 	mutex_init(&virtvdev->state_mutex);
1314 	spin_lock_init(&virtvdev->reset_lock);
1315 	virtvdev->core_device.vdev.migration_flags =
1316 		VFIO_MIGRATION_STOP_COPY |
1317 		VFIO_MIGRATION_P2P |
1318 		VFIO_MIGRATION_PRE_COPY;
1319 	virtvdev->core_device.vdev.mig_ops = &virtvdev_pci_mig_ops;
1320 }
1321 
virtiovf_open_migration(struct virtiovf_pci_core_device * virtvdev)1322 void virtiovf_open_migration(struct virtiovf_pci_core_device *virtvdev)
1323 {
1324 	if (!virtvdev->migrate_cap)
1325 		return;
1326 
1327 	virtvdev->mig_state = VFIO_DEVICE_STATE_RUNNING;
1328 }
1329 
virtiovf_close_migration(struct virtiovf_pci_core_device * virtvdev)1330 void virtiovf_close_migration(struct virtiovf_pci_core_device *virtvdev)
1331 {
1332 	if (!virtvdev->migrate_cap)
1333 		return;
1334 
1335 	virtiovf_disable_fds(virtvdev);
1336 }
1337