1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved
4 */
5
6 #include <linux/device.h>
7 #include <linux/module.h>
8 #include <linux/mutex.h>
9 #include <linux/pci.h>
10 #include <linux/pm_runtime.h>
11 #include <linux/types.h>
12 #include <linux/uaccess.h>
13 #include <linux/vfio.h>
14 #include <linux/vfio_pci_core.h>
15 #include <linux/virtio_pci.h>
16 #include <linux/virtio_net.h>
17 #include <linux/virtio_pci_admin.h>
18 #include <linux/anon_inodes.h>
19
20 #include "common.h"
21
22 /* Device specification max parts size */
23 #define MAX_LOAD_SIZE (BIT_ULL(BITS_PER_TYPE \
24 (((struct virtio_admin_cmd_dev_parts_metadata_result *)0)->parts_size.size)) - 1)
25
26 /* Initial target buffer size */
27 #define VIRTIOVF_TARGET_INITIAL_BUF_SIZE SZ_1M
28
29 static int
30 virtiovf_read_device_context_chunk(struct virtiovf_migration_file *migf,
31 u32 ctx_size);
32
33 static struct page *
virtiovf_get_migration_page(struct virtiovf_data_buffer * buf,unsigned long offset)34 virtiovf_get_migration_page(struct virtiovf_data_buffer *buf,
35 unsigned long offset)
36 {
37 unsigned long cur_offset = 0;
38 struct scatterlist *sg;
39 unsigned int i;
40
41 /* All accesses are sequential */
42 if (offset < buf->last_offset || !buf->last_offset_sg) {
43 buf->last_offset = 0;
44 buf->last_offset_sg = buf->table.sgt.sgl;
45 buf->sg_last_entry = 0;
46 }
47
48 cur_offset = buf->last_offset;
49
50 for_each_sg(buf->last_offset_sg, sg,
51 buf->table.sgt.orig_nents - buf->sg_last_entry, i) {
52 if (offset < sg->length + cur_offset) {
53 buf->last_offset_sg = sg;
54 buf->sg_last_entry += i;
55 buf->last_offset = cur_offset;
56 return sg_page(sg) + (offset - cur_offset) / PAGE_SIZE;
57 }
58 cur_offset += sg->length;
59 }
60 return NULL;
61 }
62
virtiovf_add_migration_pages(struct virtiovf_data_buffer * buf,unsigned int npages)63 static int virtiovf_add_migration_pages(struct virtiovf_data_buffer *buf,
64 unsigned int npages)
65 {
66 unsigned int to_alloc = npages;
67 struct page **page_list;
68 unsigned long filled;
69 unsigned int to_fill;
70 int ret;
71 int i;
72
73 to_fill = min_t(unsigned int, npages, PAGE_SIZE / sizeof(*page_list));
74 page_list = kvcalloc(to_fill, sizeof(*page_list), GFP_KERNEL_ACCOUNT);
75 if (!page_list)
76 return -ENOMEM;
77
78 do {
79 filled = alloc_pages_bulk(GFP_KERNEL_ACCOUNT, to_fill,
80 page_list);
81 if (!filled) {
82 ret = -ENOMEM;
83 goto err;
84 }
85 to_alloc -= filled;
86 ret = sg_alloc_append_table_from_pages(&buf->table, page_list,
87 filled, 0, filled << PAGE_SHIFT, UINT_MAX,
88 SG_MAX_SINGLE_ALLOC, GFP_KERNEL_ACCOUNT);
89
90 if (ret)
91 goto err_append;
92 buf->allocated_length += filled * PAGE_SIZE;
93 /* clean input for another bulk allocation */
94 memset(page_list, 0, filled * sizeof(*page_list));
95 to_fill = min_t(unsigned int, to_alloc,
96 PAGE_SIZE / sizeof(*page_list));
97 } while (to_alloc > 0);
98
99 kvfree(page_list);
100 return 0;
101
102 err_append:
103 for (i = filled - 1; i >= 0; i--)
104 __free_page(page_list[i]);
105 err:
106 kvfree(page_list);
107 return ret;
108 }
109
virtiovf_free_data_buffer(struct virtiovf_data_buffer * buf)110 static void virtiovf_free_data_buffer(struct virtiovf_data_buffer *buf)
111 {
112 struct sg_page_iter sg_iter;
113
114 /* Undo alloc_pages_bulk() */
115 for_each_sgtable_page(&buf->table.sgt, &sg_iter, 0)
116 __free_page(sg_page_iter_page(&sg_iter));
117 sg_free_append_table(&buf->table);
118 kfree(buf);
119 }
120
121 static struct virtiovf_data_buffer *
virtiovf_alloc_data_buffer(struct virtiovf_migration_file * migf,size_t length)122 virtiovf_alloc_data_buffer(struct virtiovf_migration_file *migf, size_t length)
123 {
124 struct virtiovf_data_buffer *buf;
125 int ret;
126
127 buf = kzalloc(sizeof(*buf), GFP_KERNEL_ACCOUNT);
128 if (!buf)
129 return ERR_PTR(-ENOMEM);
130
131 ret = virtiovf_add_migration_pages(buf,
132 DIV_ROUND_UP_ULL(length, PAGE_SIZE));
133 if (ret)
134 goto end;
135
136 buf->migf = migf;
137 return buf;
138 end:
139 virtiovf_free_data_buffer(buf);
140 return ERR_PTR(ret);
141 }
142
virtiovf_put_data_buffer(struct virtiovf_data_buffer * buf)143 static void virtiovf_put_data_buffer(struct virtiovf_data_buffer *buf)
144 {
145 spin_lock_irq(&buf->migf->list_lock);
146 list_add_tail(&buf->buf_elm, &buf->migf->avail_list);
147 spin_unlock_irq(&buf->migf->list_lock);
148 }
149
150 static int
virtiovf_pci_alloc_obj_id(struct virtiovf_pci_core_device * virtvdev,u8 type,u32 * obj_id)151 virtiovf_pci_alloc_obj_id(struct virtiovf_pci_core_device *virtvdev, u8 type,
152 u32 *obj_id)
153 {
154 return virtio_pci_admin_obj_create(virtvdev->core_device.pdev,
155 VIRTIO_RESOURCE_OBJ_DEV_PARTS, type, obj_id);
156 }
157
158 static void
virtiovf_pci_free_obj_id(struct virtiovf_pci_core_device * virtvdev,u32 obj_id)159 virtiovf_pci_free_obj_id(struct virtiovf_pci_core_device *virtvdev, u32 obj_id)
160 {
161 virtio_pci_admin_obj_destroy(virtvdev->core_device.pdev,
162 VIRTIO_RESOURCE_OBJ_DEV_PARTS, obj_id);
163 }
164
165 static struct virtiovf_data_buffer *
virtiovf_get_data_buffer(struct virtiovf_migration_file * migf,size_t length)166 virtiovf_get_data_buffer(struct virtiovf_migration_file *migf, size_t length)
167 {
168 struct virtiovf_data_buffer *buf, *temp_buf;
169 struct list_head free_list;
170
171 INIT_LIST_HEAD(&free_list);
172
173 spin_lock_irq(&migf->list_lock);
174 list_for_each_entry_safe(buf, temp_buf, &migf->avail_list, buf_elm) {
175 list_del_init(&buf->buf_elm);
176 if (buf->allocated_length >= length) {
177 spin_unlock_irq(&migf->list_lock);
178 goto found;
179 }
180 /*
181 * Prevent holding redundant buffers. Put in a free
182 * list and call at the end not under the spin lock
183 * (&migf->list_lock) to minimize its scope usage.
184 */
185 list_add(&buf->buf_elm, &free_list);
186 }
187 spin_unlock_irq(&migf->list_lock);
188 buf = virtiovf_alloc_data_buffer(migf, length);
189
190 found:
191 while ((temp_buf = list_first_entry_or_null(&free_list,
192 struct virtiovf_data_buffer, buf_elm))) {
193 list_del(&temp_buf->buf_elm);
194 virtiovf_free_data_buffer(temp_buf);
195 }
196
197 return buf;
198 }
199
virtiovf_clean_migf_resources(struct virtiovf_migration_file * migf)200 static void virtiovf_clean_migf_resources(struct virtiovf_migration_file *migf)
201 {
202 struct virtiovf_data_buffer *entry;
203
204 if (migf->buf) {
205 virtiovf_free_data_buffer(migf->buf);
206 migf->buf = NULL;
207 }
208
209 if (migf->buf_header) {
210 virtiovf_free_data_buffer(migf->buf_header);
211 migf->buf_header = NULL;
212 }
213
214 list_splice(&migf->avail_list, &migf->buf_list);
215
216 while ((entry = list_first_entry_or_null(&migf->buf_list,
217 struct virtiovf_data_buffer, buf_elm))) {
218 list_del(&entry->buf_elm);
219 virtiovf_free_data_buffer(entry);
220 }
221
222 if (migf->has_obj_id)
223 virtiovf_pci_free_obj_id(migf->virtvdev, migf->obj_id);
224 }
225
virtiovf_disable_fd(struct virtiovf_migration_file * migf)226 static void virtiovf_disable_fd(struct virtiovf_migration_file *migf)
227 {
228 mutex_lock(&migf->lock);
229 migf->state = VIRTIOVF_MIGF_STATE_ERROR;
230 migf->filp->f_pos = 0;
231 mutex_unlock(&migf->lock);
232 }
233
virtiovf_disable_fds(struct virtiovf_pci_core_device * virtvdev)234 static void virtiovf_disable_fds(struct virtiovf_pci_core_device *virtvdev)
235 {
236 if (virtvdev->resuming_migf) {
237 virtiovf_disable_fd(virtvdev->resuming_migf);
238 virtiovf_clean_migf_resources(virtvdev->resuming_migf);
239 fput(virtvdev->resuming_migf->filp);
240 virtvdev->resuming_migf = NULL;
241 }
242 if (virtvdev->saving_migf) {
243 virtiovf_disable_fd(virtvdev->saving_migf);
244 virtiovf_clean_migf_resources(virtvdev->saving_migf);
245 fput(virtvdev->saving_migf->filp);
246 virtvdev->saving_migf = NULL;
247 }
248 }
249
250 /*
251 * This function is called in all state_mutex unlock cases to
252 * handle a 'deferred_reset' if exists.
253 */
virtiovf_state_mutex_unlock(struct virtiovf_pci_core_device * virtvdev)254 static void virtiovf_state_mutex_unlock(struct virtiovf_pci_core_device *virtvdev)
255 {
256 again:
257 spin_lock(&virtvdev->reset_lock);
258 if (virtvdev->deferred_reset) {
259 virtvdev->deferred_reset = false;
260 spin_unlock(&virtvdev->reset_lock);
261 virtvdev->mig_state = VFIO_DEVICE_STATE_RUNNING;
262 virtiovf_disable_fds(virtvdev);
263 goto again;
264 }
265 mutex_unlock(&virtvdev->state_mutex);
266 spin_unlock(&virtvdev->reset_lock);
267 }
268
virtiovf_migration_reset_done(struct pci_dev * pdev)269 void virtiovf_migration_reset_done(struct pci_dev *pdev)
270 {
271 struct virtiovf_pci_core_device *virtvdev = dev_get_drvdata(&pdev->dev);
272
273 if (!virtvdev->migrate_cap)
274 return;
275
276 /*
277 * As the higher VFIO layers are holding locks across reset and using
278 * those same locks with the mm_lock we need to prevent ABBA deadlock
279 * with the state_mutex and mm_lock.
280 * In case the state_mutex was taken already we defer the cleanup work
281 * to the unlock flow of the other running context.
282 */
283 spin_lock(&virtvdev->reset_lock);
284 virtvdev->deferred_reset = true;
285 if (!mutex_trylock(&virtvdev->state_mutex)) {
286 spin_unlock(&virtvdev->reset_lock);
287 return;
288 }
289 spin_unlock(&virtvdev->reset_lock);
290 virtiovf_state_mutex_unlock(virtvdev);
291 }
292
virtiovf_release_file(struct inode * inode,struct file * filp)293 static int virtiovf_release_file(struct inode *inode, struct file *filp)
294 {
295 struct virtiovf_migration_file *migf = filp->private_data;
296
297 virtiovf_disable_fd(migf);
298 mutex_destroy(&migf->lock);
299 kfree(migf);
300 return 0;
301 }
302
303 static struct virtiovf_data_buffer *
virtiovf_get_data_buff_from_pos(struct virtiovf_migration_file * migf,loff_t pos,bool * end_of_data)304 virtiovf_get_data_buff_from_pos(struct virtiovf_migration_file *migf,
305 loff_t pos, bool *end_of_data)
306 {
307 struct virtiovf_data_buffer *buf;
308 bool found = false;
309
310 *end_of_data = false;
311 spin_lock_irq(&migf->list_lock);
312 if (list_empty(&migf->buf_list)) {
313 *end_of_data = true;
314 goto end;
315 }
316
317 buf = list_first_entry(&migf->buf_list, struct virtiovf_data_buffer,
318 buf_elm);
319 if (pos >= buf->start_pos &&
320 pos < buf->start_pos + buf->length) {
321 found = true;
322 goto end;
323 }
324
325 /*
326 * As we use a stream based FD we may expect having the data always
327 * on first chunk
328 */
329 migf->state = VIRTIOVF_MIGF_STATE_ERROR;
330
331 end:
332 spin_unlock_irq(&migf->list_lock);
333 return found ? buf : NULL;
334 }
335
virtiovf_buf_read(struct virtiovf_data_buffer * vhca_buf,char __user ** buf,size_t * len,loff_t * pos)336 static ssize_t virtiovf_buf_read(struct virtiovf_data_buffer *vhca_buf,
337 char __user **buf, size_t *len, loff_t *pos)
338 {
339 unsigned long offset;
340 ssize_t done = 0;
341 size_t copy_len;
342
343 copy_len = min_t(size_t,
344 vhca_buf->start_pos + vhca_buf->length - *pos, *len);
345 while (copy_len) {
346 size_t page_offset;
347 struct page *page;
348 size_t page_len;
349 u8 *from_buff;
350 int ret;
351
352 offset = *pos - vhca_buf->start_pos;
353 page_offset = offset % PAGE_SIZE;
354 offset -= page_offset;
355 page = virtiovf_get_migration_page(vhca_buf, offset);
356 if (!page)
357 return -EINVAL;
358 page_len = min_t(size_t, copy_len, PAGE_SIZE - page_offset);
359 from_buff = kmap_local_page(page);
360 ret = copy_to_user(*buf, from_buff + page_offset, page_len);
361 kunmap_local(from_buff);
362 if (ret)
363 return -EFAULT;
364 *pos += page_len;
365 *len -= page_len;
366 *buf += page_len;
367 done += page_len;
368 copy_len -= page_len;
369 }
370
371 if (*pos >= vhca_buf->start_pos + vhca_buf->length) {
372 spin_lock_irq(&vhca_buf->migf->list_lock);
373 list_del_init(&vhca_buf->buf_elm);
374 list_add_tail(&vhca_buf->buf_elm, &vhca_buf->migf->avail_list);
375 spin_unlock_irq(&vhca_buf->migf->list_lock);
376 }
377
378 return done;
379 }
380
virtiovf_save_read(struct file * filp,char __user * buf,size_t len,loff_t * pos)381 static ssize_t virtiovf_save_read(struct file *filp, char __user *buf, size_t len,
382 loff_t *pos)
383 {
384 struct virtiovf_migration_file *migf = filp->private_data;
385 struct virtiovf_data_buffer *vhca_buf;
386 bool first_loop_call = true;
387 bool end_of_data;
388 ssize_t done = 0;
389
390 if (pos)
391 return -ESPIPE;
392 pos = &filp->f_pos;
393
394 mutex_lock(&migf->lock);
395 if (migf->state == VIRTIOVF_MIGF_STATE_ERROR) {
396 done = -ENODEV;
397 goto out_unlock;
398 }
399
400 while (len) {
401 ssize_t count;
402
403 vhca_buf = virtiovf_get_data_buff_from_pos(migf, *pos, &end_of_data);
404 if (first_loop_call) {
405 first_loop_call = false;
406 /* Temporary end of file as part of PRE_COPY */
407 if (end_of_data && migf->state == VIRTIOVF_MIGF_STATE_PRECOPY) {
408 done = -ENOMSG;
409 goto out_unlock;
410 }
411 if (end_of_data && migf->state != VIRTIOVF_MIGF_STATE_COMPLETE) {
412 done = -EINVAL;
413 goto out_unlock;
414 }
415 }
416
417 if (end_of_data)
418 goto out_unlock;
419
420 if (!vhca_buf) {
421 done = -EINVAL;
422 goto out_unlock;
423 }
424
425 count = virtiovf_buf_read(vhca_buf, &buf, &len, pos);
426 if (count < 0) {
427 done = count;
428 goto out_unlock;
429 }
430 done += count;
431 }
432
433 out_unlock:
434 mutex_unlock(&migf->lock);
435 return done;
436 }
437
virtiovf_precopy_ioctl(struct file * filp,unsigned int cmd,unsigned long arg)438 static long virtiovf_precopy_ioctl(struct file *filp, unsigned int cmd,
439 unsigned long arg)
440 {
441 struct virtiovf_migration_file *migf = filp->private_data;
442 struct virtiovf_pci_core_device *virtvdev = migf->virtvdev;
443 struct vfio_precopy_info info = {};
444 loff_t *pos = &filp->f_pos;
445 bool end_of_data = false;
446 unsigned long minsz;
447 u32 ctx_size = 0;
448 int ret;
449
450 if (cmd != VFIO_MIG_GET_PRECOPY_INFO)
451 return -ENOTTY;
452
453 minsz = offsetofend(struct vfio_precopy_info, dirty_bytes);
454 if (copy_from_user(&info, (void __user *)arg, minsz))
455 return -EFAULT;
456
457 if (info.argsz < minsz)
458 return -EINVAL;
459
460 mutex_lock(&virtvdev->state_mutex);
461 if (virtvdev->mig_state != VFIO_DEVICE_STATE_PRE_COPY &&
462 virtvdev->mig_state != VFIO_DEVICE_STATE_PRE_COPY_P2P) {
463 ret = -EINVAL;
464 goto err_state_unlock;
465 }
466
467 /*
468 * The virtio specification does not include a PRE_COPY concept.
469 * Since we can expect the data to remain the same for a certain period,
470 * we use a rate limiter mechanism before making a call to the device.
471 */
472 if (__ratelimit(&migf->pre_copy_rl_state)) {
473
474 ret = virtio_pci_admin_dev_parts_metadata_get(virtvdev->core_device.pdev,
475 VIRTIO_RESOURCE_OBJ_DEV_PARTS, migf->obj_id,
476 VIRTIO_ADMIN_CMD_DEV_PARTS_METADATA_TYPE_SIZE,
477 &ctx_size);
478 if (ret)
479 goto err_state_unlock;
480 }
481
482 mutex_lock(&migf->lock);
483 if (migf->state == VIRTIOVF_MIGF_STATE_ERROR) {
484 ret = -ENODEV;
485 goto err_migf_unlock;
486 }
487
488 if (migf->pre_copy_initial_bytes > *pos) {
489 info.initial_bytes = migf->pre_copy_initial_bytes - *pos;
490 } else {
491 info.dirty_bytes = migf->max_pos - *pos;
492 if (!info.dirty_bytes)
493 end_of_data = true;
494 info.dirty_bytes += ctx_size;
495 }
496
497 if (!end_of_data || !ctx_size) {
498 mutex_unlock(&migf->lock);
499 goto done;
500 }
501
502 mutex_unlock(&migf->lock);
503 /*
504 * We finished transferring the current state and the device has a
505 * dirty state, read a new state.
506 */
507 ret = virtiovf_read_device_context_chunk(migf, ctx_size);
508 if (ret)
509 /*
510 * The machine is running, and context size could be grow, so no reason to mark
511 * the device state as VIRTIOVF_MIGF_STATE_ERROR.
512 */
513 goto err_state_unlock;
514
515 done:
516 virtiovf_state_mutex_unlock(virtvdev);
517 if (copy_to_user((void __user *)arg, &info, minsz))
518 return -EFAULT;
519 return 0;
520
521 err_migf_unlock:
522 mutex_unlock(&migf->lock);
523 err_state_unlock:
524 virtiovf_state_mutex_unlock(virtvdev);
525 return ret;
526 }
527
528 static const struct file_operations virtiovf_save_fops = {
529 .owner = THIS_MODULE,
530 .read = virtiovf_save_read,
531 .unlocked_ioctl = virtiovf_precopy_ioctl,
532 .compat_ioctl = compat_ptr_ioctl,
533 .release = virtiovf_release_file,
534 };
535
536 static int
virtiovf_add_buf_header(struct virtiovf_data_buffer * header_buf,u32 data_size)537 virtiovf_add_buf_header(struct virtiovf_data_buffer *header_buf,
538 u32 data_size)
539 {
540 struct virtiovf_migration_file *migf = header_buf->migf;
541 struct virtiovf_migration_header header = {};
542 struct page *page;
543 u8 *to_buff;
544
545 header.record_size = cpu_to_le64(data_size);
546 header.flags = cpu_to_le32(VIRTIOVF_MIGF_HEADER_FLAGS_TAG_MANDATORY);
547 header.tag = cpu_to_le32(VIRTIOVF_MIGF_HEADER_TAG_DEVICE_DATA);
548 page = virtiovf_get_migration_page(header_buf, 0);
549 if (!page)
550 return -EINVAL;
551 to_buff = kmap_local_page(page);
552 memcpy(to_buff, &header, sizeof(header));
553 kunmap_local(to_buff);
554 header_buf->length = sizeof(header);
555 header_buf->start_pos = header_buf->migf->max_pos;
556 migf->max_pos += header_buf->length;
557 spin_lock_irq(&migf->list_lock);
558 list_add_tail(&header_buf->buf_elm, &migf->buf_list);
559 spin_unlock_irq(&migf->list_lock);
560 return 0;
561 }
562
563 static int
virtiovf_read_device_context_chunk(struct virtiovf_migration_file * migf,u32 ctx_size)564 virtiovf_read_device_context_chunk(struct virtiovf_migration_file *migf,
565 u32 ctx_size)
566 {
567 struct virtiovf_data_buffer *header_buf;
568 struct virtiovf_data_buffer *buf;
569 bool unmark_end = false;
570 struct scatterlist *sg;
571 unsigned int i;
572 u32 res_size;
573 int nent;
574 int ret;
575
576 buf = virtiovf_get_data_buffer(migf, ctx_size);
577 if (IS_ERR(buf))
578 return PTR_ERR(buf);
579
580 /* Find the total count of SG entries which satisfies the size */
581 nent = sg_nents_for_len(buf->table.sgt.sgl, ctx_size);
582 if (nent <= 0) {
583 ret = -EINVAL;
584 goto out;
585 }
586
587 /*
588 * Iterate to that SG entry and mark it as last (if it's not already)
589 * to let underlay layers iterate only till that entry.
590 */
591 for_each_sg(buf->table.sgt.sgl, sg, nent - 1, i)
592 ;
593
594 if (!sg_is_last(sg)) {
595 unmark_end = true;
596 sg_mark_end(sg);
597 }
598
599 ret = virtio_pci_admin_dev_parts_get(migf->virtvdev->core_device.pdev,
600 VIRTIO_RESOURCE_OBJ_DEV_PARTS,
601 migf->obj_id,
602 VIRTIO_ADMIN_CMD_DEV_PARTS_GET_TYPE_ALL,
603 buf->table.sgt.sgl, &res_size);
604 /* Restore the original SG mark end */
605 if (unmark_end)
606 sg_unmark_end(sg);
607 if (ret)
608 goto out;
609
610 buf->length = res_size;
611 header_buf = virtiovf_get_data_buffer(migf,
612 sizeof(struct virtiovf_migration_header));
613 if (IS_ERR(header_buf)) {
614 ret = PTR_ERR(header_buf);
615 goto out;
616 }
617
618 ret = virtiovf_add_buf_header(header_buf, res_size);
619 if (ret)
620 goto out_header;
621
622 buf->start_pos = buf->migf->max_pos;
623 migf->max_pos += buf->length;
624 spin_lock(&migf->list_lock);
625 list_add_tail(&buf->buf_elm, &migf->buf_list);
626 spin_unlock_irq(&migf->list_lock);
627 return 0;
628
629 out_header:
630 virtiovf_put_data_buffer(header_buf);
631 out:
632 virtiovf_put_data_buffer(buf);
633 return ret;
634 }
635
636 static int
virtiovf_pci_save_device_final_data(struct virtiovf_pci_core_device * virtvdev)637 virtiovf_pci_save_device_final_data(struct virtiovf_pci_core_device *virtvdev)
638 {
639 struct virtiovf_migration_file *migf = virtvdev->saving_migf;
640 u32 ctx_size;
641 int ret;
642
643 if (migf->state == VIRTIOVF_MIGF_STATE_ERROR)
644 return -ENODEV;
645
646 ret = virtio_pci_admin_dev_parts_metadata_get(virtvdev->core_device.pdev,
647 VIRTIO_RESOURCE_OBJ_DEV_PARTS, migf->obj_id,
648 VIRTIO_ADMIN_CMD_DEV_PARTS_METADATA_TYPE_SIZE,
649 &ctx_size);
650 if (ret)
651 goto err;
652
653 if (!ctx_size) {
654 ret = -EINVAL;
655 goto err;
656 }
657
658 ret = virtiovf_read_device_context_chunk(migf, ctx_size);
659 if (ret)
660 goto err;
661
662 migf->state = VIRTIOVF_MIGF_STATE_COMPLETE;
663 return 0;
664
665 err:
666 migf->state = VIRTIOVF_MIGF_STATE_ERROR;
667 return ret;
668 }
669
670 static struct virtiovf_migration_file *
virtiovf_pci_save_device_data(struct virtiovf_pci_core_device * virtvdev,bool pre_copy)671 virtiovf_pci_save_device_data(struct virtiovf_pci_core_device *virtvdev,
672 bool pre_copy)
673 {
674 struct virtiovf_migration_file *migf;
675 u32 ctx_size;
676 u32 obj_id;
677 int ret;
678
679 migf = kzalloc(sizeof(*migf), GFP_KERNEL_ACCOUNT);
680 if (!migf)
681 return ERR_PTR(-ENOMEM);
682
683 migf->filp = anon_inode_getfile("virtiovf_mig", &virtiovf_save_fops, migf,
684 O_RDONLY);
685 if (IS_ERR(migf->filp)) {
686 ret = PTR_ERR(migf->filp);
687 kfree(migf);
688 return ERR_PTR(ret);
689 }
690
691 stream_open(migf->filp->f_inode, migf->filp);
692 mutex_init(&migf->lock);
693 INIT_LIST_HEAD(&migf->buf_list);
694 INIT_LIST_HEAD(&migf->avail_list);
695 spin_lock_init(&migf->list_lock);
696 migf->virtvdev = virtvdev;
697
698 lockdep_assert_held(&virtvdev->state_mutex);
699 ret = virtiovf_pci_alloc_obj_id(virtvdev, VIRTIO_RESOURCE_OBJ_DEV_PARTS_TYPE_GET,
700 &obj_id);
701 if (ret)
702 goto out;
703
704 migf->obj_id = obj_id;
705 /* Mark as having a valid obj id which can be even 0 */
706 migf->has_obj_id = true;
707 ret = virtio_pci_admin_dev_parts_metadata_get(virtvdev->core_device.pdev,
708 VIRTIO_RESOURCE_OBJ_DEV_PARTS, obj_id,
709 VIRTIO_ADMIN_CMD_DEV_PARTS_METADATA_TYPE_SIZE,
710 &ctx_size);
711 if (ret)
712 goto out_clean;
713
714 if (!ctx_size) {
715 ret = -EINVAL;
716 goto out_clean;
717 }
718
719 ret = virtiovf_read_device_context_chunk(migf, ctx_size);
720 if (ret)
721 goto out_clean;
722
723 if (pre_copy) {
724 migf->pre_copy_initial_bytes = migf->max_pos;
725 /* Arbitrarily set the pre-copy rate limit to 1-second intervals */
726 ratelimit_state_init(&migf->pre_copy_rl_state, 1 * HZ, 1);
727 /* Prevent any rate messages upon its usage */
728 ratelimit_set_flags(&migf->pre_copy_rl_state,
729 RATELIMIT_MSG_ON_RELEASE);
730 migf->state = VIRTIOVF_MIGF_STATE_PRECOPY;
731 } else {
732 migf->state = VIRTIOVF_MIGF_STATE_COMPLETE;
733 }
734
735 return migf;
736
737 out_clean:
738 virtiovf_clean_migf_resources(migf);
739 out:
740 fput(migf->filp);
741 return ERR_PTR(ret);
742 }
743
744 /*
745 * Set the required object header at the beginning of the buffer.
746 * The actual device parts data will be written post of the header offset.
747 */
virtiovf_set_obj_cmd_header(struct virtiovf_data_buffer * vhca_buf)748 static int virtiovf_set_obj_cmd_header(struct virtiovf_data_buffer *vhca_buf)
749 {
750 struct virtio_admin_cmd_resource_obj_cmd_hdr obj_hdr = {};
751 struct page *page;
752 u8 *to_buff;
753
754 obj_hdr.type = cpu_to_le16(VIRTIO_RESOURCE_OBJ_DEV_PARTS);
755 obj_hdr.id = cpu_to_le32(vhca_buf->migf->obj_id);
756 page = virtiovf_get_migration_page(vhca_buf, 0);
757 if (!page)
758 return -EINVAL;
759 to_buff = kmap_local_page(page);
760 memcpy(to_buff, &obj_hdr, sizeof(obj_hdr));
761 kunmap_local(to_buff);
762
763 /* Mark the buffer as including the header object data */
764 vhca_buf->include_header_object = 1;
765 return 0;
766 }
767
768 static int
virtiovf_append_page_to_mig_buf(struct virtiovf_data_buffer * vhca_buf,const char __user ** buf,size_t * len,loff_t * pos,ssize_t * done)769 virtiovf_append_page_to_mig_buf(struct virtiovf_data_buffer *vhca_buf,
770 const char __user **buf, size_t *len,
771 loff_t *pos, ssize_t *done)
772 {
773 unsigned long offset;
774 size_t page_offset;
775 struct page *page;
776 size_t page_len;
777 u8 *to_buff;
778 int ret;
779
780 offset = *pos - vhca_buf->start_pos;
781
782 if (vhca_buf->include_header_object)
783 /* The buffer holds the object header, update the offset accordingly */
784 offset += sizeof(struct virtio_admin_cmd_resource_obj_cmd_hdr);
785
786 page_offset = offset % PAGE_SIZE;
787
788 page = virtiovf_get_migration_page(vhca_buf, offset - page_offset);
789 if (!page)
790 return -EINVAL;
791
792 page_len = min_t(size_t, *len, PAGE_SIZE - page_offset);
793 to_buff = kmap_local_page(page);
794 ret = copy_from_user(to_buff + page_offset, *buf, page_len);
795 kunmap_local(to_buff);
796 if (ret)
797 return -EFAULT;
798
799 *pos += page_len;
800 *done += page_len;
801 *buf += page_len;
802 *len -= page_len;
803 vhca_buf->length += page_len;
804 return 0;
805 }
806
807 static ssize_t
virtiovf_resume_read_chunk(struct virtiovf_migration_file * migf,struct virtiovf_data_buffer * vhca_buf,size_t chunk_size,const char __user ** buf,size_t * len,loff_t * pos,ssize_t * done,bool * has_work)808 virtiovf_resume_read_chunk(struct virtiovf_migration_file *migf,
809 struct virtiovf_data_buffer *vhca_buf,
810 size_t chunk_size, const char __user **buf,
811 size_t *len, loff_t *pos, ssize_t *done,
812 bool *has_work)
813 {
814 size_t copy_len, to_copy;
815 int ret;
816
817 to_copy = min_t(size_t, *len, chunk_size - vhca_buf->length);
818 copy_len = to_copy;
819 while (to_copy) {
820 ret = virtiovf_append_page_to_mig_buf(vhca_buf, buf, &to_copy,
821 pos, done);
822 if (ret)
823 return ret;
824 }
825
826 *len -= copy_len;
827 if (vhca_buf->length == chunk_size) {
828 migf->load_state = VIRTIOVF_LOAD_STATE_LOAD_CHUNK;
829 migf->max_pos += chunk_size;
830 *has_work = true;
831 }
832
833 return 0;
834 }
835
836 static int
virtiovf_resume_read_header_data(struct virtiovf_migration_file * migf,struct virtiovf_data_buffer * vhca_buf,const char __user ** buf,size_t * len,loff_t * pos,ssize_t * done)837 virtiovf_resume_read_header_data(struct virtiovf_migration_file *migf,
838 struct virtiovf_data_buffer *vhca_buf,
839 const char __user **buf, size_t *len,
840 loff_t *pos, ssize_t *done)
841 {
842 size_t copy_len, to_copy;
843 size_t required_data;
844 int ret;
845
846 required_data = migf->record_size - vhca_buf->length;
847 to_copy = min_t(size_t, *len, required_data);
848 copy_len = to_copy;
849 while (to_copy) {
850 ret = virtiovf_append_page_to_mig_buf(vhca_buf, buf, &to_copy,
851 pos, done);
852 if (ret)
853 return ret;
854 }
855
856 *len -= copy_len;
857 if (vhca_buf->length == migf->record_size) {
858 switch (migf->record_tag) {
859 default:
860 /* Optional tag */
861 break;
862 }
863
864 migf->load_state = VIRTIOVF_LOAD_STATE_READ_HEADER;
865 migf->max_pos += migf->record_size;
866 vhca_buf->length = 0;
867 }
868
869 return 0;
870 }
871
872 static int
virtiovf_resume_read_header(struct virtiovf_migration_file * migf,struct virtiovf_data_buffer * vhca_buf,const char __user ** buf,size_t * len,loff_t * pos,ssize_t * done,bool * has_work)873 virtiovf_resume_read_header(struct virtiovf_migration_file *migf,
874 struct virtiovf_data_buffer *vhca_buf,
875 const char __user **buf,
876 size_t *len, loff_t *pos,
877 ssize_t *done, bool *has_work)
878 {
879 struct page *page;
880 size_t copy_len;
881 u8 *to_buff;
882 int ret;
883
884 copy_len = min_t(size_t, *len,
885 sizeof(struct virtiovf_migration_header) - vhca_buf->length);
886 page = virtiovf_get_migration_page(vhca_buf, 0);
887 if (!page)
888 return -EINVAL;
889 to_buff = kmap_local_page(page);
890 ret = copy_from_user(to_buff + vhca_buf->length, *buf, copy_len);
891 if (ret) {
892 ret = -EFAULT;
893 goto end;
894 }
895
896 *buf += copy_len;
897 *pos += copy_len;
898 *done += copy_len;
899 *len -= copy_len;
900 vhca_buf->length += copy_len;
901 if (vhca_buf->length == sizeof(struct virtiovf_migration_header)) {
902 u64 record_size;
903 u32 flags;
904
905 record_size = le64_to_cpup((__le64 *)to_buff);
906 if (record_size > MAX_LOAD_SIZE) {
907 ret = -ENOMEM;
908 goto end;
909 }
910
911 migf->record_size = record_size;
912 flags = le32_to_cpup((__le32 *)(to_buff +
913 offsetof(struct virtiovf_migration_header, flags)));
914 migf->record_tag = le32_to_cpup((__le32 *)(to_buff +
915 offsetof(struct virtiovf_migration_header, tag)));
916 switch (migf->record_tag) {
917 case VIRTIOVF_MIGF_HEADER_TAG_DEVICE_DATA:
918 migf->load_state = VIRTIOVF_LOAD_STATE_PREP_CHUNK;
919 break;
920 default:
921 if (!(flags & VIRTIOVF_MIGF_HEADER_FLAGS_TAG_OPTIONAL)) {
922 ret = -EOPNOTSUPP;
923 goto end;
924 }
925 /* We may read and skip this optional record data */
926 migf->load_state = VIRTIOVF_LOAD_STATE_PREP_HEADER_DATA;
927 }
928
929 migf->max_pos += vhca_buf->length;
930 vhca_buf->length = 0;
931 *has_work = true;
932 }
933 end:
934 kunmap_local(to_buff);
935 return ret;
936 }
937
virtiovf_resume_write(struct file * filp,const char __user * buf,size_t len,loff_t * pos)938 static ssize_t virtiovf_resume_write(struct file *filp, const char __user *buf,
939 size_t len, loff_t *pos)
940 {
941 struct virtiovf_migration_file *migf = filp->private_data;
942 struct virtiovf_data_buffer *vhca_buf = migf->buf;
943 struct virtiovf_data_buffer *vhca_buf_header = migf->buf_header;
944 unsigned int orig_length;
945 bool has_work = false;
946 ssize_t done = 0;
947 int ret = 0;
948
949 if (pos)
950 return -ESPIPE;
951
952 pos = &filp->f_pos;
953 if (*pos < vhca_buf->start_pos)
954 return -EINVAL;
955
956 mutex_lock(&migf->virtvdev->state_mutex);
957 mutex_lock(&migf->lock);
958 if (migf->state == VIRTIOVF_MIGF_STATE_ERROR) {
959 done = -ENODEV;
960 goto out_unlock;
961 }
962
963 while (len || has_work) {
964 has_work = false;
965 switch (migf->load_state) {
966 case VIRTIOVF_LOAD_STATE_READ_HEADER:
967 ret = virtiovf_resume_read_header(migf, vhca_buf_header, &buf,
968 &len, pos, &done, &has_work);
969 if (ret)
970 goto out_unlock;
971 break;
972 case VIRTIOVF_LOAD_STATE_PREP_HEADER_DATA:
973 if (vhca_buf_header->allocated_length < migf->record_size) {
974 virtiovf_free_data_buffer(vhca_buf_header);
975
976 migf->buf_header = virtiovf_alloc_data_buffer(migf,
977 migf->record_size);
978 if (IS_ERR(migf->buf_header)) {
979 ret = PTR_ERR(migf->buf_header);
980 migf->buf_header = NULL;
981 goto out_unlock;
982 }
983
984 vhca_buf_header = migf->buf_header;
985 }
986
987 vhca_buf_header->start_pos = migf->max_pos;
988 migf->load_state = VIRTIOVF_LOAD_STATE_READ_HEADER_DATA;
989 break;
990 case VIRTIOVF_LOAD_STATE_READ_HEADER_DATA:
991 ret = virtiovf_resume_read_header_data(migf, vhca_buf_header,
992 &buf, &len, pos, &done);
993 if (ret)
994 goto out_unlock;
995 break;
996 case VIRTIOVF_LOAD_STATE_PREP_CHUNK:
997 {
998 u32 cmd_size = migf->record_size +
999 sizeof(struct virtio_admin_cmd_resource_obj_cmd_hdr);
1000
1001 /*
1002 * The DMA map/unmap is managed in virtio layer, we just need to extend
1003 * the SG pages to hold the extra required chunk data.
1004 */
1005 if (vhca_buf->allocated_length < cmd_size) {
1006 ret = virtiovf_add_migration_pages(vhca_buf,
1007 DIV_ROUND_UP_ULL(cmd_size - vhca_buf->allocated_length,
1008 PAGE_SIZE));
1009 if (ret)
1010 goto out_unlock;
1011 }
1012
1013 vhca_buf->start_pos = migf->max_pos;
1014 migf->load_state = VIRTIOVF_LOAD_STATE_READ_CHUNK;
1015 break;
1016 }
1017 case VIRTIOVF_LOAD_STATE_READ_CHUNK:
1018 ret = virtiovf_resume_read_chunk(migf, vhca_buf, migf->record_size,
1019 &buf, &len, pos, &done, &has_work);
1020 if (ret)
1021 goto out_unlock;
1022 break;
1023 case VIRTIOVF_LOAD_STATE_LOAD_CHUNK:
1024 /* Mark the last SG entry and set its length */
1025 sg_mark_end(vhca_buf->last_offset_sg);
1026 orig_length = vhca_buf->last_offset_sg->length;
1027 /* Length should include the resource object command header */
1028 vhca_buf->last_offset_sg->length = vhca_buf->length +
1029 sizeof(struct virtio_admin_cmd_resource_obj_cmd_hdr) -
1030 vhca_buf->last_offset;
1031 ret = virtio_pci_admin_dev_parts_set(migf->virtvdev->core_device.pdev,
1032 vhca_buf->table.sgt.sgl);
1033 /* Restore the original SG data */
1034 vhca_buf->last_offset_sg->length = orig_length;
1035 sg_unmark_end(vhca_buf->last_offset_sg);
1036 if (ret)
1037 goto out_unlock;
1038 migf->load_state = VIRTIOVF_LOAD_STATE_READ_HEADER;
1039 /* be ready for reading the next chunk */
1040 vhca_buf->length = 0;
1041 break;
1042 default:
1043 break;
1044 }
1045 }
1046
1047 out_unlock:
1048 if (ret)
1049 migf->state = VIRTIOVF_MIGF_STATE_ERROR;
1050 mutex_unlock(&migf->lock);
1051 virtiovf_state_mutex_unlock(migf->virtvdev);
1052 return ret ? ret : done;
1053 }
1054
1055 static const struct file_operations virtiovf_resume_fops = {
1056 .owner = THIS_MODULE,
1057 .write = virtiovf_resume_write,
1058 .release = virtiovf_release_file,
1059 };
1060
1061 static struct virtiovf_migration_file *
virtiovf_pci_resume_device_data(struct virtiovf_pci_core_device * virtvdev)1062 virtiovf_pci_resume_device_data(struct virtiovf_pci_core_device *virtvdev)
1063 {
1064 struct virtiovf_migration_file *migf;
1065 struct virtiovf_data_buffer *buf;
1066 u32 obj_id;
1067 int ret;
1068
1069 migf = kzalloc(sizeof(*migf), GFP_KERNEL_ACCOUNT);
1070 if (!migf)
1071 return ERR_PTR(-ENOMEM);
1072
1073 migf->filp = anon_inode_getfile("virtiovf_mig", &virtiovf_resume_fops, migf,
1074 O_WRONLY);
1075 if (IS_ERR(migf->filp)) {
1076 ret = PTR_ERR(migf->filp);
1077 kfree(migf);
1078 return ERR_PTR(ret);
1079 }
1080
1081 stream_open(migf->filp->f_inode, migf->filp);
1082 mutex_init(&migf->lock);
1083 INIT_LIST_HEAD(&migf->buf_list);
1084 INIT_LIST_HEAD(&migf->avail_list);
1085 spin_lock_init(&migf->list_lock);
1086
1087 buf = virtiovf_alloc_data_buffer(migf, VIRTIOVF_TARGET_INITIAL_BUF_SIZE);
1088 if (IS_ERR(buf)) {
1089 ret = PTR_ERR(buf);
1090 goto out;
1091 }
1092
1093 migf->buf = buf;
1094
1095 buf = virtiovf_alloc_data_buffer(migf,
1096 sizeof(struct virtiovf_migration_header));
1097 if (IS_ERR(buf)) {
1098 ret = PTR_ERR(buf);
1099 goto out_clean;
1100 }
1101
1102 migf->buf_header = buf;
1103 migf->load_state = VIRTIOVF_LOAD_STATE_READ_HEADER;
1104
1105 migf->virtvdev = virtvdev;
1106 ret = virtiovf_pci_alloc_obj_id(virtvdev, VIRTIO_RESOURCE_OBJ_DEV_PARTS_TYPE_SET,
1107 &obj_id);
1108 if (ret)
1109 goto out_clean;
1110
1111 migf->obj_id = obj_id;
1112 /* Mark as having a valid obj id which can be even 0 */
1113 migf->has_obj_id = true;
1114 ret = virtiovf_set_obj_cmd_header(migf->buf);
1115 if (ret)
1116 goto out_clean;
1117
1118 return migf;
1119
1120 out_clean:
1121 virtiovf_clean_migf_resources(migf);
1122 out:
1123 fput(migf->filp);
1124 return ERR_PTR(ret);
1125 }
1126
1127 static struct file *
virtiovf_pci_step_device_state_locked(struct virtiovf_pci_core_device * virtvdev,u32 new)1128 virtiovf_pci_step_device_state_locked(struct virtiovf_pci_core_device *virtvdev,
1129 u32 new)
1130 {
1131 u32 cur = virtvdev->mig_state;
1132 int ret;
1133
1134 if (cur == VFIO_DEVICE_STATE_RUNNING_P2P && new == VFIO_DEVICE_STATE_STOP) {
1135 /* NOP */
1136 return NULL;
1137 }
1138
1139 if (cur == VFIO_DEVICE_STATE_STOP && new == VFIO_DEVICE_STATE_RUNNING_P2P) {
1140 /* NOP */
1141 return NULL;
1142 }
1143
1144 if ((cur == VFIO_DEVICE_STATE_RUNNING && new == VFIO_DEVICE_STATE_RUNNING_P2P) ||
1145 (cur == VFIO_DEVICE_STATE_PRE_COPY && new == VFIO_DEVICE_STATE_PRE_COPY_P2P)) {
1146 ret = virtio_pci_admin_mode_set(virtvdev->core_device.pdev,
1147 BIT(VIRTIO_ADMIN_CMD_DEV_MODE_F_STOPPED));
1148 if (ret)
1149 return ERR_PTR(ret);
1150 return NULL;
1151 }
1152
1153 if ((cur == VFIO_DEVICE_STATE_RUNNING_P2P && new == VFIO_DEVICE_STATE_RUNNING) ||
1154 (cur == VFIO_DEVICE_STATE_PRE_COPY_P2P && new == VFIO_DEVICE_STATE_PRE_COPY)) {
1155 ret = virtio_pci_admin_mode_set(virtvdev->core_device.pdev, 0);
1156 if (ret)
1157 return ERR_PTR(ret);
1158 return NULL;
1159 }
1160
1161 if (cur == VFIO_DEVICE_STATE_STOP && new == VFIO_DEVICE_STATE_STOP_COPY) {
1162 struct virtiovf_migration_file *migf;
1163
1164 migf = virtiovf_pci_save_device_data(virtvdev, false);
1165 if (IS_ERR(migf))
1166 return ERR_CAST(migf);
1167 get_file(migf->filp);
1168 virtvdev->saving_migf = migf;
1169 return migf->filp;
1170 }
1171
1172 if ((cur == VFIO_DEVICE_STATE_STOP_COPY && new == VFIO_DEVICE_STATE_STOP) ||
1173 (cur == VFIO_DEVICE_STATE_PRE_COPY && new == VFIO_DEVICE_STATE_RUNNING) ||
1174 (cur == VFIO_DEVICE_STATE_PRE_COPY_P2P && new == VFIO_DEVICE_STATE_RUNNING_P2P)) {
1175 virtiovf_disable_fds(virtvdev);
1176 return NULL;
1177 }
1178
1179 if (cur == VFIO_DEVICE_STATE_STOP && new == VFIO_DEVICE_STATE_RESUMING) {
1180 struct virtiovf_migration_file *migf;
1181
1182 migf = virtiovf_pci_resume_device_data(virtvdev);
1183 if (IS_ERR(migf))
1184 return ERR_CAST(migf);
1185 get_file(migf->filp);
1186 virtvdev->resuming_migf = migf;
1187 return migf->filp;
1188 }
1189
1190 if (cur == VFIO_DEVICE_STATE_RESUMING && new == VFIO_DEVICE_STATE_STOP) {
1191 virtiovf_disable_fds(virtvdev);
1192 return NULL;
1193 }
1194
1195 if ((cur == VFIO_DEVICE_STATE_RUNNING && new == VFIO_DEVICE_STATE_PRE_COPY) ||
1196 (cur == VFIO_DEVICE_STATE_RUNNING_P2P &&
1197 new == VFIO_DEVICE_STATE_PRE_COPY_P2P)) {
1198 struct virtiovf_migration_file *migf;
1199
1200 migf = virtiovf_pci_save_device_data(virtvdev, true);
1201 if (IS_ERR(migf))
1202 return ERR_CAST(migf);
1203 get_file(migf->filp);
1204 virtvdev->saving_migf = migf;
1205 return migf->filp;
1206 }
1207
1208 if (cur == VFIO_DEVICE_STATE_PRE_COPY_P2P && new == VFIO_DEVICE_STATE_STOP_COPY) {
1209 ret = virtiovf_pci_save_device_final_data(virtvdev);
1210 return ret ? ERR_PTR(ret) : NULL;
1211 }
1212
1213 /*
1214 * vfio_mig_get_next_state() does not use arcs other than the above
1215 */
1216 WARN_ON(true);
1217 return ERR_PTR(-EINVAL);
1218 }
1219
1220 static struct file *
virtiovf_pci_set_device_state(struct vfio_device * vdev,enum vfio_device_mig_state new_state)1221 virtiovf_pci_set_device_state(struct vfio_device *vdev,
1222 enum vfio_device_mig_state new_state)
1223 {
1224 struct virtiovf_pci_core_device *virtvdev = container_of(
1225 vdev, struct virtiovf_pci_core_device, core_device.vdev);
1226 enum vfio_device_mig_state next_state;
1227 struct file *res = NULL;
1228 int ret;
1229
1230 mutex_lock(&virtvdev->state_mutex);
1231 while (new_state != virtvdev->mig_state) {
1232 ret = vfio_mig_get_next_state(vdev, virtvdev->mig_state,
1233 new_state, &next_state);
1234 if (ret) {
1235 res = ERR_PTR(ret);
1236 break;
1237 }
1238 res = virtiovf_pci_step_device_state_locked(virtvdev, next_state);
1239 if (IS_ERR(res))
1240 break;
1241 virtvdev->mig_state = next_state;
1242 if (WARN_ON(res && new_state != virtvdev->mig_state)) {
1243 fput(res);
1244 res = ERR_PTR(-EINVAL);
1245 break;
1246 }
1247 }
1248 virtiovf_state_mutex_unlock(virtvdev);
1249 return res;
1250 }
1251
virtiovf_pci_get_device_state(struct vfio_device * vdev,enum vfio_device_mig_state * curr_state)1252 static int virtiovf_pci_get_device_state(struct vfio_device *vdev,
1253 enum vfio_device_mig_state *curr_state)
1254 {
1255 struct virtiovf_pci_core_device *virtvdev = container_of(
1256 vdev, struct virtiovf_pci_core_device, core_device.vdev);
1257
1258 mutex_lock(&virtvdev->state_mutex);
1259 *curr_state = virtvdev->mig_state;
1260 virtiovf_state_mutex_unlock(virtvdev);
1261 return 0;
1262 }
1263
virtiovf_pci_get_data_size(struct vfio_device * vdev,unsigned long * stop_copy_length)1264 static int virtiovf_pci_get_data_size(struct vfio_device *vdev,
1265 unsigned long *stop_copy_length)
1266 {
1267 struct virtiovf_pci_core_device *virtvdev = container_of(
1268 vdev, struct virtiovf_pci_core_device, core_device.vdev);
1269 bool obj_id_exists;
1270 u32 res_size;
1271 u32 obj_id;
1272 int ret;
1273
1274 mutex_lock(&virtvdev->state_mutex);
1275 obj_id_exists = virtvdev->saving_migf && virtvdev->saving_migf->has_obj_id;
1276 if (!obj_id_exists) {
1277 ret = virtiovf_pci_alloc_obj_id(virtvdev,
1278 VIRTIO_RESOURCE_OBJ_DEV_PARTS_TYPE_GET,
1279 &obj_id);
1280 if (ret)
1281 goto end;
1282 } else {
1283 obj_id = virtvdev->saving_migf->obj_id;
1284 }
1285
1286 ret = virtio_pci_admin_dev_parts_metadata_get(virtvdev->core_device.pdev,
1287 VIRTIO_RESOURCE_OBJ_DEV_PARTS, obj_id,
1288 VIRTIO_ADMIN_CMD_DEV_PARTS_METADATA_TYPE_SIZE,
1289 &res_size);
1290 if (!ret)
1291 *stop_copy_length = res_size;
1292
1293 /*
1294 * We can't leave this obj_id alive if didn't exist before, otherwise, it might
1295 * stay alive, even without an active migration flow (e.g. migration was cancelled)
1296 */
1297 if (!obj_id_exists)
1298 virtiovf_pci_free_obj_id(virtvdev, obj_id);
1299 end:
1300 virtiovf_state_mutex_unlock(virtvdev);
1301 return ret;
1302 }
1303
1304 static const struct vfio_migration_ops virtvdev_pci_mig_ops = {
1305 .migration_set_state = virtiovf_pci_set_device_state,
1306 .migration_get_state = virtiovf_pci_get_device_state,
1307 .migration_get_data_size = virtiovf_pci_get_data_size,
1308 };
1309
virtiovf_set_migratable(struct virtiovf_pci_core_device * virtvdev)1310 void virtiovf_set_migratable(struct virtiovf_pci_core_device *virtvdev)
1311 {
1312 virtvdev->migrate_cap = 1;
1313 mutex_init(&virtvdev->state_mutex);
1314 spin_lock_init(&virtvdev->reset_lock);
1315 virtvdev->core_device.vdev.migration_flags =
1316 VFIO_MIGRATION_STOP_COPY |
1317 VFIO_MIGRATION_P2P |
1318 VFIO_MIGRATION_PRE_COPY;
1319 virtvdev->core_device.vdev.mig_ops = &virtvdev_pci_mig_ops;
1320 }
1321
virtiovf_open_migration(struct virtiovf_pci_core_device * virtvdev)1322 void virtiovf_open_migration(struct virtiovf_pci_core_device *virtvdev)
1323 {
1324 if (!virtvdev->migrate_cap)
1325 return;
1326
1327 virtvdev->mig_state = VFIO_DEVICE_STATE_RUNNING;
1328 }
1329
virtiovf_close_migration(struct virtiovf_pci_core_device * virtvdev)1330 void virtiovf_close_migration(struct virtiovf_pci_core_device *virtvdev)
1331 {
1332 if (!virtvdev->migrate_cap)
1333 return;
1334
1335 virtiovf_disable_fds(virtvdev);
1336 }
1337