10bbc82e4SYishai Hadas // SPDX-License-Identifier: GPL-2.0-only 20bbc82e4SYishai Hadas /* 30bbc82e4SYishai Hadas * Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved 40bbc82e4SYishai Hadas */ 50bbc82e4SYishai Hadas 60bbc82e4SYishai Hadas #include <linux/device.h> 70bbc82e4SYishai Hadas #include <linux/module.h> 80bbc82e4SYishai Hadas #include <linux/mutex.h> 90bbc82e4SYishai Hadas #include <linux/pci.h> 100bbc82e4SYishai Hadas #include <linux/pm_runtime.h> 110bbc82e4SYishai Hadas #include <linux/types.h> 120bbc82e4SYishai Hadas #include <linux/uaccess.h> 130bbc82e4SYishai Hadas #include <linux/vfio.h> 140bbc82e4SYishai Hadas #include <linux/vfio_pci_core.h> 150bbc82e4SYishai Hadas #include <linux/virtio_pci.h> 160bbc82e4SYishai Hadas #include <linux/virtio_net.h> 170bbc82e4SYishai Hadas #include <linux/virtio_pci_admin.h> 180bbc82e4SYishai Hadas #include <linux/anon_inodes.h> 190bbc82e4SYishai Hadas 200bbc82e4SYishai Hadas #include "common.h" 210bbc82e4SYishai Hadas 220bbc82e4SYishai Hadas /* Device specification max parts size */ 230bbc82e4SYishai Hadas #define MAX_LOAD_SIZE (BIT_ULL(BITS_PER_TYPE \ 240bbc82e4SYishai Hadas (((struct virtio_admin_cmd_dev_parts_metadata_result *)0)->parts_size.size)) - 1) 250bbc82e4SYishai Hadas 260bbc82e4SYishai Hadas /* Initial target buffer size */ 270bbc82e4SYishai Hadas #define VIRTIOVF_TARGET_INITIAL_BUF_SIZE SZ_1M 280bbc82e4SYishai Hadas 296cea64b1SYishai Hadas static int 306cea64b1SYishai Hadas virtiovf_read_device_context_chunk(struct virtiovf_migration_file *migf, 316cea64b1SYishai Hadas u32 ctx_size); 326cea64b1SYishai Hadas 330bbc82e4SYishai Hadas static struct page * 340bbc82e4SYishai Hadas virtiovf_get_migration_page(struct virtiovf_data_buffer *buf, 350bbc82e4SYishai Hadas unsigned long offset) 360bbc82e4SYishai Hadas { 370bbc82e4SYishai Hadas unsigned long cur_offset = 0; 380bbc82e4SYishai Hadas struct scatterlist *sg; 390bbc82e4SYishai Hadas unsigned int i; 400bbc82e4SYishai Hadas 410bbc82e4SYishai Hadas /* All accesses are sequential */ 420bbc82e4SYishai Hadas if (offset < buf->last_offset || !buf->last_offset_sg) { 430bbc82e4SYishai Hadas buf->last_offset = 0; 440bbc82e4SYishai Hadas buf->last_offset_sg = buf->table.sgt.sgl; 450bbc82e4SYishai Hadas buf->sg_last_entry = 0; 460bbc82e4SYishai Hadas } 470bbc82e4SYishai Hadas 480bbc82e4SYishai Hadas cur_offset = buf->last_offset; 490bbc82e4SYishai Hadas 500bbc82e4SYishai Hadas for_each_sg(buf->last_offset_sg, sg, 510bbc82e4SYishai Hadas buf->table.sgt.orig_nents - buf->sg_last_entry, i) { 520bbc82e4SYishai Hadas if (offset < sg->length + cur_offset) { 530bbc82e4SYishai Hadas buf->last_offset_sg = sg; 540bbc82e4SYishai Hadas buf->sg_last_entry += i; 550bbc82e4SYishai Hadas buf->last_offset = cur_offset; 560bbc82e4SYishai Hadas return nth_page(sg_page(sg), 570bbc82e4SYishai Hadas (offset - cur_offset) / PAGE_SIZE); 580bbc82e4SYishai Hadas } 590bbc82e4SYishai Hadas cur_offset += sg->length; 600bbc82e4SYishai Hadas } 610bbc82e4SYishai Hadas return NULL; 620bbc82e4SYishai Hadas } 630bbc82e4SYishai Hadas 640bbc82e4SYishai Hadas static int virtiovf_add_migration_pages(struct virtiovf_data_buffer *buf, 650bbc82e4SYishai Hadas unsigned int npages) 660bbc82e4SYishai Hadas { 670bbc82e4SYishai Hadas unsigned int to_alloc = npages; 680bbc82e4SYishai Hadas struct page **page_list; 690bbc82e4SYishai Hadas unsigned long filled; 700bbc82e4SYishai Hadas unsigned int to_fill; 710bbc82e4SYishai Hadas int ret; 720bbc82e4SYishai Hadas int i; 730bbc82e4SYishai Hadas 740bbc82e4SYishai Hadas to_fill = min_t(unsigned int, npages, PAGE_SIZE / sizeof(*page_list)); 750bbc82e4SYishai Hadas page_list = kvcalloc(to_fill, sizeof(*page_list), GFP_KERNEL_ACCOUNT); 760bbc82e4SYishai Hadas if (!page_list) 770bbc82e4SYishai Hadas return -ENOMEM; 780bbc82e4SYishai Hadas 790bbc82e4SYishai Hadas do { 80*6bf9b5b4SLuiz Capitulino filled = alloc_pages_bulk(GFP_KERNEL_ACCOUNT, to_fill, 810bbc82e4SYishai Hadas page_list); 820bbc82e4SYishai Hadas if (!filled) { 830bbc82e4SYishai Hadas ret = -ENOMEM; 840bbc82e4SYishai Hadas goto err; 850bbc82e4SYishai Hadas } 860bbc82e4SYishai Hadas to_alloc -= filled; 870bbc82e4SYishai Hadas ret = sg_alloc_append_table_from_pages(&buf->table, page_list, 880bbc82e4SYishai Hadas filled, 0, filled << PAGE_SHIFT, UINT_MAX, 890bbc82e4SYishai Hadas SG_MAX_SINGLE_ALLOC, GFP_KERNEL_ACCOUNT); 900bbc82e4SYishai Hadas 910bbc82e4SYishai Hadas if (ret) 920bbc82e4SYishai Hadas goto err_append; 930bbc82e4SYishai Hadas buf->allocated_length += filled * PAGE_SIZE; 940bbc82e4SYishai Hadas /* clean input for another bulk allocation */ 950bbc82e4SYishai Hadas memset(page_list, 0, filled * sizeof(*page_list)); 960bbc82e4SYishai Hadas to_fill = min_t(unsigned int, to_alloc, 970bbc82e4SYishai Hadas PAGE_SIZE / sizeof(*page_list)); 980bbc82e4SYishai Hadas } while (to_alloc > 0); 990bbc82e4SYishai Hadas 1000bbc82e4SYishai Hadas kvfree(page_list); 1010bbc82e4SYishai Hadas return 0; 1020bbc82e4SYishai Hadas 1030bbc82e4SYishai Hadas err_append: 1040bbc82e4SYishai Hadas for (i = filled - 1; i >= 0; i--) 1050bbc82e4SYishai Hadas __free_page(page_list[i]); 1060bbc82e4SYishai Hadas err: 1070bbc82e4SYishai Hadas kvfree(page_list); 1080bbc82e4SYishai Hadas return ret; 1090bbc82e4SYishai Hadas } 1100bbc82e4SYishai Hadas 1110bbc82e4SYishai Hadas static void virtiovf_free_data_buffer(struct virtiovf_data_buffer *buf) 1120bbc82e4SYishai Hadas { 1130bbc82e4SYishai Hadas struct sg_page_iter sg_iter; 1140bbc82e4SYishai Hadas 115*6bf9b5b4SLuiz Capitulino /* Undo alloc_pages_bulk() */ 1160bbc82e4SYishai Hadas for_each_sgtable_page(&buf->table.sgt, &sg_iter, 0) 1170bbc82e4SYishai Hadas __free_page(sg_page_iter_page(&sg_iter)); 1180bbc82e4SYishai Hadas sg_free_append_table(&buf->table); 1190bbc82e4SYishai Hadas kfree(buf); 1200bbc82e4SYishai Hadas } 1210bbc82e4SYishai Hadas 1220bbc82e4SYishai Hadas static struct virtiovf_data_buffer * 1230bbc82e4SYishai Hadas virtiovf_alloc_data_buffer(struct virtiovf_migration_file *migf, size_t length) 1240bbc82e4SYishai Hadas { 1250bbc82e4SYishai Hadas struct virtiovf_data_buffer *buf; 1260bbc82e4SYishai Hadas int ret; 1270bbc82e4SYishai Hadas 1280bbc82e4SYishai Hadas buf = kzalloc(sizeof(*buf), GFP_KERNEL_ACCOUNT); 1290bbc82e4SYishai Hadas if (!buf) 1300bbc82e4SYishai Hadas return ERR_PTR(-ENOMEM); 1310bbc82e4SYishai Hadas 1320bbc82e4SYishai Hadas ret = virtiovf_add_migration_pages(buf, 1330bbc82e4SYishai Hadas DIV_ROUND_UP_ULL(length, PAGE_SIZE)); 1340bbc82e4SYishai Hadas if (ret) 1350bbc82e4SYishai Hadas goto end; 1360bbc82e4SYishai Hadas 1370bbc82e4SYishai Hadas buf->migf = migf; 1380bbc82e4SYishai Hadas return buf; 1390bbc82e4SYishai Hadas end: 1400bbc82e4SYishai Hadas virtiovf_free_data_buffer(buf); 1410bbc82e4SYishai Hadas return ERR_PTR(ret); 1420bbc82e4SYishai Hadas } 1430bbc82e4SYishai Hadas 1440bbc82e4SYishai Hadas static void virtiovf_put_data_buffer(struct virtiovf_data_buffer *buf) 1450bbc82e4SYishai Hadas { 1460bbc82e4SYishai Hadas spin_lock_irq(&buf->migf->list_lock); 1470bbc82e4SYishai Hadas list_add_tail(&buf->buf_elm, &buf->migf->avail_list); 1480bbc82e4SYishai Hadas spin_unlock_irq(&buf->migf->list_lock); 1490bbc82e4SYishai Hadas } 1500bbc82e4SYishai Hadas 1510bbc82e4SYishai Hadas static int 1520bbc82e4SYishai Hadas virtiovf_pci_alloc_obj_id(struct virtiovf_pci_core_device *virtvdev, u8 type, 1530bbc82e4SYishai Hadas u32 *obj_id) 1540bbc82e4SYishai Hadas { 1550bbc82e4SYishai Hadas return virtio_pci_admin_obj_create(virtvdev->core_device.pdev, 1560bbc82e4SYishai Hadas VIRTIO_RESOURCE_OBJ_DEV_PARTS, type, obj_id); 1570bbc82e4SYishai Hadas } 1580bbc82e4SYishai Hadas 1590bbc82e4SYishai Hadas static void 1600bbc82e4SYishai Hadas virtiovf_pci_free_obj_id(struct virtiovf_pci_core_device *virtvdev, u32 obj_id) 1610bbc82e4SYishai Hadas { 1620bbc82e4SYishai Hadas virtio_pci_admin_obj_destroy(virtvdev->core_device.pdev, 1630bbc82e4SYishai Hadas VIRTIO_RESOURCE_OBJ_DEV_PARTS, obj_id); 1640bbc82e4SYishai Hadas } 1650bbc82e4SYishai Hadas 1666cea64b1SYishai Hadas static struct virtiovf_data_buffer * 1676cea64b1SYishai Hadas virtiovf_get_data_buffer(struct virtiovf_migration_file *migf, size_t length) 1686cea64b1SYishai Hadas { 1696cea64b1SYishai Hadas struct virtiovf_data_buffer *buf, *temp_buf; 1706cea64b1SYishai Hadas struct list_head free_list; 1716cea64b1SYishai Hadas 1726cea64b1SYishai Hadas INIT_LIST_HEAD(&free_list); 1736cea64b1SYishai Hadas 1746cea64b1SYishai Hadas spin_lock_irq(&migf->list_lock); 1756cea64b1SYishai Hadas list_for_each_entry_safe(buf, temp_buf, &migf->avail_list, buf_elm) { 1766cea64b1SYishai Hadas list_del_init(&buf->buf_elm); 1776cea64b1SYishai Hadas if (buf->allocated_length >= length) { 1786cea64b1SYishai Hadas spin_unlock_irq(&migf->list_lock); 1796cea64b1SYishai Hadas goto found; 1806cea64b1SYishai Hadas } 1816cea64b1SYishai Hadas /* 1826cea64b1SYishai Hadas * Prevent holding redundant buffers. Put in a free 1836cea64b1SYishai Hadas * list and call at the end not under the spin lock 1846cea64b1SYishai Hadas * (&migf->list_lock) to minimize its scope usage. 1856cea64b1SYishai Hadas */ 1866cea64b1SYishai Hadas list_add(&buf->buf_elm, &free_list); 1876cea64b1SYishai Hadas } 1886cea64b1SYishai Hadas spin_unlock_irq(&migf->list_lock); 1896cea64b1SYishai Hadas buf = virtiovf_alloc_data_buffer(migf, length); 1906cea64b1SYishai Hadas 1916cea64b1SYishai Hadas found: 1926cea64b1SYishai Hadas while ((temp_buf = list_first_entry_or_null(&free_list, 1936cea64b1SYishai Hadas struct virtiovf_data_buffer, buf_elm))) { 1946cea64b1SYishai Hadas list_del(&temp_buf->buf_elm); 1956cea64b1SYishai Hadas virtiovf_free_data_buffer(temp_buf); 1966cea64b1SYishai Hadas } 1976cea64b1SYishai Hadas 1986cea64b1SYishai Hadas return buf; 1996cea64b1SYishai Hadas } 2006cea64b1SYishai Hadas 2010bbc82e4SYishai Hadas static void virtiovf_clean_migf_resources(struct virtiovf_migration_file *migf) 2020bbc82e4SYishai Hadas { 2030bbc82e4SYishai Hadas struct virtiovf_data_buffer *entry; 2040bbc82e4SYishai Hadas 2050bbc82e4SYishai Hadas if (migf->buf) { 2060bbc82e4SYishai Hadas virtiovf_free_data_buffer(migf->buf); 2070bbc82e4SYishai Hadas migf->buf = NULL; 2080bbc82e4SYishai Hadas } 2090bbc82e4SYishai Hadas 2100bbc82e4SYishai Hadas if (migf->buf_header) { 2110bbc82e4SYishai Hadas virtiovf_free_data_buffer(migf->buf_header); 2120bbc82e4SYishai Hadas migf->buf_header = NULL; 2130bbc82e4SYishai Hadas } 2140bbc82e4SYishai Hadas 2150bbc82e4SYishai Hadas list_splice(&migf->avail_list, &migf->buf_list); 2160bbc82e4SYishai Hadas 2170bbc82e4SYishai Hadas while ((entry = list_first_entry_or_null(&migf->buf_list, 2180bbc82e4SYishai Hadas struct virtiovf_data_buffer, buf_elm))) { 2190bbc82e4SYishai Hadas list_del(&entry->buf_elm); 2200bbc82e4SYishai Hadas virtiovf_free_data_buffer(entry); 2210bbc82e4SYishai Hadas } 2220bbc82e4SYishai Hadas 2230bbc82e4SYishai Hadas if (migf->has_obj_id) 2240bbc82e4SYishai Hadas virtiovf_pci_free_obj_id(migf->virtvdev, migf->obj_id); 2250bbc82e4SYishai Hadas } 2260bbc82e4SYishai Hadas 2270bbc82e4SYishai Hadas static void virtiovf_disable_fd(struct virtiovf_migration_file *migf) 2280bbc82e4SYishai Hadas { 2290bbc82e4SYishai Hadas mutex_lock(&migf->lock); 2300bbc82e4SYishai Hadas migf->state = VIRTIOVF_MIGF_STATE_ERROR; 2310bbc82e4SYishai Hadas migf->filp->f_pos = 0; 2320bbc82e4SYishai Hadas mutex_unlock(&migf->lock); 2330bbc82e4SYishai Hadas } 2340bbc82e4SYishai Hadas 2350bbc82e4SYishai Hadas static void virtiovf_disable_fds(struct virtiovf_pci_core_device *virtvdev) 2360bbc82e4SYishai Hadas { 2370bbc82e4SYishai Hadas if (virtvdev->resuming_migf) { 2380bbc82e4SYishai Hadas virtiovf_disable_fd(virtvdev->resuming_migf); 2390bbc82e4SYishai Hadas virtiovf_clean_migf_resources(virtvdev->resuming_migf); 2400bbc82e4SYishai Hadas fput(virtvdev->resuming_migf->filp); 2410bbc82e4SYishai Hadas virtvdev->resuming_migf = NULL; 2420bbc82e4SYishai Hadas } 2430bbc82e4SYishai Hadas if (virtvdev->saving_migf) { 2440bbc82e4SYishai Hadas virtiovf_disable_fd(virtvdev->saving_migf); 2450bbc82e4SYishai Hadas virtiovf_clean_migf_resources(virtvdev->saving_migf); 2460bbc82e4SYishai Hadas fput(virtvdev->saving_migf->filp); 2470bbc82e4SYishai Hadas virtvdev->saving_migf = NULL; 2480bbc82e4SYishai Hadas } 2490bbc82e4SYishai Hadas } 2500bbc82e4SYishai Hadas 2510bbc82e4SYishai Hadas /* 2520bbc82e4SYishai Hadas * This function is called in all state_mutex unlock cases to 2530bbc82e4SYishai Hadas * handle a 'deferred_reset' if exists. 2540bbc82e4SYishai Hadas */ 2550bbc82e4SYishai Hadas static void virtiovf_state_mutex_unlock(struct virtiovf_pci_core_device *virtvdev) 2560bbc82e4SYishai Hadas { 2570bbc82e4SYishai Hadas again: 2580bbc82e4SYishai Hadas spin_lock(&virtvdev->reset_lock); 2590bbc82e4SYishai Hadas if (virtvdev->deferred_reset) { 2600bbc82e4SYishai Hadas virtvdev->deferred_reset = false; 2610bbc82e4SYishai Hadas spin_unlock(&virtvdev->reset_lock); 2620bbc82e4SYishai Hadas virtvdev->mig_state = VFIO_DEVICE_STATE_RUNNING; 2630bbc82e4SYishai Hadas virtiovf_disable_fds(virtvdev); 2640bbc82e4SYishai Hadas goto again; 2650bbc82e4SYishai Hadas } 2660bbc82e4SYishai Hadas mutex_unlock(&virtvdev->state_mutex); 2670bbc82e4SYishai Hadas spin_unlock(&virtvdev->reset_lock); 2680bbc82e4SYishai Hadas } 2690bbc82e4SYishai Hadas 2700bbc82e4SYishai Hadas void virtiovf_migration_reset_done(struct pci_dev *pdev) 2710bbc82e4SYishai Hadas { 2720bbc82e4SYishai Hadas struct virtiovf_pci_core_device *virtvdev = dev_get_drvdata(&pdev->dev); 2730bbc82e4SYishai Hadas 2740bbc82e4SYishai Hadas if (!virtvdev->migrate_cap) 2750bbc82e4SYishai Hadas return; 2760bbc82e4SYishai Hadas 2770bbc82e4SYishai Hadas /* 2780bbc82e4SYishai Hadas * As the higher VFIO layers are holding locks across reset and using 2790bbc82e4SYishai Hadas * those same locks with the mm_lock we need to prevent ABBA deadlock 2800bbc82e4SYishai Hadas * with the state_mutex and mm_lock. 2810bbc82e4SYishai Hadas * In case the state_mutex was taken already we defer the cleanup work 2820bbc82e4SYishai Hadas * to the unlock flow of the other running context. 2830bbc82e4SYishai Hadas */ 2840bbc82e4SYishai Hadas spin_lock(&virtvdev->reset_lock); 2850bbc82e4SYishai Hadas virtvdev->deferred_reset = true; 2860bbc82e4SYishai Hadas if (!mutex_trylock(&virtvdev->state_mutex)) { 2870bbc82e4SYishai Hadas spin_unlock(&virtvdev->reset_lock); 2880bbc82e4SYishai Hadas return; 2890bbc82e4SYishai Hadas } 2900bbc82e4SYishai Hadas spin_unlock(&virtvdev->reset_lock); 2910bbc82e4SYishai Hadas virtiovf_state_mutex_unlock(virtvdev); 2920bbc82e4SYishai Hadas } 2930bbc82e4SYishai Hadas 2940bbc82e4SYishai Hadas static int virtiovf_release_file(struct inode *inode, struct file *filp) 2950bbc82e4SYishai Hadas { 2960bbc82e4SYishai Hadas struct virtiovf_migration_file *migf = filp->private_data; 2970bbc82e4SYishai Hadas 2980bbc82e4SYishai Hadas virtiovf_disable_fd(migf); 2990bbc82e4SYishai Hadas mutex_destroy(&migf->lock); 3000bbc82e4SYishai Hadas kfree(migf); 3010bbc82e4SYishai Hadas return 0; 3020bbc82e4SYishai Hadas } 3030bbc82e4SYishai Hadas 3040bbc82e4SYishai Hadas static struct virtiovf_data_buffer * 3050bbc82e4SYishai Hadas virtiovf_get_data_buff_from_pos(struct virtiovf_migration_file *migf, 3060bbc82e4SYishai Hadas loff_t pos, bool *end_of_data) 3070bbc82e4SYishai Hadas { 3080bbc82e4SYishai Hadas struct virtiovf_data_buffer *buf; 3090bbc82e4SYishai Hadas bool found = false; 3100bbc82e4SYishai Hadas 3110bbc82e4SYishai Hadas *end_of_data = false; 3120bbc82e4SYishai Hadas spin_lock_irq(&migf->list_lock); 3130bbc82e4SYishai Hadas if (list_empty(&migf->buf_list)) { 3140bbc82e4SYishai Hadas *end_of_data = true; 3150bbc82e4SYishai Hadas goto end; 3160bbc82e4SYishai Hadas } 3170bbc82e4SYishai Hadas 3180bbc82e4SYishai Hadas buf = list_first_entry(&migf->buf_list, struct virtiovf_data_buffer, 3190bbc82e4SYishai Hadas buf_elm); 3200bbc82e4SYishai Hadas if (pos >= buf->start_pos && 3210bbc82e4SYishai Hadas pos < buf->start_pos + buf->length) { 3220bbc82e4SYishai Hadas found = true; 3230bbc82e4SYishai Hadas goto end; 3240bbc82e4SYishai Hadas } 3250bbc82e4SYishai Hadas 3260bbc82e4SYishai Hadas /* 3270bbc82e4SYishai Hadas * As we use a stream based FD we may expect having the data always 3280bbc82e4SYishai Hadas * on first chunk 3290bbc82e4SYishai Hadas */ 3300bbc82e4SYishai Hadas migf->state = VIRTIOVF_MIGF_STATE_ERROR; 3310bbc82e4SYishai Hadas 3320bbc82e4SYishai Hadas end: 3330bbc82e4SYishai Hadas spin_unlock_irq(&migf->list_lock); 3340bbc82e4SYishai Hadas return found ? buf : NULL; 3350bbc82e4SYishai Hadas } 3360bbc82e4SYishai Hadas 3370bbc82e4SYishai Hadas static ssize_t virtiovf_buf_read(struct virtiovf_data_buffer *vhca_buf, 3380bbc82e4SYishai Hadas char __user **buf, size_t *len, loff_t *pos) 3390bbc82e4SYishai Hadas { 3400bbc82e4SYishai Hadas unsigned long offset; 3410bbc82e4SYishai Hadas ssize_t done = 0; 3420bbc82e4SYishai Hadas size_t copy_len; 3430bbc82e4SYishai Hadas 3440bbc82e4SYishai Hadas copy_len = min_t(size_t, 3450bbc82e4SYishai Hadas vhca_buf->start_pos + vhca_buf->length - *pos, *len); 3460bbc82e4SYishai Hadas while (copy_len) { 3470bbc82e4SYishai Hadas size_t page_offset; 3480bbc82e4SYishai Hadas struct page *page; 3490bbc82e4SYishai Hadas size_t page_len; 3500bbc82e4SYishai Hadas u8 *from_buff; 3510bbc82e4SYishai Hadas int ret; 3520bbc82e4SYishai Hadas 3530bbc82e4SYishai Hadas offset = *pos - vhca_buf->start_pos; 3540bbc82e4SYishai Hadas page_offset = offset % PAGE_SIZE; 3550bbc82e4SYishai Hadas offset -= page_offset; 3560bbc82e4SYishai Hadas page = virtiovf_get_migration_page(vhca_buf, offset); 3570bbc82e4SYishai Hadas if (!page) 3580bbc82e4SYishai Hadas return -EINVAL; 3590bbc82e4SYishai Hadas page_len = min_t(size_t, copy_len, PAGE_SIZE - page_offset); 3600bbc82e4SYishai Hadas from_buff = kmap_local_page(page); 3610bbc82e4SYishai Hadas ret = copy_to_user(*buf, from_buff + page_offset, page_len); 3620bbc82e4SYishai Hadas kunmap_local(from_buff); 3630bbc82e4SYishai Hadas if (ret) 3640bbc82e4SYishai Hadas return -EFAULT; 3650bbc82e4SYishai Hadas *pos += page_len; 3660bbc82e4SYishai Hadas *len -= page_len; 3670bbc82e4SYishai Hadas *buf += page_len; 3680bbc82e4SYishai Hadas done += page_len; 3690bbc82e4SYishai Hadas copy_len -= page_len; 3700bbc82e4SYishai Hadas } 3710bbc82e4SYishai Hadas 3720bbc82e4SYishai Hadas if (*pos >= vhca_buf->start_pos + vhca_buf->length) { 3730bbc82e4SYishai Hadas spin_lock_irq(&vhca_buf->migf->list_lock); 3740bbc82e4SYishai Hadas list_del_init(&vhca_buf->buf_elm); 3750bbc82e4SYishai Hadas list_add_tail(&vhca_buf->buf_elm, &vhca_buf->migf->avail_list); 3760bbc82e4SYishai Hadas spin_unlock_irq(&vhca_buf->migf->list_lock); 3770bbc82e4SYishai Hadas } 3780bbc82e4SYishai Hadas 3790bbc82e4SYishai Hadas return done; 3800bbc82e4SYishai Hadas } 3810bbc82e4SYishai Hadas 3820bbc82e4SYishai Hadas static ssize_t virtiovf_save_read(struct file *filp, char __user *buf, size_t len, 3830bbc82e4SYishai Hadas loff_t *pos) 3840bbc82e4SYishai Hadas { 3850bbc82e4SYishai Hadas struct virtiovf_migration_file *migf = filp->private_data; 3860bbc82e4SYishai Hadas struct virtiovf_data_buffer *vhca_buf; 3876cea64b1SYishai Hadas bool first_loop_call = true; 3880bbc82e4SYishai Hadas bool end_of_data; 3890bbc82e4SYishai Hadas ssize_t done = 0; 3900bbc82e4SYishai Hadas 3910bbc82e4SYishai Hadas if (pos) 3920bbc82e4SYishai Hadas return -ESPIPE; 3930bbc82e4SYishai Hadas pos = &filp->f_pos; 3940bbc82e4SYishai Hadas 3950bbc82e4SYishai Hadas mutex_lock(&migf->lock); 3960bbc82e4SYishai Hadas if (migf->state == VIRTIOVF_MIGF_STATE_ERROR) { 3970bbc82e4SYishai Hadas done = -ENODEV; 3980bbc82e4SYishai Hadas goto out_unlock; 3990bbc82e4SYishai Hadas } 4000bbc82e4SYishai Hadas 4010bbc82e4SYishai Hadas while (len) { 4020bbc82e4SYishai Hadas ssize_t count; 4030bbc82e4SYishai Hadas 4040bbc82e4SYishai Hadas vhca_buf = virtiovf_get_data_buff_from_pos(migf, *pos, &end_of_data); 4056cea64b1SYishai Hadas if (first_loop_call) { 4066cea64b1SYishai Hadas first_loop_call = false; 4076cea64b1SYishai Hadas /* Temporary end of file as part of PRE_COPY */ 4086cea64b1SYishai Hadas if (end_of_data && migf->state == VIRTIOVF_MIGF_STATE_PRECOPY) { 4096cea64b1SYishai Hadas done = -ENOMSG; 4106cea64b1SYishai Hadas goto out_unlock; 4116cea64b1SYishai Hadas } 4126cea64b1SYishai Hadas if (end_of_data && migf->state != VIRTIOVF_MIGF_STATE_COMPLETE) { 4136cea64b1SYishai Hadas done = -EINVAL; 4146cea64b1SYishai Hadas goto out_unlock; 4156cea64b1SYishai Hadas } 4166cea64b1SYishai Hadas } 4176cea64b1SYishai Hadas 4180bbc82e4SYishai Hadas if (end_of_data) 4190bbc82e4SYishai Hadas goto out_unlock; 4200bbc82e4SYishai Hadas 4210bbc82e4SYishai Hadas if (!vhca_buf) { 4220bbc82e4SYishai Hadas done = -EINVAL; 4230bbc82e4SYishai Hadas goto out_unlock; 4240bbc82e4SYishai Hadas } 4250bbc82e4SYishai Hadas 4260bbc82e4SYishai Hadas count = virtiovf_buf_read(vhca_buf, &buf, &len, pos); 4270bbc82e4SYishai Hadas if (count < 0) { 4280bbc82e4SYishai Hadas done = count; 4290bbc82e4SYishai Hadas goto out_unlock; 4300bbc82e4SYishai Hadas } 4310bbc82e4SYishai Hadas done += count; 4320bbc82e4SYishai Hadas } 4330bbc82e4SYishai Hadas 4340bbc82e4SYishai Hadas out_unlock: 4350bbc82e4SYishai Hadas mutex_unlock(&migf->lock); 4360bbc82e4SYishai Hadas return done; 4370bbc82e4SYishai Hadas } 4380bbc82e4SYishai Hadas 4396cea64b1SYishai Hadas static long virtiovf_precopy_ioctl(struct file *filp, unsigned int cmd, 4406cea64b1SYishai Hadas unsigned long arg) 4416cea64b1SYishai Hadas { 4426cea64b1SYishai Hadas struct virtiovf_migration_file *migf = filp->private_data; 4436cea64b1SYishai Hadas struct virtiovf_pci_core_device *virtvdev = migf->virtvdev; 4446cea64b1SYishai Hadas struct vfio_precopy_info info = {}; 4456cea64b1SYishai Hadas loff_t *pos = &filp->f_pos; 4466cea64b1SYishai Hadas bool end_of_data = false; 4476cea64b1SYishai Hadas unsigned long minsz; 4486cea64b1SYishai Hadas u32 ctx_size = 0; 4496cea64b1SYishai Hadas int ret; 4506cea64b1SYishai Hadas 4516cea64b1SYishai Hadas if (cmd != VFIO_MIG_GET_PRECOPY_INFO) 4526cea64b1SYishai Hadas return -ENOTTY; 4536cea64b1SYishai Hadas 4546cea64b1SYishai Hadas minsz = offsetofend(struct vfio_precopy_info, dirty_bytes); 4556cea64b1SYishai Hadas if (copy_from_user(&info, (void __user *)arg, minsz)) 4566cea64b1SYishai Hadas return -EFAULT; 4576cea64b1SYishai Hadas 4586cea64b1SYishai Hadas if (info.argsz < minsz) 4596cea64b1SYishai Hadas return -EINVAL; 4606cea64b1SYishai Hadas 4616cea64b1SYishai Hadas mutex_lock(&virtvdev->state_mutex); 4626cea64b1SYishai Hadas if (virtvdev->mig_state != VFIO_DEVICE_STATE_PRE_COPY && 4636cea64b1SYishai Hadas virtvdev->mig_state != VFIO_DEVICE_STATE_PRE_COPY_P2P) { 4646cea64b1SYishai Hadas ret = -EINVAL; 4656cea64b1SYishai Hadas goto err_state_unlock; 4666cea64b1SYishai Hadas } 4676cea64b1SYishai Hadas 4686cea64b1SYishai Hadas /* 4696cea64b1SYishai Hadas * The virtio specification does not include a PRE_COPY concept. 4706cea64b1SYishai Hadas * Since we can expect the data to remain the same for a certain period, 4716cea64b1SYishai Hadas * we use a rate limiter mechanism before making a call to the device. 4726cea64b1SYishai Hadas */ 4736cea64b1SYishai Hadas if (__ratelimit(&migf->pre_copy_rl_state)) { 4746cea64b1SYishai Hadas 4756cea64b1SYishai Hadas ret = virtio_pci_admin_dev_parts_metadata_get(virtvdev->core_device.pdev, 4766cea64b1SYishai Hadas VIRTIO_RESOURCE_OBJ_DEV_PARTS, migf->obj_id, 4776cea64b1SYishai Hadas VIRTIO_ADMIN_CMD_DEV_PARTS_METADATA_TYPE_SIZE, 4786cea64b1SYishai Hadas &ctx_size); 4796cea64b1SYishai Hadas if (ret) 4806cea64b1SYishai Hadas goto err_state_unlock; 4816cea64b1SYishai Hadas } 4826cea64b1SYishai Hadas 4836cea64b1SYishai Hadas mutex_lock(&migf->lock); 4846cea64b1SYishai Hadas if (migf->state == VIRTIOVF_MIGF_STATE_ERROR) { 4856cea64b1SYishai Hadas ret = -ENODEV; 4866cea64b1SYishai Hadas goto err_migf_unlock; 4876cea64b1SYishai Hadas } 4886cea64b1SYishai Hadas 4896cea64b1SYishai Hadas if (migf->pre_copy_initial_bytes > *pos) { 4906cea64b1SYishai Hadas info.initial_bytes = migf->pre_copy_initial_bytes - *pos; 4916cea64b1SYishai Hadas } else { 4926cea64b1SYishai Hadas info.dirty_bytes = migf->max_pos - *pos; 4936cea64b1SYishai Hadas if (!info.dirty_bytes) 4946cea64b1SYishai Hadas end_of_data = true; 4956cea64b1SYishai Hadas info.dirty_bytes += ctx_size; 4966cea64b1SYishai Hadas } 4976cea64b1SYishai Hadas 4986cea64b1SYishai Hadas if (!end_of_data || !ctx_size) { 4996cea64b1SYishai Hadas mutex_unlock(&migf->lock); 5006cea64b1SYishai Hadas goto done; 5016cea64b1SYishai Hadas } 5026cea64b1SYishai Hadas 5036cea64b1SYishai Hadas mutex_unlock(&migf->lock); 5046cea64b1SYishai Hadas /* 5056cea64b1SYishai Hadas * We finished transferring the current state and the device has a 5066cea64b1SYishai Hadas * dirty state, read a new state. 5076cea64b1SYishai Hadas */ 5086cea64b1SYishai Hadas ret = virtiovf_read_device_context_chunk(migf, ctx_size); 5096cea64b1SYishai Hadas if (ret) 5106cea64b1SYishai Hadas /* 5116cea64b1SYishai Hadas * The machine is running, and context size could be grow, so no reason to mark 5126cea64b1SYishai Hadas * the device state as VIRTIOVF_MIGF_STATE_ERROR. 5136cea64b1SYishai Hadas */ 5146cea64b1SYishai Hadas goto err_state_unlock; 5156cea64b1SYishai Hadas 5166cea64b1SYishai Hadas done: 5176cea64b1SYishai Hadas virtiovf_state_mutex_unlock(virtvdev); 5186cea64b1SYishai Hadas if (copy_to_user((void __user *)arg, &info, minsz)) 5196cea64b1SYishai Hadas return -EFAULT; 5206cea64b1SYishai Hadas return 0; 5216cea64b1SYishai Hadas 5226cea64b1SYishai Hadas err_migf_unlock: 5236cea64b1SYishai Hadas mutex_unlock(&migf->lock); 5246cea64b1SYishai Hadas err_state_unlock: 5256cea64b1SYishai Hadas virtiovf_state_mutex_unlock(virtvdev); 5266cea64b1SYishai Hadas return ret; 5276cea64b1SYishai Hadas } 5286cea64b1SYishai Hadas 5290bbc82e4SYishai Hadas static const struct file_operations virtiovf_save_fops = { 5300bbc82e4SYishai Hadas .owner = THIS_MODULE, 5310bbc82e4SYishai Hadas .read = virtiovf_save_read, 5326cea64b1SYishai Hadas .unlocked_ioctl = virtiovf_precopy_ioctl, 5336cea64b1SYishai Hadas .compat_ioctl = compat_ptr_ioctl, 5340bbc82e4SYishai Hadas .release = virtiovf_release_file, 5350bbc82e4SYishai Hadas }; 5360bbc82e4SYishai Hadas 5370bbc82e4SYishai Hadas static int 5380bbc82e4SYishai Hadas virtiovf_add_buf_header(struct virtiovf_data_buffer *header_buf, 5390bbc82e4SYishai Hadas u32 data_size) 5400bbc82e4SYishai Hadas { 5410bbc82e4SYishai Hadas struct virtiovf_migration_file *migf = header_buf->migf; 5420bbc82e4SYishai Hadas struct virtiovf_migration_header header = {}; 5430bbc82e4SYishai Hadas struct page *page; 5440bbc82e4SYishai Hadas u8 *to_buff; 5450bbc82e4SYishai Hadas 5460bbc82e4SYishai Hadas header.record_size = cpu_to_le64(data_size); 5470bbc82e4SYishai Hadas header.flags = cpu_to_le32(VIRTIOVF_MIGF_HEADER_FLAGS_TAG_MANDATORY); 5480bbc82e4SYishai Hadas header.tag = cpu_to_le32(VIRTIOVF_MIGF_HEADER_TAG_DEVICE_DATA); 5490bbc82e4SYishai Hadas page = virtiovf_get_migration_page(header_buf, 0); 5500bbc82e4SYishai Hadas if (!page) 5510bbc82e4SYishai Hadas return -EINVAL; 5520bbc82e4SYishai Hadas to_buff = kmap_local_page(page); 5530bbc82e4SYishai Hadas memcpy(to_buff, &header, sizeof(header)); 5540bbc82e4SYishai Hadas kunmap_local(to_buff); 5550bbc82e4SYishai Hadas header_buf->length = sizeof(header); 5560bbc82e4SYishai Hadas header_buf->start_pos = header_buf->migf->max_pos; 5570bbc82e4SYishai Hadas migf->max_pos += header_buf->length; 5580bbc82e4SYishai Hadas spin_lock_irq(&migf->list_lock); 5590bbc82e4SYishai Hadas list_add_tail(&header_buf->buf_elm, &migf->buf_list); 5600bbc82e4SYishai Hadas spin_unlock_irq(&migf->list_lock); 5610bbc82e4SYishai Hadas return 0; 5620bbc82e4SYishai Hadas } 5630bbc82e4SYishai Hadas 5640bbc82e4SYishai Hadas static int 5650bbc82e4SYishai Hadas virtiovf_read_device_context_chunk(struct virtiovf_migration_file *migf, 5660bbc82e4SYishai Hadas u32 ctx_size) 5670bbc82e4SYishai Hadas { 5680bbc82e4SYishai Hadas struct virtiovf_data_buffer *header_buf; 5690bbc82e4SYishai Hadas struct virtiovf_data_buffer *buf; 5700bbc82e4SYishai Hadas bool unmark_end = false; 5710bbc82e4SYishai Hadas struct scatterlist *sg; 5720bbc82e4SYishai Hadas unsigned int i; 5730bbc82e4SYishai Hadas u32 res_size; 5740bbc82e4SYishai Hadas int nent; 5750bbc82e4SYishai Hadas int ret; 5760bbc82e4SYishai Hadas 5776cea64b1SYishai Hadas buf = virtiovf_get_data_buffer(migf, ctx_size); 5780bbc82e4SYishai Hadas if (IS_ERR(buf)) 5790bbc82e4SYishai Hadas return PTR_ERR(buf); 5800bbc82e4SYishai Hadas 5810bbc82e4SYishai Hadas /* Find the total count of SG entries which satisfies the size */ 5820bbc82e4SYishai Hadas nent = sg_nents_for_len(buf->table.sgt.sgl, ctx_size); 5830bbc82e4SYishai Hadas if (nent <= 0) { 5840bbc82e4SYishai Hadas ret = -EINVAL; 5850bbc82e4SYishai Hadas goto out; 5860bbc82e4SYishai Hadas } 5870bbc82e4SYishai Hadas 5880bbc82e4SYishai Hadas /* 5890bbc82e4SYishai Hadas * Iterate to that SG entry and mark it as last (if it's not already) 5900bbc82e4SYishai Hadas * to let underlay layers iterate only till that entry. 5910bbc82e4SYishai Hadas */ 5920bbc82e4SYishai Hadas for_each_sg(buf->table.sgt.sgl, sg, nent - 1, i) 5930bbc82e4SYishai Hadas ; 5940bbc82e4SYishai Hadas 5950bbc82e4SYishai Hadas if (!sg_is_last(sg)) { 5960bbc82e4SYishai Hadas unmark_end = true; 5970bbc82e4SYishai Hadas sg_mark_end(sg); 5980bbc82e4SYishai Hadas } 5990bbc82e4SYishai Hadas 6000bbc82e4SYishai Hadas ret = virtio_pci_admin_dev_parts_get(migf->virtvdev->core_device.pdev, 6010bbc82e4SYishai Hadas VIRTIO_RESOURCE_OBJ_DEV_PARTS, 6020bbc82e4SYishai Hadas migf->obj_id, 6030bbc82e4SYishai Hadas VIRTIO_ADMIN_CMD_DEV_PARTS_GET_TYPE_ALL, 6040bbc82e4SYishai Hadas buf->table.sgt.sgl, &res_size); 6050bbc82e4SYishai Hadas /* Restore the original SG mark end */ 6060bbc82e4SYishai Hadas if (unmark_end) 6070bbc82e4SYishai Hadas sg_unmark_end(sg); 6080bbc82e4SYishai Hadas if (ret) 6090bbc82e4SYishai Hadas goto out; 6100bbc82e4SYishai Hadas 6110bbc82e4SYishai Hadas buf->length = res_size; 6126cea64b1SYishai Hadas header_buf = virtiovf_get_data_buffer(migf, 6130bbc82e4SYishai Hadas sizeof(struct virtiovf_migration_header)); 6140bbc82e4SYishai Hadas if (IS_ERR(header_buf)) { 6150bbc82e4SYishai Hadas ret = PTR_ERR(header_buf); 6160bbc82e4SYishai Hadas goto out; 6170bbc82e4SYishai Hadas } 6180bbc82e4SYishai Hadas 6190bbc82e4SYishai Hadas ret = virtiovf_add_buf_header(header_buf, res_size); 6200bbc82e4SYishai Hadas if (ret) 6210bbc82e4SYishai Hadas goto out_header; 6220bbc82e4SYishai Hadas 6230bbc82e4SYishai Hadas buf->start_pos = buf->migf->max_pos; 6240bbc82e4SYishai Hadas migf->max_pos += buf->length; 6250bbc82e4SYishai Hadas spin_lock(&migf->list_lock); 6260bbc82e4SYishai Hadas list_add_tail(&buf->buf_elm, &migf->buf_list); 6270bbc82e4SYishai Hadas spin_unlock_irq(&migf->list_lock); 6280bbc82e4SYishai Hadas return 0; 6290bbc82e4SYishai Hadas 6300bbc82e4SYishai Hadas out_header: 6310bbc82e4SYishai Hadas virtiovf_put_data_buffer(header_buf); 6320bbc82e4SYishai Hadas out: 6330bbc82e4SYishai Hadas virtiovf_put_data_buffer(buf); 6340bbc82e4SYishai Hadas return ret; 6350bbc82e4SYishai Hadas } 6360bbc82e4SYishai Hadas 6376cea64b1SYishai Hadas static int 6386cea64b1SYishai Hadas virtiovf_pci_save_device_final_data(struct virtiovf_pci_core_device *virtvdev) 6396cea64b1SYishai Hadas { 6406cea64b1SYishai Hadas struct virtiovf_migration_file *migf = virtvdev->saving_migf; 6416cea64b1SYishai Hadas u32 ctx_size; 6426cea64b1SYishai Hadas int ret; 6436cea64b1SYishai Hadas 6446cea64b1SYishai Hadas if (migf->state == VIRTIOVF_MIGF_STATE_ERROR) 6456cea64b1SYishai Hadas return -ENODEV; 6466cea64b1SYishai Hadas 6476cea64b1SYishai Hadas ret = virtio_pci_admin_dev_parts_metadata_get(virtvdev->core_device.pdev, 6486cea64b1SYishai Hadas VIRTIO_RESOURCE_OBJ_DEV_PARTS, migf->obj_id, 6496cea64b1SYishai Hadas VIRTIO_ADMIN_CMD_DEV_PARTS_METADATA_TYPE_SIZE, 6506cea64b1SYishai Hadas &ctx_size); 6516cea64b1SYishai Hadas if (ret) 6526cea64b1SYishai Hadas goto err; 6536cea64b1SYishai Hadas 6546cea64b1SYishai Hadas if (!ctx_size) { 6556cea64b1SYishai Hadas ret = -EINVAL; 6566cea64b1SYishai Hadas goto err; 6576cea64b1SYishai Hadas } 6586cea64b1SYishai Hadas 6596cea64b1SYishai Hadas ret = virtiovf_read_device_context_chunk(migf, ctx_size); 6606cea64b1SYishai Hadas if (ret) 6616cea64b1SYishai Hadas goto err; 6626cea64b1SYishai Hadas 6636cea64b1SYishai Hadas migf->state = VIRTIOVF_MIGF_STATE_COMPLETE; 6646cea64b1SYishai Hadas return 0; 6656cea64b1SYishai Hadas 6666cea64b1SYishai Hadas err: 6676cea64b1SYishai Hadas migf->state = VIRTIOVF_MIGF_STATE_ERROR; 6686cea64b1SYishai Hadas return ret; 6696cea64b1SYishai Hadas } 6706cea64b1SYishai Hadas 6710bbc82e4SYishai Hadas static struct virtiovf_migration_file * 6726cea64b1SYishai Hadas virtiovf_pci_save_device_data(struct virtiovf_pci_core_device *virtvdev, 6736cea64b1SYishai Hadas bool pre_copy) 6740bbc82e4SYishai Hadas { 6750bbc82e4SYishai Hadas struct virtiovf_migration_file *migf; 6760bbc82e4SYishai Hadas u32 ctx_size; 6770bbc82e4SYishai Hadas u32 obj_id; 6780bbc82e4SYishai Hadas int ret; 6790bbc82e4SYishai Hadas 6800bbc82e4SYishai Hadas migf = kzalloc(sizeof(*migf), GFP_KERNEL_ACCOUNT); 6810bbc82e4SYishai Hadas if (!migf) 6820bbc82e4SYishai Hadas return ERR_PTR(-ENOMEM); 6830bbc82e4SYishai Hadas 6840bbc82e4SYishai Hadas migf->filp = anon_inode_getfile("virtiovf_mig", &virtiovf_save_fops, migf, 6850bbc82e4SYishai Hadas O_RDONLY); 6860bbc82e4SYishai Hadas if (IS_ERR(migf->filp)) { 6870bbc82e4SYishai Hadas ret = PTR_ERR(migf->filp); 6880bbc82e4SYishai Hadas kfree(migf); 6890bbc82e4SYishai Hadas return ERR_PTR(ret); 6900bbc82e4SYishai Hadas } 6910bbc82e4SYishai Hadas 6920bbc82e4SYishai Hadas stream_open(migf->filp->f_inode, migf->filp); 6930bbc82e4SYishai Hadas mutex_init(&migf->lock); 6940bbc82e4SYishai Hadas INIT_LIST_HEAD(&migf->buf_list); 6950bbc82e4SYishai Hadas INIT_LIST_HEAD(&migf->avail_list); 6960bbc82e4SYishai Hadas spin_lock_init(&migf->list_lock); 6970bbc82e4SYishai Hadas migf->virtvdev = virtvdev; 6980bbc82e4SYishai Hadas 6990bbc82e4SYishai Hadas lockdep_assert_held(&virtvdev->state_mutex); 7000bbc82e4SYishai Hadas ret = virtiovf_pci_alloc_obj_id(virtvdev, VIRTIO_RESOURCE_OBJ_DEV_PARTS_TYPE_GET, 7010bbc82e4SYishai Hadas &obj_id); 7020bbc82e4SYishai Hadas if (ret) 7030bbc82e4SYishai Hadas goto out; 7040bbc82e4SYishai Hadas 7050bbc82e4SYishai Hadas migf->obj_id = obj_id; 7060bbc82e4SYishai Hadas /* Mark as having a valid obj id which can be even 0 */ 7070bbc82e4SYishai Hadas migf->has_obj_id = true; 7080bbc82e4SYishai Hadas ret = virtio_pci_admin_dev_parts_metadata_get(virtvdev->core_device.pdev, 7090bbc82e4SYishai Hadas VIRTIO_RESOURCE_OBJ_DEV_PARTS, obj_id, 7100bbc82e4SYishai Hadas VIRTIO_ADMIN_CMD_DEV_PARTS_METADATA_TYPE_SIZE, 7110bbc82e4SYishai Hadas &ctx_size); 7120bbc82e4SYishai Hadas if (ret) 7130bbc82e4SYishai Hadas goto out_clean; 7140bbc82e4SYishai Hadas 7150bbc82e4SYishai Hadas if (!ctx_size) { 7160bbc82e4SYishai Hadas ret = -EINVAL; 7170bbc82e4SYishai Hadas goto out_clean; 7180bbc82e4SYishai Hadas } 7190bbc82e4SYishai Hadas 7200bbc82e4SYishai Hadas ret = virtiovf_read_device_context_chunk(migf, ctx_size); 7210bbc82e4SYishai Hadas if (ret) 7220bbc82e4SYishai Hadas goto out_clean; 7230bbc82e4SYishai Hadas 7246cea64b1SYishai Hadas if (pre_copy) { 7256cea64b1SYishai Hadas migf->pre_copy_initial_bytes = migf->max_pos; 7266cea64b1SYishai Hadas /* Arbitrarily set the pre-copy rate limit to 1-second intervals */ 7276cea64b1SYishai Hadas ratelimit_state_init(&migf->pre_copy_rl_state, 1 * HZ, 1); 7286cea64b1SYishai Hadas /* Prevent any rate messages upon its usage */ 7296cea64b1SYishai Hadas ratelimit_set_flags(&migf->pre_copy_rl_state, 7306cea64b1SYishai Hadas RATELIMIT_MSG_ON_RELEASE); 7316cea64b1SYishai Hadas migf->state = VIRTIOVF_MIGF_STATE_PRECOPY; 7326cea64b1SYishai Hadas } else { 7336cea64b1SYishai Hadas migf->state = VIRTIOVF_MIGF_STATE_COMPLETE; 7346cea64b1SYishai Hadas } 7356cea64b1SYishai Hadas 7360bbc82e4SYishai Hadas return migf; 7370bbc82e4SYishai Hadas 7380bbc82e4SYishai Hadas out_clean: 7390bbc82e4SYishai Hadas virtiovf_clean_migf_resources(migf); 7400bbc82e4SYishai Hadas out: 7410bbc82e4SYishai Hadas fput(migf->filp); 7420bbc82e4SYishai Hadas return ERR_PTR(ret); 7430bbc82e4SYishai Hadas } 7440bbc82e4SYishai Hadas 7450bbc82e4SYishai Hadas /* 7460bbc82e4SYishai Hadas * Set the required object header at the beginning of the buffer. 7470bbc82e4SYishai Hadas * The actual device parts data will be written post of the header offset. 7480bbc82e4SYishai Hadas */ 7490bbc82e4SYishai Hadas static int virtiovf_set_obj_cmd_header(struct virtiovf_data_buffer *vhca_buf) 7500bbc82e4SYishai Hadas { 7510bbc82e4SYishai Hadas struct virtio_admin_cmd_resource_obj_cmd_hdr obj_hdr = {}; 7520bbc82e4SYishai Hadas struct page *page; 7530bbc82e4SYishai Hadas u8 *to_buff; 7540bbc82e4SYishai Hadas 7550bbc82e4SYishai Hadas obj_hdr.type = cpu_to_le16(VIRTIO_RESOURCE_OBJ_DEV_PARTS); 7560bbc82e4SYishai Hadas obj_hdr.id = cpu_to_le32(vhca_buf->migf->obj_id); 7570bbc82e4SYishai Hadas page = virtiovf_get_migration_page(vhca_buf, 0); 7580bbc82e4SYishai Hadas if (!page) 7590bbc82e4SYishai Hadas return -EINVAL; 7600bbc82e4SYishai Hadas to_buff = kmap_local_page(page); 7610bbc82e4SYishai Hadas memcpy(to_buff, &obj_hdr, sizeof(obj_hdr)); 7620bbc82e4SYishai Hadas kunmap_local(to_buff); 7630bbc82e4SYishai Hadas 7640bbc82e4SYishai Hadas /* Mark the buffer as including the header object data */ 7650bbc82e4SYishai Hadas vhca_buf->include_header_object = 1; 7660bbc82e4SYishai Hadas return 0; 7670bbc82e4SYishai Hadas } 7680bbc82e4SYishai Hadas 7690bbc82e4SYishai Hadas static int 7700bbc82e4SYishai Hadas virtiovf_append_page_to_mig_buf(struct virtiovf_data_buffer *vhca_buf, 7710bbc82e4SYishai Hadas const char __user **buf, size_t *len, 7720bbc82e4SYishai Hadas loff_t *pos, ssize_t *done) 7730bbc82e4SYishai Hadas { 7740bbc82e4SYishai Hadas unsigned long offset; 7750bbc82e4SYishai Hadas size_t page_offset; 7760bbc82e4SYishai Hadas struct page *page; 7770bbc82e4SYishai Hadas size_t page_len; 7780bbc82e4SYishai Hadas u8 *to_buff; 7790bbc82e4SYishai Hadas int ret; 7800bbc82e4SYishai Hadas 7810bbc82e4SYishai Hadas offset = *pos - vhca_buf->start_pos; 7820bbc82e4SYishai Hadas 7830bbc82e4SYishai Hadas if (vhca_buf->include_header_object) 7840bbc82e4SYishai Hadas /* The buffer holds the object header, update the offset accordingly */ 7850bbc82e4SYishai Hadas offset += sizeof(struct virtio_admin_cmd_resource_obj_cmd_hdr); 7860bbc82e4SYishai Hadas 7870bbc82e4SYishai Hadas page_offset = offset % PAGE_SIZE; 7880bbc82e4SYishai Hadas 7890bbc82e4SYishai Hadas page = virtiovf_get_migration_page(vhca_buf, offset - page_offset); 7900bbc82e4SYishai Hadas if (!page) 7910bbc82e4SYishai Hadas return -EINVAL; 7920bbc82e4SYishai Hadas 7930bbc82e4SYishai Hadas page_len = min_t(size_t, *len, PAGE_SIZE - page_offset); 7940bbc82e4SYishai Hadas to_buff = kmap_local_page(page); 7950bbc82e4SYishai Hadas ret = copy_from_user(to_buff + page_offset, *buf, page_len); 7960bbc82e4SYishai Hadas kunmap_local(to_buff); 7970bbc82e4SYishai Hadas if (ret) 7980bbc82e4SYishai Hadas return -EFAULT; 7990bbc82e4SYishai Hadas 8000bbc82e4SYishai Hadas *pos += page_len; 8010bbc82e4SYishai Hadas *done += page_len; 8020bbc82e4SYishai Hadas *buf += page_len; 8030bbc82e4SYishai Hadas *len -= page_len; 8040bbc82e4SYishai Hadas vhca_buf->length += page_len; 8050bbc82e4SYishai Hadas return 0; 8060bbc82e4SYishai Hadas } 8070bbc82e4SYishai Hadas 8080bbc82e4SYishai Hadas static ssize_t 8090bbc82e4SYishai Hadas virtiovf_resume_read_chunk(struct virtiovf_migration_file *migf, 8100bbc82e4SYishai Hadas struct virtiovf_data_buffer *vhca_buf, 8110bbc82e4SYishai Hadas size_t chunk_size, const char __user **buf, 8120bbc82e4SYishai Hadas size_t *len, loff_t *pos, ssize_t *done, 8130bbc82e4SYishai Hadas bool *has_work) 8140bbc82e4SYishai Hadas { 8150bbc82e4SYishai Hadas size_t copy_len, to_copy; 8160bbc82e4SYishai Hadas int ret; 8170bbc82e4SYishai Hadas 8180bbc82e4SYishai Hadas to_copy = min_t(size_t, *len, chunk_size - vhca_buf->length); 8190bbc82e4SYishai Hadas copy_len = to_copy; 8200bbc82e4SYishai Hadas while (to_copy) { 8210bbc82e4SYishai Hadas ret = virtiovf_append_page_to_mig_buf(vhca_buf, buf, &to_copy, 8220bbc82e4SYishai Hadas pos, done); 8230bbc82e4SYishai Hadas if (ret) 8240bbc82e4SYishai Hadas return ret; 8250bbc82e4SYishai Hadas } 8260bbc82e4SYishai Hadas 8270bbc82e4SYishai Hadas *len -= copy_len; 8280bbc82e4SYishai Hadas if (vhca_buf->length == chunk_size) { 8290bbc82e4SYishai Hadas migf->load_state = VIRTIOVF_LOAD_STATE_LOAD_CHUNK; 8300bbc82e4SYishai Hadas migf->max_pos += chunk_size; 8310bbc82e4SYishai Hadas *has_work = true; 8320bbc82e4SYishai Hadas } 8330bbc82e4SYishai Hadas 8340bbc82e4SYishai Hadas return 0; 8350bbc82e4SYishai Hadas } 8360bbc82e4SYishai Hadas 8370bbc82e4SYishai Hadas static int 8380bbc82e4SYishai Hadas virtiovf_resume_read_header_data(struct virtiovf_migration_file *migf, 8390bbc82e4SYishai Hadas struct virtiovf_data_buffer *vhca_buf, 8400bbc82e4SYishai Hadas const char __user **buf, size_t *len, 8410bbc82e4SYishai Hadas loff_t *pos, ssize_t *done) 8420bbc82e4SYishai Hadas { 8430bbc82e4SYishai Hadas size_t copy_len, to_copy; 8440bbc82e4SYishai Hadas size_t required_data; 8450bbc82e4SYishai Hadas int ret; 8460bbc82e4SYishai Hadas 8470bbc82e4SYishai Hadas required_data = migf->record_size - vhca_buf->length; 8480bbc82e4SYishai Hadas to_copy = min_t(size_t, *len, required_data); 8490bbc82e4SYishai Hadas copy_len = to_copy; 8500bbc82e4SYishai Hadas while (to_copy) { 8510bbc82e4SYishai Hadas ret = virtiovf_append_page_to_mig_buf(vhca_buf, buf, &to_copy, 8520bbc82e4SYishai Hadas pos, done); 8530bbc82e4SYishai Hadas if (ret) 8540bbc82e4SYishai Hadas return ret; 8550bbc82e4SYishai Hadas } 8560bbc82e4SYishai Hadas 8570bbc82e4SYishai Hadas *len -= copy_len; 8580bbc82e4SYishai Hadas if (vhca_buf->length == migf->record_size) { 8590bbc82e4SYishai Hadas switch (migf->record_tag) { 8600bbc82e4SYishai Hadas default: 8610bbc82e4SYishai Hadas /* Optional tag */ 8620bbc82e4SYishai Hadas break; 8630bbc82e4SYishai Hadas } 8640bbc82e4SYishai Hadas 8650bbc82e4SYishai Hadas migf->load_state = VIRTIOVF_LOAD_STATE_READ_HEADER; 8660bbc82e4SYishai Hadas migf->max_pos += migf->record_size; 8670bbc82e4SYishai Hadas vhca_buf->length = 0; 8680bbc82e4SYishai Hadas } 8690bbc82e4SYishai Hadas 8700bbc82e4SYishai Hadas return 0; 8710bbc82e4SYishai Hadas } 8720bbc82e4SYishai Hadas 8730bbc82e4SYishai Hadas static int 8740bbc82e4SYishai Hadas virtiovf_resume_read_header(struct virtiovf_migration_file *migf, 8750bbc82e4SYishai Hadas struct virtiovf_data_buffer *vhca_buf, 8760bbc82e4SYishai Hadas const char __user **buf, 8770bbc82e4SYishai Hadas size_t *len, loff_t *pos, 8780bbc82e4SYishai Hadas ssize_t *done, bool *has_work) 8790bbc82e4SYishai Hadas { 8800bbc82e4SYishai Hadas struct page *page; 8810bbc82e4SYishai Hadas size_t copy_len; 8820bbc82e4SYishai Hadas u8 *to_buff; 8830bbc82e4SYishai Hadas int ret; 8840bbc82e4SYishai Hadas 8850bbc82e4SYishai Hadas copy_len = min_t(size_t, *len, 8860bbc82e4SYishai Hadas sizeof(struct virtiovf_migration_header) - vhca_buf->length); 8870bbc82e4SYishai Hadas page = virtiovf_get_migration_page(vhca_buf, 0); 8880bbc82e4SYishai Hadas if (!page) 8890bbc82e4SYishai Hadas return -EINVAL; 8900bbc82e4SYishai Hadas to_buff = kmap_local_page(page); 8910bbc82e4SYishai Hadas ret = copy_from_user(to_buff + vhca_buf->length, *buf, copy_len); 8920bbc82e4SYishai Hadas if (ret) { 8930bbc82e4SYishai Hadas ret = -EFAULT; 8940bbc82e4SYishai Hadas goto end; 8950bbc82e4SYishai Hadas } 8960bbc82e4SYishai Hadas 8970bbc82e4SYishai Hadas *buf += copy_len; 8980bbc82e4SYishai Hadas *pos += copy_len; 8990bbc82e4SYishai Hadas *done += copy_len; 9000bbc82e4SYishai Hadas *len -= copy_len; 9010bbc82e4SYishai Hadas vhca_buf->length += copy_len; 9020bbc82e4SYishai Hadas if (vhca_buf->length == sizeof(struct virtiovf_migration_header)) { 9030bbc82e4SYishai Hadas u64 record_size; 9040bbc82e4SYishai Hadas u32 flags; 9050bbc82e4SYishai Hadas 9060bbc82e4SYishai Hadas record_size = le64_to_cpup((__le64 *)to_buff); 9070bbc82e4SYishai Hadas if (record_size > MAX_LOAD_SIZE) { 9080bbc82e4SYishai Hadas ret = -ENOMEM; 9090bbc82e4SYishai Hadas goto end; 9100bbc82e4SYishai Hadas } 9110bbc82e4SYishai Hadas 9120bbc82e4SYishai Hadas migf->record_size = record_size; 9130bbc82e4SYishai Hadas flags = le32_to_cpup((__le32 *)(to_buff + 9140bbc82e4SYishai Hadas offsetof(struct virtiovf_migration_header, flags))); 9150bbc82e4SYishai Hadas migf->record_tag = le32_to_cpup((__le32 *)(to_buff + 9160bbc82e4SYishai Hadas offsetof(struct virtiovf_migration_header, tag))); 9170bbc82e4SYishai Hadas switch (migf->record_tag) { 9180bbc82e4SYishai Hadas case VIRTIOVF_MIGF_HEADER_TAG_DEVICE_DATA: 9190bbc82e4SYishai Hadas migf->load_state = VIRTIOVF_LOAD_STATE_PREP_CHUNK; 9200bbc82e4SYishai Hadas break; 9210bbc82e4SYishai Hadas default: 9220bbc82e4SYishai Hadas if (!(flags & VIRTIOVF_MIGF_HEADER_FLAGS_TAG_OPTIONAL)) { 9230bbc82e4SYishai Hadas ret = -EOPNOTSUPP; 9240bbc82e4SYishai Hadas goto end; 9250bbc82e4SYishai Hadas } 9260bbc82e4SYishai Hadas /* We may read and skip this optional record data */ 9270bbc82e4SYishai Hadas migf->load_state = VIRTIOVF_LOAD_STATE_PREP_HEADER_DATA; 9280bbc82e4SYishai Hadas } 9290bbc82e4SYishai Hadas 9300bbc82e4SYishai Hadas migf->max_pos += vhca_buf->length; 9310bbc82e4SYishai Hadas vhca_buf->length = 0; 9320bbc82e4SYishai Hadas *has_work = true; 9330bbc82e4SYishai Hadas } 9340bbc82e4SYishai Hadas end: 9350bbc82e4SYishai Hadas kunmap_local(to_buff); 9360bbc82e4SYishai Hadas return ret; 9370bbc82e4SYishai Hadas } 9380bbc82e4SYishai Hadas 9390bbc82e4SYishai Hadas static ssize_t virtiovf_resume_write(struct file *filp, const char __user *buf, 9400bbc82e4SYishai Hadas size_t len, loff_t *pos) 9410bbc82e4SYishai Hadas { 9420bbc82e4SYishai Hadas struct virtiovf_migration_file *migf = filp->private_data; 9430bbc82e4SYishai Hadas struct virtiovf_data_buffer *vhca_buf = migf->buf; 9440bbc82e4SYishai Hadas struct virtiovf_data_buffer *vhca_buf_header = migf->buf_header; 9450bbc82e4SYishai Hadas unsigned int orig_length; 9460bbc82e4SYishai Hadas bool has_work = false; 9470bbc82e4SYishai Hadas ssize_t done = 0; 9480bbc82e4SYishai Hadas int ret = 0; 9490bbc82e4SYishai Hadas 9500bbc82e4SYishai Hadas if (pos) 9510bbc82e4SYishai Hadas return -ESPIPE; 9520bbc82e4SYishai Hadas 9530bbc82e4SYishai Hadas pos = &filp->f_pos; 9540bbc82e4SYishai Hadas if (*pos < vhca_buf->start_pos) 9550bbc82e4SYishai Hadas return -EINVAL; 9560bbc82e4SYishai Hadas 9570bbc82e4SYishai Hadas mutex_lock(&migf->virtvdev->state_mutex); 9580bbc82e4SYishai Hadas mutex_lock(&migf->lock); 9590bbc82e4SYishai Hadas if (migf->state == VIRTIOVF_MIGF_STATE_ERROR) { 9600bbc82e4SYishai Hadas done = -ENODEV; 9610bbc82e4SYishai Hadas goto out_unlock; 9620bbc82e4SYishai Hadas } 9630bbc82e4SYishai Hadas 9640bbc82e4SYishai Hadas while (len || has_work) { 9650bbc82e4SYishai Hadas has_work = false; 9660bbc82e4SYishai Hadas switch (migf->load_state) { 9670bbc82e4SYishai Hadas case VIRTIOVF_LOAD_STATE_READ_HEADER: 9680bbc82e4SYishai Hadas ret = virtiovf_resume_read_header(migf, vhca_buf_header, &buf, 9690bbc82e4SYishai Hadas &len, pos, &done, &has_work); 9700bbc82e4SYishai Hadas if (ret) 9710bbc82e4SYishai Hadas goto out_unlock; 9720bbc82e4SYishai Hadas break; 9730bbc82e4SYishai Hadas case VIRTIOVF_LOAD_STATE_PREP_HEADER_DATA: 9740bbc82e4SYishai Hadas if (vhca_buf_header->allocated_length < migf->record_size) { 9750bbc82e4SYishai Hadas virtiovf_free_data_buffer(vhca_buf_header); 9760bbc82e4SYishai Hadas 9770bbc82e4SYishai Hadas migf->buf_header = virtiovf_alloc_data_buffer(migf, 9780bbc82e4SYishai Hadas migf->record_size); 9790bbc82e4SYishai Hadas if (IS_ERR(migf->buf_header)) { 9800bbc82e4SYishai Hadas ret = PTR_ERR(migf->buf_header); 9810bbc82e4SYishai Hadas migf->buf_header = NULL; 9820bbc82e4SYishai Hadas goto out_unlock; 9830bbc82e4SYishai Hadas } 9840bbc82e4SYishai Hadas 9850bbc82e4SYishai Hadas vhca_buf_header = migf->buf_header; 9860bbc82e4SYishai Hadas } 9870bbc82e4SYishai Hadas 9880bbc82e4SYishai Hadas vhca_buf_header->start_pos = migf->max_pos; 9890bbc82e4SYishai Hadas migf->load_state = VIRTIOVF_LOAD_STATE_READ_HEADER_DATA; 9900bbc82e4SYishai Hadas break; 9910bbc82e4SYishai Hadas case VIRTIOVF_LOAD_STATE_READ_HEADER_DATA: 9920bbc82e4SYishai Hadas ret = virtiovf_resume_read_header_data(migf, vhca_buf_header, 9930bbc82e4SYishai Hadas &buf, &len, pos, &done); 9940bbc82e4SYishai Hadas if (ret) 9950bbc82e4SYishai Hadas goto out_unlock; 9960bbc82e4SYishai Hadas break; 9970bbc82e4SYishai Hadas case VIRTIOVF_LOAD_STATE_PREP_CHUNK: 9980bbc82e4SYishai Hadas { 9990bbc82e4SYishai Hadas u32 cmd_size = migf->record_size + 10000bbc82e4SYishai Hadas sizeof(struct virtio_admin_cmd_resource_obj_cmd_hdr); 10010bbc82e4SYishai Hadas 10020bbc82e4SYishai Hadas /* 10030bbc82e4SYishai Hadas * The DMA map/unmap is managed in virtio layer, we just need to extend 10040bbc82e4SYishai Hadas * the SG pages to hold the extra required chunk data. 10050bbc82e4SYishai Hadas */ 10060bbc82e4SYishai Hadas if (vhca_buf->allocated_length < cmd_size) { 10070bbc82e4SYishai Hadas ret = virtiovf_add_migration_pages(vhca_buf, 10080bbc82e4SYishai Hadas DIV_ROUND_UP_ULL(cmd_size - vhca_buf->allocated_length, 10090bbc82e4SYishai Hadas PAGE_SIZE)); 10100bbc82e4SYishai Hadas if (ret) 10110bbc82e4SYishai Hadas goto out_unlock; 10120bbc82e4SYishai Hadas } 10130bbc82e4SYishai Hadas 10140bbc82e4SYishai Hadas vhca_buf->start_pos = migf->max_pos; 10150bbc82e4SYishai Hadas migf->load_state = VIRTIOVF_LOAD_STATE_READ_CHUNK; 10160bbc82e4SYishai Hadas break; 10170bbc82e4SYishai Hadas } 10180bbc82e4SYishai Hadas case VIRTIOVF_LOAD_STATE_READ_CHUNK: 10190bbc82e4SYishai Hadas ret = virtiovf_resume_read_chunk(migf, vhca_buf, migf->record_size, 10200bbc82e4SYishai Hadas &buf, &len, pos, &done, &has_work); 10210bbc82e4SYishai Hadas if (ret) 10220bbc82e4SYishai Hadas goto out_unlock; 10230bbc82e4SYishai Hadas break; 10240bbc82e4SYishai Hadas case VIRTIOVF_LOAD_STATE_LOAD_CHUNK: 10250bbc82e4SYishai Hadas /* Mark the last SG entry and set its length */ 10260bbc82e4SYishai Hadas sg_mark_end(vhca_buf->last_offset_sg); 10270bbc82e4SYishai Hadas orig_length = vhca_buf->last_offset_sg->length; 10280bbc82e4SYishai Hadas /* Length should include the resource object command header */ 10290bbc82e4SYishai Hadas vhca_buf->last_offset_sg->length = vhca_buf->length + 10300bbc82e4SYishai Hadas sizeof(struct virtio_admin_cmd_resource_obj_cmd_hdr) - 10310bbc82e4SYishai Hadas vhca_buf->last_offset; 10320bbc82e4SYishai Hadas ret = virtio_pci_admin_dev_parts_set(migf->virtvdev->core_device.pdev, 10330bbc82e4SYishai Hadas vhca_buf->table.sgt.sgl); 10340bbc82e4SYishai Hadas /* Restore the original SG data */ 10350bbc82e4SYishai Hadas vhca_buf->last_offset_sg->length = orig_length; 10360bbc82e4SYishai Hadas sg_unmark_end(vhca_buf->last_offset_sg); 10370bbc82e4SYishai Hadas if (ret) 10380bbc82e4SYishai Hadas goto out_unlock; 10390bbc82e4SYishai Hadas migf->load_state = VIRTIOVF_LOAD_STATE_READ_HEADER; 10400bbc82e4SYishai Hadas /* be ready for reading the next chunk */ 10410bbc82e4SYishai Hadas vhca_buf->length = 0; 10420bbc82e4SYishai Hadas break; 10430bbc82e4SYishai Hadas default: 10440bbc82e4SYishai Hadas break; 10450bbc82e4SYishai Hadas } 10460bbc82e4SYishai Hadas } 10470bbc82e4SYishai Hadas 10480bbc82e4SYishai Hadas out_unlock: 10490bbc82e4SYishai Hadas if (ret) 10500bbc82e4SYishai Hadas migf->state = VIRTIOVF_MIGF_STATE_ERROR; 10510bbc82e4SYishai Hadas mutex_unlock(&migf->lock); 10520bbc82e4SYishai Hadas virtiovf_state_mutex_unlock(migf->virtvdev); 10530bbc82e4SYishai Hadas return ret ? ret : done; 10540bbc82e4SYishai Hadas } 10550bbc82e4SYishai Hadas 10560bbc82e4SYishai Hadas static const struct file_operations virtiovf_resume_fops = { 10570bbc82e4SYishai Hadas .owner = THIS_MODULE, 10580bbc82e4SYishai Hadas .write = virtiovf_resume_write, 10590bbc82e4SYishai Hadas .release = virtiovf_release_file, 10600bbc82e4SYishai Hadas }; 10610bbc82e4SYishai Hadas 10620bbc82e4SYishai Hadas static struct virtiovf_migration_file * 10630bbc82e4SYishai Hadas virtiovf_pci_resume_device_data(struct virtiovf_pci_core_device *virtvdev) 10640bbc82e4SYishai Hadas { 10650bbc82e4SYishai Hadas struct virtiovf_migration_file *migf; 10660bbc82e4SYishai Hadas struct virtiovf_data_buffer *buf; 10670bbc82e4SYishai Hadas u32 obj_id; 10680bbc82e4SYishai Hadas int ret; 10690bbc82e4SYishai Hadas 10700bbc82e4SYishai Hadas migf = kzalloc(sizeof(*migf), GFP_KERNEL_ACCOUNT); 10710bbc82e4SYishai Hadas if (!migf) 10720bbc82e4SYishai Hadas return ERR_PTR(-ENOMEM); 10730bbc82e4SYishai Hadas 10740bbc82e4SYishai Hadas migf->filp = anon_inode_getfile("virtiovf_mig", &virtiovf_resume_fops, migf, 10750bbc82e4SYishai Hadas O_WRONLY); 10760bbc82e4SYishai Hadas if (IS_ERR(migf->filp)) { 10770bbc82e4SYishai Hadas ret = PTR_ERR(migf->filp); 10780bbc82e4SYishai Hadas kfree(migf); 10790bbc82e4SYishai Hadas return ERR_PTR(ret); 10800bbc82e4SYishai Hadas } 10810bbc82e4SYishai Hadas 10820bbc82e4SYishai Hadas stream_open(migf->filp->f_inode, migf->filp); 10830bbc82e4SYishai Hadas mutex_init(&migf->lock); 10840bbc82e4SYishai Hadas INIT_LIST_HEAD(&migf->buf_list); 10850bbc82e4SYishai Hadas INIT_LIST_HEAD(&migf->avail_list); 10860bbc82e4SYishai Hadas spin_lock_init(&migf->list_lock); 10870bbc82e4SYishai Hadas 10880bbc82e4SYishai Hadas buf = virtiovf_alloc_data_buffer(migf, VIRTIOVF_TARGET_INITIAL_BUF_SIZE); 10890bbc82e4SYishai Hadas if (IS_ERR(buf)) { 10900bbc82e4SYishai Hadas ret = PTR_ERR(buf); 10910bbc82e4SYishai Hadas goto out; 10920bbc82e4SYishai Hadas } 10930bbc82e4SYishai Hadas 10940bbc82e4SYishai Hadas migf->buf = buf; 10950bbc82e4SYishai Hadas 10960bbc82e4SYishai Hadas buf = virtiovf_alloc_data_buffer(migf, 10970bbc82e4SYishai Hadas sizeof(struct virtiovf_migration_header)); 10980bbc82e4SYishai Hadas if (IS_ERR(buf)) { 10990bbc82e4SYishai Hadas ret = PTR_ERR(buf); 11000bbc82e4SYishai Hadas goto out_clean; 11010bbc82e4SYishai Hadas } 11020bbc82e4SYishai Hadas 11030bbc82e4SYishai Hadas migf->buf_header = buf; 11040bbc82e4SYishai Hadas migf->load_state = VIRTIOVF_LOAD_STATE_READ_HEADER; 11050bbc82e4SYishai Hadas 11060bbc82e4SYishai Hadas migf->virtvdev = virtvdev; 11070bbc82e4SYishai Hadas ret = virtiovf_pci_alloc_obj_id(virtvdev, VIRTIO_RESOURCE_OBJ_DEV_PARTS_TYPE_SET, 11080bbc82e4SYishai Hadas &obj_id); 11090bbc82e4SYishai Hadas if (ret) 11100bbc82e4SYishai Hadas goto out_clean; 11110bbc82e4SYishai Hadas 11120bbc82e4SYishai Hadas migf->obj_id = obj_id; 11130bbc82e4SYishai Hadas /* Mark as having a valid obj id which can be even 0 */ 11140bbc82e4SYishai Hadas migf->has_obj_id = true; 11150bbc82e4SYishai Hadas ret = virtiovf_set_obj_cmd_header(migf->buf); 11160bbc82e4SYishai Hadas if (ret) 11170bbc82e4SYishai Hadas goto out_clean; 11180bbc82e4SYishai Hadas 11190bbc82e4SYishai Hadas return migf; 11200bbc82e4SYishai Hadas 11210bbc82e4SYishai Hadas out_clean: 11220bbc82e4SYishai Hadas virtiovf_clean_migf_resources(migf); 11230bbc82e4SYishai Hadas out: 11240bbc82e4SYishai Hadas fput(migf->filp); 11250bbc82e4SYishai Hadas return ERR_PTR(ret); 11260bbc82e4SYishai Hadas } 11270bbc82e4SYishai Hadas 11280bbc82e4SYishai Hadas static struct file * 11290bbc82e4SYishai Hadas virtiovf_pci_step_device_state_locked(struct virtiovf_pci_core_device *virtvdev, 11300bbc82e4SYishai Hadas u32 new) 11310bbc82e4SYishai Hadas { 11320bbc82e4SYishai Hadas u32 cur = virtvdev->mig_state; 11330bbc82e4SYishai Hadas int ret; 11340bbc82e4SYishai Hadas 11350bbc82e4SYishai Hadas if (cur == VFIO_DEVICE_STATE_RUNNING_P2P && new == VFIO_DEVICE_STATE_STOP) { 11360bbc82e4SYishai Hadas /* NOP */ 11370bbc82e4SYishai Hadas return NULL; 11380bbc82e4SYishai Hadas } 11390bbc82e4SYishai Hadas 11400bbc82e4SYishai Hadas if (cur == VFIO_DEVICE_STATE_STOP && new == VFIO_DEVICE_STATE_RUNNING_P2P) { 11410bbc82e4SYishai Hadas /* NOP */ 11420bbc82e4SYishai Hadas return NULL; 11430bbc82e4SYishai Hadas } 11440bbc82e4SYishai Hadas 11456cea64b1SYishai Hadas if ((cur == VFIO_DEVICE_STATE_RUNNING && new == VFIO_DEVICE_STATE_RUNNING_P2P) || 11466cea64b1SYishai Hadas (cur == VFIO_DEVICE_STATE_PRE_COPY && new == VFIO_DEVICE_STATE_PRE_COPY_P2P)) { 11470bbc82e4SYishai Hadas ret = virtio_pci_admin_mode_set(virtvdev->core_device.pdev, 11480bbc82e4SYishai Hadas BIT(VIRTIO_ADMIN_CMD_DEV_MODE_F_STOPPED)); 11490bbc82e4SYishai Hadas if (ret) 11500bbc82e4SYishai Hadas return ERR_PTR(ret); 11510bbc82e4SYishai Hadas return NULL; 11520bbc82e4SYishai Hadas } 11530bbc82e4SYishai Hadas 11546cea64b1SYishai Hadas if ((cur == VFIO_DEVICE_STATE_RUNNING_P2P && new == VFIO_DEVICE_STATE_RUNNING) || 11556cea64b1SYishai Hadas (cur == VFIO_DEVICE_STATE_PRE_COPY_P2P && new == VFIO_DEVICE_STATE_PRE_COPY)) { 11560bbc82e4SYishai Hadas ret = virtio_pci_admin_mode_set(virtvdev->core_device.pdev, 0); 11570bbc82e4SYishai Hadas if (ret) 11580bbc82e4SYishai Hadas return ERR_PTR(ret); 11590bbc82e4SYishai Hadas return NULL; 11600bbc82e4SYishai Hadas } 11610bbc82e4SYishai Hadas 11620bbc82e4SYishai Hadas if (cur == VFIO_DEVICE_STATE_STOP && new == VFIO_DEVICE_STATE_STOP_COPY) { 11630bbc82e4SYishai Hadas struct virtiovf_migration_file *migf; 11640bbc82e4SYishai Hadas 11656cea64b1SYishai Hadas migf = virtiovf_pci_save_device_data(virtvdev, false); 11660bbc82e4SYishai Hadas if (IS_ERR(migf)) 11670bbc82e4SYishai Hadas return ERR_CAST(migf); 11680bbc82e4SYishai Hadas get_file(migf->filp); 11690bbc82e4SYishai Hadas virtvdev->saving_migf = migf; 11700bbc82e4SYishai Hadas return migf->filp; 11710bbc82e4SYishai Hadas } 11720bbc82e4SYishai Hadas 11736cea64b1SYishai Hadas if ((cur == VFIO_DEVICE_STATE_STOP_COPY && new == VFIO_DEVICE_STATE_STOP) || 11746cea64b1SYishai Hadas (cur == VFIO_DEVICE_STATE_PRE_COPY && new == VFIO_DEVICE_STATE_RUNNING) || 11756cea64b1SYishai Hadas (cur == VFIO_DEVICE_STATE_PRE_COPY_P2P && new == VFIO_DEVICE_STATE_RUNNING_P2P)) { 11760bbc82e4SYishai Hadas virtiovf_disable_fds(virtvdev); 11770bbc82e4SYishai Hadas return NULL; 11780bbc82e4SYishai Hadas } 11790bbc82e4SYishai Hadas 11800bbc82e4SYishai Hadas if (cur == VFIO_DEVICE_STATE_STOP && new == VFIO_DEVICE_STATE_RESUMING) { 11810bbc82e4SYishai Hadas struct virtiovf_migration_file *migf; 11820bbc82e4SYishai Hadas 11830bbc82e4SYishai Hadas migf = virtiovf_pci_resume_device_data(virtvdev); 11840bbc82e4SYishai Hadas if (IS_ERR(migf)) 11850bbc82e4SYishai Hadas return ERR_CAST(migf); 11860bbc82e4SYishai Hadas get_file(migf->filp); 11870bbc82e4SYishai Hadas virtvdev->resuming_migf = migf; 11880bbc82e4SYishai Hadas return migf->filp; 11890bbc82e4SYishai Hadas } 11900bbc82e4SYishai Hadas 11910bbc82e4SYishai Hadas if (cur == VFIO_DEVICE_STATE_RESUMING && new == VFIO_DEVICE_STATE_STOP) { 11920bbc82e4SYishai Hadas virtiovf_disable_fds(virtvdev); 11930bbc82e4SYishai Hadas return NULL; 11940bbc82e4SYishai Hadas } 11950bbc82e4SYishai Hadas 11966cea64b1SYishai Hadas if ((cur == VFIO_DEVICE_STATE_RUNNING && new == VFIO_DEVICE_STATE_PRE_COPY) || 11976cea64b1SYishai Hadas (cur == VFIO_DEVICE_STATE_RUNNING_P2P && 11986cea64b1SYishai Hadas new == VFIO_DEVICE_STATE_PRE_COPY_P2P)) { 11996cea64b1SYishai Hadas struct virtiovf_migration_file *migf; 12006cea64b1SYishai Hadas 12016cea64b1SYishai Hadas migf = virtiovf_pci_save_device_data(virtvdev, true); 12026cea64b1SYishai Hadas if (IS_ERR(migf)) 12036cea64b1SYishai Hadas return ERR_CAST(migf); 12046cea64b1SYishai Hadas get_file(migf->filp); 12056cea64b1SYishai Hadas virtvdev->saving_migf = migf; 12066cea64b1SYishai Hadas return migf->filp; 12076cea64b1SYishai Hadas } 12086cea64b1SYishai Hadas 12096cea64b1SYishai Hadas if (cur == VFIO_DEVICE_STATE_PRE_COPY_P2P && new == VFIO_DEVICE_STATE_STOP_COPY) { 12106cea64b1SYishai Hadas ret = virtiovf_pci_save_device_final_data(virtvdev); 12116cea64b1SYishai Hadas return ret ? ERR_PTR(ret) : NULL; 12126cea64b1SYishai Hadas } 12136cea64b1SYishai Hadas 12140bbc82e4SYishai Hadas /* 12150bbc82e4SYishai Hadas * vfio_mig_get_next_state() does not use arcs other than the above 12160bbc82e4SYishai Hadas */ 12170bbc82e4SYishai Hadas WARN_ON(true); 12180bbc82e4SYishai Hadas return ERR_PTR(-EINVAL); 12190bbc82e4SYishai Hadas } 12200bbc82e4SYishai Hadas 12210bbc82e4SYishai Hadas static struct file * 12220bbc82e4SYishai Hadas virtiovf_pci_set_device_state(struct vfio_device *vdev, 12230bbc82e4SYishai Hadas enum vfio_device_mig_state new_state) 12240bbc82e4SYishai Hadas { 12250bbc82e4SYishai Hadas struct virtiovf_pci_core_device *virtvdev = container_of( 12260bbc82e4SYishai Hadas vdev, struct virtiovf_pci_core_device, core_device.vdev); 12270bbc82e4SYishai Hadas enum vfio_device_mig_state next_state; 12280bbc82e4SYishai Hadas struct file *res = NULL; 12290bbc82e4SYishai Hadas int ret; 12300bbc82e4SYishai Hadas 12310bbc82e4SYishai Hadas mutex_lock(&virtvdev->state_mutex); 12320bbc82e4SYishai Hadas while (new_state != virtvdev->mig_state) { 12330bbc82e4SYishai Hadas ret = vfio_mig_get_next_state(vdev, virtvdev->mig_state, 12340bbc82e4SYishai Hadas new_state, &next_state); 12350bbc82e4SYishai Hadas if (ret) { 12360bbc82e4SYishai Hadas res = ERR_PTR(ret); 12370bbc82e4SYishai Hadas break; 12380bbc82e4SYishai Hadas } 12390bbc82e4SYishai Hadas res = virtiovf_pci_step_device_state_locked(virtvdev, next_state); 12400bbc82e4SYishai Hadas if (IS_ERR(res)) 12410bbc82e4SYishai Hadas break; 12420bbc82e4SYishai Hadas virtvdev->mig_state = next_state; 12430bbc82e4SYishai Hadas if (WARN_ON(res && new_state != virtvdev->mig_state)) { 12440bbc82e4SYishai Hadas fput(res); 12450bbc82e4SYishai Hadas res = ERR_PTR(-EINVAL); 12460bbc82e4SYishai Hadas break; 12470bbc82e4SYishai Hadas } 12480bbc82e4SYishai Hadas } 12490bbc82e4SYishai Hadas virtiovf_state_mutex_unlock(virtvdev); 12500bbc82e4SYishai Hadas return res; 12510bbc82e4SYishai Hadas } 12520bbc82e4SYishai Hadas 12530bbc82e4SYishai Hadas static int virtiovf_pci_get_device_state(struct vfio_device *vdev, 12540bbc82e4SYishai Hadas enum vfio_device_mig_state *curr_state) 12550bbc82e4SYishai Hadas { 12560bbc82e4SYishai Hadas struct virtiovf_pci_core_device *virtvdev = container_of( 12570bbc82e4SYishai Hadas vdev, struct virtiovf_pci_core_device, core_device.vdev); 12580bbc82e4SYishai Hadas 12590bbc82e4SYishai Hadas mutex_lock(&virtvdev->state_mutex); 12600bbc82e4SYishai Hadas *curr_state = virtvdev->mig_state; 12610bbc82e4SYishai Hadas virtiovf_state_mutex_unlock(virtvdev); 12620bbc82e4SYishai Hadas return 0; 12630bbc82e4SYishai Hadas } 12640bbc82e4SYishai Hadas 12650bbc82e4SYishai Hadas static int virtiovf_pci_get_data_size(struct vfio_device *vdev, 12660bbc82e4SYishai Hadas unsigned long *stop_copy_length) 12670bbc82e4SYishai Hadas { 12680bbc82e4SYishai Hadas struct virtiovf_pci_core_device *virtvdev = container_of( 12690bbc82e4SYishai Hadas vdev, struct virtiovf_pci_core_device, core_device.vdev); 12700bbc82e4SYishai Hadas bool obj_id_exists; 12710bbc82e4SYishai Hadas u32 res_size; 12720bbc82e4SYishai Hadas u32 obj_id; 12730bbc82e4SYishai Hadas int ret; 12740bbc82e4SYishai Hadas 12750bbc82e4SYishai Hadas mutex_lock(&virtvdev->state_mutex); 12760bbc82e4SYishai Hadas obj_id_exists = virtvdev->saving_migf && virtvdev->saving_migf->has_obj_id; 12770bbc82e4SYishai Hadas if (!obj_id_exists) { 12780bbc82e4SYishai Hadas ret = virtiovf_pci_alloc_obj_id(virtvdev, 12790bbc82e4SYishai Hadas VIRTIO_RESOURCE_OBJ_DEV_PARTS_TYPE_GET, 12800bbc82e4SYishai Hadas &obj_id); 12810bbc82e4SYishai Hadas if (ret) 12820bbc82e4SYishai Hadas goto end; 12830bbc82e4SYishai Hadas } else { 12840bbc82e4SYishai Hadas obj_id = virtvdev->saving_migf->obj_id; 12850bbc82e4SYishai Hadas } 12860bbc82e4SYishai Hadas 12870bbc82e4SYishai Hadas ret = virtio_pci_admin_dev_parts_metadata_get(virtvdev->core_device.pdev, 12880bbc82e4SYishai Hadas VIRTIO_RESOURCE_OBJ_DEV_PARTS, obj_id, 12890bbc82e4SYishai Hadas VIRTIO_ADMIN_CMD_DEV_PARTS_METADATA_TYPE_SIZE, 12900bbc82e4SYishai Hadas &res_size); 12910bbc82e4SYishai Hadas if (!ret) 12920bbc82e4SYishai Hadas *stop_copy_length = res_size; 12930bbc82e4SYishai Hadas 12940bbc82e4SYishai Hadas /* 12950bbc82e4SYishai Hadas * We can't leave this obj_id alive if didn't exist before, otherwise, it might 12960bbc82e4SYishai Hadas * stay alive, even without an active migration flow (e.g. migration was cancelled) 12970bbc82e4SYishai Hadas */ 12980bbc82e4SYishai Hadas if (!obj_id_exists) 12990bbc82e4SYishai Hadas virtiovf_pci_free_obj_id(virtvdev, obj_id); 13000bbc82e4SYishai Hadas end: 13010bbc82e4SYishai Hadas virtiovf_state_mutex_unlock(virtvdev); 13020bbc82e4SYishai Hadas return ret; 13030bbc82e4SYishai Hadas } 13040bbc82e4SYishai Hadas 13050bbc82e4SYishai Hadas static const struct vfio_migration_ops virtvdev_pci_mig_ops = { 13060bbc82e4SYishai Hadas .migration_set_state = virtiovf_pci_set_device_state, 13070bbc82e4SYishai Hadas .migration_get_state = virtiovf_pci_get_device_state, 13080bbc82e4SYishai Hadas .migration_get_data_size = virtiovf_pci_get_data_size, 13090bbc82e4SYishai Hadas }; 13100bbc82e4SYishai Hadas 13110bbc82e4SYishai Hadas void virtiovf_set_migratable(struct virtiovf_pci_core_device *virtvdev) 13120bbc82e4SYishai Hadas { 13130bbc82e4SYishai Hadas virtvdev->migrate_cap = 1; 13140bbc82e4SYishai Hadas mutex_init(&virtvdev->state_mutex); 13150bbc82e4SYishai Hadas spin_lock_init(&virtvdev->reset_lock); 13160bbc82e4SYishai Hadas virtvdev->core_device.vdev.migration_flags = 13170bbc82e4SYishai Hadas VFIO_MIGRATION_STOP_COPY | 13186cea64b1SYishai Hadas VFIO_MIGRATION_P2P | 13196cea64b1SYishai Hadas VFIO_MIGRATION_PRE_COPY; 13200bbc82e4SYishai Hadas virtvdev->core_device.vdev.mig_ops = &virtvdev_pci_mig_ops; 13210bbc82e4SYishai Hadas } 13220bbc82e4SYishai Hadas 13230bbc82e4SYishai Hadas void virtiovf_open_migration(struct virtiovf_pci_core_device *virtvdev) 13240bbc82e4SYishai Hadas { 13250bbc82e4SYishai Hadas if (!virtvdev->migrate_cap) 13260bbc82e4SYishai Hadas return; 13270bbc82e4SYishai Hadas 13280bbc82e4SYishai Hadas virtvdev->mig_state = VFIO_DEVICE_STATE_RUNNING; 13290bbc82e4SYishai Hadas } 13300bbc82e4SYishai Hadas 13310bbc82e4SYishai Hadas void virtiovf_close_migration(struct virtiovf_pci_core_device *virtvdev) 13320bbc82e4SYishai Hadas { 13330bbc82e4SYishai Hadas if (!virtvdev->migrate_cap) 13340bbc82e4SYishai Hadas return; 13350bbc82e4SYishai Hadas 13360bbc82e4SYishai Hadas virtiovf_disable_fds(virtvdev); 13370bbc82e4SYishai Hadas } 1338