1f232836aSBrett Creeley // SPDX-License-Identifier: GPL-2.0 2f232836aSBrett Creeley /* Copyright(c) 2023 Advanced Micro Devices, Inc. */ 3f232836aSBrett Creeley 4f232836aSBrett Creeley #include <linux/interval_tree.h> 5f232836aSBrett Creeley #include <linux/vfio.h> 6f232836aSBrett Creeley 7f232836aSBrett Creeley #include <linux/pds/pds_common.h> 8f232836aSBrett Creeley #include <linux/pds/pds_core_if.h> 9f232836aSBrett Creeley #include <linux/pds/pds_adminq.h> 10f232836aSBrett Creeley 11f232836aSBrett Creeley #include "vfio_dev.h" 12f232836aSBrett Creeley #include "cmds.h" 13f232836aSBrett Creeley #include "dirty.h" 14f232836aSBrett Creeley 15f232836aSBrett Creeley #define READ_SEQ true 16f232836aSBrett Creeley #define WRITE_ACK false 17f232836aSBrett Creeley 18f232836aSBrett Creeley bool pds_vfio_dirty_is_enabled(struct pds_vfio_pci_device *pds_vfio) 19f232836aSBrett Creeley { 20f232836aSBrett Creeley return pds_vfio->dirty.is_enabled; 21f232836aSBrett Creeley } 22f232836aSBrett Creeley 23f232836aSBrett Creeley void pds_vfio_dirty_set_enabled(struct pds_vfio_pci_device *pds_vfio) 24f232836aSBrett Creeley { 25f232836aSBrett Creeley pds_vfio->dirty.is_enabled = true; 26f232836aSBrett Creeley } 27f232836aSBrett Creeley 28f232836aSBrett Creeley void pds_vfio_dirty_set_disabled(struct pds_vfio_pci_device *pds_vfio) 29f232836aSBrett Creeley { 30f232836aSBrett Creeley pds_vfio->dirty.is_enabled = false; 31f232836aSBrett Creeley } 32f232836aSBrett Creeley 33f232836aSBrett Creeley static void 34f232836aSBrett Creeley pds_vfio_print_guest_region_info(struct pds_vfio_pci_device *pds_vfio, 35f232836aSBrett Creeley u8 max_regions) 36f232836aSBrett Creeley { 37f232836aSBrett Creeley int len = max_regions * sizeof(struct pds_lm_dirty_region_info); 38f232836aSBrett Creeley struct pci_dev *pdev = pds_vfio->vfio_coredev.pdev; 39f232836aSBrett Creeley struct device *pdsc_dev = &pci_physfn(pdev)->dev; 40f232836aSBrett Creeley struct pds_lm_dirty_region_info *region_info; 41f232836aSBrett Creeley dma_addr_t regions_dma; 42f232836aSBrett Creeley u8 num_regions; 43f232836aSBrett Creeley int err; 44f232836aSBrett Creeley 45f232836aSBrett Creeley region_info = kcalloc(max_regions, 46f232836aSBrett Creeley sizeof(struct pds_lm_dirty_region_info), 47f232836aSBrett Creeley GFP_KERNEL); 48f232836aSBrett Creeley if (!region_info) 49f232836aSBrett Creeley return; 50f232836aSBrett Creeley 51f232836aSBrett Creeley regions_dma = 52f232836aSBrett Creeley dma_map_single(pdsc_dev, region_info, len, DMA_FROM_DEVICE); 53f232836aSBrett Creeley if (dma_mapping_error(pdsc_dev, regions_dma)) 54f232836aSBrett Creeley goto out_free_region_info; 55f232836aSBrett Creeley 56f232836aSBrett Creeley err = pds_vfio_dirty_status_cmd(pds_vfio, regions_dma, &max_regions, 57f232836aSBrett Creeley &num_regions); 58f232836aSBrett Creeley dma_unmap_single(pdsc_dev, regions_dma, len, DMA_FROM_DEVICE); 59f232836aSBrett Creeley if (err) 60f232836aSBrett Creeley goto out_free_region_info; 61f232836aSBrett Creeley 62f232836aSBrett Creeley for (unsigned int i = 0; i < num_regions; i++) 63f232836aSBrett Creeley dev_dbg(&pdev->dev, 64f232836aSBrett Creeley "region_info[%d]: dma_base 0x%llx page_count %u page_size_log2 %u\n", 65f232836aSBrett Creeley i, le64_to_cpu(region_info[i].dma_base), 66f232836aSBrett Creeley le32_to_cpu(region_info[i].page_count), 67f232836aSBrett Creeley region_info[i].page_size_log2); 68f232836aSBrett Creeley 69f232836aSBrett Creeley out_free_region_info: 70f232836aSBrett Creeley kfree(region_info); 71f232836aSBrett Creeley } 72f232836aSBrett Creeley 73f232836aSBrett Creeley static int pds_vfio_dirty_alloc_bitmaps(struct pds_vfio_dirty *dirty, 74f232836aSBrett Creeley unsigned long bytes) 75f232836aSBrett Creeley { 76f232836aSBrett Creeley unsigned long *host_seq_bmp, *host_ack_bmp; 77f232836aSBrett Creeley 78f232836aSBrett Creeley host_seq_bmp = vzalloc(bytes); 79f232836aSBrett Creeley if (!host_seq_bmp) 80f232836aSBrett Creeley return -ENOMEM; 81f232836aSBrett Creeley 82f232836aSBrett Creeley host_ack_bmp = vzalloc(bytes); 83f232836aSBrett Creeley if (!host_ack_bmp) { 84f232836aSBrett Creeley bitmap_free(host_seq_bmp); 85f232836aSBrett Creeley return -ENOMEM; 86f232836aSBrett Creeley } 87f232836aSBrett Creeley 88f232836aSBrett Creeley dirty->host_seq.bmp = host_seq_bmp; 89f232836aSBrett Creeley dirty->host_ack.bmp = host_ack_bmp; 90f232836aSBrett Creeley 91f232836aSBrett Creeley return 0; 92f232836aSBrett Creeley } 93f232836aSBrett Creeley 94f232836aSBrett Creeley static void pds_vfio_dirty_free_bitmaps(struct pds_vfio_dirty *dirty) 95f232836aSBrett Creeley { 96f232836aSBrett Creeley vfree(dirty->host_seq.bmp); 97f232836aSBrett Creeley vfree(dirty->host_ack.bmp); 98f232836aSBrett Creeley dirty->host_seq.bmp = NULL; 99f232836aSBrett Creeley dirty->host_ack.bmp = NULL; 100f232836aSBrett Creeley } 101f232836aSBrett Creeley 102f232836aSBrett Creeley static void __pds_vfio_dirty_free_sgl(struct pds_vfio_pci_device *pds_vfio, 103*3b8f7a24SBrett Creeley struct pds_vfio_dirty *dirty) 104f232836aSBrett Creeley { 105f232836aSBrett Creeley struct pci_dev *pdev = pds_vfio->vfio_coredev.pdev; 106f232836aSBrett Creeley struct device *pdsc_dev = &pci_physfn(pdev)->dev; 107f232836aSBrett Creeley 108*3b8f7a24SBrett Creeley dma_unmap_single(pdsc_dev, dirty->sgl_addr, 109*3b8f7a24SBrett Creeley dirty->num_sge * sizeof(struct pds_lm_sg_elem), 110f232836aSBrett Creeley DMA_BIDIRECTIONAL); 111*3b8f7a24SBrett Creeley kfree(dirty->sgl); 112f232836aSBrett Creeley 113*3b8f7a24SBrett Creeley dirty->num_sge = 0; 114*3b8f7a24SBrett Creeley dirty->sgl = NULL; 115*3b8f7a24SBrett Creeley dirty->sgl_addr = 0; 116f232836aSBrett Creeley } 117f232836aSBrett Creeley 118f232836aSBrett Creeley static void pds_vfio_dirty_free_sgl(struct pds_vfio_pci_device *pds_vfio) 119f232836aSBrett Creeley { 120*3b8f7a24SBrett Creeley struct pds_vfio_dirty *dirty = &pds_vfio->dirty; 121*3b8f7a24SBrett Creeley 122*3b8f7a24SBrett Creeley if (dirty->sgl) 123*3b8f7a24SBrett Creeley __pds_vfio_dirty_free_sgl(pds_vfio, dirty); 124f232836aSBrett Creeley } 125f232836aSBrett Creeley 126*3b8f7a24SBrett Creeley static int pds_vfio_dirty_alloc_sgl(struct pds_vfio_pci_device *pds_vfio, 127f232836aSBrett Creeley u32 page_count) 128f232836aSBrett Creeley { 129f232836aSBrett Creeley struct pci_dev *pdev = pds_vfio->vfio_coredev.pdev; 130f232836aSBrett Creeley struct device *pdsc_dev = &pci_physfn(pdev)->dev; 131*3b8f7a24SBrett Creeley struct pds_vfio_dirty *dirty = &pds_vfio->dirty; 132f232836aSBrett Creeley struct pds_lm_sg_elem *sgl; 133f232836aSBrett Creeley dma_addr_t sgl_addr; 134f232836aSBrett Creeley size_t sgl_size; 135f232836aSBrett Creeley u32 max_sge; 136f232836aSBrett Creeley 137f232836aSBrett Creeley max_sge = DIV_ROUND_UP(page_count, PAGE_SIZE * 8); 138f232836aSBrett Creeley sgl_size = max_sge * sizeof(struct pds_lm_sg_elem); 139f232836aSBrett Creeley 140f232836aSBrett Creeley sgl = kzalloc(sgl_size, GFP_KERNEL); 141f232836aSBrett Creeley if (!sgl) 142f232836aSBrett Creeley return -ENOMEM; 143f232836aSBrett Creeley 144f232836aSBrett Creeley sgl_addr = dma_map_single(pdsc_dev, sgl, sgl_size, DMA_BIDIRECTIONAL); 145f232836aSBrett Creeley if (dma_mapping_error(pdsc_dev, sgl_addr)) { 146f232836aSBrett Creeley kfree(sgl); 147f232836aSBrett Creeley return -EIO; 148f232836aSBrett Creeley } 149f232836aSBrett Creeley 150*3b8f7a24SBrett Creeley dirty->sgl = sgl; 151*3b8f7a24SBrett Creeley dirty->num_sge = max_sge; 152*3b8f7a24SBrett Creeley dirty->sgl_addr = sgl_addr; 153f232836aSBrett Creeley 154f232836aSBrett Creeley return 0; 155f232836aSBrett Creeley } 156f232836aSBrett Creeley 157f232836aSBrett Creeley static int pds_vfio_dirty_enable(struct pds_vfio_pci_device *pds_vfio, 158f232836aSBrett Creeley struct rb_root_cached *ranges, u32 nnodes, 159f232836aSBrett Creeley u64 *page_size) 160f232836aSBrett Creeley { 161f232836aSBrett Creeley struct pci_dev *pdev = pds_vfio->vfio_coredev.pdev; 162f232836aSBrett Creeley struct device *pdsc_dev = &pci_physfn(pdev)->dev; 163f232836aSBrett Creeley struct pds_vfio_dirty *dirty = &pds_vfio->dirty; 164f232836aSBrett Creeley u64 region_start, region_size, region_page_size; 165f232836aSBrett Creeley struct pds_lm_dirty_region_info *region_info; 166f232836aSBrett Creeley struct interval_tree_node *node = NULL; 167f232836aSBrett Creeley u8 max_regions = 0, num_regions; 168f232836aSBrett Creeley dma_addr_t regions_dma = 0; 169f232836aSBrett Creeley u32 num_ranges = nnodes; 170f232836aSBrett Creeley u32 page_count; 171f232836aSBrett Creeley u16 len; 172f232836aSBrett Creeley int err; 173f232836aSBrett Creeley 174f232836aSBrett Creeley dev_dbg(&pdev->dev, "vf%u: Start dirty page tracking\n", 175f232836aSBrett Creeley pds_vfio->vf_id); 176f232836aSBrett Creeley 177f232836aSBrett Creeley if (pds_vfio_dirty_is_enabled(pds_vfio)) 178f232836aSBrett Creeley return -EINVAL; 179f232836aSBrett Creeley 180f232836aSBrett Creeley /* find if dirty tracking is disabled, i.e. num_regions == 0 */ 181f232836aSBrett Creeley err = pds_vfio_dirty_status_cmd(pds_vfio, 0, &max_regions, 182f232836aSBrett Creeley &num_regions); 183f232836aSBrett Creeley if (err < 0) { 184f232836aSBrett Creeley dev_err(&pdev->dev, "Failed to get dirty status, err %pe\n", 185f232836aSBrett Creeley ERR_PTR(err)); 186f232836aSBrett Creeley return err; 187f232836aSBrett Creeley } else if (num_regions) { 188f232836aSBrett Creeley dev_err(&pdev->dev, 189f232836aSBrett Creeley "Dirty tracking already enabled for %d regions\n", 190f232836aSBrett Creeley num_regions); 191f232836aSBrett Creeley return -EEXIST; 192f232836aSBrett Creeley } else if (!max_regions) { 193f232836aSBrett Creeley dev_err(&pdev->dev, 194f232836aSBrett Creeley "Device doesn't support dirty tracking, max_regions %d\n", 195f232836aSBrett Creeley max_regions); 196f232836aSBrett Creeley return -EOPNOTSUPP; 197f232836aSBrett Creeley } 198f232836aSBrett Creeley 199f232836aSBrett Creeley /* 200f232836aSBrett Creeley * Only support 1 region for now. If there are any large gaps in the 201f232836aSBrett Creeley * VM's address regions, then this would be a waste of memory as we are 202f232836aSBrett Creeley * generating 2 bitmaps (ack/seq) from the min address to the max 203f232836aSBrett Creeley * address of the VM's address regions. In the future, if we support 204f232836aSBrett Creeley * more than one region in the device/driver we can split the bitmaps 205f232836aSBrett Creeley * on the largest address region gaps. We can do this split up to the 206f232836aSBrett Creeley * max_regions times returned from the dirty_status command. 207f232836aSBrett Creeley */ 208f232836aSBrett Creeley max_regions = 1; 209f232836aSBrett Creeley if (num_ranges > max_regions) { 210f232836aSBrett Creeley vfio_combine_iova_ranges(ranges, nnodes, max_regions); 211f232836aSBrett Creeley num_ranges = max_regions; 212f232836aSBrett Creeley } 213f232836aSBrett Creeley 214f232836aSBrett Creeley node = interval_tree_iter_first(ranges, 0, ULONG_MAX); 215f232836aSBrett Creeley if (!node) 216f232836aSBrett Creeley return -EINVAL; 217f232836aSBrett Creeley 218f232836aSBrett Creeley region_size = node->last - node->start + 1; 219f232836aSBrett Creeley region_start = node->start; 220f232836aSBrett Creeley region_page_size = *page_size; 221f232836aSBrett Creeley 222f232836aSBrett Creeley len = sizeof(*region_info); 223f232836aSBrett Creeley region_info = kzalloc(len, GFP_KERNEL); 224f232836aSBrett Creeley if (!region_info) 225f232836aSBrett Creeley return -ENOMEM; 226f232836aSBrett Creeley 227f232836aSBrett Creeley page_count = DIV_ROUND_UP(region_size, region_page_size); 228f232836aSBrett Creeley 229f232836aSBrett Creeley region_info->dma_base = cpu_to_le64(region_start); 230f232836aSBrett Creeley region_info->page_count = cpu_to_le32(page_count); 231f232836aSBrett Creeley region_info->page_size_log2 = ilog2(region_page_size); 232f232836aSBrett Creeley 233f232836aSBrett Creeley regions_dma = dma_map_single(pdsc_dev, (void *)region_info, len, 234f232836aSBrett Creeley DMA_BIDIRECTIONAL); 235f232836aSBrett Creeley if (dma_mapping_error(pdsc_dev, regions_dma)) { 236f232836aSBrett Creeley err = -ENOMEM; 237f232836aSBrett Creeley goto out_free_region_info; 238f232836aSBrett Creeley } 239f232836aSBrett Creeley 240f232836aSBrett Creeley err = pds_vfio_dirty_enable_cmd(pds_vfio, regions_dma, max_regions); 241f232836aSBrett Creeley dma_unmap_single(pdsc_dev, regions_dma, len, DMA_BIDIRECTIONAL); 242f232836aSBrett Creeley if (err) 243f232836aSBrett Creeley goto out_free_region_info; 244f232836aSBrett Creeley 245f232836aSBrett Creeley /* 246f232836aSBrett Creeley * page_count might be adjusted by the device, 247f232836aSBrett Creeley * update it before freeing region_info DMA 248f232836aSBrett Creeley */ 249f232836aSBrett Creeley page_count = le32_to_cpu(region_info->page_count); 250f232836aSBrett Creeley 251f232836aSBrett Creeley dev_dbg(&pdev->dev, 252f232836aSBrett Creeley "region_info: regions_dma 0x%llx dma_base 0x%llx page_count %u page_size_log2 %u\n", 253f232836aSBrett Creeley regions_dma, region_start, page_count, 254f232836aSBrett Creeley (u8)ilog2(region_page_size)); 255f232836aSBrett Creeley 256f232836aSBrett Creeley err = pds_vfio_dirty_alloc_bitmaps(dirty, page_count / BITS_PER_BYTE); 257f232836aSBrett Creeley if (err) { 258f232836aSBrett Creeley dev_err(&pdev->dev, "Failed to alloc dirty bitmaps: %pe\n", 259f232836aSBrett Creeley ERR_PTR(err)); 260f232836aSBrett Creeley goto out_free_region_info; 261f232836aSBrett Creeley } 262f232836aSBrett Creeley 263f232836aSBrett Creeley err = pds_vfio_dirty_alloc_sgl(pds_vfio, page_count); 264f232836aSBrett Creeley if (err) { 265f232836aSBrett Creeley dev_err(&pdev->dev, "Failed to alloc dirty sg lists: %pe\n", 266f232836aSBrett Creeley ERR_PTR(err)); 267f232836aSBrett Creeley goto out_free_bitmaps; 268f232836aSBrett Creeley } 269f232836aSBrett Creeley 270f232836aSBrett Creeley dirty->region_start = region_start; 271f232836aSBrett Creeley dirty->region_size = region_size; 272f232836aSBrett Creeley dirty->region_page_size = region_page_size; 273f232836aSBrett Creeley pds_vfio_dirty_set_enabled(pds_vfio); 274f232836aSBrett Creeley 275f232836aSBrett Creeley pds_vfio_print_guest_region_info(pds_vfio, max_regions); 276f232836aSBrett Creeley 277f232836aSBrett Creeley kfree(region_info); 278f232836aSBrett Creeley 279f232836aSBrett Creeley return 0; 280f232836aSBrett Creeley 281f232836aSBrett Creeley out_free_bitmaps: 282f232836aSBrett Creeley pds_vfio_dirty_free_bitmaps(dirty); 283f232836aSBrett Creeley out_free_region_info: 284f232836aSBrett Creeley kfree(region_info); 285f232836aSBrett Creeley return err; 286f232836aSBrett Creeley } 287f232836aSBrett Creeley 288f232836aSBrett Creeley void pds_vfio_dirty_disable(struct pds_vfio_pci_device *pds_vfio, bool send_cmd) 289f232836aSBrett Creeley { 290f232836aSBrett Creeley if (pds_vfio_dirty_is_enabled(pds_vfio)) { 291f232836aSBrett Creeley pds_vfio_dirty_set_disabled(pds_vfio); 292f232836aSBrett Creeley if (send_cmd) 293f232836aSBrett Creeley pds_vfio_dirty_disable_cmd(pds_vfio); 294f232836aSBrett Creeley pds_vfio_dirty_free_sgl(pds_vfio); 295f232836aSBrett Creeley pds_vfio_dirty_free_bitmaps(&pds_vfio->dirty); 296f232836aSBrett Creeley } 297f232836aSBrett Creeley 298f232836aSBrett Creeley if (send_cmd) 299f232836aSBrett Creeley pds_vfio_send_host_vf_lm_status_cmd(pds_vfio, PDS_LM_STA_NONE); 300f232836aSBrett Creeley } 301f232836aSBrett Creeley 302f232836aSBrett Creeley static int pds_vfio_dirty_seq_ack(struct pds_vfio_pci_device *pds_vfio, 303f232836aSBrett Creeley struct pds_vfio_bmp_info *bmp_info, 304f232836aSBrett Creeley u32 offset, u32 bmp_bytes, bool read_seq) 305f232836aSBrett Creeley { 306f232836aSBrett Creeley const char *bmp_type_str = read_seq ? "read_seq" : "write_ack"; 307f232836aSBrett Creeley u8 dma_dir = read_seq ? DMA_FROM_DEVICE : DMA_TO_DEVICE; 308f232836aSBrett Creeley struct pci_dev *pdev = pds_vfio->vfio_coredev.pdev; 309f232836aSBrett Creeley struct device *pdsc_dev = &pci_physfn(pdev)->dev; 310*3b8f7a24SBrett Creeley struct pds_vfio_dirty *dirty = &pds_vfio->dirty; 311*3b8f7a24SBrett Creeley struct pds_lm_sg_elem *sgl; 312f232836aSBrett Creeley unsigned long long npages; 313f232836aSBrett Creeley struct sg_table sg_table; 314f232836aSBrett Creeley struct scatterlist *sg; 315f232836aSBrett Creeley struct page **pages; 316f232836aSBrett Creeley u32 page_offset; 317f232836aSBrett Creeley const void *bmp; 318f232836aSBrett Creeley size_t size; 319f232836aSBrett Creeley u16 num_sge; 320f232836aSBrett Creeley int err; 321f232836aSBrett Creeley int i; 322f232836aSBrett Creeley 323f232836aSBrett Creeley bmp = (void *)((u64)bmp_info->bmp + offset); 324f232836aSBrett Creeley page_offset = offset_in_page(bmp); 325f232836aSBrett Creeley bmp -= page_offset; 326f232836aSBrett Creeley 327f232836aSBrett Creeley /* 328f232836aSBrett Creeley * Start and end of bitmap section to seq/ack might not be page 329f232836aSBrett Creeley * aligned, so use the page_offset to account for that so there 330f232836aSBrett Creeley * will be enough pages to represent the bmp_bytes 331f232836aSBrett Creeley */ 332f232836aSBrett Creeley npages = DIV_ROUND_UP_ULL(bmp_bytes + page_offset, PAGE_SIZE); 333f232836aSBrett Creeley pages = kmalloc_array(npages, sizeof(*pages), GFP_KERNEL); 334f232836aSBrett Creeley if (!pages) 335f232836aSBrett Creeley return -ENOMEM; 336f232836aSBrett Creeley 337f232836aSBrett Creeley for (unsigned long long i = 0; i < npages; i++) { 338f232836aSBrett Creeley struct page *page = vmalloc_to_page(bmp); 339f232836aSBrett Creeley 340f232836aSBrett Creeley if (!page) { 341f232836aSBrett Creeley err = -EFAULT; 342f232836aSBrett Creeley goto out_free_pages; 343f232836aSBrett Creeley } 344f232836aSBrett Creeley 345f232836aSBrett Creeley pages[i] = page; 346f232836aSBrett Creeley bmp += PAGE_SIZE; 347f232836aSBrett Creeley } 348f232836aSBrett Creeley 349f232836aSBrett Creeley err = sg_alloc_table_from_pages(&sg_table, pages, npages, page_offset, 350f232836aSBrett Creeley bmp_bytes, GFP_KERNEL); 351f232836aSBrett Creeley if (err) 352f232836aSBrett Creeley goto out_free_pages; 353f232836aSBrett Creeley 354f232836aSBrett Creeley err = dma_map_sgtable(pdsc_dev, &sg_table, dma_dir, 0); 355f232836aSBrett Creeley if (err) 356f232836aSBrett Creeley goto out_free_sg_table; 357f232836aSBrett Creeley 358*3b8f7a24SBrett Creeley sgl = pds_vfio->dirty.sgl; 359f232836aSBrett Creeley for_each_sgtable_dma_sg(&sg_table, sg, i) { 360*3b8f7a24SBrett Creeley struct pds_lm_sg_elem *sg_elem = &sgl[i]; 361f232836aSBrett Creeley 362f232836aSBrett Creeley sg_elem->addr = cpu_to_le64(sg_dma_address(sg)); 363f232836aSBrett Creeley sg_elem->len = cpu_to_le32(sg_dma_len(sg)); 364f232836aSBrett Creeley } 365f232836aSBrett Creeley 366f232836aSBrett Creeley num_sge = sg_table.nents; 367f232836aSBrett Creeley size = num_sge * sizeof(struct pds_lm_sg_elem); 368*3b8f7a24SBrett Creeley dma_sync_single_for_device(pdsc_dev, dirty->sgl_addr, size, dma_dir); 369*3b8f7a24SBrett Creeley err = pds_vfio_dirty_seq_ack_cmd(pds_vfio, dirty->sgl_addr, num_sge, 370f232836aSBrett Creeley offset, bmp_bytes, read_seq); 371f232836aSBrett Creeley if (err) 372f232836aSBrett Creeley dev_err(&pdev->dev, 373f232836aSBrett Creeley "Dirty bitmap %s failed offset %u bmp_bytes %u num_sge %u DMA 0x%llx: %pe\n", 374f232836aSBrett Creeley bmp_type_str, offset, bmp_bytes, 375*3b8f7a24SBrett Creeley num_sge, dirty->sgl_addr, ERR_PTR(err)); 376*3b8f7a24SBrett Creeley dma_sync_single_for_cpu(pdsc_dev, dirty->sgl_addr, size, dma_dir); 377f232836aSBrett Creeley 378f232836aSBrett Creeley dma_unmap_sgtable(pdsc_dev, &sg_table, dma_dir, 0); 379f232836aSBrett Creeley out_free_sg_table: 380f232836aSBrett Creeley sg_free_table(&sg_table); 381f232836aSBrett Creeley out_free_pages: 382f232836aSBrett Creeley kfree(pages); 383f232836aSBrett Creeley 384f232836aSBrett Creeley return err; 385f232836aSBrett Creeley } 386f232836aSBrett Creeley 387f232836aSBrett Creeley static int pds_vfio_dirty_write_ack(struct pds_vfio_pci_device *pds_vfio, 388f232836aSBrett Creeley u32 offset, u32 len) 389f232836aSBrett Creeley { 390f232836aSBrett Creeley return pds_vfio_dirty_seq_ack(pds_vfio, &pds_vfio->dirty.host_ack, 391f232836aSBrett Creeley offset, len, WRITE_ACK); 392f232836aSBrett Creeley } 393f232836aSBrett Creeley 394f232836aSBrett Creeley static int pds_vfio_dirty_read_seq(struct pds_vfio_pci_device *pds_vfio, 395f232836aSBrett Creeley u32 offset, u32 len) 396f232836aSBrett Creeley { 397f232836aSBrett Creeley return pds_vfio_dirty_seq_ack(pds_vfio, &pds_vfio->dirty.host_seq, 398f232836aSBrett Creeley offset, len, READ_SEQ); 399f232836aSBrett Creeley } 400f232836aSBrett Creeley 401f232836aSBrett Creeley static int pds_vfio_dirty_process_bitmaps(struct pds_vfio_pci_device *pds_vfio, 402f232836aSBrett Creeley struct iova_bitmap *dirty_bitmap, 403f232836aSBrett Creeley u32 bmp_offset, u32 len_bytes) 404f232836aSBrett Creeley { 405f232836aSBrett Creeley u64 page_size = pds_vfio->dirty.region_page_size; 406f232836aSBrett Creeley u64 region_start = pds_vfio->dirty.region_start; 407f232836aSBrett Creeley u32 bmp_offset_bit; 408f232836aSBrett Creeley __le64 *seq, *ack; 409f232836aSBrett Creeley int dword_count; 410f232836aSBrett Creeley 411f232836aSBrett Creeley dword_count = len_bytes / sizeof(u64); 412f232836aSBrett Creeley seq = (__le64 *)((u64)pds_vfio->dirty.host_seq.bmp + bmp_offset); 413f232836aSBrett Creeley ack = (__le64 *)((u64)pds_vfio->dirty.host_ack.bmp + bmp_offset); 414f232836aSBrett Creeley bmp_offset_bit = bmp_offset * 8; 415f232836aSBrett Creeley 416f232836aSBrett Creeley for (int i = 0; i < dword_count; i++) { 417f232836aSBrett Creeley u64 xor = le64_to_cpu(seq[i]) ^ le64_to_cpu(ack[i]); 418f232836aSBrett Creeley 419f232836aSBrett Creeley /* prepare for next write_ack call */ 420f232836aSBrett Creeley ack[i] = seq[i]; 421f232836aSBrett Creeley 422f232836aSBrett Creeley for (u8 bit_i = 0; bit_i < BITS_PER_TYPE(u64); ++bit_i) { 423f232836aSBrett Creeley if (xor & BIT(bit_i)) { 424f232836aSBrett Creeley u64 abs_bit_i = bmp_offset_bit + 425f232836aSBrett Creeley i * BITS_PER_TYPE(u64) + bit_i; 426f232836aSBrett Creeley u64 addr = abs_bit_i * page_size + region_start; 427f232836aSBrett Creeley 428f232836aSBrett Creeley iova_bitmap_set(dirty_bitmap, addr, page_size); 429f232836aSBrett Creeley } 430f232836aSBrett Creeley } 431f232836aSBrett Creeley } 432f232836aSBrett Creeley 433f232836aSBrett Creeley return 0; 434f232836aSBrett Creeley } 435f232836aSBrett Creeley 436f232836aSBrett Creeley static int pds_vfio_dirty_sync(struct pds_vfio_pci_device *pds_vfio, 437f232836aSBrett Creeley struct iova_bitmap *dirty_bitmap, 438f232836aSBrett Creeley unsigned long iova, unsigned long length) 439f232836aSBrett Creeley { 440f232836aSBrett Creeley struct device *dev = &pds_vfio->vfio_coredev.pdev->dev; 441f232836aSBrett Creeley struct pds_vfio_dirty *dirty = &pds_vfio->dirty; 442f232836aSBrett Creeley u64 bmp_offset, bmp_bytes; 443f232836aSBrett Creeley u64 bitmap_size, pages; 444f232836aSBrett Creeley int err; 445f232836aSBrett Creeley 446f232836aSBrett Creeley dev_dbg(dev, "vf%u: Get dirty page bitmap\n", pds_vfio->vf_id); 447f232836aSBrett Creeley 448f232836aSBrett Creeley if (!pds_vfio_dirty_is_enabled(pds_vfio)) { 449f232836aSBrett Creeley dev_err(dev, "vf%u: Sync failed, dirty tracking is disabled\n", 450f232836aSBrett Creeley pds_vfio->vf_id); 451f232836aSBrett Creeley return -EINVAL; 452f232836aSBrett Creeley } 453f232836aSBrett Creeley 454f232836aSBrett Creeley pages = DIV_ROUND_UP(length, pds_vfio->dirty.region_page_size); 455f232836aSBrett Creeley bitmap_size = 456f232836aSBrett Creeley round_up(pages, sizeof(u64) * BITS_PER_BYTE) / BITS_PER_BYTE; 457f232836aSBrett Creeley 458f232836aSBrett Creeley dev_dbg(dev, 459f232836aSBrett Creeley "vf%u: iova 0x%lx length %lu page_size %llu pages %llu bitmap_size %llu\n", 460f232836aSBrett Creeley pds_vfio->vf_id, iova, length, pds_vfio->dirty.region_page_size, 461f232836aSBrett Creeley pages, bitmap_size); 462f232836aSBrett Creeley 4634004497cSBrett Creeley if (!length || ((iova - dirty->region_start + length) > dirty->region_size)) { 464f232836aSBrett Creeley dev_err(dev, "Invalid iova 0x%lx and/or length 0x%lx to sync\n", 465f232836aSBrett Creeley iova, length); 466f232836aSBrett Creeley return -EINVAL; 467f232836aSBrett Creeley } 468f232836aSBrett Creeley 469f232836aSBrett Creeley /* bitmap is modified in 64 bit chunks */ 470f232836aSBrett Creeley bmp_bytes = ALIGN(DIV_ROUND_UP(length / dirty->region_page_size, 471f232836aSBrett Creeley sizeof(u64)), 472f232836aSBrett Creeley sizeof(u64)); 473f232836aSBrett Creeley if (bmp_bytes != bitmap_size) { 474f232836aSBrett Creeley dev_err(dev, 475f232836aSBrett Creeley "Calculated bitmap bytes %llu not equal to bitmap size %llu\n", 476f232836aSBrett Creeley bmp_bytes, bitmap_size); 477f232836aSBrett Creeley return -EINVAL; 478f232836aSBrett Creeley } 479f232836aSBrett Creeley 4804004497cSBrett Creeley bmp_offset = DIV_ROUND_UP((iova - dirty->region_start) / 4814004497cSBrett Creeley dirty->region_page_size, sizeof(u64)); 482f232836aSBrett Creeley 483f232836aSBrett Creeley dev_dbg(dev, 484f232836aSBrett Creeley "Syncing dirty bitmap, iova 0x%lx length 0x%lx, bmp_offset %llu bmp_bytes %llu\n", 485f232836aSBrett Creeley iova, length, bmp_offset, bmp_bytes); 486f232836aSBrett Creeley 487f232836aSBrett Creeley err = pds_vfio_dirty_read_seq(pds_vfio, bmp_offset, bmp_bytes); 488f232836aSBrett Creeley if (err) 489f232836aSBrett Creeley return err; 490f232836aSBrett Creeley 491f232836aSBrett Creeley err = pds_vfio_dirty_process_bitmaps(pds_vfio, dirty_bitmap, bmp_offset, 492f232836aSBrett Creeley bmp_bytes); 493f232836aSBrett Creeley if (err) 494f232836aSBrett Creeley return err; 495f232836aSBrett Creeley 496f232836aSBrett Creeley err = pds_vfio_dirty_write_ack(pds_vfio, bmp_offset, bmp_bytes); 497f232836aSBrett Creeley if (err) 498f232836aSBrett Creeley return err; 499f232836aSBrett Creeley 500f232836aSBrett Creeley return 0; 501f232836aSBrett Creeley } 502f232836aSBrett Creeley 503f232836aSBrett Creeley int pds_vfio_dma_logging_report(struct vfio_device *vdev, unsigned long iova, 504f232836aSBrett Creeley unsigned long length, struct iova_bitmap *dirty) 505f232836aSBrett Creeley { 506f232836aSBrett Creeley struct pds_vfio_pci_device *pds_vfio = 507f232836aSBrett Creeley container_of(vdev, struct pds_vfio_pci_device, 508f232836aSBrett Creeley vfio_coredev.vdev); 509f232836aSBrett Creeley int err; 510f232836aSBrett Creeley 511f232836aSBrett Creeley mutex_lock(&pds_vfio->state_mutex); 512f232836aSBrett Creeley err = pds_vfio_dirty_sync(pds_vfio, dirty, iova, length); 513f232836aSBrett Creeley pds_vfio_state_mutex_unlock(pds_vfio); 514f232836aSBrett Creeley 515f232836aSBrett Creeley return err; 516f232836aSBrett Creeley } 517f232836aSBrett Creeley 518f232836aSBrett Creeley int pds_vfio_dma_logging_start(struct vfio_device *vdev, 519f232836aSBrett Creeley struct rb_root_cached *ranges, u32 nnodes, 520f232836aSBrett Creeley u64 *page_size) 521f232836aSBrett Creeley { 522f232836aSBrett Creeley struct pds_vfio_pci_device *pds_vfio = 523f232836aSBrett Creeley container_of(vdev, struct pds_vfio_pci_device, 524f232836aSBrett Creeley vfio_coredev.vdev); 525f232836aSBrett Creeley int err; 526f232836aSBrett Creeley 527f232836aSBrett Creeley mutex_lock(&pds_vfio->state_mutex); 528f232836aSBrett Creeley pds_vfio_send_host_vf_lm_status_cmd(pds_vfio, PDS_LM_STA_IN_PROGRESS); 529f232836aSBrett Creeley err = pds_vfio_dirty_enable(pds_vfio, ranges, nnodes, page_size); 530f232836aSBrett Creeley pds_vfio_state_mutex_unlock(pds_vfio); 531f232836aSBrett Creeley 532f232836aSBrett Creeley return err; 533f232836aSBrett Creeley } 534f232836aSBrett Creeley 535f232836aSBrett Creeley int pds_vfio_dma_logging_stop(struct vfio_device *vdev) 536f232836aSBrett Creeley { 537f232836aSBrett Creeley struct pds_vfio_pci_device *pds_vfio = 538f232836aSBrett Creeley container_of(vdev, struct pds_vfio_pci_device, 539f232836aSBrett Creeley vfio_coredev.vdev); 540f232836aSBrett Creeley 541f232836aSBrett Creeley mutex_lock(&pds_vfio->state_mutex); 542f232836aSBrett Creeley pds_vfio_dirty_disable(pds_vfio, true); 543f232836aSBrett Creeley pds_vfio_state_mutex_unlock(pds_vfio); 544f232836aSBrett Creeley 545f232836aSBrett Creeley return 0; 546f232836aSBrett Creeley } 547