1f232836aSBrett Creeley // SPDX-License-Identifier: GPL-2.0 2f232836aSBrett Creeley /* Copyright(c) 2023 Advanced Micro Devices, Inc. */ 3f232836aSBrett Creeley 4f232836aSBrett Creeley #include <linux/interval_tree.h> 5f232836aSBrett Creeley #include <linux/vfio.h> 6f232836aSBrett Creeley 7f232836aSBrett Creeley #include <linux/pds/pds_common.h> 8f232836aSBrett Creeley #include <linux/pds/pds_core_if.h> 9f232836aSBrett Creeley #include <linux/pds/pds_adminq.h> 10f232836aSBrett Creeley 11f232836aSBrett Creeley #include "vfio_dev.h" 12f232836aSBrett Creeley #include "cmds.h" 13f232836aSBrett Creeley #include "dirty.h" 14f232836aSBrett Creeley 15f232836aSBrett Creeley #define READ_SEQ true 16f232836aSBrett Creeley #define WRITE_ACK false 17f232836aSBrett Creeley 18f232836aSBrett Creeley bool pds_vfio_dirty_is_enabled(struct pds_vfio_pci_device *pds_vfio) 19f232836aSBrett Creeley { 20f232836aSBrett Creeley return pds_vfio->dirty.is_enabled; 21f232836aSBrett Creeley } 22f232836aSBrett Creeley 23f232836aSBrett Creeley void pds_vfio_dirty_set_enabled(struct pds_vfio_pci_device *pds_vfio) 24f232836aSBrett Creeley { 25f232836aSBrett Creeley pds_vfio->dirty.is_enabled = true; 26f232836aSBrett Creeley } 27f232836aSBrett Creeley 28f232836aSBrett Creeley void pds_vfio_dirty_set_disabled(struct pds_vfio_pci_device *pds_vfio) 29f232836aSBrett Creeley { 30f232836aSBrett Creeley pds_vfio->dirty.is_enabled = false; 31f232836aSBrett Creeley } 32f232836aSBrett Creeley 33f232836aSBrett Creeley static void 34f232836aSBrett Creeley pds_vfio_print_guest_region_info(struct pds_vfio_pci_device *pds_vfio, 35f232836aSBrett Creeley u8 max_regions) 36f232836aSBrett Creeley { 37f232836aSBrett Creeley int len = max_regions * sizeof(struct pds_lm_dirty_region_info); 38f232836aSBrett Creeley struct pci_dev *pdev = pds_vfio->vfio_coredev.pdev; 39f232836aSBrett Creeley struct device *pdsc_dev = &pci_physfn(pdev)->dev; 40f232836aSBrett Creeley struct pds_lm_dirty_region_info *region_info; 41f232836aSBrett Creeley dma_addr_t regions_dma; 42f232836aSBrett Creeley u8 num_regions; 43f232836aSBrett Creeley int err; 44f232836aSBrett Creeley 45f232836aSBrett Creeley region_info = kcalloc(max_regions, 46f232836aSBrett Creeley sizeof(struct pds_lm_dirty_region_info), 47f232836aSBrett Creeley GFP_KERNEL); 48f232836aSBrett Creeley if (!region_info) 49f232836aSBrett Creeley return; 50f232836aSBrett Creeley 51f232836aSBrett Creeley regions_dma = 52f232836aSBrett Creeley dma_map_single(pdsc_dev, region_info, len, DMA_FROM_DEVICE); 53f232836aSBrett Creeley if (dma_mapping_error(pdsc_dev, regions_dma)) 54f232836aSBrett Creeley goto out_free_region_info; 55f232836aSBrett Creeley 56f232836aSBrett Creeley err = pds_vfio_dirty_status_cmd(pds_vfio, regions_dma, &max_regions, 57f232836aSBrett Creeley &num_regions); 58f232836aSBrett Creeley dma_unmap_single(pdsc_dev, regions_dma, len, DMA_FROM_DEVICE); 59f232836aSBrett Creeley if (err) 60f232836aSBrett Creeley goto out_free_region_info; 61f232836aSBrett Creeley 62f232836aSBrett Creeley for (unsigned int i = 0; i < num_regions; i++) 63f232836aSBrett Creeley dev_dbg(&pdev->dev, 64f232836aSBrett Creeley "region_info[%d]: dma_base 0x%llx page_count %u page_size_log2 %u\n", 65f232836aSBrett Creeley i, le64_to_cpu(region_info[i].dma_base), 66f232836aSBrett Creeley le32_to_cpu(region_info[i].page_count), 67f232836aSBrett Creeley region_info[i].page_size_log2); 68f232836aSBrett Creeley 69f232836aSBrett Creeley out_free_region_info: 70f232836aSBrett Creeley kfree(region_info); 71f232836aSBrett Creeley } 72f232836aSBrett Creeley 732e7c6febSBrett Creeley static int pds_vfio_dirty_alloc_bitmaps(struct pds_vfio_region *region, 74f232836aSBrett Creeley unsigned long bytes) 75f232836aSBrett Creeley { 76f232836aSBrett Creeley unsigned long *host_seq_bmp, *host_ack_bmp; 77f232836aSBrett Creeley 78f232836aSBrett Creeley host_seq_bmp = vzalloc(bytes); 79f232836aSBrett Creeley if (!host_seq_bmp) 80f232836aSBrett Creeley return -ENOMEM; 81f232836aSBrett Creeley 82f232836aSBrett Creeley host_ack_bmp = vzalloc(bytes); 83f232836aSBrett Creeley if (!host_ack_bmp) { 84f232836aSBrett Creeley bitmap_free(host_seq_bmp); 85f232836aSBrett Creeley return -ENOMEM; 86f232836aSBrett Creeley } 87f232836aSBrett Creeley 882e7c6febSBrett Creeley region->host_seq = host_seq_bmp; 892e7c6febSBrett Creeley region->host_ack = host_ack_bmp; 902e7c6febSBrett Creeley region->bmp_bytes = bytes; 91f232836aSBrett Creeley 92f232836aSBrett Creeley return 0; 93f232836aSBrett Creeley } 94f232836aSBrett Creeley 95f232836aSBrett Creeley static void pds_vfio_dirty_free_bitmaps(struct pds_vfio_dirty *dirty) 96f232836aSBrett Creeley { 972e7c6febSBrett Creeley if (!dirty->regions) 982e7c6febSBrett Creeley return; 992e7c6febSBrett Creeley 1002e7c6febSBrett Creeley for (int i = 0; i < dirty->num_regions; i++) { 1012e7c6febSBrett Creeley struct pds_vfio_region *region = &dirty->regions[i]; 1022e7c6febSBrett Creeley 1032e7c6febSBrett Creeley vfree(region->host_seq); 1042e7c6febSBrett Creeley vfree(region->host_ack); 1052e7c6febSBrett Creeley region->host_seq = NULL; 1062e7c6febSBrett Creeley region->host_ack = NULL; 1072e7c6febSBrett Creeley region->bmp_bytes = 0; 1082e7c6febSBrett Creeley } 109f232836aSBrett Creeley } 110f232836aSBrett Creeley 111f232836aSBrett Creeley static void __pds_vfio_dirty_free_sgl(struct pds_vfio_pci_device *pds_vfio, 11287bdf980SBrett Creeley struct pds_vfio_region *region) 113f232836aSBrett Creeley { 114f232836aSBrett Creeley struct pci_dev *pdev = pds_vfio->vfio_coredev.pdev; 115f232836aSBrett Creeley struct device *pdsc_dev = &pci_physfn(pdev)->dev; 116f232836aSBrett Creeley 11787bdf980SBrett Creeley dma_unmap_single(pdsc_dev, region->sgl_addr, 11887bdf980SBrett Creeley region->num_sge * sizeof(struct pds_lm_sg_elem), 119f232836aSBrett Creeley DMA_BIDIRECTIONAL); 12087bdf980SBrett Creeley kfree(region->sgl); 121f232836aSBrett Creeley 12287bdf980SBrett Creeley region->num_sge = 0; 12387bdf980SBrett Creeley region->sgl = NULL; 12487bdf980SBrett Creeley region->sgl_addr = 0; 125f232836aSBrett Creeley } 126f232836aSBrett Creeley 127f232836aSBrett Creeley static void pds_vfio_dirty_free_sgl(struct pds_vfio_pci_device *pds_vfio) 128f232836aSBrett Creeley { 1292e7c6febSBrett Creeley struct pds_vfio_dirty *dirty = &pds_vfio->dirty; 1302e7c6febSBrett Creeley 1312e7c6febSBrett Creeley if (!dirty->regions) 1322e7c6febSBrett Creeley return; 1332e7c6febSBrett Creeley 1342e7c6febSBrett Creeley for (int i = 0; i < dirty->num_regions; i++) { 1352e7c6febSBrett Creeley struct pds_vfio_region *region = &dirty->regions[i]; 1363b8f7a24SBrett Creeley 13787bdf980SBrett Creeley if (region->sgl) 13887bdf980SBrett Creeley __pds_vfio_dirty_free_sgl(pds_vfio, region); 139f232836aSBrett Creeley } 1402e7c6febSBrett Creeley } 141f232836aSBrett Creeley 1423b8f7a24SBrett Creeley static int pds_vfio_dirty_alloc_sgl(struct pds_vfio_pci_device *pds_vfio, 14387bdf980SBrett Creeley struct pds_vfio_region *region, 144f232836aSBrett Creeley u32 page_count) 145f232836aSBrett Creeley { 146f232836aSBrett Creeley struct pci_dev *pdev = pds_vfio->vfio_coredev.pdev; 147f232836aSBrett Creeley struct device *pdsc_dev = &pci_physfn(pdev)->dev; 148f232836aSBrett Creeley struct pds_lm_sg_elem *sgl; 149f232836aSBrett Creeley dma_addr_t sgl_addr; 150f232836aSBrett Creeley size_t sgl_size; 151f232836aSBrett Creeley u32 max_sge; 152f232836aSBrett Creeley 153f232836aSBrett Creeley max_sge = DIV_ROUND_UP(page_count, PAGE_SIZE * 8); 154f232836aSBrett Creeley sgl_size = max_sge * sizeof(struct pds_lm_sg_elem); 155f232836aSBrett Creeley 156f232836aSBrett Creeley sgl = kzalloc(sgl_size, GFP_KERNEL); 157f232836aSBrett Creeley if (!sgl) 158f232836aSBrett Creeley return -ENOMEM; 159f232836aSBrett Creeley 160f232836aSBrett Creeley sgl_addr = dma_map_single(pdsc_dev, sgl, sgl_size, DMA_BIDIRECTIONAL); 161f232836aSBrett Creeley if (dma_mapping_error(pdsc_dev, sgl_addr)) { 162f232836aSBrett Creeley kfree(sgl); 163f232836aSBrett Creeley return -EIO; 164f232836aSBrett Creeley } 165f232836aSBrett Creeley 16687bdf980SBrett Creeley region->sgl = sgl; 16787bdf980SBrett Creeley region->num_sge = max_sge; 16887bdf980SBrett Creeley region->sgl_addr = sgl_addr; 169f232836aSBrett Creeley 170f232836aSBrett Creeley return 0; 171f232836aSBrett Creeley } 172f232836aSBrett Creeley 1732e7c6febSBrett Creeley static void pds_vfio_dirty_free_regions(struct pds_vfio_dirty *dirty) 1742e7c6febSBrett Creeley { 1752e7c6febSBrett Creeley vfree(dirty->regions); 1762e7c6febSBrett Creeley dirty->regions = NULL; 1772e7c6febSBrett Creeley dirty->num_regions = 0; 1782e7c6febSBrett Creeley } 1792e7c6febSBrett Creeley 1802e7c6febSBrett Creeley static int pds_vfio_dirty_alloc_regions(struct pds_vfio_pci_device *pds_vfio, 1812e7c6febSBrett Creeley struct pds_lm_dirty_region_info *region_info, 1822e7c6febSBrett Creeley u64 region_page_size, u8 num_regions) 1832e7c6febSBrett Creeley { 1842e7c6febSBrett Creeley struct pci_dev *pdev = pds_vfio->vfio_coredev.pdev; 1852e7c6febSBrett Creeley struct pds_vfio_dirty *dirty = &pds_vfio->dirty; 1862e7c6febSBrett Creeley u32 dev_bmp_offset_byte = 0; 1872e7c6febSBrett Creeley int err; 1882e7c6febSBrett Creeley 1892e7c6febSBrett Creeley dirty->regions = vcalloc(num_regions, sizeof(struct pds_vfio_region)); 1902e7c6febSBrett Creeley if (!dirty->regions) 1912e7c6febSBrett Creeley return -ENOMEM; 1922e7c6febSBrett Creeley dirty->num_regions = num_regions; 1932e7c6febSBrett Creeley 1942e7c6febSBrett Creeley for (int i = 0; i < num_regions; i++) { 1952e7c6febSBrett Creeley struct pds_lm_dirty_region_info *ri = ®ion_info[i]; 1962e7c6febSBrett Creeley struct pds_vfio_region *region = &dirty->regions[i]; 1972e7c6febSBrett Creeley u64 region_size, region_start; 1982e7c6febSBrett Creeley u32 page_count; 1992e7c6febSBrett Creeley 2002e7c6febSBrett Creeley /* page_count might be adjusted by the device */ 2012e7c6febSBrett Creeley page_count = le32_to_cpu(ri->page_count); 2022e7c6febSBrett Creeley region_start = le64_to_cpu(ri->dma_base); 2032e7c6febSBrett Creeley region_size = page_count * region_page_size; 2042e7c6febSBrett Creeley 2052e7c6febSBrett Creeley err = pds_vfio_dirty_alloc_bitmaps(region, 2062e7c6febSBrett Creeley page_count / BITS_PER_BYTE); 2072e7c6febSBrett Creeley if (err) { 2082e7c6febSBrett Creeley dev_err(&pdev->dev, "Failed to alloc dirty bitmaps: %pe\n", 2092e7c6febSBrett Creeley ERR_PTR(err)); 2102e7c6febSBrett Creeley goto out_free_regions; 2112e7c6febSBrett Creeley } 2122e7c6febSBrett Creeley 2132e7c6febSBrett Creeley err = pds_vfio_dirty_alloc_sgl(pds_vfio, region, page_count); 2142e7c6febSBrett Creeley if (err) { 2152e7c6febSBrett Creeley dev_err(&pdev->dev, "Failed to alloc dirty sg lists: %pe\n", 2162e7c6febSBrett Creeley ERR_PTR(err)); 2172e7c6febSBrett Creeley goto out_free_regions; 2182e7c6febSBrett Creeley } 2192e7c6febSBrett Creeley 2202e7c6febSBrett Creeley region->size = region_size; 2212e7c6febSBrett Creeley region->start = region_start; 2222e7c6febSBrett Creeley region->page_size = region_page_size; 2232e7c6febSBrett Creeley region->dev_bmp_offset_start_byte = dev_bmp_offset_byte; 2242e7c6febSBrett Creeley 2252e7c6febSBrett Creeley dev_bmp_offset_byte += page_count / BITS_PER_BYTE; 2262e7c6febSBrett Creeley if (dev_bmp_offset_byte % BITS_PER_BYTE) { 2272e7c6febSBrett Creeley dev_err(&pdev->dev, "Device bitmap offset is mis-aligned\n"); 2282e7c6febSBrett Creeley err = -EINVAL; 2292e7c6febSBrett Creeley goto out_free_regions; 2302e7c6febSBrett Creeley } 2312e7c6febSBrett Creeley } 2322e7c6febSBrett Creeley 2332e7c6febSBrett Creeley return 0; 2342e7c6febSBrett Creeley 2352e7c6febSBrett Creeley out_free_regions: 2362e7c6febSBrett Creeley pds_vfio_dirty_free_bitmaps(dirty); 2372e7c6febSBrett Creeley pds_vfio_dirty_free_sgl(pds_vfio); 2382e7c6febSBrett Creeley pds_vfio_dirty_free_regions(dirty); 2392e7c6febSBrett Creeley 2402e7c6febSBrett Creeley return err; 2412e7c6febSBrett Creeley } 2422e7c6febSBrett Creeley 243f232836aSBrett Creeley static int pds_vfio_dirty_enable(struct pds_vfio_pci_device *pds_vfio, 244f232836aSBrett Creeley struct rb_root_cached *ranges, u32 nnodes, 245f232836aSBrett Creeley u64 *page_size) 246f232836aSBrett Creeley { 247f232836aSBrett Creeley struct pci_dev *pdev = pds_vfio->vfio_coredev.pdev; 248f232836aSBrett Creeley struct device *pdsc_dev = &pci_physfn(pdev)->dev; 249f232836aSBrett Creeley struct pds_lm_dirty_region_info *region_info; 250f232836aSBrett Creeley struct interval_tree_node *node = NULL; 2512e7c6febSBrett Creeley u64 region_page_size = *page_size; 252f232836aSBrett Creeley u8 max_regions = 0, num_regions; 253f232836aSBrett Creeley dma_addr_t regions_dma = 0; 254f232836aSBrett Creeley u32 num_ranges = nnodes; 255f232836aSBrett Creeley int err; 2562e7c6febSBrett Creeley u16 len; 257f232836aSBrett Creeley 258f232836aSBrett Creeley dev_dbg(&pdev->dev, "vf%u: Start dirty page tracking\n", 259f232836aSBrett Creeley pds_vfio->vf_id); 260f232836aSBrett Creeley 261f232836aSBrett Creeley if (pds_vfio_dirty_is_enabled(pds_vfio)) 262f232836aSBrett Creeley return -EINVAL; 263f232836aSBrett Creeley 264f232836aSBrett Creeley /* find if dirty tracking is disabled, i.e. num_regions == 0 */ 265f232836aSBrett Creeley err = pds_vfio_dirty_status_cmd(pds_vfio, 0, &max_regions, 266f232836aSBrett Creeley &num_regions); 267f232836aSBrett Creeley if (err < 0) { 268f232836aSBrett Creeley dev_err(&pdev->dev, "Failed to get dirty status, err %pe\n", 269f232836aSBrett Creeley ERR_PTR(err)); 270f232836aSBrett Creeley return err; 271f232836aSBrett Creeley } else if (num_regions) { 272f232836aSBrett Creeley dev_err(&pdev->dev, 273f232836aSBrett Creeley "Dirty tracking already enabled for %d regions\n", 274f232836aSBrett Creeley num_regions); 275f232836aSBrett Creeley return -EEXIST; 276f232836aSBrett Creeley } else if (!max_regions) { 277f232836aSBrett Creeley dev_err(&pdev->dev, 278f232836aSBrett Creeley "Device doesn't support dirty tracking, max_regions %d\n", 279f232836aSBrett Creeley max_regions); 280f232836aSBrett Creeley return -EOPNOTSUPP; 281f232836aSBrett Creeley } 282f232836aSBrett Creeley 283f232836aSBrett Creeley if (num_ranges > max_regions) { 284f232836aSBrett Creeley vfio_combine_iova_ranges(ranges, nnodes, max_regions); 285f232836aSBrett Creeley num_ranges = max_regions; 286f232836aSBrett Creeley } 287f232836aSBrett Creeley 2882e7c6febSBrett Creeley region_info = kcalloc(num_ranges, sizeof(*region_info), GFP_KERNEL); 2892e7c6febSBrett Creeley if (!region_info) 2902e7c6febSBrett Creeley return -ENOMEM; 2912e7c6febSBrett Creeley len = num_ranges * sizeof(*region_info); 2922e7c6febSBrett Creeley 293f232836aSBrett Creeley node = interval_tree_iter_first(ranges, 0, ULONG_MAX); 294f232836aSBrett Creeley if (!node) 295f232836aSBrett Creeley return -EINVAL; 2962e7c6febSBrett Creeley for (int i = 0; i < num_ranges; i++) { 2972e7c6febSBrett Creeley struct pds_lm_dirty_region_info *ri = ®ion_info[i]; 2982e7c6febSBrett Creeley u64 region_size = node->last - node->start + 1; 2992e7c6febSBrett Creeley u64 region_start = node->start; 3002e7c6febSBrett Creeley u32 page_count; 301f232836aSBrett Creeley 302f232836aSBrett Creeley page_count = DIV_ROUND_UP(region_size, region_page_size); 303f232836aSBrett Creeley 3042e7c6febSBrett Creeley ri->dma_base = cpu_to_le64(region_start); 3052e7c6febSBrett Creeley ri->page_count = cpu_to_le32(page_count); 3062e7c6febSBrett Creeley ri->page_size_log2 = ilog2(region_page_size); 3072e7c6febSBrett Creeley 3082e7c6febSBrett Creeley dev_dbg(&pdev->dev, 3092e7c6febSBrett Creeley "region_info[%d]: region_start 0x%llx region_end 0x%lx region_size 0x%llx page_count %u page_size %llu\n", 3102e7c6febSBrett Creeley i, region_start, node->last, region_size, page_count, 3112e7c6febSBrett Creeley region_page_size); 3122e7c6febSBrett Creeley 3132e7c6febSBrett Creeley node = interval_tree_iter_next(node, 0, ULONG_MAX); 3142e7c6febSBrett Creeley } 315f232836aSBrett Creeley 316f232836aSBrett Creeley regions_dma = dma_map_single(pdsc_dev, (void *)region_info, len, 317f232836aSBrett Creeley DMA_BIDIRECTIONAL); 318f232836aSBrett Creeley if (dma_mapping_error(pdsc_dev, regions_dma)) { 319f232836aSBrett Creeley err = -ENOMEM; 320f232836aSBrett Creeley goto out_free_region_info; 321f232836aSBrett Creeley } 322f232836aSBrett Creeley 3232e7c6febSBrett Creeley err = pds_vfio_dirty_enable_cmd(pds_vfio, regions_dma, num_ranges); 324f232836aSBrett Creeley dma_unmap_single(pdsc_dev, regions_dma, len, DMA_BIDIRECTIONAL); 325f232836aSBrett Creeley if (err) 326f232836aSBrett Creeley goto out_free_region_info; 327f232836aSBrett Creeley 3282e7c6febSBrett Creeley err = pds_vfio_dirty_alloc_regions(pds_vfio, region_info, 3292e7c6febSBrett Creeley region_page_size, num_ranges); 330f232836aSBrett Creeley if (err) { 3312e7c6febSBrett Creeley dev_err(&pdev->dev, 3322e7c6febSBrett Creeley "Failed to allocate %d regions for tracking dirty regions: %pe\n", 3332e7c6febSBrett Creeley num_regions, ERR_PTR(err)); 3342e7c6febSBrett Creeley goto out_dirty_disable; 335f232836aSBrett Creeley } 336f232836aSBrett Creeley 337f232836aSBrett Creeley pds_vfio_dirty_set_enabled(pds_vfio); 338f232836aSBrett Creeley 339f232836aSBrett Creeley pds_vfio_print_guest_region_info(pds_vfio, max_regions); 340f232836aSBrett Creeley 341f232836aSBrett Creeley kfree(region_info); 342f232836aSBrett Creeley 343f232836aSBrett Creeley return 0; 344f232836aSBrett Creeley 3452e7c6febSBrett Creeley out_dirty_disable: 3462e7c6febSBrett Creeley pds_vfio_dirty_disable_cmd(pds_vfio); 347f232836aSBrett Creeley out_free_region_info: 348f232836aSBrett Creeley kfree(region_info); 349f232836aSBrett Creeley return err; 350f232836aSBrett Creeley } 351f232836aSBrett Creeley 352f232836aSBrett Creeley void pds_vfio_dirty_disable(struct pds_vfio_pci_device *pds_vfio, bool send_cmd) 353f232836aSBrett Creeley { 354f232836aSBrett Creeley if (pds_vfio_dirty_is_enabled(pds_vfio)) { 355f232836aSBrett Creeley pds_vfio_dirty_set_disabled(pds_vfio); 356f232836aSBrett Creeley if (send_cmd) 357f232836aSBrett Creeley pds_vfio_dirty_disable_cmd(pds_vfio); 358f232836aSBrett Creeley pds_vfio_dirty_free_sgl(pds_vfio); 359f232836aSBrett Creeley pds_vfio_dirty_free_bitmaps(&pds_vfio->dirty); 3602e7c6febSBrett Creeley pds_vfio_dirty_free_regions(&pds_vfio->dirty); 361f232836aSBrett Creeley } 362f232836aSBrett Creeley 363f232836aSBrett Creeley if (send_cmd) 364f232836aSBrett Creeley pds_vfio_send_host_vf_lm_status_cmd(pds_vfio, PDS_LM_STA_NONE); 365f232836aSBrett Creeley } 366f232836aSBrett Creeley 367f232836aSBrett Creeley static int pds_vfio_dirty_seq_ack(struct pds_vfio_pci_device *pds_vfio, 36887bdf980SBrett Creeley struct pds_vfio_region *region, 3690c320f22SBrett Creeley unsigned long *seq_ack_bmp, u32 offset, 3700c320f22SBrett Creeley u32 bmp_bytes, bool read_seq) 371f232836aSBrett Creeley { 372f232836aSBrett Creeley const char *bmp_type_str = read_seq ? "read_seq" : "write_ack"; 373f232836aSBrett Creeley u8 dma_dir = read_seq ? DMA_FROM_DEVICE : DMA_TO_DEVICE; 374f232836aSBrett Creeley struct pci_dev *pdev = pds_vfio->vfio_coredev.pdev; 375f232836aSBrett Creeley struct device *pdsc_dev = &pci_physfn(pdev)->dev; 376f232836aSBrett Creeley unsigned long long npages; 377f232836aSBrett Creeley struct sg_table sg_table; 378f232836aSBrett Creeley struct scatterlist *sg; 379f232836aSBrett Creeley struct page **pages; 380f232836aSBrett Creeley u32 page_offset; 381f232836aSBrett Creeley const void *bmp; 382f232836aSBrett Creeley size_t size; 383f232836aSBrett Creeley u16 num_sge; 384f232836aSBrett Creeley int err; 385f232836aSBrett Creeley int i; 386f232836aSBrett Creeley 3870c320f22SBrett Creeley bmp = (void *)((u64)seq_ack_bmp + offset); 388f232836aSBrett Creeley page_offset = offset_in_page(bmp); 389f232836aSBrett Creeley bmp -= page_offset; 390f232836aSBrett Creeley 391f232836aSBrett Creeley /* 392f232836aSBrett Creeley * Start and end of bitmap section to seq/ack might not be page 393f232836aSBrett Creeley * aligned, so use the page_offset to account for that so there 394f232836aSBrett Creeley * will be enough pages to represent the bmp_bytes 395f232836aSBrett Creeley */ 396f232836aSBrett Creeley npages = DIV_ROUND_UP_ULL(bmp_bytes + page_offset, PAGE_SIZE); 397f232836aSBrett Creeley pages = kmalloc_array(npages, sizeof(*pages), GFP_KERNEL); 398f232836aSBrett Creeley if (!pages) 399f232836aSBrett Creeley return -ENOMEM; 400f232836aSBrett Creeley 401f232836aSBrett Creeley for (unsigned long long i = 0; i < npages; i++) { 402f232836aSBrett Creeley struct page *page = vmalloc_to_page(bmp); 403f232836aSBrett Creeley 404f232836aSBrett Creeley if (!page) { 405f232836aSBrett Creeley err = -EFAULT; 406f232836aSBrett Creeley goto out_free_pages; 407f232836aSBrett Creeley } 408f232836aSBrett Creeley 409f232836aSBrett Creeley pages[i] = page; 410f232836aSBrett Creeley bmp += PAGE_SIZE; 411f232836aSBrett Creeley } 412f232836aSBrett Creeley 413f232836aSBrett Creeley err = sg_alloc_table_from_pages(&sg_table, pages, npages, page_offset, 414f232836aSBrett Creeley bmp_bytes, GFP_KERNEL); 415f232836aSBrett Creeley if (err) 416f232836aSBrett Creeley goto out_free_pages; 417f232836aSBrett Creeley 418f232836aSBrett Creeley err = dma_map_sgtable(pdsc_dev, &sg_table, dma_dir, 0); 419f232836aSBrett Creeley if (err) 420f232836aSBrett Creeley goto out_free_sg_table; 421f232836aSBrett Creeley 422f232836aSBrett Creeley for_each_sgtable_dma_sg(&sg_table, sg, i) { 4233f589813SBrett Creeley struct pds_lm_sg_elem *sg_elem = ®ion->sgl[i]; 424f232836aSBrett Creeley 425f232836aSBrett Creeley sg_elem->addr = cpu_to_le64(sg_dma_address(sg)); 426f232836aSBrett Creeley sg_elem->len = cpu_to_le32(sg_dma_len(sg)); 427f232836aSBrett Creeley } 428f232836aSBrett Creeley 429f232836aSBrett Creeley num_sge = sg_table.nents; 430f232836aSBrett Creeley size = num_sge * sizeof(struct pds_lm_sg_elem); 4312e7c6febSBrett Creeley offset += region->dev_bmp_offset_start_byte; 4323f589813SBrett Creeley dma_sync_single_for_device(pdsc_dev, region->sgl_addr, size, dma_dir); 4333f589813SBrett Creeley err = pds_vfio_dirty_seq_ack_cmd(pds_vfio, region->sgl_addr, num_sge, 434f232836aSBrett Creeley offset, bmp_bytes, read_seq); 435f232836aSBrett Creeley if (err) 436f232836aSBrett Creeley dev_err(&pdev->dev, 437f232836aSBrett Creeley "Dirty bitmap %s failed offset %u bmp_bytes %u num_sge %u DMA 0x%llx: %pe\n", 438f232836aSBrett Creeley bmp_type_str, offset, bmp_bytes, 4393f589813SBrett Creeley num_sge, region->sgl_addr, ERR_PTR(err)); 4403f589813SBrett Creeley dma_sync_single_for_cpu(pdsc_dev, region->sgl_addr, size, dma_dir); 441f232836aSBrett Creeley 442f232836aSBrett Creeley dma_unmap_sgtable(pdsc_dev, &sg_table, dma_dir, 0); 443f232836aSBrett Creeley out_free_sg_table: 444f232836aSBrett Creeley sg_free_table(&sg_table); 445f232836aSBrett Creeley out_free_pages: 446f232836aSBrett Creeley kfree(pages); 447f232836aSBrett Creeley 448f232836aSBrett Creeley return err; 449f232836aSBrett Creeley } 450f232836aSBrett Creeley 451f232836aSBrett Creeley static int pds_vfio_dirty_write_ack(struct pds_vfio_pci_device *pds_vfio, 45287bdf980SBrett Creeley struct pds_vfio_region *region, 453f232836aSBrett Creeley u32 offset, u32 len) 454f232836aSBrett Creeley { 4553f589813SBrett Creeley 4560c320f22SBrett Creeley return pds_vfio_dirty_seq_ack(pds_vfio, region, region->host_ack, 457f232836aSBrett Creeley offset, len, WRITE_ACK); 458f232836aSBrett Creeley } 459f232836aSBrett Creeley 460f232836aSBrett Creeley static int pds_vfio_dirty_read_seq(struct pds_vfio_pci_device *pds_vfio, 46187bdf980SBrett Creeley struct pds_vfio_region *region, 462f232836aSBrett Creeley u32 offset, u32 len) 463f232836aSBrett Creeley { 4640c320f22SBrett Creeley return pds_vfio_dirty_seq_ack(pds_vfio, region, region->host_seq, 465f232836aSBrett Creeley offset, len, READ_SEQ); 466f232836aSBrett Creeley } 467f232836aSBrett Creeley 468f232836aSBrett Creeley static int pds_vfio_dirty_process_bitmaps(struct pds_vfio_pci_device *pds_vfio, 46987bdf980SBrett Creeley struct pds_vfio_region *region, 470f232836aSBrett Creeley struct iova_bitmap *dirty_bitmap, 471f232836aSBrett Creeley u32 bmp_offset, u32 len_bytes) 472f232836aSBrett Creeley { 47387bdf980SBrett Creeley u64 page_size = region->page_size; 47487bdf980SBrett Creeley u64 region_start = region->start; 475f232836aSBrett Creeley u32 bmp_offset_bit; 476f232836aSBrett Creeley __le64 *seq, *ack; 477f232836aSBrett Creeley int dword_count; 478f232836aSBrett Creeley 479f232836aSBrett Creeley dword_count = len_bytes / sizeof(u64); 4800c320f22SBrett Creeley seq = (__le64 *)((u64)region->host_seq + bmp_offset); 4810c320f22SBrett Creeley ack = (__le64 *)((u64)region->host_ack + bmp_offset); 482f232836aSBrett Creeley bmp_offset_bit = bmp_offset * 8; 483f232836aSBrett Creeley 484f232836aSBrett Creeley for (int i = 0; i < dword_count; i++) { 485f232836aSBrett Creeley u64 xor = le64_to_cpu(seq[i]) ^ le64_to_cpu(ack[i]); 486f232836aSBrett Creeley 487f232836aSBrett Creeley /* prepare for next write_ack call */ 488f232836aSBrett Creeley ack[i] = seq[i]; 489f232836aSBrett Creeley 490f232836aSBrett Creeley for (u8 bit_i = 0; bit_i < BITS_PER_TYPE(u64); ++bit_i) { 491f232836aSBrett Creeley if (xor & BIT(bit_i)) { 492f232836aSBrett Creeley u64 abs_bit_i = bmp_offset_bit + 493f232836aSBrett Creeley i * BITS_PER_TYPE(u64) + bit_i; 494f232836aSBrett Creeley u64 addr = abs_bit_i * page_size + region_start; 495f232836aSBrett Creeley 496f232836aSBrett Creeley iova_bitmap_set(dirty_bitmap, addr, page_size); 497f232836aSBrett Creeley } 498f232836aSBrett Creeley } 499f232836aSBrett Creeley } 500f232836aSBrett Creeley 501f232836aSBrett Creeley return 0; 502f232836aSBrett Creeley } 503f232836aSBrett Creeley 5042e7c6febSBrett Creeley static struct pds_vfio_region * 5052e7c6febSBrett Creeley pds_vfio_get_region(struct pds_vfio_pci_device *pds_vfio, unsigned long iova) 5062e7c6febSBrett Creeley { 5072e7c6febSBrett Creeley struct pds_vfio_dirty *dirty = &pds_vfio->dirty; 5082e7c6febSBrett Creeley 5092e7c6febSBrett Creeley for (int i = 0; i < dirty->num_regions; i++) { 5102e7c6febSBrett Creeley struct pds_vfio_region *region = &dirty->regions[i]; 5112e7c6febSBrett Creeley 5122e7c6febSBrett Creeley if (iova >= region->start && 5132e7c6febSBrett Creeley iova < (region->start + region->size)) 5142e7c6febSBrett Creeley return region; 5152e7c6febSBrett Creeley } 5162e7c6febSBrett Creeley 5172e7c6febSBrett Creeley return NULL; 5182e7c6febSBrett Creeley } 5192e7c6febSBrett Creeley 520f232836aSBrett Creeley static int pds_vfio_dirty_sync(struct pds_vfio_pci_device *pds_vfio, 521f232836aSBrett Creeley struct iova_bitmap *dirty_bitmap, 522f232836aSBrett Creeley unsigned long iova, unsigned long length) 523f232836aSBrett Creeley { 524f232836aSBrett Creeley struct device *dev = &pds_vfio->vfio_coredev.pdev->dev; 5252e7c6febSBrett Creeley struct pds_vfio_region *region; 526f232836aSBrett Creeley u64 bmp_offset, bmp_bytes; 527f232836aSBrett Creeley u64 bitmap_size, pages; 528f232836aSBrett Creeley int err; 529f232836aSBrett Creeley 530f232836aSBrett Creeley dev_dbg(dev, "vf%u: Get dirty page bitmap\n", pds_vfio->vf_id); 531f232836aSBrett Creeley 532f232836aSBrett Creeley if (!pds_vfio_dirty_is_enabled(pds_vfio)) { 533f232836aSBrett Creeley dev_err(dev, "vf%u: Sync failed, dirty tracking is disabled\n", 534f232836aSBrett Creeley pds_vfio->vf_id); 535f232836aSBrett Creeley return -EINVAL; 536f232836aSBrett Creeley } 537f232836aSBrett Creeley 5382e7c6febSBrett Creeley region = pds_vfio_get_region(pds_vfio, iova); 5392e7c6febSBrett Creeley if (!region) { 5402e7c6febSBrett Creeley dev_err(dev, "vf%u: Failed to find region that contains iova 0x%lx length 0x%lx\n", 5412e7c6febSBrett Creeley pds_vfio->vf_id, iova, length); 5422e7c6febSBrett Creeley return -EINVAL; 5432e7c6febSBrett Creeley } 5442e7c6febSBrett Creeley 54587bdf980SBrett Creeley pages = DIV_ROUND_UP(length, region->page_size); 546f232836aSBrett Creeley bitmap_size = 547f232836aSBrett Creeley round_up(pages, sizeof(u64) * BITS_PER_BYTE) / BITS_PER_BYTE; 548f232836aSBrett Creeley 549f232836aSBrett Creeley dev_dbg(dev, 550f232836aSBrett Creeley "vf%u: iova 0x%lx length %lu page_size %llu pages %llu bitmap_size %llu\n", 55187bdf980SBrett Creeley pds_vfio->vf_id, iova, length, region->page_size, 552f232836aSBrett Creeley pages, bitmap_size); 553f232836aSBrett Creeley 55487bdf980SBrett Creeley if (!length || ((iova - region->start + length) > region->size)) { 555f232836aSBrett Creeley dev_err(dev, "Invalid iova 0x%lx and/or length 0x%lx to sync\n", 556f232836aSBrett Creeley iova, length); 557f232836aSBrett Creeley return -EINVAL; 558f232836aSBrett Creeley } 559f232836aSBrett Creeley 560f232836aSBrett Creeley /* bitmap is modified in 64 bit chunks */ 56187bdf980SBrett Creeley bmp_bytes = ALIGN(DIV_ROUND_UP(length / region->page_size, 5623f589813SBrett Creeley sizeof(u64)), sizeof(u64)); 563f232836aSBrett Creeley if (bmp_bytes != bitmap_size) { 564f232836aSBrett Creeley dev_err(dev, 565f232836aSBrett Creeley "Calculated bitmap bytes %llu not equal to bitmap size %llu\n", 566f232836aSBrett Creeley bmp_bytes, bitmap_size); 567f232836aSBrett Creeley return -EINVAL; 568f232836aSBrett Creeley } 569f232836aSBrett Creeley 5700c320f22SBrett Creeley if (bmp_bytes > region->bmp_bytes) { 5710c320f22SBrett Creeley dev_err(dev, 5720c320f22SBrett Creeley "Calculated bitmap bytes %llu larger than region's cached bmp_bytes %llu\n", 5730c320f22SBrett Creeley bmp_bytes, region->bmp_bytes); 5740c320f22SBrett Creeley return -EINVAL; 5750c320f22SBrett Creeley } 5760c320f22SBrett Creeley 57787bdf980SBrett Creeley bmp_offset = DIV_ROUND_UP((iova - region->start) / 57887bdf980SBrett Creeley region->page_size, sizeof(u64)); 579f232836aSBrett Creeley 580f232836aSBrett Creeley dev_dbg(dev, 581f232836aSBrett Creeley "Syncing dirty bitmap, iova 0x%lx length 0x%lx, bmp_offset %llu bmp_bytes %llu\n", 582f232836aSBrett Creeley iova, length, bmp_offset, bmp_bytes); 583f232836aSBrett Creeley 58487bdf980SBrett Creeley err = pds_vfio_dirty_read_seq(pds_vfio, region, bmp_offset, bmp_bytes); 585f232836aSBrett Creeley if (err) 586f232836aSBrett Creeley return err; 587f232836aSBrett Creeley 58887bdf980SBrett Creeley err = pds_vfio_dirty_process_bitmaps(pds_vfio, region, dirty_bitmap, 58987bdf980SBrett Creeley bmp_offset, bmp_bytes); 590f232836aSBrett Creeley if (err) 591f232836aSBrett Creeley return err; 592f232836aSBrett Creeley 59387bdf980SBrett Creeley err = pds_vfio_dirty_write_ack(pds_vfio, region, bmp_offset, bmp_bytes); 594f232836aSBrett Creeley if (err) 595f232836aSBrett Creeley return err; 596f232836aSBrett Creeley 597f232836aSBrett Creeley return 0; 598f232836aSBrett Creeley } 599f232836aSBrett Creeley 600f232836aSBrett Creeley int pds_vfio_dma_logging_report(struct vfio_device *vdev, unsigned long iova, 601f232836aSBrett Creeley unsigned long length, struct iova_bitmap *dirty) 602f232836aSBrett Creeley { 603f232836aSBrett Creeley struct pds_vfio_pci_device *pds_vfio = 604f232836aSBrett Creeley container_of(vdev, struct pds_vfio_pci_device, 605f232836aSBrett Creeley vfio_coredev.vdev); 606f232836aSBrett Creeley int err; 607f232836aSBrett Creeley 608f232836aSBrett Creeley mutex_lock(&pds_vfio->state_mutex); 609f232836aSBrett Creeley err = pds_vfio_dirty_sync(pds_vfio, dirty, iova, length); 610*6a7e448cSBrett Creeley mutex_unlock(&pds_vfio->state_mutex); 611f232836aSBrett Creeley 612f232836aSBrett Creeley return err; 613f232836aSBrett Creeley } 614f232836aSBrett Creeley 615f232836aSBrett Creeley int pds_vfio_dma_logging_start(struct vfio_device *vdev, 616f232836aSBrett Creeley struct rb_root_cached *ranges, u32 nnodes, 617f232836aSBrett Creeley u64 *page_size) 618f232836aSBrett Creeley { 619f232836aSBrett Creeley struct pds_vfio_pci_device *pds_vfio = 620f232836aSBrett Creeley container_of(vdev, struct pds_vfio_pci_device, 621f232836aSBrett Creeley vfio_coredev.vdev); 622f232836aSBrett Creeley int err; 623f232836aSBrett Creeley 624f232836aSBrett Creeley mutex_lock(&pds_vfio->state_mutex); 625f232836aSBrett Creeley pds_vfio_send_host_vf_lm_status_cmd(pds_vfio, PDS_LM_STA_IN_PROGRESS); 626f232836aSBrett Creeley err = pds_vfio_dirty_enable(pds_vfio, ranges, nnodes, page_size); 627*6a7e448cSBrett Creeley mutex_unlock(&pds_vfio->state_mutex); 628f232836aSBrett Creeley 629f232836aSBrett Creeley return err; 630f232836aSBrett Creeley } 631f232836aSBrett Creeley 632f232836aSBrett Creeley int pds_vfio_dma_logging_stop(struct vfio_device *vdev) 633f232836aSBrett Creeley { 634f232836aSBrett Creeley struct pds_vfio_pci_device *pds_vfio = 635f232836aSBrett Creeley container_of(vdev, struct pds_vfio_pci_device, 636f232836aSBrett Creeley vfio_coredev.vdev); 637f232836aSBrett Creeley 638f232836aSBrett Creeley mutex_lock(&pds_vfio->state_mutex); 639f232836aSBrett Creeley pds_vfio_dirty_disable(pds_vfio, true); 640*6a7e448cSBrett Creeley mutex_unlock(&pds_vfio->state_mutex); 641f232836aSBrett Creeley 642f232836aSBrett Creeley return 0; 643f232836aSBrett Creeley } 644