xref: /linux/drivers/vfio/pci/pds/dirty.c (revision a23e1966932464e1c5226cb9ac4ce1d5fc10ba22)
1f232836aSBrett Creeley // SPDX-License-Identifier: GPL-2.0
2f232836aSBrett Creeley /* Copyright(c) 2023 Advanced Micro Devices, Inc. */
3f232836aSBrett Creeley 
4f232836aSBrett Creeley #include <linux/interval_tree.h>
5f232836aSBrett Creeley #include <linux/vfio.h>
6f232836aSBrett Creeley 
7f232836aSBrett Creeley #include <linux/pds/pds_common.h>
8f232836aSBrett Creeley #include <linux/pds/pds_core_if.h>
9f232836aSBrett Creeley #include <linux/pds/pds_adminq.h>
10f232836aSBrett Creeley 
11f232836aSBrett Creeley #include "vfio_dev.h"
12f232836aSBrett Creeley #include "cmds.h"
13f232836aSBrett Creeley #include "dirty.h"
14f232836aSBrett Creeley 
15f232836aSBrett Creeley #define READ_SEQ true
16f232836aSBrett Creeley #define WRITE_ACK false
17f232836aSBrett Creeley 
18f232836aSBrett Creeley bool pds_vfio_dirty_is_enabled(struct pds_vfio_pci_device *pds_vfio)
19f232836aSBrett Creeley {
20f232836aSBrett Creeley 	return pds_vfio->dirty.is_enabled;
21f232836aSBrett Creeley }
22f232836aSBrett Creeley 
23f232836aSBrett Creeley void pds_vfio_dirty_set_enabled(struct pds_vfio_pci_device *pds_vfio)
24f232836aSBrett Creeley {
25f232836aSBrett Creeley 	pds_vfio->dirty.is_enabled = true;
26f232836aSBrett Creeley }
27f232836aSBrett Creeley 
28f232836aSBrett Creeley void pds_vfio_dirty_set_disabled(struct pds_vfio_pci_device *pds_vfio)
29f232836aSBrett Creeley {
30f232836aSBrett Creeley 	pds_vfio->dirty.is_enabled = false;
31f232836aSBrett Creeley }
32f232836aSBrett Creeley 
33f232836aSBrett Creeley static void
34f232836aSBrett Creeley pds_vfio_print_guest_region_info(struct pds_vfio_pci_device *pds_vfio,
35f232836aSBrett Creeley 				 u8 max_regions)
36f232836aSBrett Creeley {
37f232836aSBrett Creeley 	int len = max_regions * sizeof(struct pds_lm_dirty_region_info);
38f232836aSBrett Creeley 	struct pci_dev *pdev = pds_vfio->vfio_coredev.pdev;
39f232836aSBrett Creeley 	struct device *pdsc_dev = &pci_physfn(pdev)->dev;
40f232836aSBrett Creeley 	struct pds_lm_dirty_region_info *region_info;
41f232836aSBrett Creeley 	dma_addr_t regions_dma;
42f232836aSBrett Creeley 	u8 num_regions;
43f232836aSBrett Creeley 	int err;
44f232836aSBrett Creeley 
45f232836aSBrett Creeley 	region_info = kcalloc(max_regions,
46f232836aSBrett Creeley 			      sizeof(struct pds_lm_dirty_region_info),
47f232836aSBrett Creeley 			      GFP_KERNEL);
48f232836aSBrett Creeley 	if (!region_info)
49f232836aSBrett Creeley 		return;
50f232836aSBrett Creeley 
51f232836aSBrett Creeley 	regions_dma =
52f232836aSBrett Creeley 		dma_map_single(pdsc_dev, region_info, len, DMA_FROM_DEVICE);
53f232836aSBrett Creeley 	if (dma_mapping_error(pdsc_dev, regions_dma))
54f232836aSBrett Creeley 		goto out_free_region_info;
55f232836aSBrett Creeley 
56f232836aSBrett Creeley 	err = pds_vfio_dirty_status_cmd(pds_vfio, regions_dma, &max_regions,
57f232836aSBrett Creeley 					&num_regions);
58f232836aSBrett Creeley 	dma_unmap_single(pdsc_dev, regions_dma, len, DMA_FROM_DEVICE);
59f232836aSBrett Creeley 	if (err)
60f232836aSBrett Creeley 		goto out_free_region_info;
61f232836aSBrett Creeley 
62f232836aSBrett Creeley 	for (unsigned int i = 0; i < num_regions; i++)
63f232836aSBrett Creeley 		dev_dbg(&pdev->dev,
64f232836aSBrett Creeley 			"region_info[%d]: dma_base 0x%llx page_count %u page_size_log2 %u\n",
65f232836aSBrett Creeley 			i, le64_to_cpu(region_info[i].dma_base),
66f232836aSBrett Creeley 			le32_to_cpu(region_info[i].page_count),
67f232836aSBrett Creeley 			region_info[i].page_size_log2);
68f232836aSBrett Creeley 
69f232836aSBrett Creeley out_free_region_info:
70f232836aSBrett Creeley 	kfree(region_info);
71f232836aSBrett Creeley }
72f232836aSBrett Creeley 
732e7c6febSBrett Creeley static int pds_vfio_dirty_alloc_bitmaps(struct pds_vfio_region *region,
74f232836aSBrett Creeley 					unsigned long bytes)
75f232836aSBrett Creeley {
76f232836aSBrett Creeley 	unsigned long *host_seq_bmp, *host_ack_bmp;
77f232836aSBrett Creeley 
78f232836aSBrett Creeley 	host_seq_bmp = vzalloc(bytes);
79f232836aSBrett Creeley 	if (!host_seq_bmp)
80f232836aSBrett Creeley 		return -ENOMEM;
81f232836aSBrett Creeley 
82f232836aSBrett Creeley 	host_ack_bmp = vzalloc(bytes);
83f232836aSBrett Creeley 	if (!host_ack_bmp) {
84f232836aSBrett Creeley 		bitmap_free(host_seq_bmp);
85f232836aSBrett Creeley 		return -ENOMEM;
86f232836aSBrett Creeley 	}
87f232836aSBrett Creeley 
882e7c6febSBrett Creeley 	region->host_seq = host_seq_bmp;
892e7c6febSBrett Creeley 	region->host_ack = host_ack_bmp;
902e7c6febSBrett Creeley 	region->bmp_bytes = bytes;
91f232836aSBrett Creeley 
92f232836aSBrett Creeley 	return 0;
93f232836aSBrett Creeley }
94f232836aSBrett Creeley 
95f232836aSBrett Creeley static void pds_vfio_dirty_free_bitmaps(struct pds_vfio_dirty *dirty)
96f232836aSBrett Creeley {
972e7c6febSBrett Creeley 	if (!dirty->regions)
982e7c6febSBrett Creeley 		return;
992e7c6febSBrett Creeley 
1002e7c6febSBrett Creeley 	for (int i = 0; i < dirty->num_regions; i++) {
1012e7c6febSBrett Creeley 		struct pds_vfio_region *region = &dirty->regions[i];
1022e7c6febSBrett Creeley 
1032e7c6febSBrett Creeley 		vfree(region->host_seq);
1042e7c6febSBrett Creeley 		vfree(region->host_ack);
1052e7c6febSBrett Creeley 		region->host_seq = NULL;
1062e7c6febSBrett Creeley 		region->host_ack = NULL;
1072e7c6febSBrett Creeley 		region->bmp_bytes = 0;
1082e7c6febSBrett Creeley 	}
109f232836aSBrett Creeley }
110f232836aSBrett Creeley 
111f232836aSBrett Creeley static void __pds_vfio_dirty_free_sgl(struct pds_vfio_pci_device *pds_vfio,
11287bdf980SBrett Creeley 				      struct pds_vfio_region *region)
113f232836aSBrett Creeley {
114f232836aSBrett Creeley 	struct pci_dev *pdev = pds_vfio->vfio_coredev.pdev;
115f232836aSBrett Creeley 	struct device *pdsc_dev = &pci_physfn(pdev)->dev;
116f232836aSBrett Creeley 
11787bdf980SBrett Creeley 	dma_unmap_single(pdsc_dev, region->sgl_addr,
11887bdf980SBrett Creeley 			 region->num_sge * sizeof(struct pds_lm_sg_elem),
119f232836aSBrett Creeley 			 DMA_BIDIRECTIONAL);
12087bdf980SBrett Creeley 	kfree(region->sgl);
121f232836aSBrett Creeley 
12287bdf980SBrett Creeley 	region->num_sge = 0;
12387bdf980SBrett Creeley 	region->sgl = NULL;
12487bdf980SBrett Creeley 	region->sgl_addr = 0;
125f232836aSBrett Creeley }
126f232836aSBrett Creeley 
127f232836aSBrett Creeley static void pds_vfio_dirty_free_sgl(struct pds_vfio_pci_device *pds_vfio)
128f232836aSBrett Creeley {
1292e7c6febSBrett Creeley 	struct pds_vfio_dirty *dirty = &pds_vfio->dirty;
1302e7c6febSBrett Creeley 
1312e7c6febSBrett Creeley 	if (!dirty->regions)
1322e7c6febSBrett Creeley 		return;
1332e7c6febSBrett Creeley 
1342e7c6febSBrett Creeley 	for (int i = 0; i < dirty->num_regions; i++) {
1352e7c6febSBrett Creeley 		struct pds_vfio_region *region = &dirty->regions[i];
1363b8f7a24SBrett Creeley 
13787bdf980SBrett Creeley 		if (region->sgl)
13887bdf980SBrett Creeley 			__pds_vfio_dirty_free_sgl(pds_vfio, region);
139f232836aSBrett Creeley 	}
1402e7c6febSBrett Creeley }
141f232836aSBrett Creeley 
1423b8f7a24SBrett Creeley static int pds_vfio_dirty_alloc_sgl(struct pds_vfio_pci_device *pds_vfio,
14387bdf980SBrett Creeley 				    struct pds_vfio_region *region,
144f232836aSBrett Creeley 				    u32 page_count)
145f232836aSBrett Creeley {
146f232836aSBrett Creeley 	struct pci_dev *pdev = pds_vfio->vfio_coredev.pdev;
147f232836aSBrett Creeley 	struct device *pdsc_dev = &pci_physfn(pdev)->dev;
148f232836aSBrett Creeley 	struct pds_lm_sg_elem *sgl;
149f232836aSBrett Creeley 	dma_addr_t sgl_addr;
150f232836aSBrett Creeley 	size_t sgl_size;
151f232836aSBrett Creeley 	u32 max_sge;
152f232836aSBrett Creeley 
153f232836aSBrett Creeley 	max_sge = DIV_ROUND_UP(page_count, PAGE_SIZE * 8);
154f232836aSBrett Creeley 	sgl_size = max_sge * sizeof(struct pds_lm_sg_elem);
155f232836aSBrett Creeley 
156f232836aSBrett Creeley 	sgl = kzalloc(sgl_size, GFP_KERNEL);
157f232836aSBrett Creeley 	if (!sgl)
158f232836aSBrett Creeley 		return -ENOMEM;
159f232836aSBrett Creeley 
160f232836aSBrett Creeley 	sgl_addr = dma_map_single(pdsc_dev, sgl, sgl_size, DMA_BIDIRECTIONAL);
161f232836aSBrett Creeley 	if (dma_mapping_error(pdsc_dev, sgl_addr)) {
162f232836aSBrett Creeley 		kfree(sgl);
163f232836aSBrett Creeley 		return -EIO;
164f232836aSBrett Creeley 	}
165f232836aSBrett Creeley 
16687bdf980SBrett Creeley 	region->sgl = sgl;
16787bdf980SBrett Creeley 	region->num_sge = max_sge;
16887bdf980SBrett Creeley 	region->sgl_addr = sgl_addr;
169f232836aSBrett Creeley 
170f232836aSBrett Creeley 	return 0;
171f232836aSBrett Creeley }
172f232836aSBrett Creeley 
1732e7c6febSBrett Creeley static void pds_vfio_dirty_free_regions(struct pds_vfio_dirty *dirty)
1742e7c6febSBrett Creeley {
1752e7c6febSBrett Creeley 	vfree(dirty->regions);
1762e7c6febSBrett Creeley 	dirty->regions = NULL;
1772e7c6febSBrett Creeley 	dirty->num_regions = 0;
1782e7c6febSBrett Creeley }
1792e7c6febSBrett Creeley 
1802e7c6febSBrett Creeley static int pds_vfio_dirty_alloc_regions(struct pds_vfio_pci_device *pds_vfio,
1812e7c6febSBrett Creeley 					struct pds_lm_dirty_region_info *region_info,
1822e7c6febSBrett Creeley 					u64 region_page_size, u8 num_regions)
1832e7c6febSBrett Creeley {
1842e7c6febSBrett Creeley 	struct pci_dev *pdev = pds_vfio->vfio_coredev.pdev;
1852e7c6febSBrett Creeley 	struct pds_vfio_dirty *dirty = &pds_vfio->dirty;
1862e7c6febSBrett Creeley 	u32 dev_bmp_offset_byte = 0;
1872e7c6febSBrett Creeley 	int err;
1882e7c6febSBrett Creeley 
1892e7c6febSBrett Creeley 	dirty->regions = vcalloc(num_regions, sizeof(struct pds_vfio_region));
1902e7c6febSBrett Creeley 	if (!dirty->regions)
1912e7c6febSBrett Creeley 		return -ENOMEM;
1922e7c6febSBrett Creeley 	dirty->num_regions = num_regions;
1932e7c6febSBrett Creeley 
1942e7c6febSBrett Creeley 	for (int i = 0; i < num_regions; i++) {
1952e7c6febSBrett Creeley 		struct pds_lm_dirty_region_info *ri = &region_info[i];
1962e7c6febSBrett Creeley 		struct pds_vfio_region *region = &dirty->regions[i];
1972e7c6febSBrett Creeley 		u64 region_size, region_start;
1982e7c6febSBrett Creeley 		u32 page_count;
1992e7c6febSBrett Creeley 
2002e7c6febSBrett Creeley 		/* page_count might be adjusted by the device */
2012e7c6febSBrett Creeley 		page_count = le32_to_cpu(ri->page_count);
2022e7c6febSBrett Creeley 		region_start = le64_to_cpu(ri->dma_base);
2032e7c6febSBrett Creeley 		region_size = page_count * region_page_size;
2042e7c6febSBrett Creeley 
2052e7c6febSBrett Creeley 		err = pds_vfio_dirty_alloc_bitmaps(region,
2062e7c6febSBrett Creeley 						   page_count / BITS_PER_BYTE);
2072e7c6febSBrett Creeley 		if (err) {
2082e7c6febSBrett Creeley 			dev_err(&pdev->dev, "Failed to alloc dirty bitmaps: %pe\n",
2092e7c6febSBrett Creeley 				ERR_PTR(err));
2102e7c6febSBrett Creeley 			goto out_free_regions;
2112e7c6febSBrett Creeley 		}
2122e7c6febSBrett Creeley 
2132e7c6febSBrett Creeley 		err = pds_vfio_dirty_alloc_sgl(pds_vfio, region, page_count);
2142e7c6febSBrett Creeley 		if (err) {
2152e7c6febSBrett Creeley 			dev_err(&pdev->dev, "Failed to alloc dirty sg lists: %pe\n",
2162e7c6febSBrett Creeley 				ERR_PTR(err));
2172e7c6febSBrett Creeley 			goto out_free_regions;
2182e7c6febSBrett Creeley 		}
2192e7c6febSBrett Creeley 
2202e7c6febSBrett Creeley 		region->size = region_size;
2212e7c6febSBrett Creeley 		region->start = region_start;
2222e7c6febSBrett Creeley 		region->page_size = region_page_size;
2232e7c6febSBrett Creeley 		region->dev_bmp_offset_start_byte = dev_bmp_offset_byte;
2242e7c6febSBrett Creeley 
2252e7c6febSBrett Creeley 		dev_bmp_offset_byte += page_count / BITS_PER_BYTE;
2262e7c6febSBrett Creeley 		if (dev_bmp_offset_byte % BITS_PER_BYTE) {
2272e7c6febSBrett Creeley 			dev_err(&pdev->dev, "Device bitmap offset is mis-aligned\n");
2282e7c6febSBrett Creeley 			err = -EINVAL;
2292e7c6febSBrett Creeley 			goto out_free_regions;
2302e7c6febSBrett Creeley 		}
2312e7c6febSBrett Creeley 	}
2322e7c6febSBrett Creeley 
2332e7c6febSBrett Creeley 	return 0;
2342e7c6febSBrett Creeley 
2352e7c6febSBrett Creeley out_free_regions:
2362e7c6febSBrett Creeley 	pds_vfio_dirty_free_bitmaps(dirty);
2372e7c6febSBrett Creeley 	pds_vfio_dirty_free_sgl(pds_vfio);
2382e7c6febSBrett Creeley 	pds_vfio_dirty_free_regions(dirty);
2392e7c6febSBrett Creeley 
2402e7c6febSBrett Creeley 	return err;
2412e7c6febSBrett Creeley }
2422e7c6febSBrett Creeley 
243f232836aSBrett Creeley static int pds_vfio_dirty_enable(struct pds_vfio_pci_device *pds_vfio,
244f232836aSBrett Creeley 				 struct rb_root_cached *ranges, u32 nnodes,
245f232836aSBrett Creeley 				 u64 *page_size)
246f232836aSBrett Creeley {
247f232836aSBrett Creeley 	struct pci_dev *pdev = pds_vfio->vfio_coredev.pdev;
248f232836aSBrett Creeley 	struct device *pdsc_dev = &pci_physfn(pdev)->dev;
249f232836aSBrett Creeley 	struct pds_lm_dirty_region_info *region_info;
250f232836aSBrett Creeley 	struct interval_tree_node *node = NULL;
2512e7c6febSBrett Creeley 	u64 region_page_size = *page_size;
252f232836aSBrett Creeley 	u8 max_regions = 0, num_regions;
253f232836aSBrett Creeley 	dma_addr_t regions_dma = 0;
254f232836aSBrett Creeley 	u32 num_ranges = nnodes;
255f232836aSBrett Creeley 	int err;
2562e7c6febSBrett Creeley 	u16 len;
257f232836aSBrett Creeley 
258f232836aSBrett Creeley 	dev_dbg(&pdev->dev, "vf%u: Start dirty page tracking\n",
259f232836aSBrett Creeley 		pds_vfio->vf_id);
260f232836aSBrett Creeley 
261f232836aSBrett Creeley 	if (pds_vfio_dirty_is_enabled(pds_vfio))
262f232836aSBrett Creeley 		return -EINVAL;
263f232836aSBrett Creeley 
264f232836aSBrett Creeley 	/* find if dirty tracking is disabled, i.e. num_regions == 0 */
265f232836aSBrett Creeley 	err = pds_vfio_dirty_status_cmd(pds_vfio, 0, &max_regions,
266f232836aSBrett Creeley 					&num_regions);
267f232836aSBrett Creeley 	if (err < 0) {
268f232836aSBrett Creeley 		dev_err(&pdev->dev, "Failed to get dirty status, err %pe\n",
269f232836aSBrett Creeley 			ERR_PTR(err));
270f232836aSBrett Creeley 		return err;
271f232836aSBrett Creeley 	} else if (num_regions) {
272f232836aSBrett Creeley 		dev_err(&pdev->dev,
273f232836aSBrett Creeley 			"Dirty tracking already enabled for %d regions\n",
274f232836aSBrett Creeley 			num_regions);
275f232836aSBrett Creeley 		return -EEXIST;
276f232836aSBrett Creeley 	} else if (!max_regions) {
277f232836aSBrett Creeley 		dev_err(&pdev->dev,
278f232836aSBrett Creeley 			"Device doesn't support dirty tracking, max_regions %d\n",
279f232836aSBrett Creeley 			max_regions);
280f232836aSBrett Creeley 		return -EOPNOTSUPP;
281f232836aSBrett Creeley 	}
282f232836aSBrett Creeley 
283f232836aSBrett Creeley 	if (num_ranges > max_regions) {
284f232836aSBrett Creeley 		vfio_combine_iova_ranges(ranges, nnodes, max_regions);
285f232836aSBrett Creeley 		num_ranges = max_regions;
286f232836aSBrett Creeley 	}
287f232836aSBrett Creeley 
2882e7c6febSBrett Creeley 	region_info = kcalloc(num_ranges, sizeof(*region_info), GFP_KERNEL);
2892e7c6febSBrett Creeley 	if (!region_info)
2902e7c6febSBrett Creeley 		return -ENOMEM;
2912e7c6febSBrett Creeley 	len = num_ranges * sizeof(*region_info);
2922e7c6febSBrett Creeley 
293f232836aSBrett Creeley 	node = interval_tree_iter_first(ranges, 0, ULONG_MAX);
294f232836aSBrett Creeley 	if (!node)
295f232836aSBrett Creeley 		return -EINVAL;
2962e7c6febSBrett Creeley 	for (int i = 0; i < num_ranges; i++) {
2972e7c6febSBrett Creeley 		struct pds_lm_dirty_region_info *ri = &region_info[i];
2982e7c6febSBrett Creeley 		u64 region_size = node->last - node->start + 1;
2992e7c6febSBrett Creeley 		u64 region_start = node->start;
3002e7c6febSBrett Creeley 		u32 page_count;
301f232836aSBrett Creeley 
302f232836aSBrett Creeley 		page_count = DIV_ROUND_UP(region_size, region_page_size);
303f232836aSBrett Creeley 
3042e7c6febSBrett Creeley 		ri->dma_base = cpu_to_le64(region_start);
3052e7c6febSBrett Creeley 		ri->page_count = cpu_to_le32(page_count);
3062e7c6febSBrett Creeley 		ri->page_size_log2 = ilog2(region_page_size);
3072e7c6febSBrett Creeley 
3082e7c6febSBrett Creeley 		dev_dbg(&pdev->dev,
3092e7c6febSBrett Creeley 			"region_info[%d]: region_start 0x%llx region_end 0x%lx region_size 0x%llx page_count %u page_size %llu\n",
3102e7c6febSBrett Creeley 			i, region_start, node->last, region_size, page_count,
3112e7c6febSBrett Creeley 			region_page_size);
3122e7c6febSBrett Creeley 
3132e7c6febSBrett Creeley 		node = interval_tree_iter_next(node, 0, ULONG_MAX);
3142e7c6febSBrett Creeley 	}
315f232836aSBrett Creeley 
316f232836aSBrett Creeley 	regions_dma = dma_map_single(pdsc_dev, (void *)region_info, len,
317f232836aSBrett Creeley 				     DMA_BIDIRECTIONAL);
318f232836aSBrett Creeley 	if (dma_mapping_error(pdsc_dev, regions_dma)) {
319f232836aSBrett Creeley 		err = -ENOMEM;
320f232836aSBrett Creeley 		goto out_free_region_info;
321f232836aSBrett Creeley 	}
322f232836aSBrett Creeley 
3232e7c6febSBrett Creeley 	err = pds_vfio_dirty_enable_cmd(pds_vfio, regions_dma, num_ranges);
324f232836aSBrett Creeley 	dma_unmap_single(pdsc_dev, regions_dma, len, DMA_BIDIRECTIONAL);
325f232836aSBrett Creeley 	if (err)
326f232836aSBrett Creeley 		goto out_free_region_info;
327f232836aSBrett Creeley 
3282e7c6febSBrett Creeley 	err = pds_vfio_dirty_alloc_regions(pds_vfio, region_info,
3292e7c6febSBrett Creeley 					   region_page_size, num_ranges);
330f232836aSBrett Creeley 	if (err) {
3312e7c6febSBrett Creeley 		dev_err(&pdev->dev,
3322e7c6febSBrett Creeley 			"Failed to allocate %d regions for tracking dirty regions: %pe\n",
3332e7c6febSBrett Creeley 			num_regions, ERR_PTR(err));
3342e7c6febSBrett Creeley 		goto out_dirty_disable;
335f232836aSBrett Creeley 	}
336f232836aSBrett Creeley 
337f232836aSBrett Creeley 	pds_vfio_dirty_set_enabled(pds_vfio);
338f232836aSBrett Creeley 
339f232836aSBrett Creeley 	pds_vfio_print_guest_region_info(pds_vfio, max_regions);
340f232836aSBrett Creeley 
341f232836aSBrett Creeley 	kfree(region_info);
342f232836aSBrett Creeley 
343f232836aSBrett Creeley 	return 0;
344f232836aSBrett Creeley 
3452e7c6febSBrett Creeley out_dirty_disable:
3462e7c6febSBrett Creeley 	pds_vfio_dirty_disable_cmd(pds_vfio);
347f232836aSBrett Creeley out_free_region_info:
348f232836aSBrett Creeley 	kfree(region_info);
349f232836aSBrett Creeley 	return err;
350f232836aSBrett Creeley }
351f232836aSBrett Creeley 
352f232836aSBrett Creeley void pds_vfio_dirty_disable(struct pds_vfio_pci_device *pds_vfio, bool send_cmd)
353f232836aSBrett Creeley {
354f232836aSBrett Creeley 	if (pds_vfio_dirty_is_enabled(pds_vfio)) {
355f232836aSBrett Creeley 		pds_vfio_dirty_set_disabled(pds_vfio);
356f232836aSBrett Creeley 		if (send_cmd)
357f232836aSBrett Creeley 			pds_vfio_dirty_disable_cmd(pds_vfio);
358f232836aSBrett Creeley 		pds_vfio_dirty_free_sgl(pds_vfio);
359f232836aSBrett Creeley 		pds_vfio_dirty_free_bitmaps(&pds_vfio->dirty);
3602e7c6febSBrett Creeley 		pds_vfio_dirty_free_regions(&pds_vfio->dirty);
361f232836aSBrett Creeley 	}
362f232836aSBrett Creeley 
363f232836aSBrett Creeley 	if (send_cmd)
364f232836aSBrett Creeley 		pds_vfio_send_host_vf_lm_status_cmd(pds_vfio, PDS_LM_STA_NONE);
365f232836aSBrett Creeley }
366f232836aSBrett Creeley 
367f232836aSBrett Creeley static int pds_vfio_dirty_seq_ack(struct pds_vfio_pci_device *pds_vfio,
36887bdf980SBrett Creeley 				  struct pds_vfio_region *region,
3690c320f22SBrett Creeley 				  unsigned long *seq_ack_bmp, u32 offset,
3700c320f22SBrett Creeley 				  u32 bmp_bytes, bool read_seq)
371f232836aSBrett Creeley {
372f232836aSBrett Creeley 	const char *bmp_type_str = read_seq ? "read_seq" : "write_ack";
373f232836aSBrett Creeley 	u8 dma_dir = read_seq ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
374f232836aSBrett Creeley 	struct pci_dev *pdev = pds_vfio->vfio_coredev.pdev;
375f232836aSBrett Creeley 	struct device *pdsc_dev = &pci_physfn(pdev)->dev;
376f232836aSBrett Creeley 	unsigned long long npages;
377f232836aSBrett Creeley 	struct sg_table sg_table;
378f232836aSBrett Creeley 	struct scatterlist *sg;
379f232836aSBrett Creeley 	struct page **pages;
380f232836aSBrett Creeley 	u32 page_offset;
381f232836aSBrett Creeley 	const void *bmp;
382f232836aSBrett Creeley 	size_t size;
383f232836aSBrett Creeley 	u16 num_sge;
384f232836aSBrett Creeley 	int err;
385f232836aSBrett Creeley 	int i;
386f232836aSBrett Creeley 
3870c320f22SBrett Creeley 	bmp = (void *)((u64)seq_ack_bmp + offset);
388f232836aSBrett Creeley 	page_offset = offset_in_page(bmp);
389f232836aSBrett Creeley 	bmp -= page_offset;
390f232836aSBrett Creeley 
391f232836aSBrett Creeley 	/*
392f232836aSBrett Creeley 	 * Start and end of bitmap section to seq/ack might not be page
393f232836aSBrett Creeley 	 * aligned, so use the page_offset to account for that so there
394f232836aSBrett Creeley 	 * will be enough pages to represent the bmp_bytes
395f232836aSBrett Creeley 	 */
396f232836aSBrett Creeley 	npages = DIV_ROUND_UP_ULL(bmp_bytes + page_offset, PAGE_SIZE);
397f232836aSBrett Creeley 	pages = kmalloc_array(npages, sizeof(*pages), GFP_KERNEL);
398f232836aSBrett Creeley 	if (!pages)
399f232836aSBrett Creeley 		return -ENOMEM;
400f232836aSBrett Creeley 
401f232836aSBrett Creeley 	for (unsigned long long i = 0; i < npages; i++) {
402f232836aSBrett Creeley 		struct page *page = vmalloc_to_page(bmp);
403f232836aSBrett Creeley 
404f232836aSBrett Creeley 		if (!page) {
405f232836aSBrett Creeley 			err = -EFAULT;
406f232836aSBrett Creeley 			goto out_free_pages;
407f232836aSBrett Creeley 		}
408f232836aSBrett Creeley 
409f232836aSBrett Creeley 		pages[i] = page;
410f232836aSBrett Creeley 		bmp += PAGE_SIZE;
411f232836aSBrett Creeley 	}
412f232836aSBrett Creeley 
413f232836aSBrett Creeley 	err = sg_alloc_table_from_pages(&sg_table, pages, npages, page_offset,
414f232836aSBrett Creeley 					bmp_bytes, GFP_KERNEL);
415f232836aSBrett Creeley 	if (err)
416f232836aSBrett Creeley 		goto out_free_pages;
417f232836aSBrett Creeley 
418f232836aSBrett Creeley 	err = dma_map_sgtable(pdsc_dev, &sg_table, dma_dir, 0);
419f232836aSBrett Creeley 	if (err)
420f232836aSBrett Creeley 		goto out_free_sg_table;
421f232836aSBrett Creeley 
422f232836aSBrett Creeley 	for_each_sgtable_dma_sg(&sg_table, sg, i) {
4233f589813SBrett Creeley 		struct pds_lm_sg_elem *sg_elem = &region->sgl[i];
424f232836aSBrett Creeley 
425f232836aSBrett Creeley 		sg_elem->addr = cpu_to_le64(sg_dma_address(sg));
426f232836aSBrett Creeley 		sg_elem->len = cpu_to_le32(sg_dma_len(sg));
427f232836aSBrett Creeley 	}
428f232836aSBrett Creeley 
429f232836aSBrett Creeley 	num_sge = sg_table.nents;
430f232836aSBrett Creeley 	size = num_sge * sizeof(struct pds_lm_sg_elem);
4312e7c6febSBrett Creeley 	offset += region->dev_bmp_offset_start_byte;
4323f589813SBrett Creeley 	dma_sync_single_for_device(pdsc_dev, region->sgl_addr, size, dma_dir);
4333f589813SBrett Creeley 	err = pds_vfio_dirty_seq_ack_cmd(pds_vfio, region->sgl_addr, num_sge,
434f232836aSBrett Creeley 					 offset, bmp_bytes, read_seq);
435f232836aSBrett Creeley 	if (err)
436f232836aSBrett Creeley 		dev_err(&pdev->dev,
437f232836aSBrett Creeley 			"Dirty bitmap %s failed offset %u bmp_bytes %u num_sge %u DMA 0x%llx: %pe\n",
438f232836aSBrett Creeley 			bmp_type_str, offset, bmp_bytes,
4393f589813SBrett Creeley 			num_sge, region->sgl_addr, ERR_PTR(err));
4403f589813SBrett Creeley 	dma_sync_single_for_cpu(pdsc_dev, region->sgl_addr, size, dma_dir);
441f232836aSBrett Creeley 
442f232836aSBrett Creeley 	dma_unmap_sgtable(pdsc_dev, &sg_table, dma_dir, 0);
443f232836aSBrett Creeley out_free_sg_table:
444f232836aSBrett Creeley 	sg_free_table(&sg_table);
445f232836aSBrett Creeley out_free_pages:
446f232836aSBrett Creeley 	kfree(pages);
447f232836aSBrett Creeley 
448f232836aSBrett Creeley 	return err;
449f232836aSBrett Creeley }
450f232836aSBrett Creeley 
451f232836aSBrett Creeley static int pds_vfio_dirty_write_ack(struct pds_vfio_pci_device *pds_vfio,
45287bdf980SBrett Creeley 				   struct pds_vfio_region *region,
453f232836aSBrett Creeley 				    u32 offset, u32 len)
454f232836aSBrett Creeley {
4553f589813SBrett Creeley 
4560c320f22SBrett Creeley 	return pds_vfio_dirty_seq_ack(pds_vfio, region, region->host_ack,
457f232836aSBrett Creeley 				      offset, len, WRITE_ACK);
458f232836aSBrett Creeley }
459f232836aSBrett Creeley 
460f232836aSBrett Creeley static int pds_vfio_dirty_read_seq(struct pds_vfio_pci_device *pds_vfio,
46187bdf980SBrett Creeley 				   struct pds_vfio_region *region,
462f232836aSBrett Creeley 				   u32 offset, u32 len)
463f232836aSBrett Creeley {
4640c320f22SBrett Creeley 	return pds_vfio_dirty_seq_ack(pds_vfio, region, region->host_seq,
465f232836aSBrett Creeley 				      offset, len, READ_SEQ);
466f232836aSBrett Creeley }
467f232836aSBrett Creeley 
468f232836aSBrett Creeley static int pds_vfio_dirty_process_bitmaps(struct pds_vfio_pci_device *pds_vfio,
46987bdf980SBrett Creeley 					  struct pds_vfio_region *region,
470f232836aSBrett Creeley 					  struct iova_bitmap *dirty_bitmap,
471f232836aSBrett Creeley 					  u32 bmp_offset, u32 len_bytes)
472f232836aSBrett Creeley {
47387bdf980SBrett Creeley 	u64 page_size = region->page_size;
47487bdf980SBrett Creeley 	u64 region_start = region->start;
475f232836aSBrett Creeley 	u32 bmp_offset_bit;
476f232836aSBrett Creeley 	__le64 *seq, *ack;
477f232836aSBrett Creeley 	int dword_count;
478f232836aSBrett Creeley 
479f232836aSBrett Creeley 	dword_count = len_bytes / sizeof(u64);
4800c320f22SBrett Creeley 	seq = (__le64 *)((u64)region->host_seq + bmp_offset);
4810c320f22SBrett Creeley 	ack = (__le64 *)((u64)region->host_ack + bmp_offset);
482f232836aSBrett Creeley 	bmp_offset_bit = bmp_offset * 8;
483f232836aSBrett Creeley 
484f232836aSBrett Creeley 	for (int i = 0; i < dword_count; i++) {
485f232836aSBrett Creeley 		u64 xor = le64_to_cpu(seq[i]) ^ le64_to_cpu(ack[i]);
486f232836aSBrett Creeley 
487f232836aSBrett Creeley 		/* prepare for next write_ack call */
488f232836aSBrett Creeley 		ack[i] = seq[i];
489f232836aSBrett Creeley 
490f232836aSBrett Creeley 		for (u8 bit_i = 0; bit_i < BITS_PER_TYPE(u64); ++bit_i) {
491f232836aSBrett Creeley 			if (xor & BIT(bit_i)) {
492f232836aSBrett Creeley 				u64 abs_bit_i = bmp_offset_bit +
493f232836aSBrett Creeley 						i * BITS_PER_TYPE(u64) + bit_i;
494f232836aSBrett Creeley 				u64 addr = abs_bit_i * page_size + region_start;
495f232836aSBrett Creeley 
496f232836aSBrett Creeley 				iova_bitmap_set(dirty_bitmap, addr, page_size);
497f232836aSBrett Creeley 			}
498f232836aSBrett Creeley 		}
499f232836aSBrett Creeley 	}
500f232836aSBrett Creeley 
501f232836aSBrett Creeley 	return 0;
502f232836aSBrett Creeley }
503f232836aSBrett Creeley 
5042e7c6febSBrett Creeley static struct pds_vfio_region *
5052e7c6febSBrett Creeley pds_vfio_get_region(struct pds_vfio_pci_device *pds_vfio, unsigned long iova)
5062e7c6febSBrett Creeley {
5072e7c6febSBrett Creeley 	struct pds_vfio_dirty *dirty = &pds_vfio->dirty;
5082e7c6febSBrett Creeley 
5092e7c6febSBrett Creeley 	for (int i = 0; i < dirty->num_regions; i++) {
5102e7c6febSBrett Creeley 		struct pds_vfio_region *region = &dirty->regions[i];
5112e7c6febSBrett Creeley 
5122e7c6febSBrett Creeley 		if (iova >= region->start &&
5132e7c6febSBrett Creeley 		    iova < (region->start + region->size))
5142e7c6febSBrett Creeley 			return region;
5152e7c6febSBrett Creeley 	}
5162e7c6febSBrett Creeley 
5172e7c6febSBrett Creeley 	return NULL;
5182e7c6febSBrett Creeley }
5192e7c6febSBrett Creeley 
520f232836aSBrett Creeley static int pds_vfio_dirty_sync(struct pds_vfio_pci_device *pds_vfio,
521f232836aSBrett Creeley 			       struct iova_bitmap *dirty_bitmap,
522f232836aSBrett Creeley 			       unsigned long iova, unsigned long length)
523f232836aSBrett Creeley {
524f232836aSBrett Creeley 	struct device *dev = &pds_vfio->vfio_coredev.pdev->dev;
5252e7c6febSBrett Creeley 	struct pds_vfio_region *region;
526f232836aSBrett Creeley 	u64 bmp_offset, bmp_bytes;
527f232836aSBrett Creeley 	u64 bitmap_size, pages;
528f232836aSBrett Creeley 	int err;
529f232836aSBrett Creeley 
530f232836aSBrett Creeley 	dev_dbg(dev, "vf%u: Get dirty page bitmap\n", pds_vfio->vf_id);
531f232836aSBrett Creeley 
532f232836aSBrett Creeley 	if (!pds_vfio_dirty_is_enabled(pds_vfio)) {
533f232836aSBrett Creeley 		dev_err(dev, "vf%u: Sync failed, dirty tracking is disabled\n",
534f232836aSBrett Creeley 			pds_vfio->vf_id);
535f232836aSBrett Creeley 		return -EINVAL;
536f232836aSBrett Creeley 	}
537f232836aSBrett Creeley 
5382e7c6febSBrett Creeley 	region = pds_vfio_get_region(pds_vfio, iova);
5392e7c6febSBrett Creeley 	if (!region) {
5402e7c6febSBrett Creeley 		dev_err(dev, "vf%u: Failed to find region that contains iova 0x%lx length 0x%lx\n",
5412e7c6febSBrett Creeley 			pds_vfio->vf_id, iova, length);
5422e7c6febSBrett Creeley 		return -EINVAL;
5432e7c6febSBrett Creeley 	}
5442e7c6febSBrett Creeley 
54587bdf980SBrett Creeley 	pages = DIV_ROUND_UP(length, region->page_size);
546f232836aSBrett Creeley 	bitmap_size =
547f232836aSBrett Creeley 		round_up(pages, sizeof(u64) * BITS_PER_BYTE) / BITS_PER_BYTE;
548f232836aSBrett Creeley 
549f232836aSBrett Creeley 	dev_dbg(dev,
550f232836aSBrett Creeley 		"vf%u: iova 0x%lx length %lu page_size %llu pages %llu bitmap_size %llu\n",
55187bdf980SBrett Creeley 		pds_vfio->vf_id, iova, length, region->page_size,
552f232836aSBrett Creeley 		pages, bitmap_size);
553f232836aSBrett Creeley 
55487bdf980SBrett Creeley 	if (!length || ((iova - region->start + length) > region->size)) {
555f232836aSBrett Creeley 		dev_err(dev, "Invalid iova 0x%lx and/or length 0x%lx to sync\n",
556f232836aSBrett Creeley 			iova, length);
557f232836aSBrett Creeley 		return -EINVAL;
558f232836aSBrett Creeley 	}
559f232836aSBrett Creeley 
560f232836aSBrett Creeley 	/* bitmap is modified in 64 bit chunks */
56187bdf980SBrett Creeley 	bmp_bytes = ALIGN(DIV_ROUND_UP(length / region->page_size,
5623f589813SBrett Creeley 				       sizeof(u64)), sizeof(u64));
563f232836aSBrett Creeley 	if (bmp_bytes != bitmap_size) {
564f232836aSBrett Creeley 		dev_err(dev,
565f232836aSBrett Creeley 			"Calculated bitmap bytes %llu not equal to bitmap size %llu\n",
566f232836aSBrett Creeley 			bmp_bytes, bitmap_size);
567f232836aSBrett Creeley 		return -EINVAL;
568f232836aSBrett Creeley 	}
569f232836aSBrett Creeley 
5700c320f22SBrett Creeley 	if (bmp_bytes > region->bmp_bytes) {
5710c320f22SBrett Creeley 		dev_err(dev,
5720c320f22SBrett Creeley 			"Calculated bitmap bytes %llu larger than region's cached bmp_bytes %llu\n",
5730c320f22SBrett Creeley 			bmp_bytes, region->bmp_bytes);
5740c320f22SBrett Creeley 		return -EINVAL;
5750c320f22SBrett Creeley 	}
5760c320f22SBrett Creeley 
57787bdf980SBrett Creeley 	bmp_offset = DIV_ROUND_UP((iova - region->start) /
57887bdf980SBrett Creeley 				  region->page_size, sizeof(u64));
579f232836aSBrett Creeley 
580f232836aSBrett Creeley 	dev_dbg(dev,
581f232836aSBrett Creeley 		"Syncing dirty bitmap, iova 0x%lx length 0x%lx, bmp_offset %llu bmp_bytes %llu\n",
582f232836aSBrett Creeley 		iova, length, bmp_offset, bmp_bytes);
583f232836aSBrett Creeley 
58487bdf980SBrett Creeley 	err = pds_vfio_dirty_read_seq(pds_vfio, region, bmp_offset, bmp_bytes);
585f232836aSBrett Creeley 	if (err)
586f232836aSBrett Creeley 		return err;
587f232836aSBrett Creeley 
58887bdf980SBrett Creeley 	err = pds_vfio_dirty_process_bitmaps(pds_vfio, region, dirty_bitmap,
58987bdf980SBrett Creeley 					     bmp_offset, bmp_bytes);
590f232836aSBrett Creeley 	if (err)
591f232836aSBrett Creeley 		return err;
592f232836aSBrett Creeley 
59387bdf980SBrett Creeley 	err = pds_vfio_dirty_write_ack(pds_vfio, region, bmp_offset, bmp_bytes);
594f232836aSBrett Creeley 	if (err)
595f232836aSBrett Creeley 		return err;
596f232836aSBrett Creeley 
597f232836aSBrett Creeley 	return 0;
598f232836aSBrett Creeley }
599f232836aSBrett Creeley 
600f232836aSBrett Creeley int pds_vfio_dma_logging_report(struct vfio_device *vdev, unsigned long iova,
601f232836aSBrett Creeley 				unsigned long length, struct iova_bitmap *dirty)
602f232836aSBrett Creeley {
603f232836aSBrett Creeley 	struct pds_vfio_pci_device *pds_vfio =
604f232836aSBrett Creeley 		container_of(vdev, struct pds_vfio_pci_device,
605f232836aSBrett Creeley 			     vfio_coredev.vdev);
606f232836aSBrett Creeley 	int err;
607f232836aSBrett Creeley 
608f232836aSBrett Creeley 	mutex_lock(&pds_vfio->state_mutex);
609f232836aSBrett Creeley 	err = pds_vfio_dirty_sync(pds_vfio, dirty, iova, length);
610*6a7e448cSBrett Creeley 	mutex_unlock(&pds_vfio->state_mutex);
611f232836aSBrett Creeley 
612f232836aSBrett Creeley 	return err;
613f232836aSBrett Creeley }
614f232836aSBrett Creeley 
615f232836aSBrett Creeley int pds_vfio_dma_logging_start(struct vfio_device *vdev,
616f232836aSBrett Creeley 			       struct rb_root_cached *ranges, u32 nnodes,
617f232836aSBrett Creeley 			       u64 *page_size)
618f232836aSBrett Creeley {
619f232836aSBrett Creeley 	struct pds_vfio_pci_device *pds_vfio =
620f232836aSBrett Creeley 		container_of(vdev, struct pds_vfio_pci_device,
621f232836aSBrett Creeley 			     vfio_coredev.vdev);
622f232836aSBrett Creeley 	int err;
623f232836aSBrett Creeley 
624f232836aSBrett Creeley 	mutex_lock(&pds_vfio->state_mutex);
625f232836aSBrett Creeley 	pds_vfio_send_host_vf_lm_status_cmd(pds_vfio, PDS_LM_STA_IN_PROGRESS);
626f232836aSBrett Creeley 	err = pds_vfio_dirty_enable(pds_vfio, ranges, nnodes, page_size);
627*6a7e448cSBrett Creeley 	mutex_unlock(&pds_vfio->state_mutex);
628f232836aSBrett Creeley 
629f232836aSBrett Creeley 	return err;
630f232836aSBrett Creeley }
631f232836aSBrett Creeley 
632f232836aSBrett Creeley int pds_vfio_dma_logging_stop(struct vfio_device *vdev)
633f232836aSBrett Creeley {
634f232836aSBrett Creeley 	struct pds_vfio_pci_device *pds_vfio =
635f232836aSBrett Creeley 		container_of(vdev, struct pds_vfio_pci_device,
636f232836aSBrett Creeley 			     vfio_coredev.vdev);
637f232836aSBrett Creeley 
638f232836aSBrett Creeley 	mutex_lock(&pds_vfio->state_mutex);
639f232836aSBrett Creeley 	pds_vfio_dirty_disable(pds_vfio, true);
640*6a7e448cSBrett Creeley 	mutex_unlock(&pds_vfio->state_mutex);
641f232836aSBrett Creeley 
642f232836aSBrett Creeley 	return 0;
643f232836aSBrett Creeley }
644