xref: /linux/drivers/iommu/iommufd/iova_bitmap.c (revision f94c1a114ac209977bdf5ca841b98424295ab1f0)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (c) 2022, Oracle and/or its affiliates.
4  * Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved
5  */
6 #include <linux/highmem.h>
7 #include <linux/iova_bitmap.h>
8 #include <linux/mm.h>
9 #include <linux/slab.h>
10 
11 #define BITS_PER_PAGE (PAGE_SIZE * BITS_PER_BYTE)
12 
13 /*
14  * struct iova_bitmap_map - A bitmap representing an IOVA range
15  *
16  * Main data structure for tracking mapped user pages of bitmap data.
17  *
18  * For example, for something recording dirty IOVAs, it will be provided a
19  * struct iova_bitmap structure, as a general structure for iterating the
20  * total IOVA range. The struct iova_bitmap_map, though, represents the
21  * subset of said IOVA space that is pinned by its parent structure (struct
22  * iova_bitmap).
23  *
24  * The user does not need to exact location of the bits in the bitmap.
25  * From user perspective the only API available is iova_bitmap_set() which
26  * records the IOVA *range* in the bitmap by setting the corresponding
27  * bits.
28  *
29  * The bitmap is an array of u64 whereas each bit represents an IOVA of
30  * range of (1 << pgshift). Thus formula for the bitmap data to be set is:
31  *
32  *   data[(iova / page_size) / 64] & (1ULL << (iova % 64))
33  */
34 struct iova_bitmap_map {
35 	/* base IOVA representing bit 0 of the first page */
36 	unsigned long iova;
37 
38 	/* mapped length */
39 	unsigned long length;
40 
41 	/* page size order that each bit granules to */
42 	unsigned long pgshift;
43 
44 	/* page offset of the first user page pinned */
45 	unsigned long pgoff;
46 
47 	/* number of pages pinned */
48 	unsigned long npages;
49 
50 	/* pinned pages representing the bitmap data */
51 	struct page **pages;
52 };
53 
54 /*
55  * struct iova_bitmap - The IOVA bitmap object
56  *
57  * Main data structure for iterating over the bitmap data.
58  *
59  * Abstracts the pinning work and iterates in IOVA ranges.
60  * It uses a windowing scheme and pins the bitmap in relatively
61  * big ranges e.g.
62  *
63  * The bitmap object uses one base page to store all the pinned pages
64  * pointers related to the bitmap. For sizeof(struct page*) == 8 it stores
65  * 512 struct page pointers which, if the base page size is 4K, it means
66  * 2M of bitmap data is pinned at a time. If the iova_bitmap page size is
67  * also 4K then the range window to iterate is 64G.
68  *
69  * For example iterating on a total IOVA range of 4G..128G, it will walk
70  * through this set of ranges:
71  *
72  *    4G  -  68G-1 (64G)
73  *    68G - 128G-1 (64G)
74  *
75  * An example of the APIs on how to use/iterate over the IOVA bitmap:
76  *
77  *   bitmap = iova_bitmap_alloc(iova, length, page_size, data);
78  *   if (IS_ERR(bitmap))
79  *       return PTR_ERR(bitmap);
80  *
81  *   ret = iova_bitmap_for_each(bitmap, arg, dirty_reporter_fn);
82  *
83  *   iova_bitmap_free(bitmap);
84  *
85  * Each iteration of the @dirty_reporter_fn is called with a unique @iova
86  * and @length argument, indicating the current range available through the
87  * iova_bitmap. The @dirty_reporter_fn uses iova_bitmap_set() to mark dirty
88  * areas (@iova_length) within that provided range, as following:
89  *
90  *   iova_bitmap_set(bitmap, iova, iova_length);
91  *
92  * The internals of the object uses an index @mapped_base_index that indexes
93  * which u64 word of the bitmap is mapped, up to @mapped_total_index.
94  * Those keep being incremented until @mapped_total_index is reached while
95  * mapping up to PAGE_SIZE / sizeof(struct page*) maximum of pages.
96  *
97  * The IOVA bitmap is usually located on what tracks DMA mapped ranges or
98  * some form of IOVA range tracking that co-relates to the user passed
99  * bitmap.
100  */
101 struct iova_bitmap {
102 	/* IOVA range representing the currently mapped bitmap data */
103 	struct iova_bitmap_map mapped;
104 
105 	/* userspace address of the bitmap */
106 	u8 __user *bitmap;
107 
108 	/* u64 index that @mapped points to */
109 	unsigned long mapped_base_index;
110 
111 	/* how many u64 can we walk in total */
112 	unsigned long mapped_total_index;
113 
114 	/* base IOVA of the whole bitmap */
115 	unsigned long iova;
116 
117 	/* length of the IOVA range for the whole bitmap */
118 	size_t length;
119 };
120 
121 /*
122  * Converts a relative IOVA to a bitmap index.
123  * This function provides the index into the u64 array (bitmap::bitmap)
124  * for a given IOVA offset.
125  * Relative IOVA means relative to the bitmap::mapped base IOVA
126  * (stored in mapped::iova). All computations in this file are done using
127  * relative IOVAs and thus avoid an extra subtraction against mapped::iova.
128  * The user API iova_bitmap_set() always uses a regular absolute IOVAs.
129  */
130 static unsigned long iova_bitmap_offset_to_index(struct iova_bitmap *bitmap,
131 						 unsigned long iova)
132 {
133 	return (iova >> bitmap->mapped.pgshift) /
134 	       BITS_PER_TYPE(*bitmap->bitmap);
135 }
136 
137 /*
138  * Converts a bitmap index to a *relative* IOVA.
139  */
140 static unsigned long iova_bitmap_index_to_offset(struct iova_bitmap *bitmap,
141 						 unsigned long index)
142 {
143 	unsigned long pgshift = bitmap->mapped.pgshift;
144 
145 	return (index * BITS_PER_TYPE(*bitmap->bitmap)) << pgshift;
146 }
147 
148 /*
149  * Returns the base IOVA of the mapped range.
150  */
151 static unsigned long iova_bitmap_mapped_iova(struct iova_bitmap *bitmap)
152 {
153 	unsigned long skip = bitmap->mapped_base_index;
154 
155 	return bitmap->iova + iova_bitmap_index_to_offset(bitmap, skip);
156 }
157 
158 static unsigned long iova_bitmap_mapped_length(struct iova_bitmap *bitmap);
159 
160 /*
161  * Pins the bitmap user pages for the current range window.
162  * This is internal to IOVA bitmap and called when advancing the
163  * index (@mapped_base_index) or allocating the bitmap.
164  */
165 static int iova_bitmap_get(struct iova_bitmap *bitmap)
166 {
167 	struct iova_bitmap_map *mapped = &bitmap->mapped;
168 	unsigned long npages;
169 	u8 __user *addr;
170 	long ret;
171 
172 	/*
173 	 * @mapped_base_index is the index of the currently mapped u64 words
174 	 * that we have access. Anything before @mapped_base_index is not
175 	 * mapped. The range @mapped_base_index .. @mapped_total_index-1 is
176 	 * mapped but capped at a maximum number of pages.
177 	 */
178 	npages = DIV_ROUND_UP((bitmap->mapped_total_index -
179 			       bitmap->mapped_base_index) *
180 			       sizeof(*bitmap->bitmap), PAGE_SIZE);
181 
182 	/*
183 	 * Bitmap address to be pinned is calculated via pointer arithmetic
184 	 * with bitmap u64 word index.
185 	 */
186 	addr = bitmap->bitmap + bitmap->mapped_base_index;
187 
188 	/*
189 	 * We always cap at max number of 'struct page' a base page can fit.
190 	 * This is, for example, on x86 means 2M of bitmap data max.
191 	 */
192 	npages = min(npages + !!offset_in_page(addr),
193 		     PAGE_SIZE / sizeof(struct page *));
194 
195 	ret = pin_user_pages_fast((unsigned long)addr, npages,
196 				  FOLL_WRITE, mapped->pages);
197 	if (ret <= 0)
198 		return -EFAULT;
199 
200 	mapped->npages = (unsigned long)ret;
201 	/* Base IOVA where @pages point to i.e. bit 0 of the first page */
202 	mapped->iova = iova_bitmap_mapped_iova(bitmap);
203 
204 	/*
205 	 * offset of the page where pinned pages bit 0 is located.
206 	 * This handles the case where the bitmap is not PAGE_SIZE
207 	 * aligned.
208 	 */
209 	mapped->pgoff = offset_in_page(addr);
210 	mapped->length = iova_bitmap_mapped_length(bitmap);
211 	return 0;
212 }
213 
214 /*
215  * Unpins the bitmap user pages and clears @npages
216  * (un)pinning is abstracted from API user and it's done when advancing
217  * the index or freeing the bitmap.
218  */
219 static void iova_bitmap_put(struct iova_bitmap *bitmap)
220 {
221 	struct iova_bitmap_map *mapped = &bitmap->mapped;
222 
223 	if (mapped->npages) {
224 		unpin_user_pages(mapped->pages, mapped->npages);
225 		mapped->npages = 0;
226 	}
227 }
228 
229 /**
230  * iova_bitmap_alloc() - Allocates an IOVA bitmap object
231  * @iova: Start address of the IOVA range
232  * @length: Length of the IOVA range
233  * @page_size: Page size of the IOVA bitmap. It defines what each bit
234  *             granularity represents
235  * @data: Userspace address of the bitmap
236  *
237  * Allocates an IOVA object and initializes all its fields including the
238  * first user pages of @data.
239  *
240  * Return: A pointer to a newly allocated struct iova_bitmap
241  * or ERR_PTR() on error.
242  */
243 struct iova_bitmap *iova_bitmap_alloc(unsigned long iova, size_t length,
244 				      unsigned long page_size, u64 __user *data)
245 {
246 	struct iova_bitmap_map *mapped;
247 	struct iova_bitmap *bitmap;
248 	int rc;
249 
250 	bitmap = kzalloc(sizeof(*bitmap), GFP_KERNEL);
251 	if (!bitmap)
252 		return ERR_PTR(-ENOMEM);
253 
254 	mapped = &bitmap->mapped;
255 	mapped->pgshift = __ffs(page_size);
256 	bitmap->bitmap = (u8 __user *)data;
257 	bitmap->mapped_total_index =
258 		iova_bitmap_offset_to_index(bitmap, length - 1) + 1;
259 	bitmap->iova = iova;
260 	bitmap->length = length;
261 	mapped->iova = iova;
262 	mapped->pages = (struct page **)__get_free_page(GFP_KERNEL);
263 	if (!mapped->pages) {
264 		rc = -ENOMEM;
265 		goto err;
266 	}
267 
268 	return bitmap;
269 
270 err:
271 	iova_bitmap_free(bitmap);
272 	return ERR_PTR(rc);
273 }
274 EXPORT_SYMBOL_NS_GPL(iova_bitmap_alloc, "IOMMUFD");
275 
276 /**
277  * iova_bitmap_free() - Frees an IOVA bitmap object
278  * @bitmap: IOVA bitmap to free
279  *
280  * It unpins and releases pages array memory and clears any leftover
281  * state.
282  */
283 void iova_bitmap_free(struct iova_bitmap *bitmap)
284 {
285 	struct iova_bitmap_map *mapped = &bitmap->mapped;
286 
287 	iova_bitmap_put(bitmap);
288 
289 	if (mapped->pages) {
290 		free_page((unsigned long)mapped->pages);
291 		mapped->pages = NULL;
292 	}
293 
294 	kfree(bitmap);
295 }
296 EXPORT_SYMBOL_NS_GPL(iova_bitmap_free, "IOMMUFD");
297 
298 /*
299  * Returns the remaining bitmap indexes from mapped_total_index to process for
300  * the currently pinned bitmap pages.
301  */
302 static unsigned long iova_bitmap_mapped_remaining(struct iova_bitmap *bitmap)
303 {
304 	unsigned long remaining, bytes;
305 
306 	bytes = (bitmap->mapped.npages << PAGE_SHIFT) - bitmap->mapped.pgoff;
307 
308 	remaining = bitmap->mapped_total_index - bitmap->mapped_base_index;
309 	remaining = min_t(unsigned long, remaining,
310 			  DIV_ROUND_UP(bytes, sizeof(*bitmap->bitmap)));
311 
312 	return remaining;
313 }
314 
315 /*
316  * Returns the length of the mapped IOVA range.
317  */
318 static unsigned long iova_bitmap_mapped_length(struct iova_bitmap *bitmap)
319 {
320 	unsigned long max_iova = bitmap->iova + bitmap->length - 1;
321 	unsigned long iova = iova_bitmap_mapped_iova(bitmap);
322 	unsigned long remaining;
323 
324 	/*
325 	 * iova_bitmap_mapped_remaining() returns a number of indexes which
326 	 * when converted to IOVA gives us a max length that the bitmap
327 	 * pinned data can cover. Afterwards, that is capped to
328 	 * only cover the IOVA range in @bitmap::iova .. @bitmap::length.
329 	 */
330 	remaining = iova_bitmap_index_to_offset(bitmap,
331 			iova_bitmap_mapped_remaining(bitmap));
332 
333 	if (iova + remaining - 1 > max_iova)
334 		remaining -= ((iova + remaining - 1) - max_iova);
335 
336 	return remaining;
337 }
338 
339 /*
340  * Returns true if [@iova..@iova+@length-1] is part of the mapped IOVA range.
341  */
342 static bool iova_bitmap_mapped_range(struct iova_bitmap_map *mapped,
343 				     unsigned long iova, size_t length)
344 {
345 	return mapped->npages &&
346 		(iova >= mapped->iova &&
347 		 (iova + length - 1) <= (mapped->iova + mapped->length - 1));
348 }
349 
350 /*
351  * Advances to a selected range, releases the current pinned
352  * pages and pins the next set of bitmap pages.
353  * Returns 0 on success or otherwise errno.
354  */
355 static int iova_bitmap_advance_to(struct iova_bitmap *bitmap,
356 				  unsigned long iova)
357 {
358 	unsigned long index;
359 
360 	index = iova_bitmap_offset_to_index(bitmap, iova - bitmap->iova);
361 	if (index >= bitmap->mapped_total_index)
362 		return -EINVAL;
363 	bitmap->mapped_base_index = index;
364 
365 	iova_bitmap_put(bitmap);
366 
367 	/* Pin the next set of bitmap pages */
368 	return iova_bitmap_get(bitmap);
369 }
370 
371 /**
372  * iova_bitmap_for_each() - Iterates over the bitmap
373  * @bitmap: IOVA bitmap to iterate
374  * @opaque: Additional argument to pass to the callback
375  * @fn: Function that gets called for each IOVA range
376  *
377  * Helper function to iterate over bitmap data representing a portion of IOVA
378  * space. It hides the complexity of iterating bitmaps and translating the
379  * mapped bitmap user pages into IOVA ranges to process.
380  *
381  * Return: 0 on success, and an error on failure either upon
382  * iteration or when the callback returns an error.
383  */
384 int iova_bitmap_for_each(struct iova_bitmap *bitmap, void *opaque,
385 			 iova_bitmap_fn_t fn)
386 {
387 	return fn(bitmap, bitmap->iova, bitmap->length, opaque);
388 }
389 EXPORT_SYMBOL_NS_GPL(iova_bitmap_for_each, "IOMMUFD");
390 
391 /**
392  * iova_bitmap_set() - Records an IOVA range in bitmap
393  * @bitmap: IOVA bitmap
394  * @iova: IOVA to start
395  * @length: IOVA range length
396  *
397  * Set the bits corresponding to the range [iova .. iova+length-1] in
398  * the user bitmap.
399  *
400  */
401 void iova_bitmap_set(struct iova_bitmap *bitmap,
402 		     unsigned long iova, size_t length)
403 {
404 	struct iova_bitmap_map *mapped = &bitmap->mapped;
405 	unsigned long cur_bit, last_bit, last_page_idx;
406 
407 update_indexes:
408 	if (unlikely(!iova_bitmap_mapped_range(mapped, iova, length))) {
409 		/*
410 		 * The attempt to advance the base index to @iova
411 		 * may fail if it's out of bounds, or pinning the pages
412 		 * returns an error.
413 		 */
414 		if (iova_bitmap_advance_to(bitmap, iova))
415 			return;
416 	}
417 
418 	last_page_idx = mapped->npages - 1;
419 	cur_bit = ((iova - mapped->iova) >>
420 		mapped->pgshift) + mapped->pgoff * BITS_PER_BYTE;
421 	last_bit = (((iova + length - 1) - mapped->iova) >>
422 		mapped->pgshift) + mapped->pgoff * BITS_PER_BYTE;
423 
424 	do {
425 		unsigned int page_idx = cur_bit / BITS_PER_PAGE;
426 		unsigned int offset = cur_bit % BITS_PER_PAGE;
427 		unsigned int nbits = min(BITS_PER_PAGE - offset,
428 					 last_bit - cur_bit + 1);
429 		void *kaddr;
430 
431 		if (unlikely(page_idx > last_page_idx)) {
432 			unsigned long left =
433 				((last_bit - cur_bit + 1) << mapped->pgshift);
434 
435 			iova += (length - left);
436 			length = left;
437 			goto update_indexes;
438 		}
439 
440 		kaddr = kmap_local_page(mapped->pages[page_idx]);
441 		bitmap_set(kaddr, offset, nbits);
442 		kunmap_local(kaddr);
443 		cur_bit += nbits;
444 	} while (cur_bit <= last_bit);
445 }
446 EXPORT_SYMBOL_NS_GPL(iova_bitmap_set, "IOMMUFD");
447