1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Copyright (c) 2022, Oracle and/or its affiliates.
4 * Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved
5 */
6 #include <linux/highmem.h>
7 #include <linux/iova_bitmap.h>
8 #include <linux/mm.h>
9 #include <linux/slab.h>
10
11 #define BITS_PER_PAGE (PAGE_SIZE * BITS_PER_BYTE)
12
13 /*
14 * struct iova_bitmap_map - A bitmap representing an IOVA range
15 *
16 * Main data structure for tracking mapped user pages of bitmap data.
17 *
18 * For example, for something recording dirty IOVAs, it will be provided a
19 * struct iova_bitmap structure, as a general structure for iterating the
20 * total IOVA range. The struct iova_bitmap_map, though, represents the
21 * subset of said IOVA space that is pinned by its parent structure (struct
22 * iova_bitmap).
23 *
24 * The user does not need to exact location of the bits in the bitmap.
25 * From user perspective the only API available is iova_bitmap_set() which
26 * records the IOVA *range* in the bitmap by setting the corresponding
27 * bits.
28 *
29 * The bitmap is an array of u64 whereas each bit represents an IOVA of
30 * range of (1 << pgshift). Thus formula for the bitmap data to be set is:
31 *
32 * data[(iova / page_size) / 64] & (1ULL << (iova % 64))
33 */
34 struct iova_bitmap_map {
35 /* base IOVA representing bit 0 of the first page */
36 unsigned long iova;
37
38 /* mapped length */
39 unsigned long length;
40
41 /* page size order that each bit granules to */
42 unsigned long pgshift;
43
44 /* page offset of the first user page pinned */
45 unsigned long pgoff;
46
47 /* number of pages pinned */
48 unsigned long npages;
49
50 /* pinned pages representing the bitmap data */
51 struct page **pages;
52 };
53
54 /*
55 * struct iova_bitmap - The IOVA bitmap object
56 *
57 * Main data structure for iterating over the bitmap data.
58 *
59 * Abstracts the pinning work and iterates in IOVA ranges.
60 * It uses a windowing scheme and pins the bitmap in relatively
61 * big ranges e.g.
62 *
63 * The bitmap object uses one base page to store all the pinned pages
64 * pointers related to the bitmap. For sizeof(struct page*) == 8 it stores
65 * 512 struct page pointers which, if the base page size is 4K, it means
66 * 2M of bitmap data is pinned at a time. If the iova_bitmap page size is
67 * also 4K then the range window to iterate is 64G.
68 *
69 * For example iterating on a total IOVA range of 4G..128G, it will walk
70 * through this set of ranges:
71 *
72 * 4G - 68G-1 (64G)
73 * 68G - 128G-1 (64G)
74 *
75 * An example of the APIs on how to use/iterate over the IOVA bitmap:
76 *
77 * bitmap = iova_bitmap_alloc(iova, length, page_size, data);
78 * if (IS_ERR(bitmap))
79 * return PTR_ERR(bitmap);
80 *
81 * ret = iova_bitmap_for_each(bitmap, arg, dirty_reporter_fn);
82 *
83 * iova_bitmap_free(bitmap);
84 *
85 * Each iteration of the @dirty_reporter_fn is called with a unique @iova
86 * and @length argument, indicating the current range available through the
87 * iova_bitmap. The @dirty_reporter_fn uses iova_bitmap_set() to mark dirty
88 * areas (@iova_length) within that provided range, as following:
89 *
90 * iova_bitmap_set(bitmap, iova, iova_length);
91 *
92 * The internals of the object uses an index @mapped_base_index that indexes
93 * which u64 word of the bitmap is mapped, up to @mapped_total_index.
94 * Those keep being incremented until @mapped_total_index is reached while
95 * mapping up to PAGE_SIZE / sizeof(struct page*) maximum of pages.
96 *
97 * The IOVA bitmap is usually located on what tracks DMA mapped ranges or
98 * some form of IOVA range tracking that co-relates to the user passed
99 * bitmap.
100 */
101 struct iova_bitmap {
102 /* IOVA range representing the currently mapped bitmap data */
103 struct iova_bitmap_map mapped;
104
105 /* userspace address of the bitmap */
106 u8 __user *bitmap;
107
108 /* u64 index that @mapped points to */
109 unsigned long mapped_base_index;
110
111 /* how many u64 can we walk in total */
112 unsigned long mapped_total_index;
113
114 /* base IOVA of the whole bitmap */
115 unsigned long iova;
116
117 /* length of the IOVA range for the whole bitmap */
118 size_t length;
119 };
120
121 /*
122 * Converts a relative IOVA to a bitmap index.
123 * This function provides the index into the u64 array (bitmap::bitmap)
124 * for a given IOVA offset.
125 * Relative IOVA means relative to the bitmap::mapped base IOVA
126 * (stored in mapped::iova). All computations in this file are done using
127 * relative IOVAs and thus avoid an extra subtraction against mapped::iova.
128 * The user API iova_bitmap_set() always uses a regular absolute IOVAs.
129 */
iova_bitmap_offset_to_index(struct iova_bitmap * bitmap,unsigned long iova)130 static unsigned long iova_bitmap_offset_to_index(struct iova_bitmap *bitmap,
131 unsigned long iova)
132 {
133 unsigned long pgsize = 1 << bitmap->mapped.pgshift;
134
135 return iova / (BITS_PER_TYPE(*bitmap->bitmap) * pgsize);
136 }
137
138 /*
139 * Converts a bitmap index to a *relative* IOVA.
140 */
iova_bitmap_index_to_offset(struct iova_bitmap * bitmap,unsigned long index)141 static unsigned long iova_bitmap_index_to_offset(struct iova_bitmap *bitmap,
142 unsigned long index)
143 {
144 unsigned long pgshift = bitmap->mapped.pgshift;
145
146 return (index * BITS_PER_TYPE(*bitmap->bitmap)) << pgshift;
147 }
148
149 /*
150 * Returns the base IOVA of the mapped range.
151 */
iova_bitmap_mapped_iova(struct iova_bitmap * bitmap)152 static unsigned long iova_bitmap_mapped_iova(struct iova_bitmap *bitmap)
153 {
154 unsigned long skip = bitmap->mapped_base_index;
155
156 return bitmap->iova + iova_bitmap_index_to_offset(bitmap, skip);
157 }
158
159 static unsigned long iova_bitmap_mapped_length(struct iova_bitmap *bitmap);
160
161 /*
162 * Pins the bitmap user pages for the current range window.
163 * This is internal to IOVA bitmap and called when advancing the
164 * index (@mapped_base_index) or allocating the bitmap.
165 */
iova_bitmap_get(struct iova_bitmap * bitmap)166 static int iova_bitmap_get(struct iova_bitmap *bitmap)
167 {
168 struct iova_bitmap_map *mapped = &bitmap->mapped;
169 unsigned long npages;
170 u8 __user *addr;
171 long ret;
172
173 /*
174 * @mapped_base_index is the index of the currently mapped u64 words
175 * that we have access. Anything before @mapped_base_index is not
176 * mapped. The range @mapped_base_index .. @mapped_total_index-1 is
177 * mapped but capped at a maximum number of pages.
178 */
179 npages = DIV_ROUND_UP((bitmap->mapped_total_index -
180 bitmap->mapped_base_index) *
181 sizeof(*bitmap->bitmap), PAGE_SIZE);
182
183 /*
184 * Bitmap address to be pinned is calculated via pointer arithmetic
185 * with bitmap u64 word index.
186 */
187 addr = bitmap->bitmap + bitmap->mapped_base_index;
188
189 /*
190 * We always cap at max number of 'struct page' a base page can fit.
191 * This is, for example, on x86 means 2M of bitmap data max.
192 */
193 npages = min(npages + !!offset_in_page(addr),
194 PAGE_SIZE / sizeof(struct page *));
195
196 ret = pin_user_pages_fast((unsigned long)addr, npages,
197 FOLL_WRITE, mapped->pages);
198 if (ret <= 0)
199 return -EFAULT;
200
201 mapped->npages = (unsigned long)ret;
202 /* Base IOVA where @pages point to i.e. bit 0 of the first page */
203 mapped->iova = iova_bitmap_mapped_iova(bitmap);
204
205 /*
206 * offset of the page where pinned pages bit 0 is located.
207 * This handles the case where the bitmap is not PAGE_SIZE
208 * aligned.
209 */
210 mapped->pgoff = offset_in_page(addr);
211 mapped->length = iova_bitmap_mapped_length(bitmap);
212 return 0;
213 }
214
215 /*
216 * Unpins the bitmap user pages and clears @npages
217 * (un)pinning is abstracted from API user and it's done when advancing
218 * the index or freeing the bitmap.
219 */
iova_bitmap_put(struct iova_bitmap * bitmap)220 static void iova_bitmap_put(struct iova_bitmap *bitmap)
221 {
222 struct iova_bitmap_map *mapped = &bitmap->mapped;
223
224 if (mapped->npages) {
225 unpin_user_pages(mapped->pages, mapped->npages);
226 mapped->npages = 0;
227 }
228 }
229
230 /**
231 * iova_bitmap_alloc() - Allocates an IOVA bitmap object
232 * @iova: Start address of the IOVA range
233 * @length: Length of the IOVA range
234 * @page_size: Page size of the IOVA bitmap. It defines what each bit
235 * granularity represents
236 * @data: Userspace address of the bitmap
237 *
238 * Allocates an IOVA object and initializes all its fields including the
239 * first user pages of @data.
240 *
241 * Return: A pointer to a newly allocated struct iova_bitmap
242 * or ERR_PTR() on error.
243 */
iova_bitmap_alloc(unsigned long iova,size_t length,unsigned long page_size,u64 __user * data)244 struct iova_bitmap *iova_bitmap_alloc(unsigned long iova, size_t length,
245 unsigned long page_size, u64 __user *data)
246 {
247 struct iova_bitmap_map *mapped;
248 struct iova_bitmap *bitmap;
249 int rc;
250
251 bitmap = kzalloc(sizeof(*bitmap), GFP_KERNEL);
252 if (!bitmap)
253 return ERR_PTR(-ENOMEM);
254
255 mapped = &bitmap->mapped;
256 mapped->pgshift = __ffs(page_size);
257 bitmap->bitmap = (u8 __user *)data;
258 bitmap->mapped_total_index =
259 iova_bitmap_offset_to_index(bitmap, length - 1) + 1;
260 bitmap->iova = iova;
261 bitmap->length = length;
262 mapped->iova = iova;
263 mapped->pages = (struct page **)__get_free_page(GFP_KERNEL);
264 if (!mapped->pages) {
265 rc = -ENOMEM;
266 goto err;
267 }
268
269 return bitmap;
270
271 err:
272 iova_bitmap_free(bitmap);
273 return ERR_PTR(rc);
274 }
275 EXPORT_SYMBOL_NS_GPL(iova_bitmap_alloc, "IOMMUFD");
276
277 /**
278 * iova_bitmap_free() - Frees an IOVA bitmap object
279 * @bitmap: IOVA bitmap to free
280 *
281 * It unpins and releases pages array memory and clears any leftover
282 * state.
283 */
iova_bitmap_free(struct iova_bitmap * bitmap)284 void iova_bitmap_free(struct iova_bitmap *bitmap)
285 {
286 struct iova_bitmap_map *mapped = &bitmap->mapped;
287
288 iova_bitmap_put(bitmap);
289
290 if (mapped->pages) {
291 free_page((unsigned long)mapped->pages);
292 mapped->pages = NULL;
293 }
294
295 kfree(bitmap);
296 }
297 EXPORT_SYMBOL_NS_GPL(iova_bitmap_free, "IOMMUFD");
298
299 /*
300 * Returns the remaining bitmap indexes from mapped_total_index to process for
301 * the currently pinned bitmap pages.
302 */
iova_bitmap_mapped_remaining(struct iova_bitmap * bitmap)303 static unsigned long iova_bitmap_mapped_remaining(struct iova_bitmap *bitmap)
304 {
305 unsigned long remaining, bytes;
306
307 bytes = (bitmap->mapped.npages << PAGE_SHIFT) - bitmap->mapped.pgoff;
308
309 remaining = bitmap->mapped_total_index - bitmap->mapped_base_index;
310 remaining = min_t(unsigned long, remaining,
311 DIV_ROUND_UP(bytes, sizeof(*bitmap->bitmap)));
312
313 return remaining;
314 }
315
316 /*
317 * Returns the length of the mapped IOVA range.
318 */
iova_bitmap_mapped_length(struct iova_bitmap * bitmap)319 static unsigned long iova_bitmap_mapped_length(struct iova_bitmap *bitmap)
320 {
321 unsigned long max_iova = bitmap->iova + bitmap->length - 1;
322 unsigned long iova = iova_bitmap_mapped_iova(bitmap);
323 unsigned long remaining;
324
325 /*
326 * iova_bitmap_mapped_remaining() returns a number of indexes which
327 * when converted to IOVA gives us a max length that the bitmap
328 * pinned data can cover. Afterwards, that is capped to
329 * only cover the IOVA range in @bitmap::iova .. @bitmap::length.
330 */
331 remaining = iova_bitmap_index_to_offset(bitmap,
332 iova_bitmap_mapped_remaining(bitmap));
333
334 if (iova + remaining - 1 > max_iova)
335 remaining -= ((iova + remaining - 1) - max_iova);
336
337 return remaining;
338 }
339
340 /*
341 * Returns true if [@iova..@iova+@length-1] is part of the mapped IOVA range.
342 */
iova_bitmap_mapped_range(struct iova_bitmap_map * mapped,unsigned long iova,size_t length)343 static bool iova_bitmap_mapped_range(struct iova_bitmap_map *mapped,
344 unsigned long iova, size_t length)
345 {
346 return mapped->npages &&
347 (iova >= mapped->iova &&
348 (iova + length - 1) <= (mapped->iova + mapped->length - 1));
349 }
350
351 /*
352 * Advances to a selected range, releases the current pinned
353 * pages and pins the next set of bitmap pages.
354 * Returns 0 on success or otherwise errno.
355 */
iova_bitmap_advance_to(struct iova_bitmap * bitmap,unsigned long iova)356 static int iova_bitmap_advance_to(struct iova_bitmap *bitmap,
357 unsigned long iova)
358 {
359 unsigned long index;
360
361 index = iova_bitmap_offset_to_index(bitmap, iova - bitmap->iova);
362 if (index >= bitmap->mapped_total_index)
363 return -EINVAL;
364 bitmap->mapped_base_index = index;
365
366 iova_bitmap_put(bitmap);
367
368 /* Pin the next set of bitmap pages */
369 return iova_bitmap_get(bitmap);
370 }
371
372 /**
373 * iova_bitmap_for_each() - Iterates over the bitmap
374 * @bitmap: IOVA bitmap to iterate
375 * @opaque: Additional argument to pass to the callback
376 * @fn: Function that gets called for each IOVA range
377 *
378 * Helper function to iterate over bitmap data representing a portion of IOVA
379 * space. It hides the complexity of iterating bitmaps and translating the
380 * mapped bitmap user pages into IOVA ranges to process.
381 *
382 * Return: 0 on success, and an error on failure either upon
383 * iteration or when the callback returns an error.
384 */
iova_bitmap_for_each(struct iova_bitmap * bitmap,void * opaque,iova_bitmap_fn_t fn)385 int iova_bitmap_for_each(struct iova_bitmap *bitmap, void *opaque,
386 iova_bitmap_fn_t fn)
387 {
388 return fn(bitmap, bitmap->iova, bitmap->length, opaque);
389 }
390 EXPORT_SYMBOL_NS_GPL(iova_bitmap_for_each, "IOMMUFD");
391
392 /**
393 * iova_bitmap_set() - Records an IOVA range in bitmap
394 * @bitmap: IOVA bitmap
395 * @iova: IOVA to start
396 * @length: IOVA range length
397 *
398 * Set the bits corresponding to the range [iova .. iova+length-1] in
399 * the user bitmap.
400 *
401 */
iova_bitmap_set(struct iova_bitmap * bitmap,unsigned long iova,size_t length)402 void iova_bitmap_set(struct iova_bitmap *bitmap,
403 unsigned long iova, size_t length)
404 {
405 struct iova_bitmap_map *mapped = &bitmap->mapped;
406 unsigned long cur_bit, last_bit, last_page_idx;
407
408 update_indexes:
409 if (unlikely(!iova_bitmap_mapped_range(mapped, iova, length))) {
410
411 /*
412 * The attempt to advance the base index to @iova
413 * may fail if it's out of bounds, or pinning the pages
414 * returns an error.
415 */
416 if (iova_bitmap_advance_to(bitmap, iova))
417 return;
418 }
419
420 last_page_idx = mapped->npages - 1;
421 cur_bit = ((iova - mapped->iova) >>
422 mapped->pgshift) + mapped->pgoff * BITS_PER_BYTE;
423 last_bit = (((iova + length - 1) - mapped->iova) >>
424 mapped->pgshift) + mapped->pgoff * BITS_PER_BYTE;
425
426 do {
427 unsigned int page_idx = cur_bit / BITS_PER_PAGE;
428 unsigned int offset = cur_bit % BITS_PER_PAGE;
429 unsigned int nbits = min(BITS_PER_PAGE - offset,
430 last_bit - cur_bit + 1);
431 void *kaddr;
432
433 if (unlikely(page_idx > last_page_idx)) {
434 unsigned long left =
435 ((last_bit - cur_bit + 1) << mapped->pgshift);
436
437 iova += (length - left);
438 length = left;
439 goto update_indexes;
440 }
441
442 kaddr = kmap_local_page(mapped->pages[page_idx]);
443 bitmap_set(kaddr, offset, nbits);
444 kunmap_local(kaddr);
445 cur_bit += nbits;
446 } while (cur_bit <= last_bit);
447 }
448 EXPORT_SYMBOL_NS_GPL(iova_bitmap_set, "IOMMUFD");
449