1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES.
3 *
4 * The io_pagetable is the top of datastructure that maps IOVA's to PFNs. The
5 * PFNs can be placed into an iommu_domain, or returned to the caller as a page
6 * list for access by an in-kernel user.
7 *
8 * The datastructure uses the iopt_pages to optimize the storage of the PFNs
9 * between the domains and xarray.
10 */
11 #include <linux/err.h>
12 #include <linux/errno.h>
13 #include <linux/iommu.h>
14 #include <linux/iommufd.h>
15 #include <linux/lockdep.h>
16 #include <linux/sched/mm.h>
17 #include <linux/slab.h>
18 #include <uapi/linux/iommufd.h>
19
20 #include "double_span.h"
21 #include "io_pagetable.h"
22
23 struct iopt_pages_list {
24 struct iopt_pages *pages;
25 struct iopt_area *area;
26 struct list_head next;
27 unsigned long start_byte;
28 unsigned long length;
29 };
30
iopt_area_contig_init(struct iopt_area_contig_iter * iter,struct io_pagetable * iopt,unsigned long iova,unsigned long last_iova)31 struct iopt_area *iopt_area_contig_init(struct iopt_area_contig_iter *iter,
32 struct io_pagetable *iopt,
33 unsigned long iova,
34 unsigned long last_iova)
35 {
36 lockdep_assert_held(&iopt->iova_rwsem);
37
38 iter->cur_iova = iova;
39 iter->last_iova = last_iova;
40 iter->area = iopt_area_iter_first(iopt, iova, iova);
41 if (!iter->area)
42 return NULL;
43 if (!iter->area->pages) {
44 iter->area = NULL;
45 return NULL;
46 }
47 return iter->area;
48 }
49
iopt_area_contig_next(struct iopt_area_contig_iter * iter)50 struct iopt_area *iopt_area_contig_next(struct iopt_area_contig_iter *iter)
51 {
52 unsigned long last_iova;
53
54 if (!iter->area)
55 return NULL;
56 last_iova = iopt_area_last_iova(iter->area);
57 if (iter->last_iova <= last_iova)
58 return NULL;
59
60 iter->cur_iova = last_iova + 1;
61 iter->area = iopt_area_iter_next(iter->area, iter->cur_iova,
62 iter->last_iova);
63 if (!iter->area)
64 return NULL;
65 if (iter->cur_iova != iopt_area_iova(iter->area) ||
66 !iter->area->pages) {
67 iter->area = NULL;
68 return NULL;
69 }
70 return iter->area;
71 }
72
__alloc_iova_check_hole(struct interval_tree_double_span_iter * span,unsigned long length,unsigned long iova_alignment,unsigned long page_offset)73 static bool __alloc_iova_check_hole(struct interval_tree_double_span_iter *span,
74 unsigned long length,
75 unsigned long iova_alignment,
76 unsigned long page_offset)
77 {
78 if (span->is_used || span->last_hole - span->start_hole < length - 1)
79 return false;
80
81 span->start_hole = ALIGN(span->start_hole, iova_alignment) |
82 page_offset;
83 if (span->start_hole > span->last_hole ||
84 span->last_hole - span->start_hole < length - 1)
85 return false;
86 return true;
87 }
88
__alloc_iova_check_used(struct interval_tree_span_iter * span,unsigned long length,unsigned long iova_alignment,unsigned long page_offset)89 static bool __alloc_iova_check_used(struct interval_tree_span_iter *span,
90 unsigned long length,
91 unsigned long iova_alignment,
92 unsigned long page_offset)
93 {
94 if (span->is_hole || span->last_used - span->start_used < length - 1)
95 return false;
96
97 span->start_used = ALIGN(span->start_used, iova_alignment) |
98 page_offset;
99 if (span->start_used > span->last_used ||
100 span->last_used - span->start_used < length - 1)
101 return false;
102 return true;
103 }
104
105 /*
106 * Automatically find a block of IOVA that is not being used and not reserved.
107 * Does not return a 0 IOVA even if it is valid.
108 */
iopt_alloc_iova(struct io_pagetable * iopt,unsigned long * iova,unsigned long uptr,unsigned long length)109 static int iopt_alloc_iova(struct io_pagetable *iopt, unsigned long *iova,
110 unsigned long uptr, unsigned long length)
111 {
112 unsigned long page_offset = uptr % PAGE_SIZE;
113 struct interval_tree_double_span_iter used_span;
114 struct interval_tree_span_iter allowed_span;
115 unsigned long max_alignment = PAGE_SIZE;
116 unsigned long iova_alignment;
117
118 lockdep_assert_held(&iopt->iova_rwsem);
119
120 /* Protect roundup_pow-of_two() from overflow */
121 if (length == 0 || length >= ULONG_MAX / 2)
122 return -EOVERFLOW;
123
124 /*
125 * Keep alignment present in the uptr when building the IOVA, this
126 * increases the chance we can map a THP.
127 */
128 if (!uptr)
129 iova_alignment = roundup_pow_of_two(length);
130 else
131 iova_alignment = min_t(unsigned long,
132 roundup_pow_of_two(length),
133 1UL << __ffs64(uptr));
134
135 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
136 max_alignment = HPAGE_SIZE;
137 #endif
138 /* Protect against ALIGN() overflow */
139 if (iova_alignment >= max_alignment)
140 iova_alignment = max_alignment;
141
142 if (iova_alignment < iopt->iova_alignment)
143 return -EINVAL;
144
145 interval_tree_for_each_span(&allowed_span, &iopt->allowed_itree,
146 PAGE_SIZE, ULONG_MAX - PAGE_SIZE) {
147 if (RB_EMPTY_ROOT(&iopt->allowed_itree.rb_root)) {
148 allowed_span.start_used = PAGE_SIZE;
149 allowed_span.last_used = ULONG_MAX - PAGE_SIZE;
150 allowed_span.is_hole = false;
151 }
152
153 if (!__alloc_iova_check_used(&allowed_span, length,
154 iova_alignment, page_offset))
155 continue;
156
157 interval_tree_for_each_double_span(
158 &used_span, &iopt->reserved_itree, &iopt->area_itree,
159 allowed_span.start_used, allowed_span.last_used) {
160 if (!__alloc_iova_check_hole(&used_span, length,
161 iova_alignment,
162 page_offset))
163 continue;
164
165 *iova = used_span.start_hole;
166 return 0;
167 }
168 }
169 return -ENOSPC;
170 }
171
iopt_check_iova(struct io_pagetable * iopt,unsigned long iova,unsigned long length)172 static int iopt_check_iova(struct io_pagetable *iopt, unsigned long iova,
173 unsigned long length)
174 {
175 unsigned long last;
176
177 lockdep_assert_held(&iopt->iova_rwsem);
178
179 if ((iova & (iopt->iova_alignment - 1)))
180 return -EINVAL;
181
182 if (check_add_overflow(iova, length - 1, &last))
183 return -EOVERFLOW;
184
185 /* No reserved IOVA intersects the range */
186 if (iopt_reserved_iter_first(iopt, iova, last))
187 return -EINVAL;
188
189 /* Check that there is not already a mapping in the range */
190 if (iopt_area_iter_first(iopt, iova, last))
191 return -EEXIST;
192 return 0;
193 }
194
195 /*
196 * The area takes a slice of the pages from start_bytes to start_byte + length
197 */
iopt_insert_area(struct io_pagetable * iopt,struct iopt_area * area,struct iopt_pages * pages,unsigned long iova,unsigned long start_byte,unsigned long length,int iommu_prot)198 static int iopt_insert_area(struct io_pagetable *iopt, struct iopt_area *area,
199 struct iopt_pages *pages, unsigned long iova,
200 unsigned long start_byte, unsigned long length,
201 int iommu_prot)
202 {
203 lockdep_assert_held_write(&iopt->iova_rwsem);
204
205 if ((iommu_prot & IOMMU_WRITE) && !pages->writable)
206 return -EPERM;
207
208 area->iommu_prot = iommu_prot;
209 area->page_offset = start_byte % PAGE_SIZE;
210 if (area->page_offset & (iopt->iova_alignment - 1))
211 return -EINVAL;
212
213 area->node.start = iova;
214 if (check_add_overflow(iova, length - 1, &area->node.last))
215 return -EOVERFLOW;
216
217 area->pages_node.start = start_byte / PAGE_SIZE;
218 if (check_add_overflow(start_byte, length - 1, &area->pages_node.last))
219 return -EOVERFLOW;
220 area->pages_node.last = area->pages_node.last / PAGE_SIZE;
221 if (WARN_ON(area->pages_node.last >= pages->npages))
222 return -EOVERFLOW;
223
224 /*
225 * The area is inserted with a NULL pages indicating it is not fully
226 * initialized yet.
227 */
228 area->iopt = iopt;
229 interval_tree_insert(&area->node, &iopt->area_itree);
230 return 0;
231 }
232
iopt_area_alloc(void)233 static struct iopt_area *iopt_area_alloc(void)
234 {
235 struct iopt_area *area;
236
237 area = kzalloc(sizeof(*area), GFP_KERNEL_ACCOUNT);
238 if (!area)
239 return NULL;
240 RB_CLEAR_NODE(&area->node.rb);
241 RB_CLEAR_NODE(&area->pages_node.rb);
242 return area;
243 }
244
iopt_alloc_area_pages(struct io_pagetable * iopt,struct list_head * pages_list,unsigned long length,unsigned long * dst_iova,int iommu_prot,unsigned int flags)245 static int iopt_alloc_area_pages(struct io_pagetable *iopt,
246 struct list_head *pages_list,
247 unsigned long length, unsigned long *dst_iova,
248 int iommu_prot, unsigned int flags)
249 {
250 struct iopt_pages_list *elm;
251 unsigned long iova;
252 int rc = 0;
253
254 list_for_each_entry(elm, pages_list, next) {
255 elm->area = iopt_area_alloc();
256 if (!elm->area)
257 return -ENOMEM;
258 }
259
260 down_write(&iopt->iova_rwsem);
261 if ((length & (iopt->iova_alignment - 1)) || !length) {
262 rc = -EINVAL;
263 goto out_unlock;
264 }
265
266 if (flags & IOPT_ALLOC_IOVA) {
267 /* Use the first entry to guess the ideal IOVA alignment */
268 elm = list_first_entry(pages_list, struct iopt_pages_list,
269 next);
270 rc = iopt_alloc_iova(
271 iopt, dst_iova,
272 (uintptr_t)elm->pages->uptr + elm->start_byte, length);
273 if (rc)
274 goto out_unlock;
275 if (IS_ENABLED(CONFIG_IOMMUFD_TEST) &&
276 WARN_ON(iopt_check_iova(iopt, *dst_iova, length))) {
277 rc = -EINVAL;
278 goto out_unlock;
279 }
280 } else {
281 rc = iopt_check_iova(iopt, *dst_iova, length);
282 if (rc)
283 goto out_unlock;
284 }
285
286 /*
287 * Areas are created with a NULL pages so that the IOVA space is
288 * reserved and we can unlock the iova_rwsem.
289 */
290 iova = *dst_iova;
291 list_for_each_entry(elm, pages_list, next) {
292 rc = iopt_insert_area(iopt, elm->area, elm->pages, iova,
293 elm->start_byte, elm->length, iommu_prot);
294 if (rc)
295 goto out_unlock;
296 iova += elm->length;
297 }
298
299 out_unlock:
300 up_write(&iopt->iova_rwsem);
301 return rc;
302 }
303
iopt_abort_area(struct iopt_area * area)304 static void iopt_abort_area(struct iopt_area *area)
305 {
306 if (IS_ENABLED(CONFIG_IOMMUFD_TEST))
307 WARN_ON(area->pages);
308 if (area->iopt) {
309 down_write(&area->iopt->iova_rwsem);
310 interval_tree_remove(&area->node, &area->iopt->area_itree);
311 up_write(&area->iopt->iova_rwsem);
312 }
313 kfree(area);
314 }
315
iopt_free_pages_list(struct list_head * pages_list)316 void iopt_free_pages_list(struct list_head *pages_list)
317 {
318 struct iopt_pages_list *elm;
319
320 while ((elm = list_first_entry_or_null(pages_list,
321 struct iopt_pages_list, next))) {
322 if (elm->area)
323 iopt_abort_area(elm->area);
324 if (elm->pages)
325 iopt_put_pages(elm->pages);
326 list_del(&elm->next);
327 kfree(elm);
328 }
329 }
330
iopt_fill_domains_pages(struct list_head * pages_list)331 static int iopt_fill_domains_pages(struct list_head *pages_list)
332 {
333 struct iopt_pages_list *undo_elm;
334 struct iopt_pages_list *elm;
335 int rc;
336
337 list_for_each_entry(elm, pages_list, next) {
338 rc = iopt_area_fill_domains(elm->area, elm->pages);
339 if (rc)
340 goto err_undo;
341 }
342 return 0;
343
344 err_undo:
345 list_for_each_entry(undo_elm, pages_list, next) {
346 if (undo_elm == elm)
347 break;
348 iopt_area_unfill_domains(undo_elm->area, undo_elm->pages);
349 }
350 return rc;
351 }
352
iopt_map_pages(struct io_pagetable * iopt,struct list_head * pages_list,unsigned long length,unsigned long * dst_iova,int iommu_prot,unsigned int flags)353 int iopt_map_pages(struct io_pagetable *iopt, struct list_head *pages_list,
354 unsigned long length, unsigned long *dst_iova,
355 int iommu_prot, unsigned int flags)
356 {
357 struct iopt_pages_list *elm;
358 int rc;
359
360 rc = iopt_alloc_area_pages(iopt, pages_list, length, dst_iova,
361 iommu_prot, flags);
362 if (rc)
363 return rc;
364
365 down_read(&iopt->domains_rwsem);
366 rc = iopt_fill_domains_pages(pages_list);
367 if (rc)
368 goto out_unlock_domains;
369
370 down_write(&iopt->iova_rwsem);
371 list_for_each_entry(elm, pages_list, next) {
372 /*
373 * area->pages must be set inside the domains_rwsem to ensure
374 * any newly added domains will get filled. Moves the reference
375 * in from the list.
376 */
377 elm->area->pages = elm->pages;
378 elm->pages = NULL;
379 elm->area = NULL;
380 }
381 up_write(&iopt->iova_rwsem);
382 out_unlock_domains:
383 up_read(&iopt->domains_rwsem);
384 return rc;
385 }
386
387 /**
388 * iopt_map_user_pages() - Map a user VA to an iova in the io page table
389 * @ictx: iommufd_ctx the iopt is part of
390 * @iopt: io_pagetable to act on
391 * @iova: If IOPT_ALLOC_IOVA is set this is unused on input and contains
392 * the chosen iova on output. Otherwise is the iova to map to on input
393 * @uptr: User VA to map
394 * @length: Number of bytes to map
395 * @iommu_prot: Combination of IOMMU_READ/WRITE/etc bits for the mapping
396 * @flags: IOPT_ALLOC_IOVA or zero
397 *
398 * iova, uptr, and length must be aligned to iova_alignment. For domain backed
399 * page tables this will pin the pages and load them into the domain at iova.
400 * For non-domain page tables this will only setup a lazy reference and the
401 * caller must use iopt_access_pages() to touch them.
402 *
403 * iopt_unmap_iova() must be called to undo this before the io_pagetable can be
404 * destroyed.
405 */
iopt_map_user_pages(struct iommufd_ctx * ictx,struct io_pagetable * iopt,unsigned long * iova,void __user * uptr,unsigned long length,int iommu_prot,unsigned int flags)406 int iopt_map_user_pages(struct iommufd_ctx *ictx, struct io_pagetable *iopt,
407 unsigned long *iova, void __user *uptr,
408 unsigned long length, int iommu_prot,
409 unsigned int flags)
410 {
411 struct iopt_pages_list elm = {};
412 LIST_HEAD(pages_list);
413 int rc;
414
415 elm.pages = iopt_alloc_pages(uptr, length, iommu_prot & IOMMU_WRITE);
416 if (IS_ERR(elm.pages))
417 return PTR_ERR(elm.pages);
418 if (ictx->account_mode == IOPT_PAGES_ACCOUNT_MM &&
419 elm.pages->account_mode == IOPT_PAGES_ACCOUNT_USER)
420 elm.pages->account_mode = IOPT_PAGES_ACCOUNT_MM;
421 elm.start_byte = uptr - elm.pages->uptr;
422 elm.length = length;
423 list_add(&elm.next, &pages_list);
424
425 rc = iopt_map_pages(iopt, &pages_list, length, iova, iommu_prot, flags);
426 if (rc) {
427 if (elm.area)
428 iopt_abort_area(elm.area);
429 if (elm.pages)
430 iopt_put_pages(elm.pages);
431 return rc;
432 }
433 return 0;
434 }
435
436 struct iova_bitmap_fn_arg {
437 unsigned long flags;
438 struct io_pagetable *iopt;
439 struct iommu_domain *domain;
440 struct iommu_dirty_bitmap *dirty;
441 };
442
__iommu_read_and_clear_dirty(struct iova_bitmap * bitmap,unsigned long iova,size_t length,void * opaque)443 static int __iommu_read_and_clear_dirty(struct iova_bitmap *bitmap,
444 unsigned long iova, size_t length,
445 void *opaque)
446 {
447 struct iopt_area *area;
448 struct iopt_area_contig_iter iter;
449 struct iova_bitmap_fn_arg *arg = opaque;
450 struct iommu_domain *domain = arg->domain;
451 struct iommu_dirty_bitmap *dirty = arg->dirty;
452 const struct iommu_dirty_ops *ops = domain->dirty_ops;
453 unsigned long last_iova = iova + length - 1;
454 unsigned long flags = arg->flags;
455 int ret;
456
457 iopt_for_each_contig_area(&iter, area, arg->iopt, iova, last_iova) {
458 unsigned long last = min(last_iova, iopt_area_last_iova(area));
459
460 ret = ops->read_and_clear_dirty(domain, iter.cur_iova,
461 last - iter.cur_iova + 1, flags,
462 dirty);
463 if (ret)
464 return ret;
465 }
466
467 if (!iopt_area_contig_done(&iter))
468 return -EINVAL;
469 return 0;
470 }
471
472 static int
iommu_read_and_clear_dirty(struct iommu_domain * domain,struct io_pagetable * iopt,unsigned long flags,struct iommu_hwpt_get_dirty_bitmap * bitmap)473 iommu_read_and_clear_dirty(struct iommu_domain *domain,
474 struct io_pagetable *iopt, unsigned long flags,
475 struct iommu_hwpt_get_dirty_bitmap *bitmap)
476 {
477 const struct iommu_dirty_ops *ops = domain->dirty_ops;
478 struct iommu_iotlb_gather gather;
479 struct iommu_dirty_bitmap dirty;
480 struct iova_bitmap_fn_arg arg;
481 struct iova_bitmap *iter;
482 int ret = 0;
483
484 if (!ops || !ops->read_and_clear_dirty)
485 return -EOPNOTSUPP;
486
487 iter = iova_bitmap_alloc(bitmap->iova, bitmap->length,
488 bitmap->page_size,
489 u64_to_user_ptr(bitmap->data));
490 if (IS_ERR(iter))
491 return -ENOMEM;
492
493 iommu_dirty_bitmap_init(&dirty, iter, &gather);
494
495 arg.flags = flags;
496 arg.iopt = iopt;
497 arg.domain = domain;
498 arg.dirty = &dirty;
499 iova_bitmap_for_each(iter, &arg, __iommu_read_and_clear_dirty);
500
501 if (!(flags & IOMMU_DIRTY_NO_CLEAR))
502 iommu_iotlb_sync(domain, &gather);
503
504 iova_bitmap_free(iter);
505
506 return ret;
507 }
508
iommufd_check_iova_range(struct io_pagetable * iopt,struct iommu_hwpt_get_dirty_bitmap * bitmap)509 int iommufd_check_iova_range(struct io_pagetable *iopt,
510 struct iommu_hwpt_get_dirty_bitmap *bitmap)
511 {
512 size_t iommu_pgsize = iopt->iova_alignment;
513 u64 last_iova;
514
515 if (check_add_overflow(bitmap->iova, bitmap->length - 1, &last_iova))
516 return -EOVERFLOW;
517
518 if (bitmap->iova > ULONG_MAX || last_iova > ULONG_MAX)
519 return -EOVERFLOW;
520
521 if ((bitmap->iova & (iommu_pgsize - 1)) ||
522 ((last_iova + 1) & (iommu_pgsize - 1)))
523 return -EINVAL;
524
525 if (!bitmap->page_size)
526 return -EINVAL;
527
528 if ((bitmap->iova & (bitmap->page_size - 1)) ||
529 ((last_iova + 1) & (bitmap->page_size - 1)))
530 return -EINVAL;
531
532 return 0;
533 }
534
iopt_read_and_clear_dirty_data(struct io_pagetable * iopt,struct iommu_domain * domain,unsigned long flags,struct iommu_hwpt_get_dirty_bitmap * bitmap)535 int iopt_read_and_clear_dirty_data(struct io_pagetable *iopt,
536 struct iommu_domain *domain,
537 unsigned long flags,
538 struct iommu_hwpt_get_dirty_bitmap *bitmap)
539 {
540 int ret;
541
542 ret = iommufd_check_iova_range(iopt, bitmap);
543 if (ret)
544 return ret;
545
546 down_read(&iopt->iova_rwsem);
547 ret = iommu_read_and_clear_dirty(domain, iopt, flags, bitmap);
548 up_read(&iopt->iova_rwsem);
549
550 return ret;
551 }
552
iopt_clear_dirty_data(struct io_pagetable * iopt,struct iommu_domain * domain)553 static int iopt_clear_dirty_data(struct io_pagetable *iopt,
554 struct iommu_domain *domain)
555 {
556 const struct iommu_dirty_ops *ops = domain->dirty_ops;
557 struct iommu_iotlb_gather gather;
558 struct iommu_dirty_bitmap dirty;
559 struct iopt_area *area;
560 int ret = 0;
561
562 lockdep_assert_held_read(&iopt->iova_rwsem);
563
564 iommu_dirty_bitmap_init(&dirty, NULL, &gather);
565
566 for (area = iopt_area_iter_first(iopt, 0, ULONG_MAX); area;
567 area = iopt_area_iter_next(area, 0, ULONG_MAX)) {
568 if (!area->pages)
569 continue;
570
571 ret = ops->read_and_clear_dirty(domain, iopt_area_iova(area),
572 iopt_area_length(area), 0,
573 &dirty);
574 if (ret)
575 break;
576 }
577
578 iommu_iotlb_sync(domain, &gather);
579 return ret;
580 }
581
iopt_set_dirty_tracking(struct io_pagetable * iopt,struct iommu_domain * domain,bool enable)582 int iopt_set_dirty_tracking(struct io_pagetable *iopt,
583 struct iommu_domain *domain, bool enable)
584 {
585 const struct iommu_dirty_ops *ops = domain->dirty_ops;
586 int ret = 0;
587
588 if (!ops)
589 return -EOPNOTSUPP;
590
591 down_read(&iopt->iova_rwsem);
592
593 /* Clear dirty bits from PTEs to ensure a clean snapshot */
594 if (enable) {
595 ret = iopt_clear_dirty_data(iopt, domain);
596 if (ret)
597 goto out_unlock;
598 }
599
600 ret = ops->set_dirty_tracking(domain, enable);
601
602 out_unlock:
603 up_read(&iopt->iova_rwsem);
604 return ret;
605 }
606
iopt_get_pages(struct io_pagetable * iopt,unsigned long iova,unsigned long length,struct list_head * pages_list)607 int iopt_get_pages(struct io_pagetable *iopt, unsigned long iova,
608 unsigned long length, struct list_head *pages_list)
609 {
610 struct iopt_area_contig_iter iter;
611 unsigned long last_iova;
612 struct iopt_area *area;
613 int rc;
614
615 if (!length)
616 return -EINVAL;
617 if (check_add_overflow(iova, length - 1, &last_iova))
618 return -EOVERFLOW;
619
620 down_read(&iopt->iova_rwsem);
621 iopt_for_each_contig_area(&iter, area, iopt, iova, last_iova) {
622 struct iopt_pages_list *elm;
623 unsigned long last = min(last_iova, iopt_area_last_iova(area));
624
625 elm = kzalloc(sizeof(*elm), GFP_KERNEL_ACCOUNT);
626 if (!elm) {
627 rc = -ENOMEM;
628 goto err_free;
629 }
630 elm->start_byte = iopt_area_start_byte(area, iter.cur_iova);
631 elm->pages = area->pages;
632 elm->length = (last - iter.cur_iova) + 1;
633 kref_get(&elm->pages->kref);
634 list_add_tail(&elm->next, pages_list);
635 }
636 if (!iopt_area_contig_done(&iter)) {
637 rc = -ENOENT;
638 goto err_free;
639 }
640 up_read(&iopt->iova_rwsem);
641 return 0;
642 err_free:
643 up_read(&iopt->iova_rwsem);
644 iopt_free_pages_list(pages_list);
645 return rc;
646 }
647
iopt_unmap_iova_range(struct io_pagetable * iopt,unsigned long start,unsigned long last,unsigned long * unmapped)648 static int iopt_unmap_iova_range(struct io_pagetable *iopt, unsigned long start,
649 unsigned long last, unsigned long *unmapped)
650 {
651 struct iopt_area *area;
652 unsigned long unmapped_bytes = 0;
653 unsigned int tries = 0;
654 int rc = -ENOENT;
655
656 /*
657 * The domains_rwsem must be held in read mode any time any area->pages
658 * is NULL. This prevents domain attach/detatch from running
659 * concurrently with cleaning up the area.
660 */
661 again:
662 down_read(&iopt->domains_rwsem);
663 down_write(&iopt->iova_rwsem);
664 while ((area = iopt_area_iter_first(iopt, start, last))) {
665 unsigned long area_last = iopt_area_last_iova(area);
666 unsigned long area_first = iopt_area_iova(area);
667 struct iopt_pages *pages;
668
669 /* Userspace should not race map/unmap's of the same area */
670 if (!area->pages) {
671 rc = -EBUSY;
672 goto out_unlock_iova;
673 }
674
675 if (area_first < start || area_last > last) {
676 rc = -ENOENT;
677 goto out_unlock_iova;
678 }
679
680 if (area_first != start)
681 tries = 0;
682
683 /*
684 * num_accesses writers must hold the iova_rwsem too, so we can
685 * safely read it under the write side of the iovam_rwsem
686 * without the pages->mutex.
687 */
688 if (area->num_accesses) {
689 size_t length = iopt_area_length(area);
690
691 start = area_first;
692 area->prevent_access = true;
693 up_write(&iopt->iova_rwsem);
694 up_read(&iopt->domains_rwsem);
695
696 iommufd_access_notify_unmap(iopt, area_first, length);
697 /* Something is not responding to unmap requests. */
698 tries++;
699 if (WARN_ON(tries > 100))
700 return -EDEADLOCK;
701 goto again;
702 }
703
704 pages = area->pages;
705 area->pages = NULL;
706 up_write(&iopt->iova_rwsem);
707
708 iopt_area_unfill_domains(area, pages);
709 iopt_abort_area(area);
710 iopt_put_pages(pages);
711
712 unmapped_bytes += area_last - area_first + 1;
713
714 down_write(&iopt->iova_rwsem);
715 }
716 if (unmapped_bytes)
717 rc = 0;
718
719 out_unlock_iova:
720 up_write(&iopt->iova_rwsem);
721 up_read(&iopt->domains_rwsem);
722 if (unmapped)
723 *unmapped = unmapped_bytes;
724 return rc;
725 }
726
727 /**
728 * iopt_unmap_iova() - Remove a range of iova
729 * @iopt: io_pagetable to act on
730 * @iova: Starting iova to unmap
731 * @length: Number of bytes to unmap
732 * @unmapped: Return number of bytes unmapped
733 *
734 * The requested range must be a superset of existing ranges.
735 * Splitting/truncating IOVA mappings is not allowed.
736 */
iopt_unmap_iova(struct io_pagetable * iopt,unsigned long iova,unsigned long length,unsigned long * unmapped)737 int iopt_unmap_iova(struct io_pagetable *iopt, unsigned long iova,
738 unsigned long length, unsigned long *unmapped)
739 {
740 unsigned long iova_last;
741
742 if (!length)
743 return -EINVAL;
744
745 if (check_add_overflow(iova, length - 1, &iova_last))
746 return -EOVERFLOW;
747
748 return iopt_unmap_iova_range(iopt, iova, iova_last, unmapped);
749 }
750
iopt_unmap_all(struct io_pagetable * iopt,unsigned long * unmapped)751 int iopt_unmap_all(struct io_pagetable *iopt, unsigned long *unmapped)
752 {
753 int rc;
754
755 rc = iopt_unmap_iova_range(iopt, 0, ULONG_MAX, unmapped);
756 /* If the IOVAs are empty then unmap all succeeds */
757 if (rc == -ENOENT)
758 return 0;
759 return rc;
760 }
761
762 /* The caller must always free all the nodes in the allowed_iova rb_root. */
iopt_set_allow_iova(struct io_pagetable * iopt,struct rb_root_cached * allowed_iova)763 int iopt_set_allow_iova(struct io_pagetable *iopt,
764 struct rb_root_cached *allowed_iova)
765 {
766 struct iopt_allowed *allowed;
767
768 down_write(&iopt->iova_rwsem);
769 swap(*allowed_iova, iopt->allowed_itree);
770
771 for (allowed = iopt_allowed_iter_first(iopt, 0, ULONG_MAX); allowed;
772 allowed = iopt_allowed_iter_next(allowed, 0, ULONG_MAX)) {
773 if (iopt_reserved_iter_first(iopt, allowed->node.start,
774 allowed->node.last)) {
775 swap(*allowed_iova, iopt->allowed_itree);
776 up_write(&iopt->iova_rwsem);
777 return -EADDRINUSE;
778 }
779 }
780 up_write(&iopt->iova_rwsem);
781 return 0;
782 }
783
iopt_reserve_iova(struct io_pagetable * iopt,unsigned long start,unsigned long last,void * owner)784 int iopt_reserve_iova(struct io_pagetable *iopt, unsigned long start,
785 unsigned long last, void *owner)
786 {
787 struct iopt_reserved *reserved;
788
789 lockdep_assert_held_write(&iopt->iova_rwsem);
790
791 if (iopt_area_iter_first(iopt, start, last) ||
792 iopt_allowed_iter_first(iopt, start, last))
793 return -EADDRINUSE;
794
795 reserved = kzalloc(sizeof(*reserved), GFP_KERNEL_ACCOUNT);
796 if (!reserved)
797 return -ENOMEM;
798 reserved->node.start = start;
799 reserved->node.last = last;
800 reserved->owner = owner;
801 interval_tree_insert(&reserved->node, &iopt->reserved_itree);
802 return 0;
803 }
804
__iopt_remove_reserved_iova(struct io_pagetable * iopt,void * owner)805 static void __iopt_remove_reserved_iova(struct io_pagetable *iopt, void *owner)
806 {
807 struct iopt_reserved *reserved, *next;
808
809 lockdep_assert_held_write(&iopt->iova_rwsem);
810
811 for (reserved = iopt_reserved_iter_first(iopt, 0, ULONG_MAX); reserved;
812 reserved = next) {
813 next = iopt_reserved_iter_next(reserved, 0, ULONG_MAX);
814
815 if (reserved->owner == owner) {
816 interval_tree_remove(&reserved->node,
817 &iopt->reserved_itree);
818 kfree(reserved);
819 }
820 }
821 }
822
iopt_remove_reserved_iova(struct io_pagetable * iopt,void * owner)823 void iopt_remove_reserved_iova(struct io_pagetable *iopt, void *owner)
824 {
825 down_write(&iopt->iova_rwsem);
826 __iopt_remove_reserved_iova(iopt, owner);
827 up_write(&iopt->iova_rwsem);
828 }
829
iopt_init_table(struct io_pagetable * iopt)830 void iopt_init_table(struct io_pagetable *iopt)
831 {
832 init_rwsem(&iopt->iova_rwsem);
833 init_rwsem(&iopt->domains_rwsem);
834 iopt->area_itree = RB_ROOT_CACHED;
835 iopt->allowed_itree = RB_ROOT_CACHED;
836 iopt->reserved_itree = RB_ROOT_CACHED;
837 xa_init_flags(&iopt->domains, XA_FLAGS_ACCOUNT);
838 xa_init_flags(&iopt->access_list, XA_FLAGS_ALLOC);
839
840 /*
841 * iopt's start as SW tables that can use the entire size_t IOVA space
842 * due to the use of size_t in the APIs. They have no alignment
843 * restriction.
844 */
845 iopt->iova_alignment = 1;
846 }
847
iopt_destroy_table(struct io_pagetable * iopt)848 void iopt_destroy_table(struct io_pagetable *iopt)
849 {
850 struct interval_tree_node *node;
851
852 if (IS_ENABLED(CONFIG_IOMMUFD_TEST))
853 iopt_remove_reserved_iova(iopt, NULL);
854
855 while ((node = interval_tree_iter_first(&iopt->allowed_itree, 0,
856 ULONG_MAX))) {
857 interval_tree_remove(node, &iopt->allowed_itree);
858 kfree(container_of(node, struct iopt_allowed, node));
859 }
860
861 WARN_ON(!RB_EMPTY_ROOT(&iopt->reserved_itree.rb_root));
862 WARN_ON(!xa_empty(&iopt->domains));
863 WARN_ON(!xa_empty(&iopt->access_list));
864 WARN_ON(!RB_EMPTY_ROOT(&iopt->area_itree.rb_root));
865 }
866
867 /**
868 * iopt_unfill_domain() - Unfill a domain with PFNs
869 * @iopt: io_pagetable to act on
870 * @domain: domain to unfill
871 *
872 * This is used when removing a domain from the iopt. Every area in the iopt
873 * will be unmapped from the domain. The domain must already be removed from the
874 * domains xarray.
875 */
iopt_unfill_domain(struct io_pagetable * iopt,struct iommu_domain * domain)876 static void iopt_unfill_domain(struct io_pagetable *iopt,
877 struct iommu_domain *domain)
878 {
879 struct iopt_area *area;
880
881 lockdep_assert_held(&iopt->iova_rwsem);
882 lockdep_assert_held_write(&iopt->domains_rwsem);
883
884 /*
885 * Some other domain is holding all the pfns still, rapidly unmap this
886 * domain.
887 */
888 if (iopt->next_domain_id != 0) {
889 /* Pick an arbitrary remaining domain to act as storage */
890 struct iommu_domain *storage_domain =
891 xa_load(&iopt->domains, 0);
892
893 for (area = iopt_area_iter_first(iopt, 0, ULONG_MAX); area;
894 area = iopt_area_iter_next(area, 0, ULONG_MAX)) {
895 struct iopt_pages *pages = area->pages;
896
897 if (!pages)
898 continue;
899
900 mutex_lock(&pages->mutex);
901 if (IS_ENABLED(CONFIG_IOMMUFD_TEST))
902 WARN_ON(!area->storage_domain);
903 if (area->storage_domain == domain)
904 area->storage_domain = storage_domain;
905 mutex_unlock(&pages->mutex);
906
907 iopt_area_unmap_domain(area, domain);
908 }
909 return;
910 }
911
912 for (area = iopt_area_iter_first(iopt, 0, ULONG_MAX); area;
913 area = iopt_area_iter_next(area, 0, ULONG_MAX)) {
914 struct iopt_pages *pages = area->pages;
915
916 if (!pages)
917 continue;
918
919 mutex_lock(&pages->mutex);
920 interval_tree_remove(&area->pages_node, &pages->domains_itree);
921 WARN_ON(area->storage_domain != domain);
922 area->storage_domain = NULL;
923 iopt_area_unfill_domain(area, pages, domain);
924 mutex_unlock(&pages->mutex);
925 }
926 }
927
928 /**
929 * iopt_fill_domain() - Fill a domain with PFNs
930 * @iopt: io_pagetable to act on
931 * @domain: domain to fill
932 *
933 * Fill the domain with PFNs from every area in the iopt. On failure the domain
934 * is left unchanged.
935 */
iopt_fill_domain(struct io_pagetable * iopt,struct iommu_domain * domain)936 static int iopt_fill_domain(struct io_pagetable *iopt,
937 struct iommu_domain *domain)
938 {
939 struct iopt_area *end_area;
940 struct iopt_area *area;
941 int rc;
942
943 lockdep_assert_held(&iopt->iova_rwsem);
944 lockdep_assert_held_write(&iopt->domains_rwsem);
945
946 for (area = iopt_area_iter_first(iopt, 0, ULONG_MAX); area;
947 area = iopt_area_iter_next(area, 0, ULONG_MAX)) {
948 struct iopt_pages *pages = area->pages;
949
950 if (!pages)
951 continue;
952
953 mutex_lock(&pages->mutex);
954 rc = iopt_area_fill_domain(area, domain);
955 if (rc) {
956 mutex_unlock(&pages->mutex);
957 goto out_unfill;
958 }
959 if (!area->storage_domain) {
960 WARN_ON(iopt->next_domain_id != 0);
961 area->storage_domain = domain;
962 interval_tree_insert(&area->pages_node,
963 &pages->domains_itree);
964 }
965 mutex_unlock(&pages->mutex);
966 }
967 return 0;
968
969 out_unfill:
970 end_area = area;
971 for (area = iopt_area_iter_first(iopt, 0, ULONG_MAX); area;
972 area = iopt_area_iter_next(area, 0, ULONG_MAX)) {
973 struct iopt_pages *pages = area->pages;
974
975 if (area == end_area)
976 break;
977 if (!pages)
978 continue;
979 mutex_lock(&pages->mutex);
980 if (iopt->next_domain_id == 0) {
981 interval_tree_remove(&area->pages_node,
982 &pages->domains_itree);
983 area->storage_domain = NULL;
984 }
985 iopt_area_unfill_domain(area, pages, domain);
986 mutex_unlock(&pages->mutex);
987 }
988 return rc;
989 }
990
991 /* All existing area's conform to an increased page size */
iopt_check_iova_alignment(struct io_pagetable * iopt,unsigned long new_iova_alignment)992 static int iopt_check_iova_alignment(struct io_pagetable *iopt,
993 unsigned long new_iova_alignment)
994 {
995 unsigned long align_mask = new_iova_alignment - 1;
996 struct iopt_area *area;
997
998 lockdep_assert_held(&iopt->iova_rwsem);
999 lockdep_assert_held(&iopt->domains_rwsem);
1000
1001 for (area = iopt_area_iter_first(iopt, 0, ULONG_MAX); area;
1002 area = iopt_area_iter_next(area, 0, ULONG_MAX))
1003 if ((iopt_area_iova(area) & align_mask) ||
1004 (iopt_area_length(area) & align_mask) ||
1005 (area->page_offset & align_mask))
1006 return -EADDRINUSE;
1007
1008 if (IS_ENABLED(CONFIG_IOMMUFD_TEST)) {
1009 struct iommufd_access *access;
1010 unsigned long index;
1011
1012 xa_for_each(&iopt->access_list, index, access)
1013 if (WARN_ON(access->iova_alignment >
1014 new_iova_alignment))
1015 return -EADDRINUSE;
1016 }
1017 return 0;
1018 }
1019
iopt_table_add_domain(struct io_pagetable * iopt,struct iommu_domain * domain)1020 int iopt_table_add_domain(struct io_pagetable *iopt,
1021 struct iommu_domain *domain)
1022 {
1023 const struct iommu_domain_geometry *geometry = &domain->geometry;
1024 struct iommu_domain *iter_domain;
1025 unsigned int new_iova_alignment;
1026 unsigned long index;
1027 int rc;
1028
1029 down_write(&iopt->domains_rwsem);
1030 down_write(&iopt->iova_rwsem);
1031
1032 xa_for_each(&iopt->domains, index, iter_domain) {
1033 if (WARN_ON(iter_domain == domain)) {
1034 rc = -EEXIST;
1035 goto out_unlock;
1036 }
1037 }
1038
1039 /*
1040 * The io page size drives the iova_alignment. Internally the iopt_pages
1041 * works in PAGE_SIZE units and we adjust when mapping sub-PAGE_SIZE
1042 * objects into the iommu_domain.
1043 *
1044 * A iommu_domain must always be able to accept PAGE_SIZE to be
1045 * compatible as we can't guarantee higher contiguity.
1046 */
1047 new_iova_alignment = max_t(unsigned long,
1048 1UL << __ffs(domain->pgsize_bitmap),
1049 iopt->iova_alignment);
1050 if (new_iova_alignment > PAGE_SIZE) {
1051 rc = -EINVAL;
1052 goto out_unlock;
1053 }
1054 if (new_iova_alignment != iopt->iova_alignment) {
1055 rc = iopt_check_iova_alignment(iopt, new_iova_alignment);
1056 if (rc)
1057 goto out_unlock;
1058 }
1059
1060 /* No area exists that is outside the allowed domain aperture */
1061 if (geometry->aperture_start != 0) {
1062 rc = iopt_reserve_iova(iopt, 0, geometry->aperture_start - 1,
1063 domain);
1064 if (rc)
1065 goto out_reserved;
1066 }
1067 if (geometry->aperture_end != ULONG_MAX) {
1068 rc = iopt_reserve_iova(iopt, geometry->aperture_end + 1,
1069 ULONG_MAX, domain);
1070 if (rc)
1071 goto out_reserved;
1072 }
1073
1074 rc = xa_reserve(&iopt->domains, iopt->next_domain_id, GFP_KERNEL);
1075 if (rc)
1076 goto out_reserved;
1077
1078 rc = iopt_fill_domain(iopt, domain);
1079 if (rc)
1080 goto out_release;
1081
1082 iopt->iova_alignment = new_iova_alignment;
1083 xa_store(&iopt->domains, iopt->next_domain_id, domain, GFP_KERNEL);
1084 iopt->next_domain_id++;
1085 up_write(&iopt->iova_rwsem);
1086 up_write(&iopt->domains_rwsem);
1087 return 0;
1088 out_release:
1089 xa_release(&iopt->domains, iopt->next_domain_id);
1090 out_reserved:
1091 __iopt_remove_reserved_iova(iopt, domain);
1092 out_unlock:
1093 up_write(&iopt->iova_rwsem);
1094 up_write(&iopt->domains_rwsem);
1095 return rc;
1096 }
1097
iopt_calculate_iova_alignment(struct io_pagetable * iopt)1098 static int iopt_calculate_iova_alignment(struct io_pagetable *iopt)
1099 {
1100 unsigned long new_iova_alignment;
1101 struct iommufd_access *access;
1102 struct iommu_domain *domain;
1103 unsigned long index;
1104
1105 lockdep_assert_held_write(&iopt->iova_rwsem);
1106 lockdep_assert_held(&iopt->domains_rwsem);
1107
1108 /* See batch_iommu_map_small() */
1109 if (iopt->disable_large_pages)
1110 new_iova_alignment = PAGE_SIZE;
1111 else
1112 new_iova_alignment = 1;
1113
1114 xa_for_each(&iopt->domains, index, domain)
1115 new_iova_alignment = max_t(unsigned long,
1116 1UL << __ffs(domain->pgsize_bitmap),
1117 new_iova_alignment);
1118 xa_for_each(&iopt->access_list, index, access)
1119 new_iova_alignment = max_t(unsigned long,
1120 access->iova_alignment,
1121 new_iova_alignment);
1122
1123 if (new_iova_alignment > iopt->iova_alignment) {
1124 int rc;
1125
1126 rc = iopt_check_iova_alignment(iopt, new_iova_alignment);
1127 if (rc)
1128 return rc;
1129 }
1130 iopt->iova_alignment = new_iova_alignment;
1131 return 0;
1132 }
1133
iopt_table_remove_domain(struct io_pagetable * iopt,struct iommu_domain * domain)1134 void iopt_table_remove_domain(struct io_pagetable *iopt,
1135 struct iommu_domain *domain)
1136 {
1137 struct iommu_domain *iter_domain = NULL;
1138 unsigned long index;
1139
1140 down_write(&iopt->domains_rwsem);
1141 down_write(&iopt->iova_rwsem);
1142
1143 xa_for_each(&iopt->domains, index, iter_domain)
1144 if (iter_domain == domain)
1145 break;
1146 if (WARN_ON(iter_domain != domain) || index >= iopt->next_domain_id)
1147 goto out_unlock;
1148
1149 /*
1150 * Compress the xarray to keep it linear by swapping the entry to erase
1151 * with the tail entry and shrinking the tail.
1152 */
1153 iopt->next_domain_id--;
1154 iter_domain = xa_erase(&iopt->domains, iopt->next_domain_id);
1155 if (index != iopt->next_domain_id)
1156 xa_store(&iopt->domains, index, iter_domain, GFP_KERNEL);
1157
1158 iopt_unfill_domain(iopt, domain);
1159 __iopt_remove_reserved_iova(iopt, domain);
1160
1161 WARN_ON(iopt_calculate_iova_alignment(iopt));
1162 out_unlock:
1163 up_write(&iopt->iova_rwsem);
1164 up_write(&iopt->domains_rwsem);
1165 }
1166
1167 /**
1168 * iopt_area_split - Split an area into two parts at iova
1169 * @area: The area to split
1170 * @iova: Becomes the last of a new area
1171 *
1172 * This splits an area into two. It is part of the VFIO compatibility to allow
1173 * poking a hole in the mapping. The two areas continue to point at the same
1174 * iopt_pages, just with different starting bytes.
1175 */
iopt_area_split(struct iopt_area * area,unsigned long iova)1176 static int iopt_area_split(struct iopt_area *area, unsigned long iova)
1177 {
1178 unsigned long alignment = area->iopt->iova_alignment;
1179 unsigned long last_iova = iopt_area_last_iova(area);
1180 unsigned long start_iova = iopt_area_iova(area);
1181 unsigned long new_start = iova + 1;
1182 struct io_pagetable *iopt = area->iopt;
1183 struct iopt_pages *pages = area->pages;
1184 struct iopt_area *lhs;
1185 struct iopt_area *rhs;
1186 int rc;
1187
1188 lockdep_assert_held_write(&iopt->iova_rwsem);
1189
1190 if (iova == start_iova || iova == last_iova)
1191 return 0;
1192
1193 if (!pages || area->prevent_access)
1194 return -EBUSY;
1195
1196 if (new_start & (alignment - 1) ||
1197 iopt_area_start_byte(area, new_start) & (alignment - 1))
1198 return -EINVAL;
1199
1200 lhs = iopt_area_alloc();
1201 if (!lhs)
1202 return -ENOMEM;
1203
1204 rhs = iopt_area_alloc();
1205 if (!rhs) {
1206 rc = -ENOMEM;
1207 goto err_free_lhs;
1208 }
1209
1210 mutex_lock(&pages->mutex);
1211 /*
1212 * Splitting is not permitted if an access exists, we don't track enough
1213 * information to split existing accesses.
1214 */
1215 if (area->num_accesses) {
1216 rc = -EINVAL;
1217 goto err_unlock;
1218 }
1219
1220 /*
1221 * Splitting is not permitted if a domain could have been mapped with
1222 * huge pages.
1223 */
1224 if (area->storage_domain && !iopt->disable_large_pages) {
1225 rc = -EINVAL;
1226 goto err_unlock;
1227 }
1228
1229 interval_tree_remove(&area->node, &iopt->area_itree);
1230 rc = iopt_insert_area(iopt, lhs, area->pages, start_iova,
1231 iopt_area_start_byte(area, start_iova),
1232 (new_start - 1) - start_iova + 1,
1233 area->iommu_prot);
1234 if (WARN_ON(rc))
1235 goto err_insert;
1236
1237 rc = iopt_insert_area(iopt, rhs, area->pages, new_start,
1238 iopt_area_start_byte(area, new_start),
1239 last_iova - new_start + 1, area->iommu_prot);
1240 if (WARN_ON(rc))
1241 goto err_remove_lhs;
1242
1243 /*
1244 * If the original area has filled a domain, domains_itree has to be
1245 * updated.
1246 */
1247 if (area->storage_domain) {
1248 interval_tree_remove(&area->pages_node, &pages->domains_itree);
1249 interval_tree_insert(&lhs->pages_node, &pages->domains_itree);
1250 interval_tree_insert(&rhs->pages_node, &pages->domains_itree);
1251 }
1252
1253 lhs->storage_domain = area->storage_domain;
1254 lhs->pages = area->pages;
1255 rhs->storage_domain = area->storage_domain;
1256 rhs->pages = area->pages;
1257 kref_get(&rhs->pages->kref);
1258 kfree(area);
1259 mutex_unlock(&pages->mutex);
1260
1261 /*
1262 * No change to domains or accesses because the pages hasn't been
1263 * changed
1264 */
1265 return 0;
1266
1267 err_remove_lhs:
1268 interval_tree_remove(&lhs->node, &iopt->area_itree);
1269 err_insert:
1270 interval_tree_insert(&area->node, &iopt->area_itree);
1271 err_unlock:
1272 mutex_unlock(&pages->mutex);
1273 kfree(rhs);
1274 err_free_lhs:
1275 kfree(lhs);
1276 return rc;
1277 }
1278
iopt_cut_iova(struct io_pagetable * iopt,unsigned long * iovas,size_t num_iovas)1279 int iopt_cut_iova(struct io_pagetable *iopt, unsigned long *iovas,
1280 size_t num_iovas)
1281 {
1282 int rc = 0;
1283 int i;
1284
1285 down_write(&iopt->iova_rwsem);
1286 for (i = 0; i < num_iovas; i++) {
1287 struct iopt_area *area;
1288
1289 area = iopt_area_iter_first(iopt, iovas[i], iovas[i]);
1290 if (!area)
1291 continue;
1292 rc = iopt_area_split(area, iovas[i]);
1293 if (rc)
1294 break;
1295 }
1296 up_write(&iopt->iova_rwsem);
1297 return rc;
1298 }
1299
iopt_enable_large_pages(struct io_pagetable * iopt)1300 void iopt_enable_large_pages(struct io_pagetable *iopt)
1301 {
1302 int rc;
1303
1304 down_write(&iopt->domains_rwsem);
1305 down_write(&iopt->iova_rwsem);
1306 WRITE_ONCE(iopt->disable_large_pages, false);
1307 rc = iopt_calculate_iova_alignment(iopt);
1308 WARN_ON(rc);
1309 up_write(&iopt->iova_rwsem);
1310 up_write(&iopt->domains_rwsem);
1311 }
1312
iopt_disable_large_pages(struct io_pagetable * iopt)1313 int iopt_disable_large_pages(struct io_pagetable *iopt)
1314 {
1315 int rc = 0;
1316
1317 down_write(&iopt->domains_rwsem);
1318 down_write(&iopt->iova_rwsem);
1319 if (iopt->disable_large_pages)
1320 goto out_unlock;
1321
1322 /* Won't do it if domains already have pages mapped in them */
1323 if (!xa_empty(&iopt->domains) &&
1324 !RB_EMPTY_ROOT(&iopt->area_itree.rb_root)) {
1325 rc = -EINVAL;
1326 goto out_unlock;
1327 }
1328
1329 WRITE_ONCE(iopt->disable_large_pages, true);
1330 rc = iopt_calculate_iova_alignment(iopt);
1331 if (rc)
1332 WRITE_ONCE(iopt->disable_large_pages, false);
1333 out_unlock:
1334 up_write(&iopt->iova_rwsem);
1335 up_write(&iopt->domains_rwsem);
1336 return rc;
1337 }
1338
iopt_add_access(struct io_pagetable * iopt,struct iommufd_access * access)1339 int iopt_add_access(struct io_pagetable *iopt, struct iommufd_access *access)
1340 {
1341 u32 new_id;
1342 int rc;
1343
1344 down_write(&iopt->domains_rwsem);
1345 down_write(&iopt->iova_rwsem);
1346 rc = xa_alloc(&iopt->access_list, &new_id, access, xa_limit_16b,
1347 GFP_KERNEL_ACCOUNT);
1348
1349 if (rc)
1350 goto out_unlock;
1351
1352 rc = iopt_calculate_iova_alignment(iopt);
1353 if (rc) {
1354 xa_erase(&iopt->access_list, new_id);
1355 goto out_unlock;
1356 }
1357 access->iopt_access_list_id = new_id;
1358
1359 out_unlock:
1360 up_write(&iopt->iova_rwsem);
1361 up_write(&iopt->domains_rwsem);
1362 return rc;
1363 }
1364
iopt_remove_access(struct io_pagetable * iopt,struct iommufd_access * access,u32 iopt_access_list_id)1365 void iopt_remove_access(struct io_pagetable *iopt,
1366 struct iommufd_access *access,
1367 u32 iopt_access_list_id)
1368 {
1369 down_write(&iopt->domains_rwsem);
1370 down_write(&iopt->iova_rwsem);
1371 WARN_ON(xa_erase(&iopt->access_list, iopt_access_list_id) != access);
1372 WARN_ON(iopt_calculate_iova_alignment(iopt));
1373 up_write(&iopt->iova_rwsem);
1374 up_write(&iopt->domains_rwsem);
1375 }
1376
1377 /* Narrow the valid_iova_itree to include reserved ranges from a device. */
iopt_table_enforce_dev_resv_regions(struct io_pagetable * iopt,struct device * dev,phys_addr_t * sw_msi_start)1378 int iopt_table_enforce_dev_resv_regions(struct io_pagetable *iopt,
1379 struct device *dev,
1380 phys_addr_t *sw_msi_start)
1381 {
1382 struct iommu_resv_region *resv;
1383 LIST_HEAD(resv_regions);
1384 unsigned int num_hw_msi = 0;
1385 unsigned int num_sw_msi = 0;
1386 int rc;
1387
1388 if (iommufd_should_fail())
1389 return -EINVAL;
1390
1391 down_write(&iopt->iova_rwsem);
1392 /* FIXME: drivers allocate memory but there is no failure propogated */
1393 iommu_get_resv_regions(dev, &resv_regions);
1394
1395 list_for_each_entry(resv, &resv_regions, list) {
1396 if (resv->type == IOMMU_RESV_DIRECT_RELAXABLE)
1397 continue;
1398
1399 if (sw_msi_start && resv->type == IOMMU_RESV_MSI)
1400 num_hw_msi++;
1401 if (sw_msi_start && resv->type == IOMMU_RESV_SW_MSI) {
1402 *sw_msi_start = resv->start;
1403 num_sw_msi++;
1404 }
1405
1406 rc = iopt_reserve_iova(iopt, resv->start,
1407 resv->length - 1 + resv->start, dev);
1408 if (rc)
1409 goto out_reserved;
1410 }
1411
1412 /* Drivers must offer sane combinations of regions */
1413 if (WARN_ON(num_sw_msi && num_hw_msi) || WARN_ON(num_sw_msi > 1)) {
1414 rc = -EINVAL;
1415 goto out_reserved;
1416 }
1417
1418 rc = 0;
1419 goto out_free_resv;
1420
1421 out_reserved:
1422 __iopt_remove_reserved_iova(iopt, dev);
1423 out_free_resv:
1424 iommu_put_resv_regions(dev, &resv_regions);
1425 up_write(&iopt->iova_rwsem);
1426 return rc;
1427 }
1428