1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES
3 */
4 #include <linux/iommu.h>
5 #include <linux/iommufd.h>
6 #include <linux/pci-ats.h>
7 #include <linux/slab.h>
8 #include <uapi/linux/iommufd.h>
9
10 #include "../iommu-priv.h"
11 #include "io_pagetable.h"
12 #include "iommufd_private.h"
13
14 static bool allow_unsafe_interrupts;
15 module_param(allow_unsafe_interrupts, bool, S_IRUGO | S_IWUSR);
16 MODULE_PARM_DESC(
17 allow_unsafe_interrupts,
18 "Allow IOMMUFD to bind to devices even if the platform cannot isolate "
19 "the MSI interrupt window. Enabling this is a security weakness.");
20
21 struct iommufd_attach {
22 struct iommufd_hw_pagetable *hwpt;
23 struct xarray device_array;
24 };
25
iommufd_group_release(struct kref * kref)26 static void iommufd_group_release(struct kref *kref)
27 {
28 struct iommufd_group *igroup =
29 container_of(kref, struct iommufd_group, ref);
30
31 WARN_ON(!xa_empty(&igroup->pasid_attach));
32
33 xa_cmpxchg(&igroup->ictx->groups, iommu_group_id(igroup->group), igroup,
34 NULL, GFP_KERNEL);
35 iommu_group_put(igroup->group);
36 mutex_destroy(&igroup->lock);
37 kfree(igroup);
38 }
39
iommufd_put_group(struct iommufd_group * group)40 static void iommufd_put_group(struct iommufd_group *group)
41 {
42 kref_put(&group->ref, iommufd_group_release);
43 }
44
iommufd_group_try_get(struct iommufd_group * igroup,struct iommu_group * group)45 static bool iommufd_group_try_get(struct iommufd_group *igroup,
46 struct iommu_group *group)
47 {
48 if (!igroup)
49 return false;
50 /*
51 * group ID's cannot be re-used until the group is put back which does
52 * not happen if we could get an igroup pointer under the xa_lock.
53 */
54 if (WARN_ON(igroup->group != group))
55 return false;
56 return kref_get_unless_zero(&igroup->ref);
57 }
58
59 /*
60 * iommufd needs to store some more data for each iommu_group, we keep a
61 * parallel xarray indexed by iommu_group id to hold this instead of putting it
62 * in the core structure. To keep things simple the iommufd_group memory is
63 * unique within the iommufd_ctx. This makes it easy to check there are no
64 * memory leaks.
65 */
iommufd_get_group(struct iommufd_ctx * ictx,struct device * dev)66 static struct iommufd_group *iommufd_get_group(struct iommufd_ctx *ictx,
67 struct device *dev)
68 {
69 struct iommufd_group *new_igroup;
70 struct iommufd_group *cur_igroup;
71 struct iommufd_group *igroup;
72 struct iommu_group *group;
73 unsigned int id;
74
75 group = iommu_group_get(dev);
76 if (!group)
77 return ERR_PTR(-ENODEV);
78
79 id = iommu_group_id(group);
80
81 xa_lock(&ictx->groups);
82 igroup = xa_load(&ictx->groups, id);
83 if (iommufd_group_try_get(igroup, group)) {
84 xa_unlock(&ictx->groups);
85 iommu_group_put(group);
86 return igroup;
87 }
88 xa_unlock(&ictx->groups);
89
90 new_igroup = kzalloc(sizeof(*new_igroup), GFP_KERNEL);
91 if (!new_igroup) {
92 iommu_group_put(group);
93 return ERR_PTR(-ENOMEM);
94 }
95
96 kref_init(&new_igroup->ref);
97 mutex_init(&new_igroup->lock);
98 xa_init(&new_igroup->pasid_attach);
99 new_igroup->sw_msi_start = PHYS_ADDR_MAX;
100 /* group reference moves into new_igroup */
101 new_igroup->group = group;
102
103 /*
104 * The ictx is not additionally refcounted here becase all objects using
105 * an igroup must put it before their destroy completes.
106 */
107 new_igroup->ictx = ictx;
108
109 /*
110 * We dropped the lock so igroup is invalid. NULL is a safe and likely
111 * value to assume for the xa_cmpxchg algorithm.
112 */
113 cur_igroup = NULL;
114 xa_lock(&ictx->groups);
115 while (true) {
116 igroup = __xa_cmpxchg(&ictx->groups, id, cur_igroup, new_igroup,
117 GFP_KERNEL);
118 if (xa_is_err(igroup)) {
119 xa_unlock(&ictx->groups);
120 iommufd_put_group(new_igroup);
121 return ERR_PTR(xa_err(igroup));
122 }
123
124 /* new_group was successfully installed */
125 if (cur_igroup == igroup) {
126 xa_unlock(&ictx->groups);
127 return new_igroup;
128 }
129
130 /* Check again if the current group is any good */
131 if (iommufd_group_try_get(igroup, group)) {
132 xa_unlock(&ictx->groups);
133 iommufd_put_group(new_igroup);
134 return igroup;
135 }
136 cur_igroup = igroup;
137 }
138 }
139
iommufd_device_destroy(struct iommufd_object * obj)140 void iommufd_device_destroy(struct iommufd_object *obj)
141 {
142 struct iommufd_device *idev =
143 container_of(obj, struct iommufd_device, obj);
144
145 iommu_device_release_dma_owner(idev->dev);
146 iommufd_put_group(idev->igroup);
147 if (!iommufd_selftest_is_mock_dev(idev->dev))
148 iommufd_ctx_put(idev->ictx);
149 }
150
151 /**
152 * iommufd_device_bind - Bind a physical device to an iommu fd
153 * @ictx: iommufd file descriptor
154 * @dev: Pointer to a physical device struct
155 * @id: Output ID number to return to userspace for this device
156 *
157 * A successful bind establishes an ownership over the device and returns
158 * struct iommufd_device pointer, otherwise returns error pointer.
159 *
160 * A driver using this API must set driver_managed_dma and must not touch
161 * the device until this routine succeeds and establishes ownership.
162 *
163 * Binding a PCI device places the entire RID under iommufd control.
164 *
165 * The caller must undo this with iommufd_device_unbind()
166 */
iommufd_device_bind(struct iommufd_ctx * ictx,struct device * dev,u32 * id)167 struct iommufd_device *iommufd_device_bind(struct iommufd_ctx *ictx,
168 struct device *dev, u32 *id)
169 {
170 struct iommufd_device *idev;
171 struct iommufd_group *igroup;
172 int rc;
173
174 /*
175 * iommufd always sets IOMMU_CACHE because we offer no way for userspace
176 * to restore cache coherency.
177 */
178 if (!device_iommu_capable(dev, IOMMU_CAP_CACHE_COHERENCY))
179 return ERR_PTR(-EINVAL);
180
181 igroup = iommufd_get_group(ictx, dev);
182 if (IS_ERR(igroup))
183 return ERR_CAST(igroup);
184
185 /*
186 * For historical compat with VFIO the insecure interrupt path is
187 * allowed if the module parameter is set. Secure/Isolated means that a
188 * MemWr operation from the device (eg a simple DMA) cannot trigger an
189 * interrupt outside this iommufd context.
190 */
191 if (!iommufd_selftest_is_mock_dev(dev) &&
192 !iommu_group_has_isolated_msi(igroup->group)) {
193 if (!allow_unsafe_interrupts) {
194 rc = -EPERM;
195 goto out_group_put;
196 }
197
198 dev_warn(
199 dev,
200 "MSI interrupts are not secure, they cannot be isolated by the platform. "
201 "Check that platform features like interrupt remapping are enabled. "
202 "Use the \"allow_unsafe_interrupts\" module parameter to override\n");
203 }
204
205 rc = iommu_device_claim_dma_owner(dev, ictx);
206 if (rc)
207 goto out_group_put;
208
209 idev = iommufd_object_alloc(ictx, idev, IOMMUFD_OBJ_DEVICE);
210 if (IS_ERR(idev)) {
211 rc = PTR_ERR(idev);
212 goto out_release_owner;
213 }
214 idev->ictx = ictx;
215 if (!iommufd_selftest_is_mock_dev(dev))
216 iommufd_ctx_get(ictx);
217 idev->dev = dev;
218 idev->enforce_cache_coherency =
219 device_iommu_capable(dev, IOMMU_CAP_ENFORCE_CACHE_COHERENCY);
220 /* The calling driver is a user until iommufd_device_unbind() */
221 refcount_inc(&idev->obj.users);
222 /* igroup refcount moves into iommufd_device */
223 idev->igroup = igroup;
224
225 /*
226 * If the caller fails after this success it must call
227 * iommufd_unbind_device() which is safe since we hold this refcount.
228 * This also means the device is a leaf in the graph and no other object
229 * can take a reference on it.
230 */
231 iommufd_object_finalize(ictx, &idev->obj);
232 *id = idev->obj.id;
233 return idev;
234
235 out_release_owner:
236 iommu_device_release_dma_owner(dev);
237 out_group_put:
238 iommufd_put_group(igroup);
239 return ERR_PTR(rc);
240 }
241 EXPORT_SYMBOL_NS_GPL(iommufd_device_bind, "IOMMUFD");
242
243 /**
244 * iommufd_ctx_has_group - True if any device within the group is bound
245 * to the ictx
246 * @ictx: iommufd file descriptor
247 * @group: Pointer to a physical iommu_group struct
248 *
249 * True if any device within the group has been bound to this ictx, ex. via
250 * iommufd_device_bind(), therefore implying ictx ownership of the group.
251 */
iommufd_ctx_has_group(struct iommufd_ctx * ictx,struct iommu_group * group)252 bool iommufd_ctx_has_group(struct iommufd_ctx *ictx, struct iommu_group *group)
253 {
254 struct iommufd_object *obj;
255 unsigned long index;
256
257 if (!ictx || !group)
258 return false;
259
260 xa_lock(&ictx->objects);
261 xa_for_each(&ictx->objects, index, obj) {
262 if (obj->type == IOMMUFD_OBJ_DEVICE &&
263 container_of(obj, struct iommufd_device, obj)
264 ->igroup->group == group) {
265 xa_unlock(&ictx->objects);
266 return true;
267 }
268 }
269 xa_unlock(&ictx->objects);
270 return false;
271 }
272 EXPORT_SYMBOL_NS_GPL(iommufd_ctx_has_group, "IOMMUFD");
273
274 /**
275 * iommufd_device_unbind - Undo iommufd_device_bind()
276 * @idev: Device returned by iommufd_device_bind()
277 *
278 * Release the device from iommufd control. The DMA ownership will return back
279 * to unowned with DMA controlled by the DMA API. This invalidates the
280 * iommufd_device pointer, other APIs that consume it must not be called
281 * concurrently.
282 */
iommufd_device_unbind(struct iommufd_device * idev)283 void iommufd_device_unbind(struct iommufd_device *idev)
284 {
285 iommufd_object_destroy_user(idev->ictx, &idev->obj);
286 }
287 EXPORT_SYMBOL_NS_GPL(iommufd_device_unbind, "IOMMUFD");
288
iommufd_device_to_ictx(struct iommufd_device * idev)289 struct iommufd_ctx *iommufd_device_to_ictx(struct iommufd_device *idev)
290 {
291 return idev->ictx;
292 }
293 EXPORT_SYMBOL_NS_GPL(iommufd_device_to_ictx, "IOMMUFD");
294
iommufd_device_to_id(struct iommufd_device * idev)295 u32 iommufd_device_to_id(struct iommufd_device *idev)
296 {
297 return idev->obj.id;
298 }
299 EXPORT_SYMBOL_NS_GPL(iommufd_device_to_id, "IOMMUFD");
300
iommufd_group_device_num(struct iommufd_group * igroup,ioasid_t pasid)301 static unsigned int iommufd_group_device_num(struct iommufd_group *igroup,
302 ioasid_t pasid)
303 {
304 struct iommufd_attach *attach;
305 struct iommufd_device *idev;
306 unsigned int count = 0;
307 unsigned long index;
308
309 lockdep_assert_held(&igroup->lock);
310
311 attach = xa_load(&igroup->pasid_attach, pasid);
312 if (attach)
313 xa_for_each(&attach->device_array, index, idev)
314 count++;
315 return count;
316 }
317
318 #ifdef CONFIG_IRQ_MSI_IOMMU
iommufd_group_setup_msi(struct iommufd_group * igroup,struct iommufd_hwpt_paging * hwpt_paging)319 static int iommufd_group_setup_msi(struct iommufd_group *igroup,
320 struct iommufd_hwpt_paging *hwpt_paging)
321 {
322 struct iommufd_ctx *ictx = igroup->ictx;
323 struct iommufd_sw_msi_map *cur;
324
325 if (igroup->sw_msi_start == PHYS_ADDR_MAX)
326 return 0;
327
328 /*
329 * Install all the MSI pages the device has been using into the domain
330 */
331 guard(mutex)(&ictx->sw_msi_lock);
332 list_for_each_entry(cur, &ictx->sw_msi_list, sw_msi_item) {
333 int rc;
334
335 if (cur->sw_msi_start != igroup->sw_msi_start ||
336 !test_bit(cur->id, igroup->required_sw_msi.bitmap))
337 continue;
338
339 rc = iommufd_sw_msi_install(ictx, hwpt_paging, cur);
340 if (rc)
341 return rc;
342 }
343 return 0;
344 }
345 #else
346 static inline int
iommufd_group_setup_msi(struct iommufd_group * igroup,struct iommufd_hwpt_paging * hwpt_paging)347 iommufd_group_setup_msi(struct iommufd_group *igroup,
348 struct iommufd_hwpt_paging *hwpt_paging)
349 {
350 return 0;
351 }
352 #endif
353
354 static bool
iommufd_group_first_attach(struct iommufd_group * igroup,ioasid_t pasid)355 iommufd_group_first_attach(struct iommufd_group *igroup, ioasid_t pasid)
356 {
357 lockdep_assert_held(&igroup->lock);
358 return !xa_load(&igroup->pasid_attach, pasid);
359 }
360
361 static int
iommufd_device_attach_reserved_iova(struct iommufd_device * idev,struct iommufd_hwpt_paging * hwpt_paging)362 iommufd_device_attach_reserved_iova(struct iommufd_device *idev,
363 struct iommufd_hwpt_paging *hwpt_paging)
364 {
365 struct iommufd_group *igroup = idev->igroup;
366 int rc;
367
368 lockdep_assert_held(&igroup->lock);
369
370 rc = iopt_table_enforce_dev_resv_regions(&hwpt_paging->ioas->iopt,
371 idev->dev,
372 &igroup->sw_msi_start);
373 if (rc)
374 return rc;
375
376 if (iommufd_group_first_attach(igroup, IOMMU_NO_PASID)) {
377 rc = iommufd_group_setup_msi(igroup, hwpt_paging);
378 if (rc) {
379 iopt_remove_reserved_iova(&hwpt_paging->ioas->iopt,
380 idev->dev);
381 return rc;
382 }
383 }
384 return 0;
385 }
386
387 /* The device attach/detach/replace helpers for attach_handle */
388
iommufd_device_is_attached(struct iommufd_device * idev,ioasid_t pasid)389 static bool iommufd_device_is_attached(struct iommufd_device *idev,
390 ioasid_t pasid)
391 {
392 struct iommufd_attach *attach;
393
394 attach = xa_load(&idev->igroup->pasid_attach, pasid);
395 return xa_load(&attach->device_array, idev->obj.id);
396 }
397
iommufd_hwpt_pasid_compat(struct iommufd_hw_pagetable * hwpt,struct iommufd_device * idev,ioasid_t pasid)398 static int iommufd_hwpt_pasid_compat(struct iommufd_hw_pagetable *hwpt,
399 struct iommufd_device *idev,
400 ioasid_t pasid)
401 {
402 struct iommufd_group *igroup = idev->igroup;
403
404 lockdep_assert_held(&igroup->lock);
405
406 if (pasid == IOMMU_NO_PASID) {
407 unsigned long start = IOMMU_NO_PASID;
408
409 if (!hwpt->pasid_compat &&
410 xa_find_after(&igroup->pasid_attach,
411 &start, UINT_MAX, XA_PRESENT))
412 return -EINVAL;
413 } else {
414 struct iommufd_attach *attach;
415
416 if (!hwpt->pasid_compat)
417 return -EINVAL;
418
419 attach = xa_load(&igroup->pasid_attach, IOMMU_NO_PASID);
420 if (attach && attach->hwpt && !attach->hwpt->pasid_compat)
421 return -EINVAL;
422 }
423
424 return 0;
425 }
426
iommufd_hwpt_compatible_device(struct iommufd_hw_pagetable * hwpt,struct iommufd_device * idev)427 static bool iommufd_hwpt_compatible_device(struct iommufd_hw_pagetable *hwpt,
428 struct iommufd_device *idev)
429 {
430 struct pci_dev *pdev;
431
432 if (!hwpt->fault || !dev_is_pci(idev->dev))
433 return true;
434
435 /*
436 * Once we turn on PCI/PRI support for VF, the response failure code
437 * should not be forwarded to the hardware due to PRI being a shared
438 * resource between PF and VFs. There is no coordination for this
439 * shared capability. This waits for a vPRI reset to recover.
440 */
441 pdev = to_pci_dev(idev->dev);
442
443 return (!pdev->is_virtfn || !pci_pri_supported(pdev));
444 }
445
iommufd_hwpt_attach_device(struct iommufd_hw_pagetable * hwpt,struct iommufd_device * idev,ioasid_t pasid)446 static int iommufd_hwpt_attach_device(struct iommufd_hw_pagetable *hwpt,
447 struct iommufd_device *idev,
448 ioasid_t pasid)
449 {
450 struct iommufd_attach_handle *handle;
451 int rc;
452
453 if (!iommufd_hwpt_compatible_device(hwpt, idev))
454 return -EINVAL;
455
456 rc = iommufd_hwpt_pasid_compat(hwpt, idev, pasid);
457 if (rc)
458 return rc;
459
460 handle = kzalloc(sizeof(*handle), GFP_KERNEL);
461 if (!handle)
462 return -ENOMEM;
463
464 handle->idev = idev;
465 if (pasid == IOMMU_NO_PASID)
466 rc = iommu_attach_group_handle(hwpt->domain, idev->igroup->group,
467 &handle->handle);
468 else
469 rc = iommu_attach_device_pasid(hwpt->domain, idev->dev, pasid,
470 &handle->handle);
471 if (rc)
472 goto out_free_handle;
473
474 return 0;
475
476 out_free_handle:
477 kfree(handle);
478 return rc;
479 }
480
481 static struct iommufd_attach_handle *
iommufd_device_get_attach_handle(struct iommufd_device * idev,ioasid_t pasid)482 iommufd_device_get_attach_handle(struct iommufd_device *idev, ioasid_t pasid)
483 {
484 struct iommu_attach_handle *handle;
485
486 lockdep_assert_held(&idev->igroup->lock);
487
488 handle =
489 iommu_attach_handle_get(idev->igroup->group, pasid, 0);
490 if (IS_ERR(handle))
491 return NULL;
492 return to_iommufd_handle(handle);
493 }
494
iommufd_hwpt_detach_device(struct iommufd_hw_pagetable * hwpt,struct iommufd_device * idev,ioasid_t pasid)495 static void iommufd_hwpt_detach_device(struct iommufd_hw_pagetable *hwpt,
496 struct iommufd_device *idev,
497 ioasid_t pasid)
498 {
499 struct iommufd_attach_handle *handle;
500
501 handle = iommufd_device_get_attach_handle(idev, pasid);
502 if (pasid == IOMMU_NO_PASID)
503 iommu_detach_group_handle(hwpt->domain, idev->igroup->group);
504 else
505 iommu_detach_device_pasid(hwpt->domain, idev->dev, pasid);
506
507 iommufd_auto_response_faults(hwpt, handle);
508 kfree(handle);
509 }
510
iommufd_hwpt_replace_device(struct iommufd_device * idev,ioasid_t pasid,struct iommufd_hw_pagetable * hwpt,struct iommufd_hw_pagetable * old)511 static int iommufd_hwpt_replace_device(struct iommufd_device *idev,
512 ioasid_t pasid,
513 struct iommufd_hw_pagetable *hwpt,
514 struct iommufd_hw_pagetable *old)
515 {
516 struct iommufd_attach_handle *handle, *old_handle;
517 int rc;
518
519 if (!iommufd_hwpt_compatible_device(hwpt, idev))
520 return -EINVAL;
521
522 rc = iommufd_hwpt_pasid_compat(hwpt, idev, pasid);
523 if (rc)
524 return rc;
525
526 old_handle = iommufd_device_get_attach_handle(idev, pasid);
527
528 handle = kzalloc(sizeof(*handle), GFP_KERNEL);
529 if (!handle)
530 return -ENOMEM;
531
532 handle->idev = idev;
533 if (pasid == IOMMU_NO_PASID)
534 rc = iommu_replace_group_handle(idev->igroup->group,
535 hwpt->domain, &handle->handle);
536 else
537 rc = iommu_replace_device_pasid(hwpt->domain, idev->dev,
538 pasid, &handle->handle);
539 if (rc)
540 goto out_free_handle;
541
542 iommufd_auto_response_faults(hwpt, old_handle);
543 kfree(old_handle);
544
545 return 0;
546
547 out_free_handle:
548 kfree(handle);
549 return rc;
550 }
551
iommufd_hw_pagetable_attach(struct iommufd_hw_pagetable * hwpt,struct iommufd_device * idev,ioasid_t pasid)552 int iommufd_hw_pagetable_attach(struct iommufd_hw_pagetable *hwpt,
553 struct iommufd_device *idev, ioasid_t pasid)
554 {
555 struct iommufd_hwpt_paging *hwpt_paging = find_hwpt_paging(hwpt);
556 bool attach_resv = hwpt_paging && pasid == IOMMU_NO_PASID;
557 struct iommufd_group *igroup = idev->igroup;
558 struct iommufd_hw_pagetable *old_hwpt;
559 struct iommufd_attach *attach;
560 int rc;
561
562 mutex_lock(&igroup->lock);
563
564 attach = xa_cmpxchg(&igroup->pasid_attach, pasid, NULL,
565 XA_ZERO_ENTRY, GFP_KERNEL);
566 if (xa_is_err(attach)) {
567 rc = xa_err(attach);
568 goto err_unlock;
569 }
570
571 if (!attach) {
572 attach = kzalloc(sizeof(*attach), GFP_KERNEL);
573 if (!attach) {
574 rc = -ENOMEM;
575 goto err_release_pasid;
576 }
577 xa_init(&attach->device_array);
578 }
579
580 old_hwpt = attach->hwpt;
581
582 rc = xa_insert(&attach->device_array, idev->obj.id, XA_ZERO_ENTRY,
583 GFP_KERNEL);
584 if (rc) {
585 WARN_ON(rc == -EBUSY && !old_hwpt);
586 goto err_free_attach;
587 }
588
589 if (old_hwpt && old_hwpt != hwpt) {
590 rc = -EINVAL;
591 goto err_release_devid;
592 }
593
594 if (attach_resv) {
595 rc = iommufd_device_attach_reserved_iova(idev, hwpt_paging);
596 if (rc)
597 goto err_release_devid;
598 }
599
600 /*
601 * Only attach to the group once for the first device that is in the
602 * group. All the other devices will follow this attachment. The user
603 * should attach every device individually to the hwpt as the per-device
604 * reserved regions are only updated during individual device
605 * attachment.
606 */
607 if (iommufd_group_first_attach(igroup, pasid)) {
608 rc = iommufd_hwpt_attach_device(hwpt, idev, pasid);
609 if (rc)
610 goto err_unresv;
611 attach->hwpt = hwpt;
612 WARN_ON(xa_is_err(xa_store(&igroup->pasid_attach, pasid, attach,
613 GFP_KERNEL)));
614 }
615 refcount_inc(&hwpt->obj.users);
616 WARN_ON(xa_is_err(xa_store(&attach->device_array, idev->obj.id,
617 idev, GFP_KERNEL)));
618 mutex_unlock(&igroup->lock);
619 return 0;
620 err_unresv:
621 if (attach_resv)
622 iopt_remove_reserved_iova(&hwpt_paging->ioas->iopt, idev->dev);
623 err_release_devid:
624 xa_release(&attach->device_array, idev->obj.id);
625 err_free_attach:
626 if (iommufd_group_first_attach(igroup, pasid))
627 kfree(attach);
628 err_release_pasid:
629 if (iommufd_group_first_attach(igroup, pasid))
630 xa_release(&igroup->pasid_attach, pasid);
631 err_unlock:
632 mutex_unlock(&igroup->lock);
633 return rc;
634 }
635
636 struct iommufd_hw_pagetable *
iommufd_hw_pagetable_detach(struct iommufd_device * idev,ioasid_t pasid)637 iommufd_hw_pagetable_detach(struct iommufd_device *idev, ioasid_t pasid)
638 {
639 struct iommufd_group *igroup = idev->igroup;
640 struct iommufd_hwpt_paging *hwpt_paging;
641 struct iommufd_hw_pagetable *hwpt;
642 struct iommufd_attach *attach;
643
644 mutex_lock(&igroup->lock);
645 attach = xa_load(&igroup->pasid_attach, pasid);
646 if (!attach) {
647 mutex_unlock(&igroup->lock);
648 return NULL;
649 }
650
651 hwpt = attach->hwpt;
652 hwpt_paging = find_hwpt_paging(hwpt);
653
654 xa_erase(&attach->device_array, idev->obj.id);
655 if (xa_empty(&attach->device_array)) {
656 iommufd_hwpt_detach_device(hwpt, idev, pasid);
657 xa_erase(&igroup->pasid_attach, pasid);
658 kfree(attach);
659 }
660 if (hwpt_paging && pasid == IOMMU_NO_PASID)
661 iopt_remove_reserved_iova(&hwpt_paging->ioas->iopt, idev->dev);
662 mutex_unlock(&igroup->lock);
663
664 /* Caller must destroy hwpt */
665 return hwpt;
666 }
667
668 static struct iommufd_hw_pagetable *
iommufd_device_do_attach(struct iommufd_device * idev,ioasid_t pasid,struct iommufd_hw_pagetable * hwpt)669 iommufd_device_do_attach(struct iommufd_device *idev, ioasid_t pasid,
670 struct iommufd_hw_pagetable *hwpt)
671 {
672 int rc;
673
674 rc = iommufd_hw_pagetable_attach(hwpt, idev, pasid);
675 if (rc)
676 return ERR_PTR(rc);
677 return NULL;
678 }
679
680 static void
iommufd_group_remove_reserved_iova(struct iommufd_group * igroup,struct iommufd_hwpt_paging * hwpt_paging)681 iommufd_group_remove_reserved_iova(struct iommufd_group *igroup,
682 struct iommufd_hwpt_paging *hwpt_paging)
683 {
684 struct iommufd_attach *attach;
685 struct iommufd_device *cur;
686 unsigned long index;
687
688 lockdep_assert_held(&igroup->lock);
689
690 attach = xa_load(&igroup->pasid_attach, IOMMU_NO_PASID);
691 xa_for_each(&attach->device_array, index, cur)
692 iopt_remove_reserved_iova(&hwpt_paging->ioas->iopt, cur->dev);
693 }
694
695 static int
iommufd_group_do_replace_reserved_iova(struct iommufd_group * igroup,struct iommufd_hwpt_paging * hwpt_paging)696 iommufd_group_do_replace_reserved_iova(struct iommufd_group *igroup,
697 struct iommufd_hwpt_paging *hwpt_paging)
698 {
699 struct iommufd_hwpt_paging *old_hwpt_paging;
700 struct iommufd_attach *attach;
701 struct iommufd_device *cur;
702 unsigned long index;
703 int rc;
704
705 lockdep_assert_held(&igroup->lock);
706
707 attach = xa_load(&igroup->pasid_attach, IOMMU_NO_PASID);
708 old_hwpt_paging = find_hwpt_paging(attach->hwpt);
709 if (!old_hwpt_paging || hwpt_paging->ioas != old_hwpt_paging->ioas) {
710 xa_for_each(&attach->device_array, index, cur) {
711 rc = iopt_table_enforce_dev_resv_regions(
712 &hwpt_paging->ioas->iopt, cur->dev, NULL);
713 if (rc)
714 goto err_unresv;
715 }
716 }
717
718 rc = iommufd_group_setup_msi(igroup, hwpt_paging);
719 if (rc)
720 goto err_unresv;
721 return 0;
722
723 err_unresv:
724 iommufd_group_remove_reserved_iova(igroup, hwpt_paging);
725 return rc;
726 }
727
728 static struct iommufd_hw_pagetable *
iommufd_device_do_replace(struct iommufd_device * idev,ioasid_t pasid,struct iommufd_hw_pagetable * hwpt)729 iommufd_device_do_replace(struct iommufd_device *idev, ioasid_t pasid,
730 struct iommufd_hw_pagetable *hwpt)
731 {
732 struct iommufd_hwpt_paging *hwpt_paging = find_hwpt_paging(hwpt);
733 bool attach_resv = hwpt_paging && pasid == IOMMU_NO_PASID;
734 struct iommufd_hwpt_paging *old_hwpt_paging;
735 struct iommufd_group *igroup = idev->igroup;
736 struct iommufd_hw_pagetable *old_hwpt;
737 struct iommufd_attach *attach;
738 unsigned int num_devices;
739 int rc;
740
741 mutex_lock(&igroup->lock);
742
743 attach = xa_load(&igroup->pasid_attach, pasid);
744 if (!attach) {
745 rc = -EINVAL;
746 goto err_unlock;
747 }
748
749 old_hwpt = attach->hwpt;
750
751 WARN_ON(!old_hwpt || xa_empty(&attach->device_array));
752
753 if (!iommufd_device_is_attached(idev, pasid)) {
754 rc = -EINVAL;
755 goto err_unlock;
756 }
757
758 if (hwpt == old_hwpt) {
759 mutex_unlock(&igroup->lock);
760 return NULL;
761 }
762
763 if (attach_resv) {
764 rc = iommufd_group_do_replace_reserved_iova(igroup, hwpt_paging);
765 if (rc)
766 goto err_unlock;
767 }
768
769 rc = iommufd_hwpt_replace_device(idev, pasid, hwpt, old_hwpt);
770 if (rc)
771 goto err_unresv;
772
773 old_hwpt_paging = find_hwpt_paging(old_hwpt);
774 if (old_hwpt_paging && pasid == IOMMU_NO_PASID &&
775 (!hwpt_paging || hwpt_paging->ioas != old_hwpt_paging->ioas))
776 iommufd_group_remove_reserved_iova(igroup, old_hwpt_paging);
777
778 attach->hwpt = hwpt;
779
780 num_devices = iommufd_group_device_num(igroup, pasid);
781 /*
782 * Move the refcounts held by the device_array to the new hwpt. Retain a
783 * refcount for this thread as the caller will free it.
784 */
785 refcount_add(num_devices, &hwpt->obj.users);
786 if (num_devices > 1)
787 WARN_ON(refcount_sub_and_test(num_devices - 1,
788 &old_hwpt->obj.users));
789 mutex_unlock(&igroup->lock);
790
791 /* Caller must destroy old_hwpt */
792 return old_hwpt;
793 err_unresv:
794 if (attach_resv)
795 iommufd_group_remove_reserved_iova(igroup, hwpt_paging);
796 err_unlock:
797 mutex_unlock(&igroup->lock);
798 return ERR_PTR(rc);
799 }
800
801 typedef struct iommufd_hw_pagetable *(*attach_fn)(
802 struct iommufd_device *idev, ioasid_t pasid,
803 struct iommufd_hw_pagetable *hwpt);
804
805 /*
806 * When automatically managing the domains we search for a compatible domain in
807 * the iopt and if one is found use it, otherwise create a new domain.
808 * Automatic domain selection will never pick a manually created domain.
809 */
810 static struct iommufd_hw_pagetable *
iommufd_device_auto_get_domain(struct iommufd_device * idev,ioasid_t pasid,struct iommufd_ioas * ioas,u32 * pt_id,attach_fn do_attach)811 iommufd_device_auto_get_domain(struct iommufd_device *idev, ioasid_t pasid,
812 struct iommufd_ioas *ioas, u32 *pt_id,
813 attach_fn do_attach)
814 {
815 /*
816 * iommufd_hw_pagetable_attach() is called by
817 * iommufd_hw_pagetable_alloc() in immediate attachment mode, same as
818 * iommufd_device_do_attach(). So if we are in this mode then we prefer
819 * to use the immediate_attach path as it supports drivers that can't
820 * directly allocate a domain.
821 */
822 bool immediate_attach = do_attach == iommufd_device_do_attach;
823 struct iommufd_hw_pagetable *destroy_hwpt;
824 struct iommufd_hwpt_paging *hwpt_paging;
825 struct iommufd_hw_pagetable *hwpt;
826
827 /*
828 * There is no differentiation when domains are allocated, so any domain
829 * that is willing to attach to the device is interchangeable with any
830 * other.
831 */
832 mutex_lock(&ioas->mutex);
833 list_for_each_entry(hwpt_paging, &ioas->hwpt_list, hwpt_item) {
834 if (!hwpt_paging->auto_domain)
835 continue;
836
837 hwpt = &hwpt_paging->common;
838 if (!iommufd_lock_obj(&hwpt->obj))
839 continue;
840 destroy_hwpt = (*do_attach)(idev, pasid, hwpt);
841 if (IS_ERR(destroy_hwpt)) {
842 iommufd_put_object(idev->ictx, &hwpt->obj);
843 /*
844 * -EINVAL means the domain is incompatible with the
845 * device. Other error codes should propagate to
846 * userspace as failure. Success means the domain is
847 * attached.
848 */
849 if (PTR_ERR(destroy_hwpt) == -EINVAL)
850 continue;
851 goto out_unlock;
852 }
853 *pt_id = hwpt->obj.id;
854 iommufd_put_object(idev->ictx, &hwpt->obj);
855 goto out_unlock;
856 }
857
858 hwpt_paging = iommufd_hwpt_paging_alloc(idev->ictx, ioas, idev, pasid,
859 0, immediate_attach, NULL);
860 if (IS_ERR(hwpt_paging)) {
861 destroy_hwpt = ERR_CAST(hwpt_paging);
862 goto out_unlock;
863 }
864 hwpt = &hwpt_paging->common;
865
866 if (!immediate_attach) {
867 destroy_hwpt = (*do_attach)(idev, pasid, hwpt);
868 if (IS_ERR(destroy_hwpt))
869 goto out_abort;
870 } else {
871 destroy_hwpt = NULL;
872 }
873
874 hwpt_paging->auto_domain = true;
875 *pt_id = hwpt->obj.id;
876
877 iommufd_object_finalize(idev->ictx, &hwpt->obj);
878 mutex_unlock(&ioas->mutex);
879 return destroy_hwpt;
880
881 out_abort:
882 iommufd_object_abort_and_destroy(idev->ictx, &hwpt->obj);
883 out_unlock:
884 mutex_unlock(&ioas->mutex);
885 return destroy_hwpt;
886 }
887
iommufd_device_change_pt(struct iommufd_device * idev,ioasid_t pasid,u32 * pt_id,attach_fn do_attach)888 static int iommufd_device_change_pt(struct iommufd_device *idev,
889 ioasid_t pasid,
890 u32 *pt_id, attach_fn do_attach)
891 {
892 struct iommufd_hw_pagetable *destroy_hwpt;
893 struct iommufd_object *pt_obj;
894
895 pt_obj = iommufd_get_object(idev->ictx, *pt_id, IOMMUFD_OBJ_ANY);
896 if (IS_ERR(pt_obj))
897 return PTR_ERR(pt_obj);
898
899 switch (pt_obj->type) {
900 case IOMMUFD_OBJ_HWPT_NESTED:
901 case IOMMUFD_OBJ_HWPT_PAGING: {
902 struct iommufd_hw_pagetable *hwpt =
903 container_of(pt_obj, struct iommufd_hw_pagetable, obj);
904
905 destroy_hwpt = (*do_attach)(idev, pasid, hwpt);
906 if (IS_ERR(destroy_hwpt))
907 goto out_put_pt_obj;
908 break;
909 }
910 case IOMMUFD_OBJ_IOAS: {
911 struct iommufd_ioas *ioas =
912 container_of(pt_obj, struct iommufd_ioas, obj);
913
914 destroy_hwpt = iommufd_device_auto_get_domain(idev, pasid, ioas,
915 pt_id, do_attach);
916 if (IS_ERR(destroy_hwpt))
917 goto out_put_pt_obj;
918 break;
919 }
920 default:
921 destroy_hwpt = ERR_PTR(-EINVAL);
922 goto out_put_pt_obj;
923 }
924 iommufd_put_object(idev->ictx, pt_obj);
925
926 /* This destruction has to be after we unlock everything */
927 if (destroy_hwpt)
928 iommufd_hw_pagetable_put(idev->ictx, destroy_hwpt);
929 return 0;
930
931 out_put_pt_obj:
932 iommufd_put_object(idev->ictx, pt_obj);
933 return PTR_ERR(destroy_hwpt);
934 }
935
936 /**
937 * iommufd_device_attach - Connect a device/pasid to an iommu_domain
938 * @idev: device to attach
939 * @pasid: pasid to attach
940 * @pt_id: Input a IOMMUFD_OBJ_IOAS, or IOMMUFD_OBJ_HWPT_PAGING
941 * Output the IOMMUFD_OBJ_HWPT_PAGING ID
942 *
943 * This connects the device/pasid to an iommu_domain, either automatically
944 * or manually selected. Once this completes the device could do DMA with
945 * @pasid. @pasid is IOMMU_NO_PASID if this attach is for no pasid usage.
946 *
947 * The caller should return the resulting pt_id back to userspace.
948 * This function is undone by calling iommufd_device_detach().
949 */
iommufd_device_attach(struct iommufd_device * idev,ioasid_t pasid,u32 * pt_id)950 int iommufd_device_attach(struct iommufd_device *idev, ioasid_t pasid,
951 u32 *pt_id)
952 {
953 int rc;
954
955 rc = iommufd_device_change_pt(idev, pasid, pt_id,
956 &iommufd_device_do_attach);
957 if (rc)
958 return rc;
959
960 /*
961 * Pairs with iommufd_device_detach() - catches caller bugs attempting
962 * to destroy a device with an attachment.
963 */
964 refcount_inc(&idev->obj.users);
965 return 0;
966 }
967 EXPORT_SYMBOL_NS_GPL(iommufd_device_attach, "IOMMUFD");
968
969 /**
970 * iommufd_device_replace - Change the device/pasid's iommu_domain
971 * @idev: device to change
972 * @pasid: pasid to change
973 * @pt_id: Input a IOMMUFD_OBJ_IOAS, or IOMMUFD_OBJ_HWPT_PAGING
974 * Output the IOMMUFD_OBJ_HWPT_PAGING ID
975 *
976 * This is the same as::
977 *
978 * iommufd_device_detach();
979 * iommufd_device_attach();
980 *
981 * If it fails then no change is made to the attachment. The iommu driver may
982 * implement this so there is no disruption in translation. This can only be
983 * called if iommufd_device_attach() has already succeeded. @pasid is
984 * IOMMU_NO_PASID for no pasid usage.
985 */
iommufd_device_replace(struct iommufd_device * idev,ioasid_t pasid,u32 * pt_id)986 int iommufd_device_replace(struct iommufd_device *idev, ioasid_t pasid,
987 u32 *pt_id)
988 {
989 return iommufd_device_change_pt(idev, pasid, pt_id,
990 &iommufd_device_do_replace);
991 }
992 EXPORT_SYMBOL_NS_GPL(iommufd_device_replace, "IOMMUFD");
993
994 /**
995 * iommufd_device_detach - Disconnect a device/device to an iommu_domain
996 * @idev: device to detach
997 * @pasid: pasid to detach
998 *
999 * Undo iommufd_device_attach(). This disconnects the idev from the previously
1000 * attached pt_id. The device returns back to a blocked DMA translation.
1001 * @pasid is IOMMU_NO_PASID for no pasid usage.
1002 */
iommufd_device_detach(struct iommufd_device * idev,ioasid_t pasid)1003 void iommufd_device_detach(struct iommufd_device *idev, ioasid_t pasid)
1004 {
1005 struct iommufd_hw_pagetable *hwpt;
1006
1007 hwpt = iommufd_hw_pagetable_detach(idev, pasid);
1008 if (!hwpt)
1009 return;
1010 iommufd_hw_pagetable_put(idev->ictx, hwpt);
1011 refcount_dec(&idev->obj.users);
1012 }
1013 EXPORT_SYMBOL_NS_GPL(iommufd_device_detach, "IOMMUFD");
1014
1015 /*
1016 * On success, it will refcount_inc() at a valid new_ioas and refcount_dec() at
1017 * a valid cur_ioas (access->ioas). A caller passing in a valid new_ioas should
1018 * call iommufd_put_object() if it does an iommufd_get_object() for a new_ioas.
1019 */
iommufd_access_change_ioas(struct iommufd_access * access,struct iommufd_ioas * new_ioas)1020 static int iommufd_access_change_ioas(struct iommufd_access *access,
1021 struct iommufd_ioas *new_ioas)
1022 {
1023 u32 iopt_access_list_id = access->iopt_access_list_id;
1024 struct iommufd_ioas *cur_ioas = access->ioas;
1025 int rc;
1026
1027 lockdep_assert_held(&access->ioas_lock);
1028
1029 /* We are racing with a concurrent detach, bail */
1030 if (cur_ioas != access->ioas_unpin)
1031 return -EBUSY;
1032
1033 if (cur_ioas == new_ioas)
1034 return 0;
1035
1036 /*
1037 * Set ioas to NULL to block any further iommufd_access_pin_pages().
1038 * iommufd_access_unpin_pages() can continue using access->ioas_unpin.
1039 */
1040 access->ioas = NULL;
1041
1042 if (new_ioas) {
1043 rc = iopt_add_access(&new_ioas->iopt, access);
1044 if (rc) {
1045 access->ioas = cur_ioas;
1046 return rc;
1047 }
1048 refcount_inc(&new_ioas->obj.users);
1049 }
1050
1051 if (cur_ioas) {
1052 if (access->ops->unmap) {
1053 mutex_unlock(&access->ioas_lock);
1054 access->ops->unmap(access->data, 0, ULONG_MAX);
1055 mutex_lock(&access->ioas_lock);
1056 }
1057 iopt_remove_access(&cur_ioas->iopt, access, iopt_access_list_id);
1058 refcount_dec(&cur_ioas->obj.users);
1059 }
1060
1061 access->ioas = new_ioas;
1062 access->ioas_unpin = new_ioas;
1063
1064 return 0;
1065 }
1066
iommufd_access_change_ioas_id(struct iommufd_access * access,u32 id)1067 static int iommufd_access_change_ioas_id(struct iommufd_access *access, u32 id)
1068 {
1069 struct iommufd_ioas *ioas = iommufd_get_ioas(access->ictx, id);
1070 int rc;
1071
1072 if (IS_ERR(ioas))
1073 return PTR_ERR(ioas);
1074 rc = iommufd_access_change_ioas(access, ioas);
1075 iommufd_put_object(access->ictx, &ioas->obj);
1076 return rc;
1077 }
1078
iommufd_access_destroy_object(struct iommufd_object * obj)1079 void iommufd_access_destroy_object(struct iommufd_object *obj)
1080 {
1081 struct iommufd_access *access =
1082 container_of(obj, struct iommufd_access, obj);
1083
1084 mutex_lock(&access->ioas_lock);
1085 if (access->ioas)
1086 WARN_ON(iommufd_access_change_ioas(access, NULL));
1087 mutex_unlock(&access->ioas_lock);
1088 iommufd_ctx_put(access->ictx);
1089 }
1090
1091 /**
1092 * iommufd_access_create - Create an iommufd_access
1093 * @ictx: iommufd file descriptor
1094 * @ops: Driver's ops to associate with the access
1095 * @data: Opaque data to pass into ops functions
1096 * @id: Output ID number to return to userspace for this access
1097 *
1098 * An iommufd_access allows a driver to read/write to the IOAS without using
1099 * DMA. The underlying CPU memory can be accessed using the
1100 * iommufd_access_pin_pages() or iommufd_access_rw() functions.
1101 *
1102 * The provided ops are required to use iommufd_access_pin_pages().
1103 */
1104 struct iommufd_access *
iommufd_access_create(struct iommufd_ctx * ictx,const struct iommufd_access_ops * ops,void * data,u32 * id)1105 iommufd_access_create(struct iommufd_ctx *ictx,
1106 const struct iommufd_access_ops *ops, void *data, u32 *id)
1107 {
1108 struct iommufd_access *access;
1109
1110 /*
1111 * There is no uAPI for the access object, but to keep things symmetric
1112 * use the object infrastructure anyhow.
1113 */
1114 access = iommufd_object_alloc(ictx, access, IOMMUFD_OBJ_ACCESS);
1115 if (IS_ERR(access))
1116 return access;
1117
1118 access->data = data;
1119 access->ops = ops;
1120
1121 if (ops->needs_pin_pages)
1122 access->iova_alignment = PAGE_SIZE;
1123 else
1124 access->iova_alignment = 1;
1125
1126 /* The calling driver is a user until iommufd_access_destroy() */
1127 refcount_inc(&access->obj.users);
1128 access->ictx = ictx;
1129 iommufd_ctx_get(ictx);
1130 iommufd_object_finalize(ictx, &access->obj);
1131 *id = access->obj.id;
1132 mutex_init(&access->ioas_lock);
1133 return access;
1134 }
1135 EXPORT_SYMBOL_NS_GPL(iommufd_access_create, "IOMMUFD");
1136
1137 /**
1138 * iommufd_access_destroy - Destroy an iommufd_access
1139 * @access: The access to destroy
1140 *
1141 * The caller must stop using the access before destroying it.
1142 */
iommufd_access_destroy(struct iommufd_access * access)1143 void iommufd_access_destroy(struct iommufd_access *access)
1144 {
1145 iommufd_object_destroy_user(access->ictx, &access->obj);
1146 }
1147 EXPORT_SYMBOL_NS_GPL(iommufd_access_destroy, "IOMMUFD");
1148
iommufd_access_detach(struct iommufd_access * access)1149 void iommufd_access_detach(struct iommufd_access *access)
1150 {
1151 mutex_lock(&access->ioas_lock);
1152 if (WARN_ON(!access->ioas)) {
1153 mutex_unlock(&access->ioas_lock);
1154 return;
1155 }
1156 WARN_ON(iommufd_access_change_ioas(access, NULL));
1157 mutex_unlock(&access->ioas_lock);
1158 }
1159 EXPORT_SYMBOL_NS_GPL(iommufd_access_detach, "IOMMUFD");
1160
iommufd_access_attach(struct iommufd_access * access,u32 ioas_id)1161 int iommufd_access_attach(struct iommufd_access *access, u32 ioas_id)
1162 {
1163 int rc;
1164
1165 mutex_lock(&access->ioas_lock);
1166 if (WARN_ON(access->ioas)) {
1167 mutex_unlock(&access->ioas_lock);
1168 return -EINVAL;
1169 }
1170
1171 rc = iommufd_access_change_ioas_id(access, ioas_id);
1172 mutex_unlock(&access->ioas_lock);
1173 return rc;
1174 }
1175 EXPORT_SYMBOL_NS_GPL(iommufd_access_attach, "IOMMUFD");
1176
iommufd_access_replace(struct iommufd_access * access,u32 ioas_id)1177 int iommufd_access_replace(struct iommufd_access *access, u32 ioas_id)
1178 {
1179 int rc;
1180
1181 mutex_lock(&access->ioas_lock);
1182 if (!access->ioas) {
1183 mutex_unlock(&access->ioas_lock);
1184 return -ENOENT;
1185 }
1186 rc = iommufd_access_change_ioas_id(access, ioas_id);
1187 mutex_unlock(&access->ioas_lock);
1188 return rc;
1189 }
1190 EXPORT_SYMBOL_NS_GPL(iommufd_access_replace, "IOMMUFD");
1191
1192 /**
1193 * iommufd_access_notify_unmap - Notify users of an iopt to stop using it
1194 * @iopt: iopt to work on
1195 * @iova: Starting iova in the iopt
1196 * @length: Number of bytes
1197 *
1198 * After this function returns there should be no users attached to the pages
1199 * linked to this iopt that intersect with iova,length. Anyone that has attached
1200 * a user through iopt_access_pages() needs to detach it through
1201 * iommufd_access_unpin_pages() before this function returns.
1202 *
1203 * iommufd_access_destroy() will wait for any outstanding unmap callback to
1204 * complete. Once iommufd_access_destroy() no unmap ops are running or will
1205 * run in the future. Due to this a driver must not create locking that prevents
1206 * unmap to complete while iommufd_access_destroy() is running.
1207 */
iommufd_access_notify_unmap(struct io_pagetable * iopt,unsigned long iova,unsigned long length)1208 void iommufd_access_notify_unmap(struct io_pagetable *iopt, unsigned long iova,
1209 unsigned long length)
1210 {
1211 struct iommufd_ioas *ioas =
1212 container_of(iopt, struct iommufd_ioas, iopt);
1213 struct iommufd_access *access;
1214 unsigned long index;
1215
1216 xa_lock(&ioas->iopt.access_list);
1217 xa_for_each(&ioas->iopt.access_list, index, access) {
1218 if (!iommufd_lock_obj(&access->obj))
1219 continue;
1220 xa_unlock(&ioas->iopt.access_list);
1221
1222 access->ops->unmap(access->data, iova, length);
1223
1224 iommufd_put_object(access->ictx, &access->obj);
1225 xa_lock(&ioas->iopt.access_list);
1226 }
1227 xa_unlock(&ioas->iopt.access_list);
1228 }
1229
1230 /**
1231 * iommufd_access_unpin_pages() - Undo iommufd_access_pin_pages
1232 * @access: IOAS access to act on
1233 * @iova: Starting IOVA
1234 * @length: Number of bytes to access
1235 *
1236 * Return the struct page's. The caller must stop accessing them before calling
1237 * this. The iova/length must exactly match the one provided to access_pages.
1238 */
iommufd_access_unpin_pages(struct iommufd_access * access,unsigned long iova,unsigned long length)1239 void iommufd_access_unpin_pages(struct iommufd_access *access,
1240 unsigned long iova, unsigned long length)
1241 {
1242 struct iopt_area_contig_iter iter;
1243 struct io_pagetable *iopt;
1244 unsigned long last_iova;
1245 struct iopt_area *area;
1246
1247 if (WARN_ON(!length) ||
1248 WARN_ON(check_add_overflow(iova, length - 1, &last_iova)))
1249 return;
1250
1251 mutex_lock(&access->ioas_lock);
1252 /*
1253 * The driver must be doing something wrong if it calls this before an
1254 * iommufd_access_attach() or after an iommufd_access_detach().
1255 */
1256 if (WARN_ON(!access->ioas_unpin)) {
1257 mutex_unlock(&access->ioas_lock);
1258 return;
1259 }
1260 iopt = &access->ioas_unpin->iopt;
1261
1262 down_read(&iopt->iova_rwsem);
1263 iopt_for_each_contig_area(&iter, area, iopt, iova, last_iova)
1264 iopt_area_remove_access(
1265 area, iopt_area_iova_to_index(area, iter.cur_iova),
1266 iopt_area_iova_to_index(
1267 area,
1268 min(last_iova, iopt_area_last_iova(area))));
1269 WARN_ON(!iopt_area_contig_done(&iter));
1270 up_read(&iopt->iova_rwsem);
1271 mutex_unlock(&access->ioas_lock);
1272 }
1273 EXPORT_SYMBOL_NS_GPL(iommufd_access_unpin_pages, "IOMMUFD");
1274
iopt_area_contig_is_aligned(struct iopt_area_contig_iter * iter)1275 static bool iopt_area_contig_is_aligned(struct iopt_area_contig_iter *iter)
1276 {
1277 if (iopt_area_start_byte(iter->area, iter->cur_iova) % PAGE_SIZE)
1278 return false;
1279
1280 if (!iopt_area_contig_done(iter) &&
1281 (iopt_area_start_byte(iter->area, iopt_area_last_iova(iter->area)) %
1282 PAGE_SIZE) != (PAGE_SIZE - 1))
1283 return false;
1284 return true;
1285 }
1286
check_area_prot(struct iopt_area * area,unsigned int flags)1287 static bool check_area_prot(struct iopt_area *area, unsigned int flags)
1288 {
1289 if (flags & IOMMUFD_ACCESS_RW_WRITE)
1290 return area->iommu_prot & IOMMU_WRITE;
1291 return area->iommu_prot & IOMMU_READ;
1292 }
1293
1294 /**
1295 * iommufd_access_pin_pages() - Return a list of pages under the iova
1296 * @access: IOAS access to act on
1297 * @iova: Starting IOVA
1298 * @length: Number of bytes to access
1299 * @out_pages: Output page list
1300 * @flags: IOPMMUFD_ACCESS_RW_* flags
1301 *
1302 * Reads @length bytes starting at iova and returns the struct page * pointers.
1303 * These can be kmap'd by the caller for CPU access.
1304 *
1305 * The caller must perform iommufd_access_unpin_pages() when done to balance
1306 * this.
1307 *
1308 * This API always requires a page aligned iova. This happens naturally if the
1309 * ioas alignment is >= PAGE_SIZE and the iova is PAGE_SIZE aligned. However
1310 * smaller alignments have corner cases where this API can fail on otherwise
1311 * aligned iova.
1312 */
iommufd_access_pin_pages(struct iommufd_access * access,unsigned long iova,unsigned long length,struct page ** out_pages,unsigned int flags)1313 int iommufd_access_pin_pages(struct iommufd_access *access, unsigned long iova,
1314 unsigned long length, struct page **out_pages,
1315 unsigned int flags)
1316 {
1317 struct iopt_area_contig_iter iter;
1318 struct io_pagetable *iopt;
1319 unsigned long last_iova;
1320 struct iopt_area *area;
1321 int rc;
1322
1323 /* Driver's ops don't support pin_pages */
1324 if (IS_ENABLED(CONFIG_IOMMUFD_TEST) &&
1325 WARN_ON(access->iova_alignment != PAGE_SIZE || !access->ops->unmap))
1326 return -EINVAL;
1327
1328 if (!length)
1329 return -EINVAL;
1330 if (check_add_overflow(iova, length - 1, &last_iova))
1331 return -EOVERFLOW;
1332
1333 mutex_lock(&access->ioas_lock);
1334 if (!access->ioas) {
1335 mutex_unlock(&access->ioas_lock);
1336 return -ENOENT;
1337 }
1338 iopt = &access->ioas->iopt;
1339
1340 down_read(&iopt->iova_rwsem);
1341 iopt_for_each_contig_area(&iter, area, iopt, iova, last_iova) {
1342 unsigned long last = min(last_iova, iopt_area_last_iova(area));
1343 unsigned long last_index = iopt_area_iova_to_index(area, last);
1344 unsigned long index =
1345 iopt_area_iova_to_index(area, iter.cur_iova);
1346
1347 if (area->prevent_access ||
1348 !iopt_area_contig_is_aligned(&iter)) {
1349 rc = -EINVAL;
1350 goto err_remove;
1351 }
1352
1353 if (!check_area_prot(area, flags)) {
1354 rc = -EPERM;
1355 goto err_remove;
1356 }
1357
1358 rc = iopt_area_add_access(area, index, last_index, out_pages,
1359 flags);
1360 if (rc)
1361 goto err_remove;
1362 out_pages += last_index - index + 1;
1363 }
1364 if (!iopt_area_contig_done(&iter)) {
1365 rc = -ENOENT;
1366 goto err_remove;
1367 }
1368
1369 up_read(&iopt->iova_rwsem);
1370 mutex_unlock(&access->ioas_lock);
1371 return 0;
1372
1373 err_remove:
1374 if (iova < iter.cur_iova) {
1375 last_iova = iter.cur_iova - 1;
1376 iopt_for_each_contig_area(&iter, area, iopt, iova, last_iova)
1377 iopt_area_remove_access(
1378 area,
1379 iopt_area_iova_to_index(area, iter.cur_iova),
1380 iopt_area_iova_to_index(
1381 area, min(last_iova,
1382 iopt_area_last_iova(area))));
1383 }
1384 up_read(&iopt->iova_rwsem);
1385 mutex_unlock(&access->ioas_lock);
1386 return rc;
1387 }
1388 EXPORT_SYMBOL_NS_GPL(iommufd_access_pin_pages, "IOMMUFD");
1389
1390 /**
1391 * iommufd_access_rw - Read or write data under the iova
1392 * @access: IOAS access to act on
1393 * @iova: Starting IOVA
1394 * @data: Kernel buffer to copy to/from
1395 * @length: Number of bytes to access
1396 * @flags: IOMMUFD_ACCESS_RW_* flags
1397 *
1398 * Copy kernel to/from data into the range given by IOVA/length. If flags
1399 * indicates IOMMUFD_ACCESS_RW_KTHREAD then a large copy can be optimized
1400 * by changing it into copy_to/from_user().
1401 */
iommufd_access_rw(struct iommufd_access * access,unsigned long iova,void * data,size_t length,unsigned int flags)1402 int iommufd_access_rw(struct iommufd_access *access, unsigned long iova,
1403 void *data, size_t length, unsigned int flags)
1404 {
1405 struct iopt_area_contig_iter iter;
1406 struct io_pagetable *iopt;
1407 struct iopt_area *area;
1408 unsigned long last_iova;
1409 int rc = -EINVAL;
1410
1411 if (!length)
1412 return -EINVAL;
1413 if (check_add_overflow(iova, length - 1, &last_iova))
1414 return -EOVERFLOW;
1415
1416 mutex_lock(&access->ioas_lock);
1417 if (!access->ioas) {
1418 mutex_unlock(&access->ioas_lock);
1419 return -ENOENT;
1420 }
1421 iopt = &access->ioas->iopt;
1422
1423 down_read(&iopt->iova_rwsem);
1424 iopt_for_each_contig_area(&iter, area, iopt, iova, last_iova) {
1425 unsigned long last = min(last_iova, iopt_area_last_iova(area));
1426 unsigned long bytes = (last - iter.cur_iova) + 1;
1427
1428 if (area->prevent_access) {
1429 rc = -EINVAL;
1430 goto err_out;
1431 }
1432
1433 if (!check_area_prot(area, flags)) {
1434 rc = -EPERM;
1435 goto err_out;
1436 }
1437
1438 rc = iopt_pages_rw_access(
1439 area->pages, iopt_area_start_byte(area, iter.cur_iova),
1440 data, bytes, flags);
1441 if (rc)
1442 goto err_out;
1443 data += bytes;
1444 }
1445 if (!iopt_area_contig_done(&iter))
1446 rc = -ENOENT;
1447 err_out:
1448 up_read(&iopt->iova_rwsem);
1449 mutex_unlock(&access->ioas_lock);
1450 return rc;
1451 }
1452 EXPORT_SYMBOL_NS_GPL(iommufd_access_rw, "IOMMUFD");
1453
iommufd_get_hw_info(struct iommufd_ucmd * ucmd)1454 int iommufd_get_hw_info(struct iommufd_ucmd *ucmd)
1455 {
1456 struct iommu_hw_info *cmd = ucmd->cmd;
1457 void __user *user_ptr = u64_to_user_ptr(cmd->data_uptr);
1458 const struct iommu_ops *ops;
1459 struct iommufd_device *idev;
1460 unsigned int data_len;
1461 unsigned int copy_len;
1462 void *data;
1463 int rc;
1464
1465 if (cmd->flags || cmd->__reserved[0] || cmd->__reserved[1] ||
1466 cmd->__reserved[2])
1467 return -EOPNOTSUPP;
1468
1469 idev = iommufd_get_device(ucmd, cmd->dev_id);
1470 if (IS_ERR(idev))
1471 return PTR_ERR(idev);
1472
1473 ops = dev_iommu_ops(idev->dev);
1474 if (ops->hw_info) {
1475 data = ops->hw_info(idev->dev, &data_len, &cmd->out_data_type);
1476 if (IS_ERR(data)) {
1477 rc = PTR_ERR(data);
1478 goto out_put;
1479 }
1480
1481 /*
1482 * drivers that have hw_info callback should have a unique
1483 * iommu_hw_info_type.
1484 */
1485 if (WARN_ON_ONCE(cmd->out_data_type ==
1486 IOMMU_HW_INFO_TYPE_NONE)) {
1487 rc = -ENODEV;
1488 goto out_free;
1489 }
1490 } else {
1491 cmd->out_data_type = IOMMU_HW_INFO_TYPE_NONE;
1492 data_len = 0;
1493 data = NULL;
1494 }
1495
1496 copy_len = min(cmd->data_len, data_len);
1497 if (copy_to_user(user_ptr, data, copy_len)) {
1498 rc = -EFAULT;
1499 goto out_free;
1500 }
1501
1502 /*
1503 * Zero the trailing bytes if the user buffer is bigger than the
1504 * data size kernel actually has.
1505 */
1506 if (copy_len < cmd->data_len) {
1507 if (clear_user(user_ptr + copy_len, cmd->data_len - copy_len)) {
1508 rc = -EFAULT;
1509 goto out_free;
1510 }
1511 }
1512
1513 /*
1514 * We return the length the kernel supports so userspace may know what
1515 * the kernel capability is. It could be larger than the input buffer.
1516 */
1517 cmd->data_len = data_len;
1518
1519 cmd->out_capabilities = 0;
1520 if (device_iommu_capable(idev->dev, IOMMU_CAP_DIRTY_TRACKING))
1521 cmd->out_capabilities |= IOMMU_HW_CAP_DIRTY_TRACKING;
1522
1523 cmd->out_max_pasid_log2 = 0;
1524 /*
1525 * Currently, all iommu drivers enable PASID in the probe_device()
1526 * op if iommu and device supports it. So the max_pasids stored in
1527 * dev->iommu indicates both PASID support and enable status. A
1528 * non-zero dev->iommu->max_pasids means PASID is supported and
1529 * enabled. The iommufd only reports PASID capability to userspace
1530 * if it's enabled.
1531 */
1532 if (idev->dev->iommu->max_pasids) {
1533 cmd->out_max_pasid_log2 = ilog2(idev->dev->iommu->max_pasids);
1534
1535 if (dev_is_pci(idev->dev)) {
1536 struct pci_dev *pdev = to_pci_dev(idev->dev);
1537 int ctrl;
1538
1539 ctrl = pci_pasid_status(pdev);
1540
1541 WARN_ON_ONCE(ctrl < 0 ||
1542 !(ctrl & PCI_PASID_CTRL_ENABLE));
1543
1544 if (ctrl & PCI_PASID_CTRL_EXEC)
1545 cmd->out_capabilities |=
1546 IOMMU_HW_CAP_PCI_PASID_EXEC;
1547 if (ctrl & PCI_PASID_CTRL_PRIV)
1548 cmd->out_capabilities |=
1549 IOMMU_HW_CAP_PCI_PASID_PRIV;
1550 }
1551 }
1552
1553 rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd));
1554 out_free:
1555 kfree(data);
1556 out_put:
1557 iommufd_put_object(ucmd->ictx, &idev->obj);
1558 return rc;
1559 }
1560