xref: /linux/drivers/iommu/iommufd/viommu.c (revision 03f76ddff5b04a808ae16c06418460151e2fdd4b)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES
3  */
4 #include "iommufd_private.h"
5 
6 void iommufd_viommu_destroy(struct iommufd_object *obj)
7 {
8 	struct iommufd_viommu *viommu =
9 		container_of(obj, struct iommufd_viommu, obj);
10 
11 	if (viommu->ops && viommu->ops->destroy)
12 		viommu->ops->destroy(viommu);
13 	refcount_dec(&viommu->hwpt->common.obj.users);
14 	xa_destroy(&viommu->vdevs);
15 }
16 
17 int iommufd_viommu_alloc_ioctl(struct iommufd_ucmd *ucmd)
18 {
19 	struct iommu_viommu_alloc *cmd = ucmd->cmd;
20 	const struct iommu_user_data user_data = {
21 		.type = cmd->type,
22 		.uptr = u64_to_user_ptr(cmd->data_uptr),
23 		.len = cmd->data_len,
24 	};
25 	struct iommufd_hwpt_paging *hwpt_paging;
26 	struct iommufd_viommu *viommu;
27 	struct iommufd_device *idev;
28 	const struct iommu_ops *ops;
29 	size_t viommu_size;
30 	int rc;
31 
32 	if (cmd->flags || cmd->type == IOMMU_VIOMMU_TYPE_DEFAULT)
33 		return -EOPNOTSUPP;
34 
35 	idev = iommufd_get_device(ucmd, cmd->dev_id);
36 	if (IS_ERR(idev))
37 		return PTR_ERR(idev);
38 
39 	ops = dev_iommu_ops(idev->dev);
40 	if (!ops->get_viommu_size || !ops->viommu_init) {
41 		rc = -EOPNOTSUPP;
42 		goto out_put_idev;
43 	}
44 
45 	viommu_size = ops->get_viommu_size(idev->dev, cmd->type);
46 	if (!viommu_size) {
47 		rc = -EOPNOTSUPP;
48 		goto out_put_idev;
49 	}
50 
51 	/*
52 	 * It is a driver bug for providing a viommu_size smaller than the core
53 	 * vIOMMU structure size
54 	 */
55 	if (WARN_ON_ONCE(viommu_size < sizeof(*viommu))) {
56 		rc = -EOPNOTSUPP;
57 		goto out_put_idev;
58 	}
59 
60 	hwpt_paging = iommufd_get_hwpt_paging(ucmd, cmd->hwpt_id);
61 	if (IS_ERR(hwpt_paging)) {
62 		rc = PTR_ERR(hwpt_paging);
63 		goto out_put_idev;
64 	}
65 
66 	if (!hwpt_paging->nest_parent) {
67 		rc = -EINVAL;
68 		goto out_put_hwpt;
69 	}
70 
71 	viommu = (struct iommufd_viommu *)_iommufd_object_alloc_ucmd(
72 		ucmd, viommu_size, IOMMUFD_OBJ_VIOMMU);
73 	if (IS_ERR(viommu)) {
74 		rc = PTR_ERR(viommu);
75 		goto out_put_hwpt;
76 	}
77 
78 	xa_init(&viommu->vdevs);
79 	viommu->type = cmd->type;
80 	viommu->ictx = ucmd->ictx;
81 	viommu->hwpt = hwpt_paging;
82 	refcount_inc(&viommu->hwpt->common.obj.users);
83 	INIT_LIST_HEAD(&viommu->veventqs);
84 	init_rwsem(&viommu->veventqs_rwsem);
85 	/*
86 	 * It is the most likely case that a physical IOMMU is unpluggable. A
87 	 * pluggable IOMMU instance (if exists) is responsible for refcounting
88 	 * on its own.
89 	 */
90 	viommu->iommu_dev = __iommu_get_iommu_dev(idev->dev);
91 
92 	rc = ops->viommu_init(viommu, hwpt_paging->common.domain,
93 			      user_data.len ? &user_data : NULL);
94 	if (rc)
95 		goto out_put_hwpt;
96 
97 	/* It is a driver bug that viommu->ops isn't filled */
98 	if (WARN_ON_ONCE(!viommu->ops)) {
99 		rc = -EOPNOTSUPP;
100 		goto out_put_hwpt;
101 	}
102 
103 	cmd->out_viommu_id = viommu->obj.id;
104 	rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd));
105 
106 out_put_hwpt:
107 	iommufd_put_object(ucmd->ictx, &hwpt_paging->common.obj);
108 out_put_idev:
109 	iommufd_put_object(ucmd->ictx, &idev->obj);
110 	return rc;
111 }
112 
113 void iommufd_vdevice_abort(struct iommufd_object *obj)
114 {
115 	struct iommufd_vdevice *vdev =
116 		container_of(obj, struct iommufd_vdevice, obj);
117 	struct iommufd_viommu *viommu = vdev->viommu;
118 	struct iommufd_device *idev = vdev->idev;
119 
120 	lockdep_assert_held(&idev->igroup->lock);
121 
122 	if (vdev->destroy)
123 		vdev->destroy(vdev);
124 	/* xa_cmpxchg is okay to fail if alloc failed xa_cmpxchg previously */
125 	xa_cmpxchg(&viommu->vdevs, vdev->virt_id, vdev, NULL, GFP_KERNEL);
126 	refcount_dec(&viommu->obj.users);
127 	idev->vdev = NULL;
128 }
129 
130 void iommufd_vdevice_destroy(struct iommufd_object *obj)
131 {
132 	struct iommufd_vdevice *vdev =
133 		container_of(obj, struct iommufd_vdevice, obj);
134 	struct iommufd_device *idev = vdev->idev;
135 	struct iommufd_ctx *ictx = idev->ictx;
136 
137 	mutex_lock(&idev->igroup->lock);
138 	iommufd_vdevice_abort(obj);
139 	mutex_unlock(&idev->igroup->lock);
140 	iommufd_put_object(ictx, &idev->obj);
141 }
142 
143 int iommufd_vdevice_alloc_ioctl(struct iommufd_ucmd *ucmd)
144 {
145 	struct iommu_vdevice_alloc *cmd = ucmd->cmd;
146 	struct iommufd_vdevice *vdev, *curr;
147 	size_t vdev_size = sizeof(*vdev);
148 	struct iommufd_viommu *viommu;
149 	struct iommufd_device *idev;
150 	u64 virt_id = cmd->virt_id;
151 	int rc = 0;
152 
153 	/* virt_id indexes an xarray */
154 	if (virt_id > ULONG_MAX)
155 		return -EINVAL;
156 
157 	viommu = iommufd_get_viommu(ucmd, cmd->viommu_id);
158 	if (IS_ERR(viommu))
159 		return PTR_ERR(viommu);
160 
161 	idev = iommufd_get_device(ucmd, cmd->dev_id);
162 	if (IS_ERR(idev)) {
163 		rc = PTR_ERR(idev);
164 		goto out_put_viommu;
165 	}
166 
167 	if (viommu->iommu_dev != __iommu_get_iommu_dev(idev->dev)) {
168 		rc = -EINVAL;
169 		goto out_put_idev;
170 	}
171 
172 	mutex_lock(&idev->igroup->lock);
173 	if (idev->destroying) {
174 		rc = -ENOENT;
175 		goto out_unlock_igroup;
176 	}
177 
178 	if (idev->vdev) {
179 		rc = -EEXIST;
180 		goto out_unlock_igroup;
181 	}
182 
183 	if (viommu->ops && viommu->ops->vdevice_size) {
184 		/*
185 		 * It is a driver bug for:
186 		 * - ops->vdevice_size smaller than the core structure size
187 		 * - not implementing a pairing ops->vdevice_init op
188 		 */
189 		if (WARN_ON_ONCE(viommu->ops->vdevice_size < vdev_size ||
190 				 !viommu->ops->vdevice_init)) {
191 			rc = -EOPNOTSUPP;
192 			goto out_put_idev;
193 		}
194 		vdev_size = viommu->ops->vdevice_size;
195 	}
196 
197 	vdev = (struct iommufd_vdevice *)_iommufd_object_alloc(
198 		ucmd->ictx, vdev_size, IOMMUFD_OBJ_VDEVICE);
199 	if (IS_ERR(vdev)) {
200 		rc = PTR_ERR(vdev);
201 		goto out_unlock_igroup;
202 	}
203 
204 	vdev->virt_id = virt_id;
205 	vdev->viommu = viommu;
206 	refcount_inc(&viommu->obj.users);
207 	/*
208 	 * A wait_cnt reference is held on the idev so long as we have the
209 	 * pointer. iommufd_device_pre_destroy() will revoke it before the
210 	 * idev real destruction.
211 	 */
212 	vdev->idev = idev;
213 
214 	/*
215 	 * iommufd_device_destroy() delays until idev->vdev is NULL before
216 	 * freeing the idev, which only happens once the vdev is finished
217 	 * destruction.
218 	 */
219 	idev->vdev = vdev;
220 
221 	curr = xa_cmpxchg(&viommu->vdevs, virt_id, NULL, vdev, GFP_KERNEL);
222 	if (curr) {
223 		rc = xa_err(curr) ?: -EEXIST;
224 		goto out_abort;
225 	}
226 
227 	if (viommu->ops && viommu->ops->vdevice_init) {
228 		rc = viommu->ops->vdevice_init(vdev);
229 		if (rc)
230 			goto out_abort;
231 	}
232 
233 	cmd->out_vdevice_id = vdev->obj.id;
234 	rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd));
235 	if (rc)
236 		goto out_abort;
237 	iommufd_object_finalize(ucmd->ictx, &vdev->obj);
238 	goto out_unlock_igroup;
239 
240 out_abort:
241 	iommufd_object_abort_and_destroy(ucmd->ictx, &vdev->obj);
242 out_unlock_igroup:
243 	mutex_unlock(&idev->igroup->lock);
244 out_put_idev:
245 	if (rc)
246 		iommufd_put_object(ucmd->ictx, &idev->obj);
247 out_put_viommu:
248 	iommufd_put_object(ucmd->ictx, &viommu->obj);
249 	return rc;
250 }
251 
252 static void iommufd_hw_queue_destroy_access(struct iommufd_ctx *ictx,
253 					    struct iommufd_access *access,
254 					    u64 base_iova, size_t length)
255 {
256 	u64 aligned_iova = PAGE_ALIGN_DOWN(base_iova);
257 	u64 offset = base_iova - aligned_iova;
258 
259 	iommufd_access_unpin_pages(access, aligned_iova,
260 				   PAGE_ALIGN(length + offset));
261 	iommufd_access_detach_internal(access);
262 	iommufd_access_destroy_internal(ictx, access);
263 }
264 
265 void iommufd_hw_queue_destroy(struct iommufd_object *obj)
266 {
267 	struct iommufd_hw_queue *hw_queue =
268 		container_of(obj, struct iommufd_hw_queue, obj);
269 
270 	if (hw_queue->destroy)
271 		hw_queue->destroy(hw_queue);
272 	if (hw_queue->access)
273 		iommufd_hw_queue_destroy_access(hw_queue->viommu->ictx,
274 						hw_queue->access,
275 						hw_queue->base_addr,
276 						hw_queue->length);
277 	if (hw_queue->viommu)
278 		refcount_dec(&hw_queue->viommu->obj.users);
279 }
280 
281 /*
282  * When the HW accesses the guest queue via physical addresses, the underlying
283  * physical pages of the guest queue must be contiguous. Also, for the security
284  * concern that IOMMUFD_CMD_IOAS_UNMAP could potentially remove the mappings of
285  * the guest queue from the nesting parent iopt while the HW is still accessing
286  * the guest queue memory physically, such a HW queue must require an access to
287  * pin the underlying pages and prevent that from happening.
288  */
289 static struct iommufd_access *
290 iommufd_hw_queue_alloc_phys(struct iommu_hw_queue_alloc *cmd,
291 			    struct iommufd_viommu *viommu, phys_addr_t *base_pa)
292 {
293 	u64 aligned_iova = PAGE_ALIGN_DOWN(cmd->nesting_parent_iova);
294 	u64 offset = cmd->nesting_parent_iova - aligned_iova;
295 	struct iommufd_access *access;
296 	struct page **pages;
297 	size_t max_npages;
298 	size_t length;
299 	size_t i;
300 	int rc;
301 
302 	/* max_npages = DIV_ROUND_UP(offset + cmd->length, PAGE_SIZE) */
303 	if (check_add_overflow(offset, cmd->length, &length))
304 		return ERR_PTR(-ERANGE);
305 	if (check_add_overflow(length, PAGE_SIZE - 1, &length))
306 		return ERR_PTR(-ERANGE);
307 	max_npages = length / PAGE_SIZE;
308 	/* length needs to be page aligned too */
309 	length = max_npages * PAGE_SIZE;
310 
311 	/*
312 	 * Use kvcalloc() to avoid memory fragmentation for a large page array.
313 	 * Set __GFP_NOWARN to avoid syzkaller blowups
314 	 */
315 	pages = kvcalloc(max_npages, sizeof(*pages), GFP_KERNEL | __GFP_NOWARN);
316 	if (!pages)
317 		return ERR_PTR(-ENOMEM);
318 
319 	access = iommufd_access_create_internal(viommu->ictx);
320 	if (IS_ERR(access)) {
321 		rc = PTR_ERR(access);
322 		goto out_free;
323 	}
324 
325 	rc = iommufd_access_attach_internal(access, viommu->hwpt->ioas);
326 	if (rc)
327 		goto out_destroy;
328 
329 	rc = iommufd_access_pin_pages(access, aligned_iova, length, pages, 0);
330 	if (rc)
331 		goto out_detach;
332 
333 	/* Validate if the underlying physical pages are contiguous */
334 	for (i = 1; i < max_npages; i++) {
335 		if (page_to_pfn(pages[i]) == page_to_pfn(pages[i - 1]) + 1)
336 			continue;
337 		rc = -EFAULT;
338 		goto out_unpin;
339 	}
340 
341 	*base_pa = (page_to_pfn(pages[0]) << PAGE_SHIFT) + offset;
342 	kvfree(pages);
343 	return access;
344 
345 out_unpin:
346 	iommufd_access_unpin_pages(access, aligned_iova, length);
347 out_detach:
348 	iommufd_access_detach_internal(access);
349 out_destroy:
350 	iommufd_access_destroy_internal(viommu->ictx, access);
351 out_free:
352 	kvfree(pages);
353 	return ERR_PTR(rc);
354 }
355 
356 int iommufd_hw_queue_alloc_ioctl(struct iommufd_ucmd *ucmd)
357 {
358 	struct iommu_hw_queue_alloc *cmd = ucmd->cmd;
359 	struct iommufd_hw_queue *hw_queue;
360 	struct iommufd_viommu *viommu;
361 	struct iommufd_access *access;
362 	size_t hw_queue_size;
363 	phys_addr_t base_pa;
364 	u64 last;
365 	int rc;
366 
367 	if (cmd->flags || cmd->type == IOMMU_HW_QUEUE_TYPE_DEFAULT)
368 		return -EOPNOTSUPP;
369 	if (!cmd->length)
370 		return -EINVAL;
371 	if (check_add_overflow(cmd->nesting_parent_iova, cmd->length - 1,
372 			       &last))
373 		return -EOVERFLOW;
374 
375 	viommu = iommufd_get_viommu(ucmd, cmd->viommu_id);
376 	if (IS_ERR(viommu))
377 		return PTR_ERR(viommu);
378 
379 	if (!viommu->ops || !viommu->ops->get_hw_queue_size ||
380 	    !viommu->ops->hw_queue_init_phys) {
381 		rc = -EOPNOTSUPP;
382 		goto out_put_viommu;
383 	}
384 
385 	hw_queue_size = viommu->ops->get_hw_queue_size(viommu, cmd->type);
386 	if (!hw_queue_size) {
387 		rc = -EOPNOTSUPP;
388 		goto out_put_viommu;
389 	}
390 
391 	/*
392 	 * It is a driver bug for providing a hw_queue_size smaller than the
393 	 * core HW queue structure size
394 	 */
395 	if (WARN_ON_ONCE(hw_queue_size < sizeof(*hw_queue))) {
396 		rc = -EOPNOTSUPP;
397 		goto out_put_viommu;
398 	}
399 
400 	hw_queue = (struct iommufd_hw_queue *)_iommufd_object_alloc_ucmd(
401 		ucmd, hw_queue_size, IOMMUFD_OBJ_HW_QUEUE);
402 	if (IS_ERR(hw_queue)) {
403 		rc = PTR_ERR(hw_queue);
404 		goto out_put_viommu;
405 	}
406 
407 	access = iommufd_hw_queue_alloc_phys(cmd, viommu, &base_pa);
408 	if (IS_ERR(access)) {
409 		rc = PTR_ERR(access);
410 		goto out_put_viommu;
411 	}
412 
413 	hw_queue->viommu = viommu;
414 	refcount_inc(&viommu->obj.users);
415 	hw_queue->access = access;
416 	hw_queue->type = cmd->type;
417 	hw_queue->length = cmd->length;
418 	hw_queue->base_addr = cmd->nesting_parent_iova;
419 
420 	rc = viommu->ops->hw_queue_init_phys(hw_queue, cmd->index, base_pa);
421 	if (rc)
422 		goto out_put_viommu;
423 
424 	cmd->out_hw_queue_id = hw_queue->obj.id;
425 	rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd));
426 
427 out_put_viommu:
428 	iommufd_put_object(ucmd->ictx, &viommu->obj);
429 	return rc;
430 }
431