1 // SPDX-License-Identifier: GPL-2.0-only
2 #include <dirent.h>
3 #include <fcntl.h>
4 #include <libgen.h>
5 #include <stdint.h>
6 #include <stdlib.h>
7 #include <string.h>
8 #include <unistd.h>
9
10 #include <sys/eventfd.h>
11 #include <sys/ioctl.h>
12 #include <sys/mman.h>
13
14 #include <uapi/linux/types.h>
15 #include <linux/limits.h>
16 #include <linux/mman.h>
17 #include <linux/types.h>
18 #include <linux/vfio.h>
19 #include <linux/iommufd.h>
20
21 #include "../../../kselftest.h"
22 #include <libvfio.h>
23
24 const char *default_iommu_mode = "iommufd";
25
26 /* Reminder: Keep in sync with FIXTURE_VARIANT_ADD_ALL_IOMMU_MODES(). */
27 static const struct iommu_mode iommu_modes[] = {
28 {
29 .name = "vfio_type1_iommu",
30 .container_path = "/dev/vfio/vfio",
31 .iommu_type = VFIO_TYPE1_IOMMU,
32 },
33 {
34 .name = "vfio_type1v2_iommu",
35 .container_path = "/dev/vfio/vfio",
36 .iommu_type = VFIO_TYPE1v2_IOMMU,
37 },
38 {
39 .name = "iommufd_compat_type1",
40 .container_path = "/dev/iommu",
41 .iommu_type = VFIO_TYPE1_IOMMU,
42 },
43 {
44 .name = "iommufd_compat_type1v2",
45 .container_path = "/dev/iommu",
46 .iommu_type = VFIO_TYPE1v2_IOMMU,
47 },
48 {
49 .name = "iommufd",
50 },
51 };
52
lookup_iommu_mode(const char * iommu_mode)53 static const struct iommu_mode *lookup_iommu_mode(const char *iommu_mode)
54 {
55 int i;
56
57 if (!iommu_mode)
58 iommu_mode = default_iommu_mode;
59
60 for (i = 0; i < ARRAY_SIZE(iommu_modes); i++) {
61 if (strcmp(iommu_mode, iommu_modes[i].name))
62 continue;
63
64 return &iommu_modes[i];
65 }
66
67 VFIO_FAIL("Unrecognized IOMMU mode: %s\n", iommu_mode);
68 }
69
__iommu_hva2iova(struct iommu * iommu,void * vaddr,iova_t * iova)70 int __iommu_hva2iova(struct iommu *iommu, void *vaddr, iova_t *iova)
71 {
72 struct dma_region *region;
73
74 list_for_each_entry(region, &iommu->dma_regions, link) {
75 if (vaddr < region->vaddr)
76 continue;
77
78 if (vaddr >= region->vaddr + region->size)
79 continue;
80
81 if (iova)
82 *iova = region->iova + (vaddr - region->vaddr);
83
84 return 0;
85 }
86
87 return -ENOENT;
88 }
89
iommu_hva2iova(struct iommu * iommu,void * vaddr)90 iova_t iommu_hva2iova(struct iommu *iommu, void *vaddr)
91 {
92 iova_t iova;
93 int ret;
94
95 ret = __iommu_hva2iova(iommu, vaddr, &iova);
96 VFIO_ASSERT_EQ(ret, 0, "%p is not mapped into the iommu\n", vaddr);
97
98 return iova;
99 }
100
vfio_iommu_map(struct iommu * iommu,struct dma_region * region)101 static int vfio_iommu_map(struct iommu *iommu, struct dma_region *region)
102 {
103 struct vfio_iommu_type1_dma_map args = {
104 .argsz = sizeof(args),
105 .flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE,
106 .vaddr = (u64)region->vaddr,
107 .iova = region->iova,
108 .size = region->size,
109 };
110
111 if (ioctl(iommu->container_fd, VFIO_IOMMU_MAP_DMA, &args))
112 return -errno;
113
114 return 0;
115 }
116
iommufd_map(struct iommu * iommu,struct dma_region * region)117 static int iommufd_map(struct iommu *iommu, struct dma_region *region)
118 {
119 struct iommu_ioas_map args = {
120 .size = sizeof(args),
121 .flags = IOMMU_IOAS_MAP_READABLE |
122 IOMMU_IOAS_MAP_WRITEABLE |
123 IOMMU_IOAS_MAP_FIXED_IOVA,
124 .user_va = (u64)region->vaddr,
125 .iova = region->iova,
126 .length = region->size,
127 .ioas_id = iommu->ioas_id,
128 };
129
130 if (ioctl(iommu->iommufd, IOMMU_IOAS_MAP, &args))
131 return -errno;
132
133 return 0;
134 }
135
__iommu_map(struct iommu * iommu,struct dma_region * region)136 int __iommu_map(struct iommu *iommu, struct dma_region *region)
137 {
138 int ret;
139
140 if (iommu->iommufd)
141 ret = iommufd_map(iommu, region);
142 else
143 ret = vfio_iommu_map(iommu, region);
144
145 if (ret)
146 return ret;
147
148 list_add(®ion->link, &iommu->dma_regions);
149
150 return 0;
151 }
152
__vfio_iommu_unmap(int fd,u64 iova,u64 size,u32 flags,u64 * unmapped)153 static int __vfio_iommu_unmap(int fd, u64 iova, u64 size, u32 flags, u64 *unmapped)
154 {
155 struct vfio_iommu_type1_dma_unmap args = {
156 .argsz = sizeof(args),
157 .iova = iova,
158 .size = size,
159 .flags = flags,
160 };
161
162 if (ioctl(fd, VFIO_IOMMU_UNMAP_DMA, &args))
163 return -errno;
164
165 if (unmapped)
166 *unmapped = args.size;
167
168 return 0;
169 }
170
vfio_iommu_unmap(struct iommu * iommu,struct dma_region * region,u64 * unmapped)171 static int vfio_iommu_unmap(struct iommu *iommu, struct dma_region *region,
172 u64 *unmapped)
173 {
174 return __vfio_iommu_unmap(iommu->container_fd, region->iova,
175 region->size, 0, unmapped);
176 }
177
__iommufd_unmap(int fd,u64 iova,u64 length,u32 ioas_id,u64 * unmapped)178 static int __iommufd_unmap(int fd, u64 iova, u64 length, u32 ioas_id, u64 *unmapped)
179 {
180 struct iommu_ioas_unmap args = {
181 .size = sizeof(args),
182 .iova = iova,
183 .length = length,
184 .ioas_id = ioas_id,
185 };
186
187 if (ioctl(fd, IOMMU_IOAS_UNMAP, &args))
188 return -errno;
189
190 if (unmapped)
191 *unmapped = args.length;
192
193 return 0;
194 }
195
iommufd_unmap(struct iommu * iommu,struct dma_region * region,u64 * unmapped)196 static int iommufd_unmap(struct iommu *iommu, struct dma_region *region,
197 u64 *unmapped)
198 {
199 return __iommufd_unmap(iommu->iommufd, region->iova, region->size,
200 iommu->ioas_id, unmapped);
201 }
202
__iommu_unmap(struct iommu * iommu,struct dma_region * region,u64 * unmapped)203 int __iommu_unmap(struct iommu *iommu, struct dma_region *region, u64 *unmapped)
204 {
205 int ret;
206
207 if (iommu->iommufd)
208 ret = iommufd_unmap(iommu, region, unmapped);
209 else
210 ret = vfio_iommu_unmap(iommu, region, unmapped);
211
212 if (ret)
213 return ret;
214
215 list_del_init(®ion->link);
216
217 return 0;
218 }
219
__iommu_unmap_all(struct iommu * iommu,u64 * unmapped)220 int __iommu_unmap_all(struct iommu *iommu, u64 *unmapped)
221 {
222 int ret;
223 struct dma_region *curr, *next;
224
225 if (iommu->iommufd)
226 ret = __iommufd_unmap(iommu->iommufd, 0, UINT64_MAX,
227 iommu->ioas_id, unmapped);
228 else
229 ret = __vfio_iommu_unmap(iommu->container_fd, 0, 0,
230 VFIO_DMA_UNMAP_FLAG_ALL, unmapped);
231
232 if (ret)
233 return ret;
234
235 list_for_each_entry_safe(curr, next, &iommu->dma_regions, link)
236 list_del_init(&curr->link);
237
238 return 0;
239 }
240
next_cap_hdr(void * buf,u32 bufsz,u32 * cap_offset)241 static struct vfio_info_cap_header *next_cap_hdr(void *buf, u32 bufsz,
242 u32 *cap_offset)
243 {
244 struct vfio_info_cap_header *hdr;
245
246 if (!*cap_offset)
247 return NULL;
248
249 VFIO_ASSERT_LT(*cap_offset, bufsz);
250 VFIO_ASSERT_GE(bufsz - *cap_offset, sizeof(*hdr));
251
252 hdr = (struct vfio_info_cap_header *)((u8 *)buf + *cap_offset);
253 *cap_offset = hdr->next;
254
255 return hdr;
256 }
257
vfio_iommu_info_cap_hdr(struct vfio_iommu_type1_info * info,u16 cap_id)258 static struct vfio_info_cap_header *vfio_iommu_info_cap_hdr(struct vfio_iommu_type1_info *info,
259 u16 cap_id)
260 {
261 struct vfio_info_cap_header *hdr;
262 u32 cap_offset = info->cap_offset;
263 u32 max_depth;
264 u32 depth = 0;
265
266 if (!(info->flags & VFIO_IOMMU_INFO_CAPS))
267 return NULL;
268
269 if (cap_offset)
270 VFIO_ASSERT_GE(cap_offset, sizeof(*info));
271
272 max_depth = (info->argsz - sizeof(*info)) / sizeof(*hdr);
273
274 while ((hdr = next_cap_hdr(info, info->argsz, &cap_offset))) {
275 depth++;
276 VFIO_ASSERT_LE(depth, max_depth, "Capability chain contains a cycle\n");
277
278 if (hdr->id == cap_id)
279 return hdr;
280 }
281
282 return NULL;
283 }
284
285 /* Return buffer including capability chain, if present. Free with free() */
vfio_iommu_get_info(int container_fd)286 static struct vfio_iommu_type1_info *vfio_iommu_get_info(int container_fd)
287 {
288 struct vfio_iommu_type1_info *info;
289
290 info = malloc(sizeof(*info));
291 VFIO_ASSERT_NOT_NULL(info);
292
293 *info = (struct vfio_iommu_type1_info) {
294 .argsz = sizeof(*info),
295 };
296
297 ioctl_assert(container_fd, VFIO_IOMMU_GET_INFO, info);
298 VFIO_ASSERT_GE(info->argsz, sizeof(*info));
299
300 info = realloc(info, info->argsz);
301 VFIO_ASSERT_NOT_NULL(info);
302
303 ioctl_assert(container_fd, VFIO_IOMMU_GET_INFO, info);
304 VFIO_ASSERT_GE(info->argsz, sizeof(*info));
305
306 return info;
307 }
308
309 /*
310 * Return iova ranges for the device's container. Normalize vfio_iommu_type1 to
311 * report iommufd's iommu_iova_range. Free with free().
312 */
vfio_iommu_iova_ranges(struct iommu * iommu,u32 * nranges)313 static struct iommu_iova_range *vfio_iommu_iova_ranges(struct iommu *iommu,
314 u32 *nranges)
315 {
316 struct vfio_iommu_type1_info_cap_iova_range *cap_range;
317 struct vfio_iommu_type1_info *info;
318 struct vfio_info_cap_header *hdr;
319 struct iommu_iova_range *ranges = NULL;
320
321 info = vfio_iommu_get_info(iommu->container_fd);
322 hdr = vfio_iommu_info_cap_hdr(info, VFIO_IOMMU_TYPE1_INFO_CAP_IOVA_RANGE);
323 VFIO_ASSERT_NOT_NULL(hdr);
324
325 cap_range = container_of(hdr, struct vfio_iommu_type1_info_cap_iova_range, header);
326 VFIO_ASSERT_GT(cap_range->nr_iovas, 0);
327
328 ranges = calloc(cap_range->nr_iovas, sizeof(*ranges));
329 VFIO_ASSERT_NOT_NULL(ranges);
330
331 for (u32 i = 0; i < cap_range->nr_iovas; i++) {
332 ranges[i] = (struct iommu_iova_range){
333 .start = cap_range->iova_ranges[i].start,
334 .last = cap_range->iova_ranges[i].end,
335 };
336 }
337
338 *nranges = cap_range->nr_iovas;
339
340 free(info);
341 return ranges;
342 }
343
344 /* Return iova ranges of the device's IOAS. Free with free() */
iommufd_iova_ranges(struct iommu * iommu,u32 * nranges)345 static struct iommu_iova_range *iommufd_iova_ranges(struct iommu *iommu,
346 u32 *nranges)
347 {
348 struct iommu_iova_range *ranges;
349 int ret;
350
351 struct iommu_ioas_iova_ranges query = {
352 .size = sizeof(query),
353 .ioas_id = iommu->ioas_id,
354 };
355
356 ret = ioctl(iommu->iommufd, IOMMU_IOAS_IOVA_RANGES, &query);
357 VFIO_ASSERT_EQ(ret, -1);
358 VFIO_ASSERT_EQ(errno, EMSGSIZE);
359 VFIO_ASSERT_GT(query.num_iovas, 0);
360
361 ranges = calloc(query.num_iovas, sizeof(*ranges));
362 VFIO_ASSERT_NOT_NULL(ranges);
363
364 query.allowed_iovas = (uintptr_t)ranges;
365
366 ioctl_assert(iommu->iommufd, IOMMU_IOAS_IOVA_RANGES, &query);
367 *nranges = query.num_iovas;
368
369 return ranges;
370 }
371
iova_range_comp(const void * a,const void * b)372 static int iova_range_comp(const void *a, const void *b)
373 {
374 const struct iommu_iova_range *ra = a, *rb = b;
375
376 if (ra->start < rb->start)
377 return -1;
378
379 if (ra->start > rb->start)
380 return 1;
381
382 return 0;
383 }
384
385 /* Return sorted IOVA ranges of the device. Free with free(). */
iommu_iova_ranges(struct iommu * iommu,u32 * nranges)386 struct iommu_iova_range *iommu_iova_ranges(struct iommu *iommu, u32 *nranges)
387 {
388 struct iommu_iova_range *ranges;
389
390 if (iommu->iommufd)
391 ranges = iommufd_iova_ranges(iommu, nranges);
392 else
393 ranges = vfio_iommu_iova_ranges(iommu, nranges);
394
395 if (!ranges)
396 return NULL;
397
398 VFIO_ASSERT_GT(*nranges, 0);
399
400 /* Sort and check that ranges are sane and non-overlapping */
401 qsort(ranges, *nranges, sizeof(*ranges), iova_range_comp);
402 VFIO_ASSERT_LT(ranges[0].start, ranges[0].last);
403
404 for (u32 i = 1; i < *nranges; i++) {
405 VFIO_ASSERT_LT(ranges[i].start, ranges[i].last);
406 VFIO_ASSERT_LT(ranges[i - 1].last, ranges[i].start);
407 }
408
409 return ranges;
410 }
411
iommufd_ioas_alloc(int iommufd)412 static u32 iommufd_ioas_alloc(int iommufd)
413 {
414 struct iommu_ioas_alloc args = {
415 .size = sizeof(args),
416 };
417
418 ioctl_assert(iommufd, IOMMU_IOAS_ALLOC, &args);
419 return args.out_ioas_id;
420 }
421
iommu_init(const char * iommu_mode)422 struct iommu *iommu_init(const char *iommu_mode)
423 {
424 const char *container_path;
425 struct iommu *iommu;
426 int version;
427
428 iommu = calloc(1, sizeof(*iommu));
429 VFIO_ASSERT_NOT_NULL(iommu);
430
431 INIT_LIST_HEAD(&iommu->dma_regions);
432
433 iommu->mode = lookup_iommu_mode(iommu_mode);
434
435 container_path = iommu->mode->container_path;
436 if (container_path) {
437 iommu->container_fd = open(container_path, O_RDWR);
438 VFIO_ASSERT_GE(iommu->container_fd, 0, "open(%s) failed\n", container_path);
439
440 version = ioctl(iommu->container_fd, VFIO_GET_API_VERSION);
441 VFIO_ASSERT_EQ(version, VFIO_API_VERSION, "Unsupported version: %d\n", version);
442 } else {
443 /*
444 * Require device->iommufd to be >0 so that a simple non-0 check can be
445 * used to check if iommufd is enabled. In practice open() will never
446 * return 0 unless stdin is closed.
447 */
448 iommu->iommufd = open("/dev/iommu", O_RDWR);
449 VFIO_ASSERT_GT(iommu->iommufd, 0);
450
451 iommu->ioas_id = iommufd_ioas_alloc(iommu->iommufd);
452 }
453
454 return iommu;
455 }
456
iommu_cleanup(struct iommu * iommu)457 void iommu_cleanup(struct iommu *iommu)
458 {
459 if (iommu->iommufd)
460 VFIO_ASSERT_EQ(close(iommu->iommufd), 0);
461 else
462 VFIO_ASSERT_EQ(close(iommu->container_fd), 0);
463
464 free(iommu);
465 }
466