1 // SPDX-License-Identifier: GPL-2.0-only
2 #include <dirent.h>
3 #include <fcntl.h>
4 #include <libgen.h>
5 #include <stdint.h>
6 #include <stdlib.h>
7 #include <string.h>
8 #include <unistd.h>
9
10 #include <sys/eventfd.h>
11 #include <sys/ioctl.h>
12 #include <sys/mman.h>
13
14 #include <linux/limits.h>
15 #include <linux/mman.h>
16 #include <linux/types.h>
17 #include <linux/vfio.h>
18 #include <linux/iommufd.h>
19
20 #include "../../../kselftest.h"
21 #include <libvfio.h>
22
23 const char *default_iommu_mode = "iommufd";
24
25 /* Reminder: Keep in sync with FIXTURE_VARIANT_ADD_ALL_IOMMU_MODES(). */
26 static const struct iommu_mode iommu_modes[] = {
27 {
28 .name = "vfio_type1_iommu",
29 .container_path = "/dev/vfio/vfio",
30 .iommu_type = VFIO_TYPE1_IOMMU,
31 },
32 {
33 .name = "vfio_type1v2_iommu",
34 .container_path = "/dev/vfio/vfio",
35 .iommu_type = VFIO_TYPE1v2_IOMMU,
36 },
37 {
38 .name = "iommufd_compat_type1",
39 .container_path = "/dev/iommu",
40 .iommu_type = VFIO_TYPE1_IOMMU,
41 },
42 {
43 .name = "iommufd_compat_type1v2",
44 .container_path = "/dev/iommu",
45 .iommu_type = VFIO_TYPE1v2_IOMMU,
46 },
47 {
48 .name = "iommufd",
49 },
50 };
51
lookup_iommu_mode(const char * iommu_mode)52 static const struct iommu_mode *lookup_iommu_mode(const char *iommu_mode)
53 {
54 int i;
55
56 if (!iommu_mode)
57 iommu_mode = default_iommu_mode;
58
59 for (i = 0; i < ARRAY_SIZE(iommu_modes); i++) {
60 if (strcmp(iommu_mode, iommu_modes[i].name))
61 continue;
62
63 return &iommu_modes[i];
64 }
65
66 VFIO_FAIL("Unrecognized IOMMU mode: %s\n", iommu_mode);
67 }
68
__iommu_hva2iova(struct iommu * iommu,void * vaddr,iova_t * iova)69 int __iommu_hva2iova(struct iommu *iommu, void *vaddr, iova_t *iova)
70 {
71 struct dma_region *region;
72
73 list_for_each_entry(region, &iommu->dma_regions, link) {
74 if (vaddr < region->vaddr)
75 continue;
76
77 if (vaddr >= region->vaddr + region->size)
78 continue;
79
80 if (iova)
81 *iova = region->iova + (vaddr - region->vaddr);
82
83 return 0;
84 }
85
86 return -ENOENT;
87 }
88
iommu_hva2iova(struct iommu * iommu,void * vaddr)89 iova_t iommu_hva2iova(struct iommu *iommu, void *vaddr)
90 {
91 iova_t iova;
92 int ret;
93
94 ret = __iommu_hva2iova(iommu, vaddr, &iova);
95 VFIO_ASSERT_EQ(ret, 0, "%p is not mapped into the iommu\n", vaddr);
96
97 return iova;
98 }
99
vfio_iommu_map(struct iommu * iommu,struct dma_region * region)100 static int vfio_iommu_map(struct iommu *iommu, struct dma_region *region)
101 {
102 struct vfio_iommu_type1_dma_map args = {
103 .argsz = sizeof(args),
104 .flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE,
105 .vaddr = (u64)region->vaddr,
106 .iova = region->iova,
107 .size = region->size,
108 };
109
110 if (ioctl(iommu->container_fd, VFIO_IOMMU_MAP_DMA, &args))
111 return -errno;
112
113 return 0;
114 }
115
iommufd_map(struct iommu * iommu,struct dma_region * region)116 static int iommufd_map(struct iommu *iommu, struct dma_region *region)
117 {
118 struct iommu_ioas_map args = {
119 .size = sizeof(args),
120 .flags = IOMMU_IOAS_MAP_READABLE |
121 IOMMU_IOAS_MAP_WRITEABLE |
122 IOMMU_IOAS_MAP_FIXED_IOVA,
123 .user_va = (u64)region->vaddr,
124 .iova = region->iova,
125 .length = region->size,
126 .ioas_id = iommu->ioas_id,
127 };
128
129 if (ioctl(iommu->iommufd, IOMMU_IOAS_MAP, &args))
130 return -errno;
131
132 return 0;
133 }
134
__iommu_map(struct iommu * iommu,struct dma_region * region)135 int __iommu_map(struct iommu *iommu, struct dma_region *region)
136 {
137 int ret;
138
139 if (iommu->iommufd)
140 ret = iommufd_map(iommu, region);
141 else
142 ret = vfio_iommu_map(iommu, region);
143
144 if (ret)
145 return ret;
146
147 list_add(®ion->link, &iommu->dma_regions);
148
149 return 0;
150 }
151
__vfio_iommu_unmap(int fd,u64 iova,u64 size,u32 flags,u64 * unmapped)152 static int __vfio_iommu_unmap(int fd, u64 iova, u64 size, u32 flags, u64 *unmapped)
153 {
154 struct vfio_iommu_type1_dma_unmap args = {
155 .argsz = sizeof(args),
156 .iova = iova,
157 .size = size,
158 .flags = flags,
159 };
160
161 if (ioctl(fd, VFIO_IOMMU_UNMAP_DMA, &args))
162 return -errno;
163
164 if (unmapped)
165 *unmapped = args.size;
166
167 return 0;
168 }
169
vfio_iommu_unmap(struct iommu * iommu,struct dma_region * region,u64 * unmapped)170 static int vfio_iommu_unmap(struct iommu *iommu, struct dma_region *region,
171 u64 *unmapped)
172 {
173 return __vfio_iommu_unmap(iommu->container_fd, region->iova,
174 region->size, 0, unmapped);
175 }
176
__iommufd_unmap(int fd,u64 iova,u64 length,u32 ioas_id,u64 * unmapped)177 static int __iommufd_unmap(int fd, u64 iova, u64 length, u32 ioas_id, u64 *unmapped)
178 {
179 struct iommu_ioas_unmap args = {
180 .size = sizeof(args),
181 .iova = iova,
182 .length = length,
183 .ioas_id = ioas_id,
184 };
185
186 if (ioctl(fd, IOMMU_IOAS_UNMAP, &args))
187 return -errno;
188
189 if (unmapped)
190 *unmapped = args.length;
191
192 return 0;
193 }
194
iommufd_unmap(struct iommu * iommu,struct dma_region * region,u64 * unmapped)195 static int iommufd_unmap(struct iommu *iommu, struct dma_region *region,
196 u64 *unmapped)
197 {
198 return __iommufd_unmap(iommu->iommufd, region->iova, region->size,
199 iommu->ioas_id, unmapped);
200 }
201
__iommu_unmap(struct iommu * iommu,struct dma_region * region,u64 * unmapped)202 int __iommu_unmap(struct iommu *iommu, struct dma_region *region, u64 *unmapped)
203 {
204 int ret;
205
206 if (iommu->iommufd)
207 ret = iommufd_unmap(iommu, region, unmapped);
208 else
209 ret = vfio_iommu_unmap(iommu, region, unmapped);
210
211 if (ret)
212 return ret;
213
214 list_del_init(®ion->link);
215
216 return 0;
217 }
218
__iommu_unmap_all(struct iommu * iommu,u64 * unmapped)219 int __iommu_unmap_all(struct iommu *iommu, u64 *unmapped)
220 {
221 int ret;
222 struct dma_region *curr, *next;
223
224 if (iommu->iommufd)
225 ret = __iommufd_unmap(iommu->iommufd, 0, UINT64_MAX,
226 iommu->ioas_id, unmapped);
227 else
228 ret = __vfio_iommu_unmap(iommu->container_fd, 0, 0,
229 VFIO_DMA_UNMAP_FLAG_ALL, unmapped);
230
231 if (ret)
232 return ret;
233
234 list_for_each_entry_safe(curr, next, &iommu->dma_regions, link)
235 list_del_init(&curr->link);
236
237 return 0;
238 }
239
next_cap_hdr(void * buf,u32 bufsz,u32 * cap_offset)240 static struct vfio_info_cap_header *next_cap_hdr(void *buf, u32 bufsz,
241 u32 *cap_offset)
242 {
243 struct vfio_info_cap_header *hdr;
244
245 if (!*cap_offset)
246 return NULL;
247
248 VFIO_ASSERT_LT(*cap_offset, bufsz);
249 VFIO_ASSERT_GE(bufsz - *cap_offset, sizeof(*hdr));
250
251 hdr = (struct vfio_info_cap_header *)((u8 *)buf + *cap_offset);
252 *cap_offset = hdr->next;
253
254 return hdr;
255 }
256
vfio_iommu_info_cap_hdr(struct vfio_iommu_type1_info * info,u16 cap_id)257 static struct vfio_info_cap_header *vfio_iommu_info_cap_hdr(struct vfio_iommu_type1_info *info,
258 u16 cap_id)
259 {
260 struct vfio_info_cap_header *hdr;
261 u32 cap_offset = info->cap_offset;
262 u32 max_depth;
263 u32 depth = 0;
264
265 if (!(info->flags & VFIO_IOMMU_INFO_CAPS))
266 return NULL;
267
268 if (cap_offset)
269 VFIO_ASSERT_GE(cap_offset, sizeof(*info));
270
271 max_depth = (info->argsz - sizeof(*info)) / sizeof(*hdr);
272
273 while ((hdr = next_cap_hdr(info, info->argsz, &cap_offset))) {
274 depth++;
275 VFIO_ASSERT_LE(depth, max_depth, "Capability chain contains a cycle\n");
276
277 if (hdr->id == cap_id)
278 return hdr;
279 }
280
281 return NULL;
282 }
283
284 /* Return buffer including capability chain, if present. Free with free() */
vfio_iommu_get_info(int container_fd)285 static struct vfio_iommu_type1_info *vfio_iommu_get_info(int container_fd)
286 {
287 struct vfio_iommu_type1_info *info;
288
289 info = malloc(sizeof(*info));
290 VFIO_ASSERT_NOT_NULL(info);
291
292 *info = (struct vfio_iommu_type1_info) {
293 .argsz = sizeof(*info),
294 };
295
296 ioctl_assert(container_fd, VFIO_IOMMU_GET_INFO, info);
297 VFIO_ASSERT_GE(info->argsz, sizeof(*info));
298
299 info = realloc(info, info->argsz);
300 VFIO_ASSERT_NOT_NULL(info);
301
302 ioctl_assert(container_fd, VFIO_IOMMU_GET_INFO, info);
303 VFIO_ASSERT_GE(info->argsz, sizeof(*info));
304
305 return info;
306 }
307
308 /*
309 * Return iova ranges for the device's container. Normalize vfio_iommu_type1 to
310 * report iommufd's iommu_iova_range. Free with free().
311 */
vfio_iommu_iova_ranges(struct iommu * iommu,u32 * nranges)312 static struct iommu_iova_range *vfio_iommu_iova_ranges(struct iommu *iommu,
313 u32 *nranges)
314 {
315 struct vfio_iommu_type1_info_cap_iova_range *cap_range;
316 struct vfio_iommu_type1_info *info;
317 struct vfio_info_cap_header *hdr;
318 struct iommu_iova_range *ranges = NULL;
319
320 info = vfio_iommu_get_info(iommu->container_fd);
321 hdr = vfio_iommu_info_cap_hdr(info, VFIO_IOMMU_TYPE1_INFO_CAP_IOVA_RANGE);
322 VFIO_ASSERT_NOT_NULL(hdr);
323
324 cap_range = container_of(hdr, struct vfio_iommu_type1_info_cap_iova_range, header);
325 VFIO_ASSERT_GT(cap_range->nr_iovas, 0);
326
327 ranges = calloc(cap_range->nr_iovas, sizeof(*ranges));
328 VFIO_ASSERT_NOT_NULL(ranges);
329
330 for (u32 i = 0; i < cap_range->nr_iovas; i++) {
331 ranges[i] = (struct iommu_iova_range){
332 .start = cap_range->iova_ranges[i].start,
333 .last = cap_range->iova_ranges[i].end,
334 };
335 }
336
337 *nranges = cap_range->nr_iovas;
338
339 free(info);
340 return ranges;
341 }
342
343 /* Return iova ranges of the device's IOAS. Free with free() */
iommufd_iova_ranges(struct iommu * iommu,u32 * nranges)344 static struct iommu_iova_range *iommufd_iova_ranges(struct iommu *iommu,
345 u32 *nranges)
346 {
347 struct iommu_iova_range *ranges;
348 int ret;
349
350 struct iommu_ioas_iova_ranges query = {
351 .size = sizeof(query),
352 .ioas_id = iommu->ioas_id,
353 };
354
355 ret = ioctl(iommu->iommufd, IOMMU_IOAS_IOVA_RANGES, &query);
356 VFIO_ASSERT_EQ(ret, -1);
357 VFIO_ASSERT_EQ(errno, EMSGSIZE);
358 VFIO_ASSERT_GT(query.num_iovas, 0);
359
360 ranges = calloc(query.num_iovas, sizeof(*ranges));
361 VFIO_ASSERT_NOT_NULL(ranges);
362
363 query.allowed_iovas = (uintptr_t)ranges;
364
365 ioctl_assert(iommu->iommufd, IOMMU_IOAS_IOVA_RANGES, &query);
366 *nranges = query.num_iovas;
367
368 return ranges;
369 }
370
iova_range_comp(const void * a,const void * b)371 static int iova_range_comp(const void *a, const void *b)
372 {
373 const struct iommu_iova_range *ra = a, *rb = b;
374
375 if (ra->start < rb->start)
376 return -1;
377
378 if (ra->start > rb->start)
379 return 1;
380
381 return 0;
382 }
383
384 /* Return sorted IOVA ranges of the device. Free with free(). */
iommu_iova_ranges(struct iommu * iommu,u32 * nranges)385 struct iommu_iova_range *iommu_iova_ranges(struct iommu *iommu, u32 *nranges)
386 {
387 struct iommu_iova_range *ranges;
388
389 if (iommu->iommufd)
390 ranges = iommufd_iova_ranges(iommu, nranges);
391 else
392 ranges = vfio_iommu_iova_ranges(iommu, nranges);
393
394 if (!ranges)
395 return NULL;
396
397 VFIO_ASSERT_GT(*nranges, 0);
398
399 /* Sort and check that ranges are sane and non-overlapping */
400 qsort(ranges, *nranges, sizeof(*ranges), iova_range_comp);
401 VFIO_ASSERT_LT(ranges[0].start, ranges[0].last);
402
403 for (u32 i = 1; i < *nranges; i++) {
404 VFIO_ASSERT_LT(ranges[i].start, ranges[i].last);
405 VFIO_ASSERT_LT(ranges[i - 1].last, ranges[i].start);
406 }
407
408 return ranges;
409 }
410
iommufd_ioas_alloc(int iommufd)411 static u32 iommufd_ioas_alloc(int iommufd)
412 {
413 struct iommu_ioas_alloc args = {
414 .size = sizeof(args),
415 };
416
417 ioctl_assert(iommufd, IOMMU_IOAS_ALLOC, &args);
418 return args.out_ioas_id;
419 }
420
iommu_init(const char * iommu_mode)421 struct iommu *iommu_init(const char *iommu_mode)
422 {
423 const char *container_path;
424 struct iommu *iommu;
425 int version;
426
427 iommu = calloc(1, sizeof(*iommu));
428 VFIO_ASSERT_NOT_NULL(iommu);
429
430 INIT_LIST_HEAD(&iommu->dma_regions);
431
432 iommu->mode = lookup_iommu_mode(iommu_mode);
433
434 container_path = iommu->mode->container_path;
435 if (container_path) {
436 iommu->container_fd = open(container_path, O_RDWR);
437 VFIO_ASSERT_GE(iommu->container_fd, 0, "open(%s) failed\n", container_path);
438
439 version = ioctl(iommu->container_fd, VFIO_GET_API_VERSION);
440 VFIO_ASSERT_EQ(version, VFIO_API_VERSION, "Unsupported version: %d\n", version);
441 } else {
442 /*
443 * Require device->iommufd to be >0 so that a simple non-0 check can be
444 * used to check if iommufd is enabled. In practice open() will never
445 * return 0 unless stdin is closed.
446 */
447 iommu->iommufd = open("/dev/iommu", O_RDWR);
448 VFIO_ASSERT_GT(iommu->iommufd, 0);
449
450 iommu->ioas_id = iommufd_ioas_alloc(iommu->iommufd);
451 }
452
453 return iommu;
454 }
455
iommu_cleanup(struct iommu * iommu)456 void iommu_cleanup(struct iommu *iommu)
457 {
458 if (iommu->iommufd)
459 VFIO_ASSERT_EQ(close(iommu->iommufd), 0);
460 else
461 VFIO_ASSERT_EQ(close(iommu->container_fd), 0);
462
463 free(iommu);
464 }
465