1 // SPDX-License-Identifier: GPL-2.0-only
2 #include <dirent.h>
3 #include <fcntl.h>
4 #include <libgen.h>
5 #include <stdint.h>
6 #include <stdlib.h>
7 #include <string.h>
8 #include <unistd.h>
9
10 #include <sys/eventfd.h>
11 #include <sys/ioctl.h>
12 #include <sys/mman.h>
13
14 #include <uapi/linux/types.h>
15 #include <linux/iommufd.h>
16 #include <linux/limits.h>
17 #include <linux/mman.h>
18 #include <linux/overflow.h>
19 #include <linux/types.h>
20 #include <linux/vfio.h>
21
22 #include "../../../kselftest.h"
23 #include <vfio_util.h>
24
25 #define PCI_SYSFS_PATH "/sys/bus/pci/devices"
26
27 #define ioctl_assert(_fd, _op, _arg) do { \
28 void *__arg = (_arg); \
29 int __ret = ioctl((_fd), (_op), (__arg)); \
30 VFIO_ASSERT_EQ(__ret, 0, "ioctl(%s, %s, %s) returned %d\n", #_fd, #_op, #_arg, __ret); \
31 } while (0)
32
next_cap_hdr(void * buf,u32 bufsz,u32 * cap_offset)33 static struct vfio_info_cap_header *next_cap_hdr(void *buf, u32 bufsz,
34 u32 *cap_offset)
35 {
36 struct vfio_info_cap_header *hdr;
37
38 if (!*cap_offset)
39 return NULL;
40
41 VFIO_ASSERT_LT(*cap_offset, bufsz);
42 VFIO_ASSERT_GE(bufsz - *cap_offset, sizeof(*hdr));
43
44 hdr = (struct vfio_info_cap_header *)((u8 *)buf + *cap_offset);
45 *cap_offset = hdr->next;
46
47 return hdr;
48 }
49
vfio_iommu_info_cap_hdr(struct vfio_iommu_type1_info * info,u16 cap_id)50 static struct vfio_info_cap_header *vfio_iommu_info_cap_hdr(struct vfio_iommu_type1_info *info,
51 u16 cap_id)
52 {
53 struct vfio_info_cap_header *hdr;
54 u32 cap_offset = info->cap_offset;
55 u32 max_depth;
56 u32 depth = 0;
57
58 if (!(info->flags & VFIO_IOMMU_INFO_CAPS))
59 return NULL;
60
61 if (cap_offset)
62 VFIO_ASSERT_GE(cap_offset, sizeof(*info));
63
64 max_depth = (info->argsz - sizeof(*info)) / sizeof(*hdr);
65
66 while ((hdr = next_cap_hdr(info, info->argsz, &cap_offset))) {
67 depth++;
68 VFIO_ASSERT_LE(depth, max_depth, "Capability chain contains a cycle\n");
69
70 if (hdr->id == cap_id)
71 return hdr;
72 }
73
74 return NULL;
75 }
76
77 /* Return buffer including capability chain, if present. Free with free() */
vfio_iommu_get_info(struct vfio_pci_device * device)78 static struct vfio_iommu_type1_info *vfio_iommu_get_info(struct vfio_pci_device *device)
79 {
80 struct vfio_iommu_type1_info *info;
81
82 info = malloc(sizeof(*info));
83 VFIO_ASSERT_NOT_NULL(info);
84
85 *info = (struct vfio_iommu_type1_info) {
86 .argsz = sizeof(*info),
87 };
88
89 ioctl_assert(device->container_fd, VFIO_IOMMU_GET_INFO, info);
90 VFIO_ASSERT_GE(info->argsz, sizeof(*info));
91
92 info = realloc(info, info->argsz);
93 VFIO_ASSERT_NOT_NULL(info);
94
95 ioctl_assert(device->container_fd, VFIO_IOMMU_GET_INFO, info);
96 VFIO_ASSERT_GE(info->argsz, sizeof(*info));
97
98 return info;
99 }
100
101 /*
102 * Return iova ranges for the device's container. Normalize vfio_iommu_type1 to
103 * report iommufd's iommu_iova_range. Free with free().
104 */
vfio_iommu_iova_ranges(struct vfio_pci_device * device,u32 * nranges)105 static struct iommu_iova_range *vfio_iommu_iova_ranges(struct vfio_pci_device *device,
106 u32 *nranges)
107 {
108 struct vfio_iommu_type1_info_cap_iova_range *cap_range;
109 struct vfio_iommu_type1_info *info;
110 struct vfio_info_cap_header *hdr;
111 struct iommu_iova_range *ranges = NULL;
112
113 info = vfio_iommu_get_info(device);
114 hdr = vfio_iommu_info_cap_hdr(info, VFIO_IOMMU_TYPE1_INFO_CAP_IOVA_RANGE);
115 VFIO_ASSERT_NOT_NULL(hdr);
116
117 cap_range = container_of(hdr, struct vfio_iommu_type1_info_cap_iova_range, header);
118 VFIO_ASSERT_GT(cap_range->nr_iovas, 0);
119
120 ranges = calloc(cap_range->nr_iovas, sizeof(*ranges));
121 VFIO_ASSERT_NOT_NULL(ranges);
122
123 for (u32 i = 0; i < cap_range->nr_iovas; i++) {
124 ranges[i] = (struct iommu_iova_range){
125 .start = cap_range->iova_ranges[i].start,
126 .last = cap_range->iova_ranges[i].end,
127 };
128 }
129
130 *nranges = cap_range->nr_iovas;
131
132 free(info);
133 return ranges;
134 }
135
136 /* Return iova ranges of the device's IOAS. Free with free() */
iommufd_iova_ranges(struct vfio_pci_device * device,u32 * nranges)137 static struct iommu_iova_range *iommufd_iova_ranges(struct vfio_pci_device *device,
138 u32 *nranges)
139 {
140 struct iommu_iova_range *ranges;
141 int ret;
142
143 struct iommu_ioas_iova_ranges query = {
144 .size = sizeof(query),
145 .ioas_id = device->ioas_id,
146 };
147
148 ret = ioctl(device->iommufd, IOMMU_IOAS_IOVA_RANGES, &query);
149 VFIO_ASSERT_EQ(ret, -1);
150 VFIO_ASSERT_EQ(errno, EMSGSIZE);
151 VFIO_ASSERT_GT(query.num_iovas, 0);
152
153 ranges = calloc(query.num_iovas, sizeof(*ranges));
154 VFIO_ASSERT_NOT_NULL(ranges);
155
156 query.allowed_iovas = (uintptr_t)ranges;
157
158 ioctl_assert(device->iommufd, IOMMU_IOAS_IOVA_RANGES, &query);
159 *nranges = query.num_iovas;
160
161 return ranges;
162 }
163
iova_range_comp(const void * a,const void * b)164 static int iova_range_comp(const void *a, const void *b)
165 {
166 const struct iommu_iova_range *ra = a, *rb = b;
167
168 if (ra->start < rb->start)
169 return -1;
170
171 if (ra->start > rb->start)
172 return 1;
173
174 return 0;
175 }
176
177 /* Return sorted IOVA ranges of the device. Free with free(). */
vfio_pci_iova_ranges(struct vfio_pci_device * device,u32 * nranges)178 struct iommu_iova_range *vfio_pci_iova_ranges(struct vfio_pci_device *device,
179 u32 *nranges)
180 {
181 struct iommu_iova_range *ranges;
182
183 if (device->iommufd)
184 ranges = iommufd_iova_ranges(device, nranges);
185 else
186 ranges = vfio_iommu_iova_ranges(device, nranges);
187
188 if (!ranges)
189 return NULL;
190
191 VFIO_ASSERT_GT(*nranges, 0);
192
193 /* Sort and check that ranges are sane and non-overlapping */
194 qsort(ranges, *nranges, sizeof(*ranges), iova_range_comp);
195 VFIO_ASSERT_LT(ranges[0].start, ranges[0].last);
196
197 for (u32 i = 1; i < *nranges; i++) {
198 VFIO_ASSERT_LT(ranges[i].start, ranges[i].last);
199 VFIO_ASSERT_LT(ranges[i - 1].last, ranges[i].start);
200 }
201
202 return ranges;
203 }
204
iova_allocator_init(struct vfio_pci_device * device)205 struct iova_allocator *iova_allocator_init(struct vfio_pci_device *device)
206 {
207 struct iova_allocator *allocator;
208 struct iommu_iova_range *ranges;
209 u32 nranges;
210
211 ranges = vfio_pci_iova_ranges(device, &nranges);
212 VFIO_ASSERT_NOT_NULL(ranges);
213
214 allocator = malloc(sizeof(*allocator));
215 VFIO_ASSERT_NOT_NULL(allocator);
216
217 *allocator = (struct iova_allocator){
218 .ranges = ranges,
219 .nranges = nranges,
220 .range_idx = 0,
221 .range_offset = 0,
222 };
223
224 return allocator;
225 }
226
iova_allocator_cleanup(struct iova_allocator * allocator)227 void iova_allocator_cleanup(struct iova_allocator *allocator)
228 {
229 free(allocator->ranges);
230 free(allocator);
231 }
232
iova_allocator_alloc(struct iova_allocator * allocator,size_t size)233 iova_t iova_allocator_alloc(struct iova_allocator *allocator, size_t size)
234 {
235 VFIO_ASSERT_GT(size, 0, "Invalid size arg, zero\n");
236 VFIO_ASSERT_EQ(size & (size - 1), 0, "Invalid size arg, non-power-of-2\n");
237
238 for (;;) {
239 struct iommu_iova_range *range;
240 iova_t iova, last;
241
242 VFIO_ASSERT_LT(allocator->range_idx, allocator->nranges,
243 "IOVA allocator out of space\n");
244
245 range = &allocator->ranges[allocator->range_idx];
246 iova = range->start + allocator->range_offset;
247
248 /* Check for sufficient space at the current offset */
249 if (check_add_overflow(iova, size - 1, &last) ||
250 last > range->last)
251 goto next_range;
252
253 /* Align iova to size */
254 iova = last & ~(size - 1);
255
256 /* Check for sufficient space at the aligned iova */
257 if (check_add_overflow(iova, size - 1, &last) ||
258 last > range->last)
259 goto next_range;
260
261 if (last == range->last) {
262 allocator->range_idx++;
263 allocator->range_offset = 0;
264 } else {
265 allocator->range_offset = last - range->start + 1;
266 }
267
268 return iova;
269
270 next_range:
271 allocator->range_idx++;
272 allocator->range_offset = 0;
273 }
274 }
275
__to_iova(struct vfio_pci_device * device,void * vaddr)276 iova_t __to_iova(struct vfio_pci_device *device, void *vaddr)
277 {
278 struct vfio_dma_region *region;
279
280 list_for_each_entry(region, &device->dma_regions, link) {
281 if (vaddr < region->vaddr)
282 continue;
283
284 if (vaddr >= region->vaddr + region->size)
285 continue;
286
287 return region->iova + (vaddr - region->vaddr);
288 }
289
290 return INVALID_IOVA;
291 }
292
to_iova(struct vfio_pci_device * device,void * vaddr)293 iova_t to_iova(struct vfio_pci_device *device, void *vaddr)
294 {
295 iova_t iova;
296
297 iova = __to_iova(device, vaddr);
298 VFIO_ASSERT_NE(iova, INVALID_IOVA, "%p is not mapped into device.\n", vaddr);
299
300 return iova;
301 }
302
vfio_pci_irq_set(struct vfio_pci_device * device,u32 index,u32 vector,u32 count,int * fds)303 static void vfio_pci_irq_set(struct vfio_pci_device *device,
304 u32 index, u32 vector, u32 count, int *fds)
305 {
306 u8 buf[sizeof(struct vfio_irq_set) + sizeof(int) * count] = {};
307 struct vfio_irq_set *irq = (void *)&buf;
308 int *irq_fds = (void *)&irq->data;
309
310 irq->argsz = sizeof(buf);
311 irq->flags = VFIO_IRQ_SET_ACTION_TRIGGER;
312 irq->index = index;
313 irq->start = vector;
314 irq->count = count;
315
316 if (count) {
317 irq->flags |= VFIO_IRQ_SET_DATA_EVENTFD;
318 memcpy(irq_fds, fds, sizeof(int) * count);
319 } else {
320 irq->flags |= VFIO_IRQ_SET_DATA_NONE;
321 }
322
323 ioctl_assert(device->fd, VFIO_DEVICE_SET_IRQS, irq);
324 }
325
vfio_pci_irq_trigger(struct vfio_pci_device * device,u32 index,u32 vector)326 void vfio_pci_irq_trigger(struct vfio_pci_device *device, u32 index, u32 vector)
327 {
328 struct vfio_irq_set irq = {
329 .argsz = sizeof(irq),
330 .flags = VFIO_IRQ_SET_ACTION_TRIGGER | VFIO_IRQ_SET_DATA_NONE,
331 .index = index,
332 .start = vector,
333 .count = 1,
334 };
335
336 ioctl_assert(device->fd, VFIO_DEVICE_SET_IRQS, &irq);
337 }
338
check_supported_irq_index(u32 index)339 static void check_supported_irq_index(u32 index)
340 {
341 /* VFIO selftests only supports MSI and MSI-x for now. */
342 VFIO_ASSERT_TRUE(index == VFIO_PCI_MSI_IRQ_INDEX ||
343 index == VFIO_PCI_MSIX_IRQ_INDEX,
344 "Unsupported IRQ index: %u\n", index);
345 }
346
vfio_pci_irq_enable(struct vfio_pci_device * device,u32 index,u32 vector,int count)347 void vfio_pci_irq_enable(struct vfio_pci_device *device, u32 index, u32 vector,
348 int count)
349 {
350 int i;
351
352 check_supported_irq_index(index);
353
354 for (i = vector; i < vector + count; i++) {
355 VFIO_ASSERT_LT(device->msi_eventfds[i], 0);
356 device->msi_eventfds[i] = eventfd(0, 0);
357 VFIO_ASSERT_GE(device->msi_eventfds[i], 0);
358 }
359
360 vfio_pci_irq_set(device, index, vector, count, device->msi_eventfds + vector);
361 }
362
vfio_pci_irq_disable(struct vfio_pci_device * device,u32 index)363 void vfio_pci_irq_disable(struct vfio_pci_device *device, u32 index)
364 {
365 int i;
366
367 check_supported_irq_index(index);
368
369 for (i = 0; i < ARRAY_SIZE(device->msi_eventfds); i++) {
370 if (device->msi_eventfds[i] < 0)
371 continue;
372
373 VFIO_ASSERT_EQ(close(device->msi_eventfds[i]), 0);
374 device->msi_eventfds[i] = -1;
375 }
376
377 vfio_pci_irq_set(device, index, 0, 0, NULL);
378 }
379
vfio_pci_irq_get(struct vfio_pci_device * device,u32 index,struct vfio_irq_info * irq_info)380 static void vfio_pci_irq_get(struct vfio_pci_device *device, u32 index,
381 struct vfio_irq_info *irq_info)
382 {
383 irq_info->argsz = sizeof(*irq_info);
384 irq_info->index = index;
385
386 ioctl_assert(device->fd, VFIO_DEVICE_GET_IRQ_INFO, irq_info);
387 }
388
vfio_iommu_dma_map(struct vfio_pci_device * device,struct vfio_dma_region * region)389 static int vfio_iommu_dma_map(struct vfio_pci_device *device,
390 struct vfio_dma_region *region)
391 {
392 struct vfio_iommu_type1_dma_map args = {
393 .argsz = sizeof(args),
394 .flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE,
395 .vaddr = (u64)region->vaddr,
396 .iova = region->iova,
397 .size = region->size,
398 };
399
400 if (ioctl(device->container_fd, VFIO_IOMMU_MAP_DMA, &args))
401 return -errno;
402
403 return 0;
404 }
405
iommufd_dma_map(struct vfio_pci_device * device,struct vfio_dma_region * region)406 static int iommufd_dma_map(struct vfio_pci_device *device,
407 struct vfio_dma_region *region)
408 {
409 struct iommu_ioas_map args = {
410 .size = sizeof(args),
411 .flags = IOMMU_IOAS_MAP_READABLE |
412 IOMMU_IOAS_MAP_WRITEABLE |
413 IOMMU_IOAS_MAP_FIXED_IOVA,
414 .user_va = (u64)region->vaddr,
415 .iova = region->iova,
416 .length = region->size,
417 .ioas_id = device->ioas_id,
418 };
419
420 if (ioctl(device->iommufd, IOMMU_IOAS_MAP, &args))
421 return -errno;
422
423 return 0;
424 }
425
__vfio_pci_dma_map(struct vfio_pci_device * device,struct vfio_dma_region * region)426 int __vfio_pci_dma_map(struct vfio_pci_device *device,
427 struct vfio_dma_region *region)
428 {
429 int ret;
430
431 if (device->iommufd)
432 ret = iommufd_dma_map(device, region);
433 else
434 ret = vfio_iommu_dma_map(device, region);
435
436 if (ret)
437 return ret;
438
439 list_add(®ion->link, &device->dma_regions);
440
441 return 0;
442 }
443
vfio_iommu_dma_unmap(int fd,u64 iova,u64 size,u32 flags,u64 * unmapped)444 static int vfio_iommu_dma_unmap(int fd, u64 iova, u64 size, u32 flags,
445 u64 *unmapped)
446 {
447 struct vfio_iommu_type1_dma_unmap args = {
448 .argsz = sizeof(args),
449 .iova = iova,
450 .size = size,
451 .flags = flags,
452 };
453
454 if (ioctl(fd, VFIO_IOMMU_UNMAP_DMA, &args))
455 return -errno;
456
457 if (unmapped)
458 *unmapped = args.size;
459
460 return 0;
461 }
462
iommufd_dma_unmap(int fd,u64 iova,u64 length,u32 ioas_id,u64 * unmapped)463 static int iommufd_dma_unmap(int fd, u64 iova, u64 length, u32 ioas_id,
464 u64 *unmapped)
465 {
466 struct iommu_ioas_unmap args = {
467 .size = sizeof(args),
468 .iova = iova,
469 .length = length,
470 .ioas_id = ioas_id,
471 };
472
473 if (ioctl(fd, IOMMU_IOAS_UNMAP, &args))
474 return -errno;
475
476 if (unmapped)
477 *unmapped = args.length;
478
479 return 0;
480 }
481
__vfio_pci_dma_unmap(struct vfio_pci_device * device,struct vfio_dma_region * region,u64 * unmapped)482 int __vfio_pci_dma_unmap(struct vfio_pci_device *device,
483 struct vfio_dma_region *region, u64 *unmapped)
484 {
485 int ret;
486
487 if (device->iommufd)
488 ret = iommufd_dma_unmap(device->iommufd, region->iova,
489 region->size, device->ioas_id,
490 unmapped);
491 else
492 ret = vfio_iommu_dma_unmap(device->container_fd, region->iova,
493 region->size, 0, unmapped);
494
495 if (ret)
496 return ret;
497
498 list_del_init(®ion->link);
499
500 return 0;
501 }
502
__vfio_pci_dma_unmap_all(struct vfio_pci_device * device,u64 * unmapped)503 int __vfio_pci_dma_unmap_all(struct vfio_pci_device *device, u64 *unmapped)
504 {
505 int ret;
506 struct vfio_dma_region *curr, *next;
507
508 if (device->iommufd)
509 ret = iommufd_dma_unmap(device->iommufd, 0, UINT64_MAX,
510 device->ioas_id, unmapped);
511 else
512 ret = vfio_iommu_dma_unmap(device->container_fd, 0, 0,
513 VFIO_DMA_UNMAP_FLAG_ALL, unmapped);
514
515 if (ret)
516 return ret;
517
518 list_for_each_entry_safe(curr, next, &device->dma_regions, link)
519 list_del_init(&curr->link);
520
521 return 0;
522 }
523
vfio_pci_region_get(struct vfio_pci_device * device,int index,struct vfio_region_info * info)524 static void vfio_pci_region_get(struct vfio_pci_device *device, int index,
525 struct vfio_region_info *info)
526 {
527 memset(info, 0, sizeof(*info));
528
529 info->argsz = sizeof(*info);
530 info->index = index;
531
532 ioctl_assert(device->fd, VFIO_DEVICE_GET_REGION_INFO, info);
533 }
534
vfio_pci_bar_map(struct vfio_pci_device * device,int index)535 static void vfio_pci_bar_map(struct vfio_pci_device *device, int index)
536 {
537 struct vfio_pci_bar *bar = &device->bars[index];
538 int prot = 0;
539
540 VFIO_ASSERT_LT(index, PCI_STD_NUM_BARS);
541 VFIO_ASSERT_NULL(bar->vaddr);
542 VFIO_ASSERT_TRUE(bar->info.flags & VFIO_REGION_INFO_FLAG_MMAP);
543
544 if (bar->info.flags & VFIO_REGION_INFO_FLAG_READ)
545 prot |= PROT_READ;
546 if (bar->info.flags & VFIO_REGION_INFO_FLAG_WRITE)
547 prot |= PROT_WRITE;
548
549 bar->vaddr = mmap(NULL, bar->info.size, prot, MAP_FILE | MAP_SHARED,
550 device->fd, bar->info.offset);
551 VFIO_ASSERT_NE(bar->vaddr, MAP_FAILED);
552 }
553
vfio_pci_bar_unmap(struct vfio_pci_device * device,int index)554 static void vfio_pci_bar_unmap(struct vfio_pci_device *device, int index)
555 {
556 struct vfio_pci_bar *bar = &device->bars[index];
557
558 VFIO_ASSERT_LT(index, PCI_STD_NUM_BARS);
559 VFIO_ASSERT_NOT_NULL(bar->vaddr);
560
561 VFIO_ASSERT_EQ(munmap(bar->vaddr, bar->info.size), 0);
562 bar->vaddr = NULL;
563 }
564
vfio_pci_bar_unmap_all(struct vfio_pci_device * device)565 static void vfio_pci_bar_unmap_all(struct vfio_pci_device *device)
566 {
567 int i;
568
569 for (i = 0; i < PCI_STD_NUM_BARS; i++) {
570 if (device->bars[i].vaddr)
571 vfio_pci_bar_unmap(device, i);
572 }
573 }
574
vfio_pci_config_access(struct vfio_pci_device * device,bool write,size_t config,size_t size,void * data)575 void vfio_pci_config_access(struct vfio_pci_device *device, bool write,
576 size_t config, size_t size, void *data)
577 {
578 struct vfio_region_info *config_space = &device->config_space;
579 int ret;
580
581 if (write)
582 ret = pwrite(device->fd, data, size, config_space->offset + config);
583 else
584 ret = pread(device->fd, data, size, config_space->offset + config);
585
586 VFIO_ASSERT_EQ(ret, size, "Failed to %s PCI config space: 0x%lx\n",
587 write ? "write to" : "read from", config);
588 }
589
vfio_pci_device_reset(struct vfio_pci_device * device)590 void vfio_pci_device_reset(struct vfio_pci_device *device)
591 {
592 ioctl_assert(device->fd, VFIO_DEVICE_RESET, NULL);
593 }
594
vfio_pci_get_group_from_dev(const char * bdf)595 static unsigned int vfio_pci_get_group_from_dev(const char *bdf)
596 {
597 char dev_iommu_group_path[PATH_MAX] = {0};
598 char sysfs_path[PATH_MAX] = {0};
599 unsigned int group;
600 int ret;
601
602 snprintf(sysfs_path, PATH_MAX, "%s/%s/iommu_group", PCI_SYSFS_PATH, bdf);
603
604 ret = readlink(sysfs_path, dev_iommu_group_path, sizeof(dev_iommu_group_path));
605 VFIO_ASSERT_NE(ret, -1, "Failed to get the IOMMU group for device: %s\n", bdf);
606
607 ret = sscanf(basename(dev_iommu_group_path), "%u", &group);
608 VFIO_ASSERT_EQ(ret, 1, "Failed to get the IOMMU group for device: %s\n", bdf);
609
610 return group;
611 }
612
vfio_pci_group_setup(struct vfio_pci_device * device,const char * bdf)613 static void vfio_pci_group_setup(struct vfio_pci_device *device, const char *bdf)
614 {
615 struct vfio_group_status group_status = {
616 .argsz = sizeof(group_status),
617 };
618 char group_path[32];
619 int group;
620
621 group = vfio_pci_get_group_from_dev(bdf);
622 snprintf(group_path, sizeof(group_path), "/dev/vfio/%d", group);
623
624 device->group_fd = open(group_path, O_RDWR);
625 VFIO_ASSERT_GE(device->group_fd, 0, "open(%s) failed\n", group_path);
626
627 ioctl_assert(device->group_fd, VFIO_GROUP_GET_STATUS, &group_status);
628 VFIO_ASSERT_TRUE(group_status.flags & VFIO_GROUP_FLAGS_VIABLE);
629
630 ioctl_assert(device->group_fd, VFIO_GROUP_SET_CONTAINER, &device->container_fd);
631 }
632
vfio_pci_container_setup(struct vfio_pci_device * device,const char * bdf)633 static void vfio_pci_container_setup(struct vfio_pci_device *device, const char *bdf)
634 {
635 unsigned long iommu_type = device->iommu_mode->iommu_type;
636 const char *path = device->iommu_mode->container_path;
637 int version;
638 int ret;
639
640 device->container_fd = open(path, O_RDWR);
641 VFIO_ASSERT_GE(device->container_fd, 0, "open(%s) failed\n", path);
642
643 version = ioctl(device->container_fd, VFIO_GET_API_VERSION);
644 VFIO_ASSERT_EQ(version, VFIO_API_VERSION, "Unsupported version: %d\n", version);
645
646 vfio_pci_group_setup(device, bdf);
647
648 ret = ioctl(device->container_fd, VFIO_CHECK_EXTENSION, iommu_type);
649 VFIO_ASSERT_GT(ret, 0, "VFIO IOMMU type %lu not supported\n", iommu_type);
650
651 ioctl_assert(device->container_fd, VFIO_SET_IOMMU, (void *)iommu_type);
652
653 device->fd = ioctl(device->group_fd, VFIO_GROUP_GET_DEVICE_FD, bdf);
654 VFIO_ASSERT_GE(device->fd, 0);
655 }
656
vfio_pci_device_setup(struct vfio_pci_device * device)657 static void vfio_pci_device_setup(struct vfio_pci_device *device)
658 {
659 int i;
660
661 device->info.argsz = sizeof(device->info);
662 ioctl_assert(device->fd, VFIO_DEVICE_GET_INFO, &device->info);
663
664 vfio_pci_region_get(device, VFIO_PCI_CONFIG_REGION_INDEX, &device->config_space);
665
666 /* Sanity check VFIO does not advertise mmap for config space */
667 VFIO_ASSERT_TRUE(!(device->config_space.flags & VFIO_REGION_INFO_FLAG_MMAP),
668 "PCI config space should not support mmap()\n");
669
670 for (i = 0; i < PCI_STD_NUM_BARS; i++) {
671 struct vfio_pci_bar *bar = device->bars + i;
672
673 vfio_pci_region_get(device, i, &bar->info);
674 if (bar->info.flags & VFIO_REGION_INFO_FLAG_MMAP)
675 vfio_pci_bar_map(device, i);
676 }
677
678 vfio_pci_irq_get(device, VFIO_PCI_MSI_IRQ_INDEX, &device->msi_info);
679 vfio_pci_irq_get(device, VFIO_PCI_MSIX_IRQ_INDEX, &device->msix_info);
680
681 for (i = 0; i < ARRAY_SIZE(device->msi_eventfds); i++)
682 device->msi_eventfds[i] = -1;
683 }
684
vfio_pci_get_cdev_path(const char * bdf)685 const char *vfio_pci_get_cdev_path(const char *bdf)
686 {
687 char dir_path[PATH_MAX];
688 struct dirent *entry;
689 char *cdev_path;
690 DIR *dir;
691
692 cdev_path = calloc(PATH_MAX, 1);
693 VFIO_ASSERT_NOT_NULL(cdev_path);
694
695 snprintf(dir_path, sizeof(dir_path), "/sys/bus/pci/devices/%s/vfio-dev/", bdf);
696
697 dir = opendir(dir_path);
698 VFIO_ASSERT_NOT_NULL(dir, "Failed to open directory %s\n", dir_path);
699
700 while ((entry = readdir(dir)) != NULL) {
701 /* Find the file that starts with "vfio" */
702 if (strncmp("vfio", entry->d_name, 4))
703 continue;
704
705 snprintf(cdev_path, PATH_MAX, "/dev/vfio/devices/%s", entry->d_name);
706 break;
707 }
708
709 VFIO_ASSERT_NE(cdev_path[0], 0, "Failed to find vfio cdev file.\n");
710 VFIO_ASSERT_EQ(closedir(dir), 0);
711
712 return cdev_path;
713 }
714
715 /* Reminder: Keep in sync with FIXTURE_VARIANT_ADD_ALL_IOMMU_MODES(). */
716 static const struct vfio_iommu_mode iommu_modes[] = {
717 {
718 .name = "vfio_type1_iommu",
719 .container_path = "/dev/vfio/vfio",
720 .iommu_type = VFIO_TYPE1_IOMMU,
721 },
722 {
723 .name = "vfio_type1v2_iommu",
724 .container_path = "/dev/vfio/vfio",
725 .iommu_type = VFIO_TYPE1v2_IOMMU,
726 },
727 {
728 .name = "iommufd_compat_type1",
729 .container_path = "/dev/iommu",
730 .iommu_type = VFIO_TYPE1_IOMMU,
731 },
732 {
733 .name = "iommufd_compat_type1v2",
734 .container_path = "/dev/iommu",
735 .iommu_type = VFIO_TYPE1v2_IOMMU,
736 },
737 {
738 .name = "iommufd",
739 },
740 };
741
742 const char *default_iommu_mode = "iommufd";
743
lookup_iommu_mode(const char * iommu_mode)744 static const struct vfio_iommu_mode *lookup_iommu_mode(const char *iommu_mode)
745 {
746 int i;
747
748 if (!iommu_mode)
749 iommu_mode = default_iommu_mode;
750
751 for (i = 0; i < ARRAY_SIZE(iommu_modes); i++) {
752 if (strcmp(iommu_mode, iommu_modes[i].name))
753 continue;
754
755 return &iommu_modes[i];
756 }
757
758 VFIO_FAIL("Unrecognized IOMMU mode: %s\n", iommu_mode);
759 }
760
vfio_device_bind_iommufd(int device_fd,int iommufd)761 static void vfio_device_bind_iommufd(int device_fd, int iommufd)
762 {
763 struct vfio_device_bind_iommufd args = {
764 .argsz = sizeof(args),
765 .iommufd = iommufd,
766 };
767
768 ioctl_assert(device_fd, VFIO_DEVICE_BIND_IOMMUFD, &args);
769 }
770
iommufd_ioas_alloc(int iommufd)771 static u32 iommufd_ioas_alloc(int iommufd)
772 {
773 struct iommu_ioas_alloc args = {
774 .size = sizeof(args),
775 };
776
777 ioctl_assert(iommufd, IOMMU_IOAS_ALLOC, &args);
778 return args.out_ioas_id;
779 }
780
vfio_device_attach_iommufd_pt(int device_fd,u32 pt_id)781 static void vfio_device_attach_iommufd_pt(int device_fd, u32 pt_id)
782 {
783 struct vfio_device_attach_iommufd_pt args = {
784 .argsz = sizeof(args),
785 .pt_id = pt_id,
786 };
787
788 ioctl_assert(device_fd, VFIO_DEVICE_ATTACH_IOMMUFD_PT, &args);
789 }
790
vfio_pci_iommufd_setup(struct vfio_pci_device * device,const char * bdf)791 static void vfio_pci_iommufd_setup(struct vfio_pci_device *device, const char *bdf)
792 {
793 const char *cdev_path = vfio_pci_get_cdev_path(bdf);
794
795 device->fd = open(cdev_path, O_RDWR);
796 VFIO_ASSERT_GE(device->fd, 0);
797 free((void *)cdev_path);
798
799 /*
800 * Require device->iommufd to be >0 so that a simple non-0 check can be
801 * used to check if iommufd is enabled. In practice open() will never
802 * return 0 unless stdin is closed.
803 */
804 device->iommufd = open("/dev/iommu", O_RDWR);
805 VFIO_ASSERT_GT(device->iommufd, 0);
806
807 vfio_device_bind_iommufd(device->fd, device->iommufd);
808 device->ioas_id = iommufd_ioas_alloc(device->iommufd);
809 vfio_device_attach_iommufd_pt(device->fd, device->ioas_id);
810 }
811
vfio_pci_device_init(const char * bdf,const char * iommu_mode)812 struct vfio_pci_device *vfio_pci_device_init(const char *bdf, const char *iommu_mode)
813 {
814 struct vfio_pci_device *device;
815
816 device = calloc(1, sizeof(*device));
817 VFIO_ASSERT_NOT_NULL(device);
818
819 INIT_LIST_HEAD(&device->dma_regions);
820
821 device->iommu_mode = lookup_iommu_mode(iommu_mode);
822
823 if (device->iommu_mode->container_path)
824 vfio_pci_container_setup(device, bdf);
825 else
826 vfio_pci_iommufd_setup(device, bdf);
827
828 vfio_pci_device_setup(device);
829 vfio_pci_driver_probe(device);
830
831 return device;
832 }
833
vfio_pci_device_cleanup(struct vfio_pci_device * device)834 void vfio_pci_device_cleanup(struct vfio_pci_device *device)
835 {
836 int i;
837
838 if (device->driver.initialized)
839 vfio_pci_driver_remove(device);
840
841 vfio_pci_bar_unmap_all(device);
842
843 VFIO_ASSERT_EQ(close(device->fd), 0);
844
845 for (i = 0; i < ARRAY_SIZE(device->msi_eventfds); i++) {
846 if (device->msi_eventfds[i] < 0)
847 continue;
848
849 VFIO_ASSERT_EQ(close(device->msi_eventfds[i]), 0);
850 }
851
852 if (device->iommufd) {
853 VFIO_ASSERT_EQ(close(device->iommufd), 0);
854 } else {
855 VFIO_ASSERT_EQ(close(device->group_fd), 0);
856 VFIO_ASSERT_EQ(close(device->container_fd), 0);
857 }
858
859 free(device);
860 }
861
is_bdf(const char * str)862 static bool is_bdf(const char *str)
863 {
864 unsigned int s, b, d, f;
865 int length, count;
866
867 count = sscanf(str, "%4x:%2x:%2x.%2x%n", &s, &b, &d, &f, &length);
868 return count == 4 && length == strlen(str);
869 }
870
vfio_selftests_get_bdf(int * argc,char * argv[])871 const char *vfio_selftests_get_bdf(int *argc, char *argv[])
872 {
873 char *bdf;
874
875 if (*argc > 1 && is_bdf(argv[*argc - 1]))
876 return argv[--(*argc)];
877
878 bdf = getenv("VFIO_SELFTESTS_BDF");
879 if (bdf) {
880 VFIO_ASSERT_TRUE(is_bdf(bdf), "Invalid BDF: %s\n", bdf);
881 return bdf;
882 }
883
884 fprintf(stderr, "Unable to determine which device to use, skipping test.\n");
885 fprintf(stderr, "\n");
886 fprintf(stderr, "To pass the device address via environment variable:\n");
887 fprintf(stderr, "\n");
888 fprintf(stderr, " export VFIO_SELFTESTS_BDF=segment:bus:device.function\n");
889 fprintf(stderr, " %s [options]\n", argv[0]);
890 fprintf(stderr, "\n");
891 fprintf(stderr, "To pass the device address via argv:\n");
892 fprintf(stderr, "\n");
893 fprintf(stderr, " %s [options] segment:bus:device.function\n", argv[0]);
894 fprintf(stderr, "\n");
895 exit(KSFT_SKIP);
896 }
897