1 // SPDX-License-Identifier: GPL-2.0-only 2 #include <dirent.h> 3 #include <fcntl.h> 4 #include <libgen.h> 5 #include <stdint.h> 6 #include <stdlib.h> 7 #include <string.h> 8 #include <unistd.h> 9 10 #include <sys/eventfd.h> 11 #include <sys/ioctl.h> 12 #include <sys/mman.h> 13 14 #include <uapi/linux/types.h> 15 #include <linux/iommufd.h> 16 #include <linux/limits.h> 17 #include <linux/mman.h> 18 #include <linux/overflow.h> 19 #include <linux/types.h> 20 #include <linux/vfio.h> 21 22 #include "../../../kselftest.h" 23 #include <vfio_util.h> 24 25 #define PCI_SYSFS_PATH "/sys/bus/pci/devices" 26 27 #define ioctl_assert(_fd, _op, _arg) do { \ 28 void *__arg = (_arg); \ 29 int __ret = ioctl((_fd), (_op), (__arg)); \ 30 VFIO_ASSERT_EQ(__ret, 0, "ioctl(%s, %s, %s) returned %d\n", #_fd, #_op, #_arg, __ret); \ 31 } while (0) 32 33 static struct vfio_info_cap_header *next_cap_hdr(void *buf, u32 bufsz, 34 u32 *cap_offset) 35 { 36 struct vfio_info_cap_header *hdr; 37 38 if (!*cap_offset) 39 return NULL; 40 41 VFIO_ASSERT_LT(*cap_offset, bufsz); 42 VFIO_ASSERT_GE(bufsz - *cap_offset, sizeof(*hdr)); 43 44 hdr = (struct vfio_info_cap_header *)((u8 *)buf + *cap_offset); 45 *cap_offset = hdr->next; 46 47 return hdr; 48 } 49 50 static struct vfio_info_cap_header *vfio_iommu_info_cap_hdr(struct vfio_iommu_type1_info *info, 51 u16 cap_id) 52 { 53 struct vfio_info_cap_header *hdr; 54 u32 cap_offset = info->cap_offset; 55 u32 max_depth; 56 u32 depth = 0; 57 58 if (!(info->flags & VFIO_IOMMU_INFO_CAPS)) 59 return NULL; 60 61 if (cap_offset) 62 VFIO_ASSERT_GE(cap_offset, sizeof(*info)); 63 64 max_depth = (info->argsz - sizeof(*info)) / sizeof(*hdr); 65 66 while ((hdr = next_cap_hdr(info, info->argsz, &cap_offset))) { 67 depth++; 68 VFIO_ASSERT_LE(depth, max_depth, "Capability chain contains a cycle\n"); 69 70 if (hdr->id == cap_id) 71 return hdr; 72 } 73 74 return NULL; 75 } 76 77 /* Return buffer including capability chain, if present. Free with free() */ 78 static struct vfio_iommu_type1_info *vfio_iommu_get_info(struct vfio_pci_device *device) 79 { 80 struct vfio_iommu_type1_info *info; 81 82 info = malloc(sizeof(*info)); 83 VFIO_ASSERT_NOT_NULL(info); 84 85 *info = (struct vfio_iommu_type1_info) { 86 .argsz = sizeof(*info), 87 }; 88 89 ioctl_assert(device->container_fd, VFIO_IOMMU_GET_INFO, info); 90 VFIO_ASSERT_GE(info->argsz, sizeof(*info)); 91 92 info = realloc(info, info->argsz); 93 VFIO_ASSERT_NOT_NULL(info); 94 95 ioctl_assert(device->container_fd, VFIO_IOMMU_GET_INFO, info); 96 VFIO_ASSERT_GE(info->argsz, sizeof(*info)); 97 98 return info; 99 } 100 101 /* 102 * Return iova ranges for the device's container. Normalize vfio_iommu_type1 to 103 * report iommufd's iommu_iova_range. Free with free(). 104 */ 105 static struct iommu_iova_range *vfio_iommu_iova_ranges(struct vfio_pci_device *device, 106 u32 *nranges) 107 { 108 struct vfio_iommu_type1_info_cap_iova_range *cap_range; 109 struct vfio_iommu_type1_info *info; 110 struct vfio_info_cap_header *hdr; 111 struct iommu_iova_range *ranges = NULL; 112 113 info = vfio_iommu_get_info(device); 114 hdr = vfio_iommu_info_cap_hdr(info, VFIO_IOMMU_TYPE1_INFO_CAP_IOVA_RANGE); 115 VFIO_ASSERT_NOT_NULL(hdr); 116 117 cap_range = container_of(hdr, struct vfio_iommu_type1_info_cap_iova_range, header); 118 VFIO_ASSERT_GT(cap_range->nr_iovas, 0); 119 120 ranges = calloc(cap_range->nr_iovas, sizeof(*ranges)); 121 VFIO_ASSERT_NOT_NULL(ranges); 122 123 for (u32 i = 0; i < cap_range->nr_iovas; i++) { 124 ranges[i] = (struct iommu_iova_range){ 125 .start = cap_range->iova_ranges[i].start, 126 .last = cap_range->iova_ranges[i].end, 127 }; 128 } 129 130 *nranges = cap_range->nr_iovas; 131 132 free(info); 133 return ranges; 134 } 135 136 /* Return iova ranges of the device's IOAS. Free with free() */ 137 static struct iommu_iova_range *iommufd_iova_ranges(struct vfio_pci_device *device, 138 u32 *nranges) 139 { 140 struct iommu_iova_range *ranges; 141 int ret; 142 143 struct iommu_ioas_iova_ranges query = { 144 .size = sizeof(query), 145 .ioas_id = device->ioas_id, 146 }; 147 148 ret = ioctl(device->iommufd, IOMMU_IOAS_IOVA_RANGES, &query); 149 VFIO_ASSERT_EQ(ret, -1); 150 VFIO_ASSERT_EQ(errno, EMSGSIZE); 151 VFIO_ASSERT_GT(query.num_iovas, 0); 152 153 ranges = calloc(query.num_iovas, sizeof(*ranges)); 154 VFIO_ASSERT_NOT_NULL(ranges); 155 156 query.allowed_iovas = (uintptr_t)ranges; 157 158 ioctl_assert(device->iommufd, IOMMU_IOAS_IOVA_RANGES, &query); 159 *nranges = query.num_iovas; 160 161 return ranges; 162 } 163 164 static int iova_range_comp(const void *a, const void *b) 165 { 166 const struct iommu_iova_range *ra = a, *rb = b; 167 168 if (ra->start < rb->start) 169 return -1; 170 171 if (ra->start > rb->start) 172 return 1; 173 174 return 0; 175 } 176 177 /* Return sorted IOVA ranges of the device. Free with free(). */ 178 struct iommu_iova_range *vfio_pci_iova_ranges(struct vfio_pci_device *device, 179 u32 *nranges) 180 { 181 struct iommu_iova_range *ranges; 182 183 if (device->iommufd) 184 ranges = iommufd_iova_ranges(device, nranges); 185 else 186 ranges = vfio_iommu_iova_ranges(device, nranges); 187 188 if (!ranges) 189 return NULL; 190 191 VFIO_ASSERT_GT(*nranges, 0); 192 193 /* Sort and check that ranges are sane and non-overlapping */ 194 qsort(ranges, *nranges, sizeof(*ranges), iova_range_comp); 195 VFIO_ASSERT_LT(ranges[0].start, ranges[0].last); 196 197 for (u32 i = 1; i < *nranges; i++) { 198 VFIO_ASSERT_LT(ranges[i].start, ranges[i].last); 199 VFIO_ASSERT_LT(ranges[i - 1].last, ranges[i].start); 200 } 201 202 return ranges; 203 } 204 205 struct iova_allocator *iova_allocator_init(struct vfio_pci_device *device) 206 { 207 struct iova_allocator *allocator; 208 struct iommu_iova_range *ranges; 209 u32 nranges; 210 211 ranges = vfio_pci_iova_ranges(device, &nranges); 212 VFIO_ASSERT_NOT_NULL(ranges); 213 214 allocator = malloc(sizeof(*allocator)); 215 VFIO_ASSERT_NOT_NULL(allocator); 216 217 *allocator = (struct iova_allocator){ 218 .ranges = ranges, 219 .nranges = nranges, 220 .range_idx = 0, 221 .range_offset = 0, 222 }; 223 224 return allocator; 225 } 226 227 void iova_allocator_cleanup(struct iova_allocator *allocator) 228 { 229 free(allocator->ranges); 230 free(allocator); 231 } 232 233 iova_t iova_allocator_alloc(struct iova_allocator *allocator, size_t size) 234 { 235 VFIO_ASSERT_GT(size, 0, "Invalid size arg, zero\n"); 236 VFIO_ASSERT_EQ(size & (size - 1), 0, "Invalid size arg, non-power-of-2\n"); 237 238 for (;;) { 239 struct iommu_iova_range *range; 240 iova_t iova, last; 241 242 VFIO_ASSERT_LT(allocator->range_idx, allocator->nranges, 243 "IOVA allocator out of space\n"); 244 245 range = &allocator->ranges[allocator->range_idx]; 246 iova = range->start + allocator->range_offset; 247 248 /* Check for sufficient space at the current offset */ 249 if (check_add_overflow(iova, size - 1, &last) || 250 last > range->last) 251 goto next_range; 252 253 /* Align iova to size */ 254 iova = last & ~(size - 1); 255 256 /* Check for sufficient space at the aligned iova */ 257 if (check_add_overflow(iova, size - 1, &last) || 258 last > range->last) 259 goto next_range; 260 261 if (last == range->last) { 262 allocator->range_idx++; 263 allocator->range_offset = 0; 264 } else { 265 allocator->range_offset = last - range->start + 1; 266 } 267 268 return iova; 269 270 next_range: 271 allocator->range_idx++; 272 allocator->range_offset = 0; 273 } 274 } 275 276 iova_t __to_iova(struct vfio_pci_device *device, void *vaddr) 277 { 278 struct vfio_dma_region *region; 279 280 list_for_each_entry(region, &device->dma_regions, link) { 281 if (vaddr < region->vaddr) 282 continue; 283 284 if (vaddr >= region->vaddr + region->size) 285 continue; 286 287 return region->iova + (vaddr - region->vaddr); 288 } 289 290 return INVALID_IOVA; 291 } 292 293 iova_t to_iova(struct vfio_pci_device *device, void *vaddr) 294 { 295 iova_t iova; 296 297 iova = __to_iova(device, vaddr); 298 VFIO_ASSERT_NE(iova, INVALID_IOVA, "%p is not mapped into device.\n", vaddr); 299 300 return iova; 301 } 302 303 static void vfio_pci_irq_set(struct vfio_pci_device *device, 304 u32 index, u32 vector, u32 count, int *fds) 305 { 306 u8 buf[sizeof(struct vfio_irq_set) + sizeof(int) * count] = {}; 307 struct vfio_irq_set *irq = (void *)&buf; 308 int *irq_fds = (void *)&irq->data; 309 310 irq->argsz = sizeof(buf); 311 irq->flags = VFIO_IRQ_SET_ACTION_TRIGGER; 312 irq->index = index; 313 irq->start = vector; 314 irq->count = count; 315 316 if (count) { 317 irq->flags |= VFIO_IRQ_SET_DATA_EVENTFD; 318 memcpy(irq_fds, fds, sizeof(int) * count); 319 } else { 320 irq->flags |= VFIO_IRQ_SET_DATA_NONE; 321 } 322 323 ioctl_assert(device->fd, VFIO_DEVICE_SET_IRQS, irq); 324 } 325 326 void vfio_pci_irq_trigger(struct vfio_pci_device *device, u32 index, u32 vector) 327 { 328 struct vfio_irq_set irq = { 329 .argsz = sizeof(irq), 330 .flags = VFIO_IRQ_SET_ACTION_TRIGGER | VFIO_IRQ_SET_DATA_NONE, 331 .index = index, 332 .start = vector, 333 .count = 1, 334 }; 335 336 ioctl_assert(device->fd, VFIO_DEVICE_SET_IRQS, &irq); 337 } 338 339 static void check_supported_irq_index(u32 index) 340 { 341 /* VFIO selftests only supports MSI and MSI-x for now. */ 342 VFIO_ASSERT_TRUE(index == VFIO_PCI_MSI_IRQ_INDEX || 343 index == VFIO_PCI_MSIX_IRQ_INDEX, 344 "Unsupported IRQ index: %u\n", index); 345 } 346 347 void vfio_pci_irq_enable(struct vfio_pci_device *device, u32 index, u32 vector, 348 int count) 349 { 350 int i; 351 352 check_supported_irq_index(index); 353 354 for (i = vector; i < vector + count; i++) { 355 VFIO_ASSERT_LT(device->msi_eventfds[i], 0); 356 device->msi_eventfds[i] = eventfd(0, 0); 357 VFIO_ASSERT_GE(device->msi_eventfds[i], 0); 358 } 359 360 vfio_pci_irq_set(device, index, vector, count, device->msi_eventfds + vector); 361 } 362 363 void vfio_pci_irq_disable(struct vfio_pci_device *device, u32 index) 364 { 365 int i; 366 367 check_supported_irq_index(index); 368 369 for (i = 0; i < ARRAY_SIZE(device->msi_eventfds); i++) { 370 if (device->msi_eventfds[i] < 0) 371 continue; 372 373 VFIO_ASSERT_EQ(close(device->msi_eventfds[i]), 0); 374 device->msi_eventfds[i] = -1; 375 } 376 377 vfio_pci_irq_set(device, index, 0, 0, NULL); 378 } 379 380 static void vfio_pci_irq_get(struct vfio_pci_device *device, u32 index, 381 struct vfio_irq_info *irq_info) 382 { 383 irq_info->argsz = sizeof(*irq_info); 384 irq_info->index = index; 385 386 ioctl_assert(device->fd, VFIO_DEVICE_GET_IRQ_INFO, irq_info); 387 } 388 389 static int vfio_iommu_dma_map(struct vfio_pci_device *device, 390 struct vfio_dma_region *region) 391 { 392 struct vfio_iommu_type1_dma_map args = { 393 .argsz = sizeof(args), 394 .flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE, 395 .vaddr = (u64)region->vaddr, 396 .iova = region->iova, 397 .size = region->size, 398 }; 399 400 if (ioctl(device->container_fd, VFIO_IOMMU_MAP_DMA, &args)) 401 return -errno; 402 403 return 0; 404 } 405 406 static int iommufd_dma_map(struct vfio_pci_device *device, 407 struct vfio_dma_region *region) 408 { 409 struct iommu_ioas_map args = { 410 .size = sizeof(args), 411 .flags = IOMMU_IOAS_MAP_READABLE | 412 IOMMU_IOAS_MAP_WRITEABLE | 413 IOMMU_IOAS_MAP_FIXED_IOVA, 414 .user_va = (u64)region->vaddr, 415 .iova = region->iova, 416 .length = region->size, 417 .ioas_id = device->ioas_id, 418 }; 419 420 if (ioctl(device->iommufd, IOMMU_IOAS_MAP, &args)) 421 return -errno; 422 423 return 0; 424 } 425 426 int __vfio_pci_dma_map(struct vfio_pci_device *device, 427 struct vfio_dma_region *region) 428 { 429 int ret; 430 431 if (device->iommufd) 432 ret = iommufd_dma_map(device, region); 433 else 434 ret = vfio_iommu_dma_map(device, region); 435 436 if (ret) 437 return ret; 438 439 list_add(®ion->link, &device->dma_regions); 440 441 return 0; 442 } 443 444 static int vfio_iommu_dma_unmap(int fd, u64 iova, u64 size, u32 flags, 445 u64 *unmapped) 446 { 447 struct vfio_iommu_type1_dma_unmap args = { 448 .argsz = sizeof(args), 449 .iova = iova, 450 .size = size, 451 .flags = flags, 452 }; 453 454 if (ioctl(fd, VFIO_IOMMU_UNMAP_DMA, &args)) 455 return -errno; 456 457 if (unmapped) 458 *unmapped = args.size; 459 460 return 0; 461 } 462 463 static int iommufd_dma_unmap(int fd, u64 iova, u64 length, u32 ioas_id, 464 u64 *unmapped) 465 { 466 struct iommu_ioas_unmap args = { 467 .size = sizeof(args), 468 .iova = iova, 469 .length = length, 470 .ioas_id = ioas_id, 471 }; 472 473 if (ioctl(fd, IOMMU_IOAS_UNMAP, &args)) 474 return -errno; 475 476 if (unmapped) 477 *unmapped = args.length; 478 479 return 0; 480 } 481 482 int __vfio_pci_dma_unmap(struct vfio_pci_device *device, 483 struct vfio_dma_region *region, u64 *unmapped) 484 { 485 int ret; 486 487 if (device->iommufd) 488 ret = iommufd_dma_unmap(device->iommufd, region->iova, 489 region->size, device->ioas_id, 490 unmapped); 491 else 492 ret = vfio_iommu_dma_unmap(device->container_fd, region->iova, 493 region->size, 0, unmapped); 494 495 if (ret) 496 return ret; 497 498 list_del_init(®ion->link); 499 500 return 0; 501 } 502 503 int __vfio_pci_dma_unmap_all(struct vfio_pci_device *device, u64 *unmapped) 504 { 505 int ret; 506 struct vfio_dma_region *curr, *next; 507 508 if (device->iommufd) 509 ret = iommufd_dma_unmap(device->iommufd, 0, UINT64_MAX, 510 device->ioas_id, unmapped); 511 else 512 ret = vfio_iommu_dma_unmap(device->container_fd, 0, 0, 513 VFIO_DMA_UNMAP_FLAG_ALL, unmapped); 514 515 if (ret) 516 return ret; 517 518 list_for_each_entry_safe(curr, next, &device->dma_regions, link) 519 list_del_init(&curr->link); 520 521 return 0; 522 } 523 524 static void vfio_pci_region_get(struct vfio_pci_device *device, int index, 525 struct vfio_region_info *info) 526 { 527 memset(info, 0, sizeof(*info)); 528 529 info->argsz = sizeof(*info); 530 info->index = index; 531 532 ioctl_assert(device->fd, VFIO_DEVICE_GET_REGION_INFO, info); 533 } 534 535 static void vfio_pci_bar_map(struct vfio_pci_device *device, int index) 536 { 537 struct vfio_pci_bar *bar = &device->bars[index]; 538 int prot = 0; 539 540 VFIO_ASSERT_LT(index, PCI_STD_NUM_BARS); 541 VFIO_ASSERT_NULL(bar->vaddr); 542 VFIO_ASSERT_TRUE(bar->info.flags & VFIO_REGION_INFO_FLAG_MMAP); 543 544 if (bar->info.flags & VFIO_REGION_INFO_FLAG_READ) 545 prot |= PROT_READ; 546 if (bar->info.flags & VFIO_REGION_INFO_FLAG_WRITE) 547 prot |= PROT_WRITE; 548 549 bar->vaddr = mmap(NULL, bar->info.size, prot, MAP_FILE | MAP_SHARED, 550 device->fd, bar->info.offset); 551 VFIO_ASSERT_NE(bar->vaddr, MAP_FAILED); 552 } 553 554 static void vfio_pci_bar_unmap(struct vfio_pci_device *device, int index) 555 { 556 struct vfio_pci_bar *bar = &device->bars[index]; 557 558 VFIO_ASSERT_LT(index, PCI_STD_NUM_BARS); 559 VFIO_ASSERT_NOT_NULL(bar->vaddr); 560 561 VFIO_ASSERT_EQ(munmap(bar->vaddr, bar->info.size), 0); 562 bar->vaddr = NULL; 563 } 564 565 static void vfio_pci_bar_unmap_all(struct vfio_pci_device *device) 566 { 567 int i; 568 569 for (i = 0; i < PCI_STD_NUM_BARS; i++) { 570 if (device->bars[i].vaddr) 571 vfio_pci_bar_unmap(device, i); 572 } 573 } 574 575 void vfio_pci_config_access(struct vfio_pci_device *device, bool write, 576 size_t config, size_t size, void *data) 577 { 578 struct vfio_region_info *config_space = &device->config_space; 579 int ret; 580 581 if (write) 582 ret = pwrite(device->fd, data, size, config_space->offset + config); 583 else 584 ret = pread(device->fd, data, size, config_space->offset + config); 585 586 VFIO_ASSERT_EQ(ret, size, "Failed to %s PCI config space: 0x%lx\n", 587 write ? "write to" : "read from", config); 588 } 589 590 void vfio_pci_device_reset(struct vfio_pci_device *device) 591 { 592 ioctl_assert(device->fd, VFIO_DEVICE_RESET, NULL); 593 } 594 595 static unsigned int vfio_pci_get_group_from_dev(const char *bdf) 596 { 597 char dev_iommu_group_path[PATH_MAX] = {0}; 598 char sysfs_path[PATH_MAX] = {0}; 599 unsigned int group; 600 int ret; 601 602 snprintf(sysfs_path, PATH_MAX, "%s/%s/iommu_group", PCI_SYSFS_PATH, bdf); 603 604 ret = readlink(sysfs_path, dev_iommu_group_path, sizeof(dev_iommu_group_path)); 605 VFIO_ASSERT_NE(ret, -1, "Failed to get the IOMMU group for device: %s\n", bdf); 606 607 ret = sscanf(basename(dev_iommu_group_path), "%u", &group); 608 VFIO_ASSERT_EQ(ret, 1, "Failed to get the IOMMU group for device: %s\n", bdf); 609 610 return group; 611 } 612 613 static void vfio_pci_group_setup(struct vfio_pci_device *device, const char *bdf) 614 { 615 struct vfio_group_status group_status = { 616 .argsz = sizeof(group_status), 617 }; 618 char group_path[32]; 619 int group; 620 621 group = vfio_pci_get_group_from_dev(bdf); 622 snprintf(group_path, sizeof(group_path), "/dev/vfio/%d", group); 623 624 device->group_fd = open(group_path, O_RDWR); 625 VFIO_ASSERT_GE(device->group_fd, 0, "open(%s) failed\n", group_path); 626 627 ioctl_assert(device->group_fd, VFIO_GROUP_GET_STATUS, &group_status); 628 VFIO_ASSERT_TRUE(group_status.flags & VFIO_GROUP_FLAGS_VIABLE); 629 630 ioctl_assert(device->group_fd, VFIO_GROUP_SET_CONTAINER, &device->container_fd); 631 } 632 633 static void vfio_pci_container_setup(struct vfio_pci_device *device, const char *bdf) 634 { 635 unsigned long iommu_type = device->iommu_mode->iommu_type; 636 const char *path = device->iommu_mode->container_path; 637 int version; 638 int ret; 639 640 device->container_fd = open(path, O_RDWR); 641 VFIO_ASSERT_GE(device->container_fd, 0, "open(%s) failed\n", path); 642 643 version = ioctl(device->container_fd, VFIO_GET_API_VERSION); 644 VFIO_ASSERT_EQ(version, VFIO_API_VERSION, "Unsupported version: %d\n", version); 645 646 vfio_pci_group_setup(device, bdf); 647 648 ret = ioctl(device->container_fd, VFIO_CHECK_EXTENSION, iommu_type); 649 VFIO_ASSERT_GT(ret, 0, "VFIO IOMMU type %lu not supported\n", iommu_type); 650 651 ioctl_assert(device->container_fd, VFIO_SET_IOMMU, (void *)iommu_type); 652 653 device->fd = ioctl(device->group_fd, VFIO_GROUP_GET_DEVICE_FD, bdf); 654 VFIO_ASSERT_GE(device->fd, 0); 655 } 656 657 static void vfio_pci_device_setup(struct vfio_pci_device *device) 658 { 659 int i; 660 661 device->info.argsz = sizeof(device->info); 662 ioctl_assert(device->fd, VFIO_DEVICE_GET_INFO, &device->info); 663 664 vfio_pci_region_get(device, VFIO_PCI_CONFIG_REGION_INDEX, &device->config_space); 665 666 /* Sanity check VFIO does not advertise mmap for config space */ 667 VFIO_ASSERT_TRUE(!(device->config_space.flags & VFIO_REGION_INFO_FLAG_MMAP), 668 "PCI config space should not support mmap()\n"); 669 670 for (i = 0; i < PCI_STD_NUM_BARS; i++) { 671 struct vfio_pci_bar *bar = device->bars + i; 672 673 vfio_pci_region_get(device, i, &bar->info); 674 if (bar->info.flags & VFIO_REGION_INFO_FLAG_MMAP) 675 vfio_pci_bar_map(device, i); 676 } 677 678 vfio_pci_irq_get(device, VFIO_PCI_MSI_IRQ_INDEX, &device->msi_info); 679 vfio_pci_irq_get(device, VFIO_PCI_MSIX_IRQ_INDEX, &device->msix_info); 680 681 for (i = 0; i < ARRAY_SIZE(device->msi_eventfds); i++) 682 device->msi_eventfds[i] = -1; 683 } 684 685 const char *vfio_pci_get_cdev_path(const char *bdf) 686 { 687 char dir_path[PATH_MAX]; 688 struct dirent *entry; 689 char *cdev_path; 690 DIR *dir; 691 692 cdev_path = calloc(PATH_MAX, 1); 693 VFIO_ASSERT_NOT_NULL(cdev_path); 694 695 snprintf(dir_path, sizeof(dir_path), "/sys/bus/pci/devices/%s/vfio-dev/", bdf); 696 697 dir = opendir(dir_path); 698 VFIO_ASSERT_NOT_NULL(dir, "Failed to open directory %s\n", dir_path); 699 700 while ((entry = readdir(dir)) != NULL) { 701 /* Find the file that starts with "vfio" */ 702 if (strncmp("vfio", entry->d_name, 4)) 703 continue; 704 705 snprintf(cdev_path, PATH_MAX, "/dev/vfio/devices/%s", entry->d_name); 706 break; 707 } 708 709 VFIO_ASSERT_NE(cdev_path[0], 0, "Failed to find vfio cdev file.\n"); 710 VFIO_ASSERT_EQ(closedir(dir), 0); 711 712 return cdev_path; 713 } 714 715 /* Reminder: Keep in sync with FIXTURE_VARIANT_ADD_ALL_IOMMU_MODES(). */ 716 static const struct vfio_iommu_mode iommu_modes[] = { 717 { 718 .name = "vfio_type1_iommu", 719 .container_path = "/dev/vfio/vfio", 720 .iommu_type = VFIO_TYPE1_IOMMU, 721 }, 722 { 723 .name = "vfio_type1v2_iommu", 724 .container_path = "/dev/vfio/vfio", 725 .iommu_type = VFIO_TYPE1v2_IOMMU, 726 }, 727 { 728 .name = "iommufd_compat_type1", 729 .container_path = "/dev/iommu", 730 .iommu_type = VFIO_TYPE1_IOMMU, 731 }, 732 { 733 .name = "iommufd_compat_type1v2", 734 .container_path = "/dev/iommu", 735 .iommu_type = VFIO_TYPE1v2_IOMMU, 736 }, 737 { 738 .name = "iommufd", 739 }, 740 }; 741 742 const char *default_iommu_mode = "iommufd"; 743 744 static const struct vfio_iommu_mode *lookup_iommu_mode(const char *iommu_mode) 745 { 746 int i; 747 748 if (!iommu_mode) 749 iommu_mode = default_iommu_mode; 750 751 for (i = 0; i < ARRAY_SIZE(iommu_modes); i++) { 752 if (strcmp(iommu_mode, iommu_modes[i].name)) 753 continue; 754 755 return &iommu_modes[i]; 756 } 757 758 VFIO_FAIL("Unrecognized IOMMU mode: %s\n", iommu_mode); 759 } 760 761 static void vfio_device_bind_iommufd(int device_fd, int iommufd) 762 { 763 struct vfio_device_bind_iommufd args = { 764 .argsz = sizeof(args), 765 .iommufd = iommufd, 766 }; 767 768 ioctl_assert(device_fd, VFIO_DEVICE_BIND_IOMMUFD, &args); 769 } 770 771 static u32 iommufd_ioas_alloc(int iommufd) 772 { 773 struct iommu_ioas_alloc args = { 774 .size = sizeof(args), 775 }; 776 777 ioctl_assert(iommufd, IOMMU_IOAS_ALLOC, &args); 778 return args.out_ioas_id; 779 } 780 781 static void vfio_device_attach_iommufd_pt(int device_fd, u32 pt_id) 782 { 783 struct vfio_device_attach_iommufd_pt args = { 784 .argsz = sizeof(args), 785 .pt_id = pt_id, 786 }; 787 788 ioctl_assert(device_fd, VFIO_DEVICE_ATTACH_IOMMUFD_PT, &args); 789 } 790 791 static void vfio_pci_iommufd_setup(struct vfio_pci_device *device, const char *bdf) 792 { 793 const char *cdev_path = vfio_pci_get_cdev_path(bdf); 794 795 device->fd = open(cdev_path, O_RDWR); 796 VFIO_ASSERT_GE(device->fd, 0); 797 free((void *)cdev_path); 798 799 /* 800 * Require device->iommufd to be >0 so that a simple non-0 check can be 801 * used to check if iommufd is enabled. In practice open() will never 802 * return 0 unless stdin is closed. 803 */ 804 device->iommufd = open("/dev/iommu", O_RDWR); 805 VFIO_ASSERT_GT(device->iommufd, 0); 806 807 vfio_device_bind_iommufd(device->fd, device->iommufd); 808 device->ioas_id = iommufd_ioas_alloc(device->iommufd); 809 vfio_device_attach_iommufd_pt(device->fd, device->ioas_id); 810 } 811 812 struct vfio_pci_device *vfio_pci_device_init(const char *bdf, const char *iommu_mode) 813 { 814 struct vfio_pci_device *device; 815 816 device = calloc(1, sizeof(*device)); 817 VFIO_ASSERT_NOT_NULL(device); 818 819 INIT_LIST_HEAD(&device->dma_regions); 820 821 device->iommu_mode = lookup_iommu_mode(iommu_mode); 822 823 if (device->iommu_mode->container_path) 824 vfio_pci_container_setup(device, bdf); 825 else 826 vfio_pci_iommufd_setup(device, bdf); 827 828 vfio_pci_device_setup(device); 829 vfio_pci_driver_probe(device); 830 831 return device; 832 } 833 834 void vfio_pci_device_cleanup(struct vfio_pci_device *device) 835 { 836 int i; 837 838 if (device->driver.initialized) 839 vfio_pci_driver_remove(device); 840 841 vfio_pci_bar_unmap_all(device); 842 843 VFIO_ASSERT_EQ(close(device->fd), 0); 844 845 for (i = 0; i < ARRAY_SIZE(device->msi_eventfds); i++) { 846 if (device->msi_eventfds[i] < 0) 847 continue; 848 849 VFIO_ASSERT_EQ(close(device->msi_eventfds[i]), 0); 850 } 851 852 if (device->iommufd) { 853 VFIO_ASSERT_EQ(close(device->iommufd), 0); 854 } else { 855 VFIO_ASSERT_EQ(close(device->group_fd), 0); 856 VFIO_ASSERT_EQ(close(device->container_fd), 0); 857 } 858 859 free(device); 860 } 861 862 static bool is_bdf(const char *str) 863 { 864 unsigned int s, b, d, f; 865 int length, count; 866 867 count = sscanf(str, "%4x:%2x:%2x.%2x%n", &s, &b, &d, &f, &length); 868 return count == 4 && length == strlen(str); 869 } 870 871 const char *vfio_selftests_get_bdf(int *argc, char *argv[]) 872 { 873 char *bdf; 874 875 if (*argc > 1 && is_bdf(argv[*argc - 1])) 876 return argv[--(*argc)]; 877 878 bdf = getenv("VFIO_SELFTESTS_BDF"); 879 if (bdf) { 880 VFIO_ASSERT_TRUE(is_bdf(bdf), "Invalid BDF: %s\n", bdf); 881 return bdf; 882 } 883 884 fprintf(stderr, "Unable to determine which device to use, skipping test.\n"); 885 fprintf(stderr, "\n"); 886 fprintf(stderr, "To pass the device address via environment variable:\n"); 887 fprintf(stderr, "\n"); 888 fprintf(stderr, " export VFIO_SELFTESTS_BDF=segment:bus:device.function\n"); 889 fprintf(stderr, " %s [options]\n", argv[0]); 890 fprintf(stderr, "\n"); 891 fprintf(stderr, "To pass the device address via argv:\n"); 892 fprintf(stderr, "\n"); 893 fprintf(stderr, " %s [options] segment:bus:device.function\n", argv[0]); 894 fprintf(stderr, "\n"); 895 exit(KSFT_SKIP); 896 } 897