1 // SPDX-License-Identifier: GPL-2.0-only 2 #include <dirent.h> 3 #include <fcntl.h> 4 #include <libgen.h> 5 #include <stdint.h> 6 #include <stdlib.h> 7 #include <string.h> 8 #include <unistd.h> 9 10 #include <sys/eventfd.h> 11 #include <sys/ioctl.h> 12 #include <sys/mman.h> 13 14 #include <uapi/linux/types.h> 15 #include <linux/iommufd.h> 16 #include <linux/limits.h> 17 #include <linux/mman.h> 18 #include <linux/overflow.h> 19 #include <linux/types.h> 20 #include <linux/vfio.h> 21 22 #include "../../../kselftest.h" 23 #include <vfio_util.h> 24 25 #define PCI_SYSFS_PATH "/sys/bus/pci/devices" 26 27 #define ioctl_assert(_fd, _op, _arg) do { \ 28 void *__arg = (_arg); \ 29 int __ret = ioctl((_fd), (_op), (__arg)); \ 30 VFIO_ASSERT_EQ(__ret, 0, "ioctl(%s, %s, %s) returned %d\n", #_fd, #_op, #_arg, __ret); \ 31 } while (0) 32 33 static struct vfio_info_cap_header *next_cap_hdr(void *buf, u32 bufsz, 34 u32 *cap_offset) 35 { 36 struct vfio_info_cap_header *hdr; 37 38 if (!*cap_offset) 39 return NULL; 40 41 VFIO_ASSERT_LT(*cap_offset, bufsz); 42 VFIO_ASSERT_GE(bufsz - *cap_offset, sizeof(*hdr)); 43 44 hdr = (struct vfio_info_cap_header *)((u8 *)buf + *cap_offset); 45 *cap_offset = hdr->next; 46 47 return hdr; 48 } 49 50 static struct vfio_info_cap_header *vfio_iommu_info_cap_hdr(struct vfio_iommu_type1_info *info, 51 u16 cap_id) 52 { 53 struct vfio_info_cap_header *hdr; 54 u32 cap_offset = info->cap_offset; 55 u32 max_depth; 56 u32 depth = 0; 57 58 if (!(info->flags & VFIO_IOMMU_INFO_CAPS)) 59 return NULL; 60 61 if (cap_offset) 62 VFIO_ASSERT_GE(cap_offset, sizeof(*info)); 63 64 max_depth = (info->argsz - sizeof(*info)) / sizeof(*hdr); 65 66 while ((hdr = next_cap_hdr(info, info->argsz, &cap_offset))) { 67 depth++; 68 VFIO_ASSERT_LE(depth, max_depth, "Capability chain contains a cycle\n"); 69 70 if (hdr->id == cap_id) 71 return hdr; 72 } 73 74 return NULL; 75 } 76 77 /* Return buffer including capability chain, if present. Free with free() */ 78 static struct vfio_iommu_type1_info *vfio_iommu_get_info(struct vfio_pci_device *device) 79 { 80 struct vfio_iommu_type1_info *info; 81 82 info = malloc(sizeof(*info)); 83 VFIO_ASSERT_NOT_NULL(info); 84 85 *info = (struct vfio_iommu_type1_info) { 86 .argsz = sizeof(*info), 87 }; 88 89 ioctl_assert(device->iommu->container_fd, VFIO_IOMMU_GET_INFO, info); 90 VFIO_ASSERT_GE(info->argsz, sizeof(*info)); 91 92 info = realloc(info, info->argsz); 93 VFIO_ASSERT_NOT_NULL(info); 94 95 ioctl_assert(device->iommu->container_fd, VFIO_IOMMU_GET_INFO, info); 96 VFIO_ASSERT_GE(info->argsz, sizeof(*info)); 97 98 return info; 99 } 100 101 /* 102 * Return iova ranges for the device's container. Normalize vfio_iommu_type1 to 103 * report iommufd's iommu_iova_range. Free with free(). 104 */ 105 static struct iommu_iova_range *vfio_iommu_iova_ranges(struct vfio_pci_device *device, 106 u32 *nranges) 107 { 108 struct vfio_iommu_type1_info_cap_iova_range *cap_range; 109 struct vfio_iommu_type1_info *info; 110 struct vfio_info_cap_header *hdr; 111 struct iommu_iova_range *ranges = NULL; 112 113 info = vfio_iommu_get_info(device); 114 hdr = vfio_iommu_info_cap_hdr(info, VFIO_IOMMU_TYPE1_INFO_CAP_IOVA_RANGE); 115 VFIO_ASSERT_NOT_NULL(hdr); 116 117 cap_range = container_of(hdr, struct vfio_iommu_type1_info_cap_iova_range, header); 118 VFIO_ASSERT_GT(cap_range->nr_iovas, 0); 119 120 ranges = calloc(cap_range->nr_iovas, sizeof(*ranges)); 121 VFIO_ASSERT_NOT_NULL(ranges); 122 123 for (u32 i = 0; i < cap_range->nr_iovas; i++) { 124 ranges[i] = (struct iommu_iova_range){ 125 .start = cap_range->iova_ranges[i].start, 126 .last = cap_range->iova_ranges[i].end, 127 }; 128 } 129 130 *nranges = cap_range->nr_iovas; 131 132 free(info); 133 return ranges; 134 } 135 136 /* Return iova ranges of the device's IOAS. Free with free() */ 137 static struct iommu_iova_range *iommufd_iova_ranges(struct vfio_pci_device *device, 138 u32 *nranges) 139 { 140 struct iommu_iova_range *ranges; 141 int ret; 142 143 struct iommu_ioas_iova_ranges query = { 144 .size = sizeof(query), 145 .ioas_id = device->iommu->ioas_id, 146 }; 147 148 ret = ioctl(device->iommu->iommufd, IOMMU_IOAS_IOVA_RANGES, &query); 149 VFIO_ASSERT_EQ(ret, -1); 150 VFIO_ASSERT_EQ(errno, EMSGSIZE); 151 VFIO_ASSERT_GT(query.num_iovas, 0); 152 153 ranges = calloc(query.num_iovas, sizeof(*ranges)); 154 VFIO_ASSERT_NOT_NULL(ranges); 155 156 query.allowed_iovas = (uintptr_t)ranges; 157 158 ioctl_assert(device->iommu->iommufd, IOMMU_IOAS_IOVA_RANGES, &query); 159 *nranges = query.num_iovas; 160 161 return ranges; 162 } 163 164 static int iova_range_comp(const void *a, const void *b) 165 { 166 const struct iommu_iova_range *ra = a, *rb = b; 167 168 if (ra->start < rb->start) 169 return -1; 170 171 if (ra->start > rb->start) 172 return 1; 173 174 return 0; 175 } 176 177 /* Return sorted IOVA ranges of the device. Free with free(). */ 178 struct iommu_iova_range *vfio_pci_iova_ranges(struct vfio_pci_device *device, 179 u32 *nranges) 180 { 181 struct iommu_iova_range *ranges; 182 183 if (device->iommu->iommufd) 184 ranges = iommufd_iova_ranges(device, nranges); 185 else 186 ranges = vfio_iommu_iova_ranges(device, nranges); 187 188 if (!ranges) 189 return NULL; 190 191 VFIO_ASSERT_GT(*nranges, 0); 192 193 /* Sort and check that ranges are sane and non-overlapping */ 194 qsort(ranges, *nranges, sizeof(*ranges), iova_range_comp); 195 VFIO_ASSERT_LT(ranges[0].start, ranges[0].last); 196 197 for (u32 i = 1; i < *nranges; i++) { 198 VFIO_ASSERT_LT(ranges[i].start, ranges[i].last); 199 VFIO_ASSERT_LT(ranges[i - 1].last, ranges[i].start); 200 } 201 202 return ranges; 203 } 204 205 struct iova_allocator *iova_allocator_init(struct vfio_pci_device *device) 206 { 207 struct iova_allocator *allocator; 208 struct iommu_iova_range *ranges; 209 u32 nranges; 210 211 ranges = vfio_pci_iova_ranges(device, &nranges); 212 VFIO_ASSERT_NOT_NULL(ranges); 213 214 allocator = malloc(sizeof(*allocator)); 215 VFIO_ASSERT_NOT_NULL(allocator); 216 217 *allocator = (struct iova_allocator){ 218 .ranges = ranges, 219 .nranges = nranges, 220 .range_idx = 0, 221 .range_offset = 0, 222 }; 223 224 return allocator; 225 } 226 227 void iova_allocator_cleanup(struct iova_allocator *allocator) 228 { 229 free(allocator->ranges); 230 free(allocator); 231 } 232 233 iova_t iova_allocator_alloc(struct iova_allocator *allocator, size_t size) 234 { 235 VFIO_ASSERT_GT(size, 0, "Invalid size arg, zero\n"); 236 VFIO_ASSERT_EQ(size & (size - 1), 0, "Invalid size arg, non-power-of-2\n"); 237 238 for (;;) { 239 struct iommu_iova_range *range; 240 iova_t iova, last; 241 242 VFIO_ASSERT_LT(allocator->range_idx, allocator->nranges, 243 "IOVA allocator out of space\n"); 244 245 range = &allocator->ranges[allocator->range_idx]; 246 iova = range->start + allocator->range_offset; 247 248 /* Check for sufficient space at the current offset */ 249 if (check_add_overflow(iova, size - 1, &last) || 250 last > range->last) 251 goto next_range; 252 253 /* Align iova to size */ 254 iova = last & ~(size - 1); 255 256 /* Check for sufficient space at the aligned iova */ 257 if (check_add_overflow(iova, size - 1, &last) || 258 last > range->last) 259 goto next_range; 260 261 if (last == range->last) { 262 allocator->range_idx++; 263 allocator->range_offset = 0; 264 } else { 265 allocator->range_offset = last - range->start + 1; 266 } 267 268 return iova; 269 270 next_range: 271 allocator->range_idx++; 272 allocator->range_offset = 0; 273 } 274 } 275 276 iova_t __to_iova(struct vfio_pci_device *device, void *vaddr) 277 { 278 struct dma_region *region; 279 280 list_for_each_entry(region, &device->iommu->dma_regions, link) { 281 if (vaddr < region->vaddr) 282 continue; 283 284 if (vaddr >= region->vaddr + region->size) 285 continue; 286 287 return region->iova + (vaddr - region->vaddr); 288 } 289 290 return INVALID_IOVA; 291 } 292 293 iova_t to_iova(struct vfio_pci_device *device, void *vaddr) 294 { 295 iova_t iova; 296 297 iova = __to_iova(device, vaddr); 298 VFIO_ASSERT_NE(iova, INVALID_IOVA, "%p is not mapped into device.\n", vaddr); 299 300 return iova; 301 } 302 303 static void vfio_pci_irq_set(struct vfio_pci_device *device, 304 u32 index, u32 vector, u32 count, int *fds) 305 { 306 u8 buf[sizeof(struct vfio_irq_set) + sizeof(int) * count] = {}; 307 struct vfio_irq_set *irq = (void *)&buf; 308 int *irq_fds = (void *)&irq->data; 309 310 irq->argsz = sizeof(buf); 311 irq->flags = VFIO_IRQ_SET_ACTION_TRIGGER; 312 irq->index = index; 313 irq->start = vector; 314 irq->count = count; 315 316 if (count) { 317 irq->flags |= VFIO_IRQ_SET_DATA_EVENTFD; 318 memcpy(irq_fds, fds, sizeof(int) * count); 319 } else { 320 irq->flags |= VFIO_IRQ_SET_DATA_NONE; 321 } 322 323 ioctl_assert(device->fd, VFIO_DEVICE_SET_IRQS, irq); 324 } 325 326 void vfio_pci_irq_trigger(struct vfio_pci_device *device, u32 index, u32 vector) 327 { 328 struct vfio_irq_set irq = { 329 .argsz = sizeof(irq), 330 .flags = VFIO_IRQ_SET_ACTION_TRIGGER | VFIO_IRQ_SET_DATA_NONE, 331 .index = index, 332 .start = vector, 333 .count = 1, 334 }; 335 336 ioctl_assert(device->fd, VFIO_DEVICE_SET_IRQS, &irq); 337 } 338 339 static void check_supported_irq_index(u32 index) 340 { 341 /* VFIO selftests only supports MSI and MSI-x for now. */ 342 VFIO_ASSERT_TRUE(index == VFIO_PCI_MSI_IRQ_INDEX || 343 index == VFIO_PCI_MSIX_IRQ_INDEX, 344 "Unsupported IRQ index: %u\n", index); 345 } 346 347 void vfio_pci_irq_enable(struct vfio_pci_device *device, u32 index, u32 vector, 348 int count) 349 { 350 int i; 351 352 check_supported_irq_index(index); 353 354 for (i = vector; i < vector + count; i++) { 355 VFIO_ASSERT_LT(device->msi_eventfds[i], 0); 356 device->msi_eventfds[i] = eventfd(0, 0); 357 VFIO_ASSERT_GE(device->msi_eventfds[i], 0); 358 } 359 360 vfio_pci_irq_set(device, index, vector, count, device->msi_eventfds + vector); 361 } 362 363 void vfio_pci_irq_disable(struct vfio_pci_device *device, u32 index) 364 { 365 int i; 366 367 check_supported_irq_index(index); 368 369 for (i = 0; i < ARRAY_SIZE(device->msi_eventfds); i++) { 370 if (device->msi_eventfds[i] < 0) 371 continue; 372 373 VFIO_ASSERT_EQ(close(device->msi_eventfds[i]), 0); 374 device->msi_eventfds[i] = -1; 375 } 376 377 vfio_pci_irq_set(device, index, 0, 0, NULL); 378 } 379 380 static void vfio_pci_irq_get(struct vfio_pci_device *device, u32 index, 381 struct vfio_irq_info *irq_info) 382 { 383 irq_info->argsz = sizeof(*irq_info); 384 irq_info->index = index; 385 386 ioctl_assert(device->fd, VFIO_DEVICE_GET_IRQ_INFO, irq_info); 387 } 388 389 static int vfio_iommu_dma_map(struct vfio_pci_device *device, 390 struct dma_region *region) 391 { 392 struct vfio_iommu_type1_dma_map args = { 393 .argsz = sizeof(args), 394 .flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE, 395 .vaddr = (u64)region->vaddr, 396 .iova = region->iova, 397 .size = region->size, 398 }; 399 400 if (ioctl(device->iommu->container_fd, VFIO_IOMMU_MAP_DMA, &args)) 401 return -errno; 402 403 return 0; 404 } 405 406 static int iommufd_dma_map(struct vfio_pci_device *device, 407 struct dma_region *region) 408 { 409 struct iommu_ioas_map args = { 410 .size = sizeof(args), 411 .flags = IOMMU_IOAS_MAP_READABLE | 412 IOMMU_IOAS_MAP_WRITEABLE | 413 IOMMU_IOAS_MAP_FIXED_IOVA, 414 .user_va = (u64)region->vaddr, 415 .iova = region->iova, 416 .length = region->size, 417 .ioas_id = device->iommu->ioas_id, 418 }; 419 420 if (ioctl(device->iommu->iommufd, IOMMU_IOAS_MAP, &args)) 421 return -errno; 422 423 return 0; 424 } 425 426 int __vfio_pci_dma_map(struct vfio_pci_device *device, 427 struct dma_region *region) 428 { 429 int ret; 430 431 if (device->iommu->iommufd) 432 ret = iommufd_dma_map(device, region); 433 else 434 ret = vfio_iommu_dma_map(device, region); 435 436 if (ret) 437 return ret; 438 439 list_add(®ion->link, &device->iommu->dma_regions); 440 441 return 0; 442 } 443 444 static int vfio_iommu_dma_unmap(int fd, u64 iova, u64 size, u32 flags, 445 u64 *unmapped) 446 { 447 struct vfio_iommu_type1_dma_unmap args = { 448 .argsz = sizeof(args), 449 .iova = iova, 450 .size = size, 451 .flags = flags, 452 }; 453 454 if (ioctl(fd, VFIO_IOMMU_UNMAP_DMA, &args)) 455 return -errno; 456 457 if (unmapped) 458 *unmapped = args.size; 459 460 return 0; 461 } 462 463 static int iommufd_dma_unmap(int fd, u64 iova, u64 length, u32 ioas_id, 464 u64 *unmapped) 465 { 466 struct iommu_ioas_unmap args = { 467 .size = sizeof(args), 468 .iova = iova, 469 .length = length, 470 .ioas_id = ioas_id, 471 }; 472 473 if (ioctl(fd, IOMMU_IOAS_UNMAP, &args)) 474 return -errno; 475 476 if (unmapped) 477 *unmapped = args.length; 478 479 return 0; 480 } 481 482 int __vfio_pci_dma_unmap(struct vfio_pci_device *device, 483 struct dma_region *region, u64 *unmapped) 484 { 485 int ret; 486 487 if (device->iommu->iommufd) 488 ret = iommufd_dma_unmap(device->iommu->iommufd, region->iova, 489 region->size, device->iommu->ioas_id, 490 unmapped); 491 else 492 ret = vfio_iommu_dma_unmap(device->iommu->container_fd, 493 region->iova, region->size, 0, 494 unmapped); 495 496 if (ret) 497 return ret; 498 499 list_del_init(®ion->link); 500 501 return 0; 502 } 503 504 int __vfio_pci_dma_unmap_all(struct vfio_pci_device *device, u64 *unmapped) 505 { 506 int ret; 507 struct dma_region *curr, *next; 508 509 if (device->iommu->iommufd) 510 ret = iommufd_dma_unmap(device->iommu->iommufd, 0, UINT64_MAX, 511 device->iommu->ioas_id, unmapped); 512 else 513 ret = vfio_iommu_dma_unmap(device->iommu->container_fd, 0, 0, 514 VFIO_DMA_UNMAP_FLAG_ALL, unmapped); 515 516 if (ret) 517 return ret; 518 519 list_for_each_entry_safe(curr, next, &device->iommu->dma_regions, link) 520 list_del_init(&curr->link); 521 522 return 0; 523 } 524 525 static void vfio_pci_region_get(struct vfio_pci_device *device, int index, 526 struct vfio_region_info *info) 527 { 528 memset(info, 0, sizeof(*info)); 529 530 info->argsz = sizeof(*info); 531 info->index = index; 532 533 ioctl_assert(device->fd, VFIO_DEVICE_GET_REGION_INFO, info); 534 } 535 536 static void vfio_pci_bar_map(struct vfio_pci_device *device, int index) 537 { 538 struct vfio_pci_bar *bar = &device->bars[index]; 539 int prot = 0; 540 541 VFIO_ASSERT_LT(index, PCI_STD_NUM_BARS); 542 VFIO_ASSERT_NULL(bar->vaddr); 543 VFIO_ASSERT_TRUE(bar->info.flags & VFIO_REGION_INFO_FLAG_MMAP); 544 545 if (bar->info.flags & VFIO_REGION_INFO_FLAG_READ) 546 prot |= PROT_READ; 547 if (bar->info.flags & VFIO_REGION_INFO_FLAG_WRITE) 548 prot |= PROT_WRITE; 549 550 bar->vaddr = mmap(NULL, bar->info.size, prot, MAP_FILE | MAP_SHARED, 551 device->fd, bar->info.offset); 552 VFIO_ASSERT_NE(bar->vaddr, MAP_FAILED); 553 } 554 555 static void vfio_pci_bar_unmap(struct vfio_pci_device *device, int index) 556 { 557 struct vfio_pci_bar *bar = &device->bars[index]; 558 559 VFIO_ASSERT_LT(index, PCI_STD_NUM_BARS); 560 VFIO_ASSERT_NOT_NULL(bar->vaddr); 561 562 VFIO_ASSERT_EQ(munmap(bar->vaddr, bar->info.size), 0); 563 bar->vaddr = NULL; 564 } 565 566 static void vfio_pci_bar_unmap_all(struct vfio_pci_device *device) 567 { 568 int i; 569 570 for (i = 0; i < PCI_STD_NUM_BARS; i++) { 571 if (device->bars[i].vaddr) 572 vfio_pci_bar_unmap(device, i); 573 } 574 } 575 576 void vfio_pci_config_access(struct vfio_pci_device *device, bool write, 577 size_t config, size_t size, void *data) 578 { 579 struct vfio_region_info *config_space = &device->config_space; 580 int ret; 581 582 if (write) 583 ret = pwrite(device->fd, data, size, config_space->offset + config); 584 else 585 ret = pread(device->fd, data, size, config_space->offset + config); 586 587 VFIO_ASSERT_EQ(ret, size, "Failed to %s PCI config space: 0x%lx\n", 588 write ? "write to" : "read from", config); 589 } 590 591 void vfio_pci_device_reset(struct vfio_pci_device *device) 592 { 593 ioctl_assert(device->fd, VFIO_DEVICE_RESET, NULL); 594 } 595 596 static unsigned int vfio_pci_get_group_from_dev(const char *bdf) 597 { 598 char dev_iommu_group_path[PATH_MAX] = {0}; 599 char sysfs_path[PATH_MAX] = {0}; 600 unsigned int group; 601 int ret; 602 603 snprintf(sysfs_path, PATH_MAX, "%s/%s/iommu_group", PCI_SYSFS_PATH, bdf); 604 605 ret = readlink(sysfs_path, dev_iommu_group_path, sizeof(dev_iommu_group_path)); 606 VFIO_ASSERT_NE(ret, -1, "Failed to get the IOMMU group for device: %s\n", bdf); 607 608 ret = sscanf(basename(dev_iommu_group_path), "%u", &group); 609 VFIO_ASSERT_EQ(ret, 1, "Failed to get the IOMMU group for device: %s\n", bdf); 610 611 return group; 612 } 613 614 static void vfio_pci_group_setup(struct vfio_pci_device *device, const char *bdf) 615 { 616 struct vfio_group_status group_status = { 617 .argsz = sizeof(group_status), 618 }; 619 char group_path[32]; 620 int group; 621 622 group = vfio_pci_get_group_from_dev(bdf); 623 snprintf(group_path, sizeof(group_path), "/dev/vfio/%d", group); 624 625 device->group_fd = open(group_path, O_RDWR); 626 VFIO_ASSERT_GE(device->group_fd, 0, "open(%s) failed\n", group_path); 627 628 ioctl_assert(device->group_fd, VFIO_GROUP_GET_STATUS, &group_status); 629 VFIO_ASSERT_TRUE(group_status.flags & VFIO_GROUP_FLAGS_VIABLE); 630 631 ioctl_assert(device->group_fd, VFIO_GROUP_SET_CONTAINER, &device->iommu->container_fd); 632 } 633 634 static void vfio_pci_container_setup(struct vfio_pci_device *device, const char *bdf) 635 { 636 struct iommu *iommu = device->iommu; 637 unsigned long iommu_type = iommu->mode->iommu_type; 638 int ret; 639 640 vfio_pci_group_setup(device, bdf); 641 642 ret = ioctl(iommu->container_fd, VFIO_CHECK_EXTENSION, iommu_type); 643 VFIO_ASSERT_GT(ret, 0, "VFIO IOMMU type %lu not supported\n", iommu_type); 644 645 /* 646 * Allow multiple threads to race to set the IOMMU type on the 647 * container. The first will succeed and the rest should fail 648 * because the IOMMU type is already set. 649 */ 650 (void)ioctl(iommu->container_fd, VFIO_SET_IOMMU, (void *)iommu_type); 651 652 device->fd = ioctl(device->group_fd, VFIO_GROUP_GET_DEVICE_FD, bdf); 653 VFIO_ASSERT_GE(device->fd, 0); 654 } 655 656 static void vfio_pci_device_setup(struct vfio_pci_device *device) 657 { 658 int i; 659 660 device->info.argsz = sizeof(device->info); 661 ioctl_assert(device->fd, VFIO_DEVICE_GET_INFO, &device->info); 662 663 vfio_pci_region_get(device, VFIO_PCI_CONFIG_REGION_INDEX, &device->config_space); 664 665 /* Sanity check VFIO does not advertise mmap for config space */ 666 VFIO_ASSERT_TRUE(!(device->config_space.flags & VFIO_REGION_INFO_FLAG_MMAP), 667 "PCI config space should not support mmap()\n"); 668 669 for (i = 0; i < PCI_STD_NUM_BARS; i++) { 670 struct vfio_pci_bar *bar = device->bars + i; 671 672 vfio_pci_region_get(device, i, &bar->info); 673 if (bar->info.flags & VFIO_REGION_INFO_FLAG_MMAP) 674 vfio_pci_bar_map(device, i); 675 } 676 677 vfio_pci_irq_get(device, VFIO_PCI_MSI_IRQ_INDEX, &device->msi_info); 678 vfio_pci_irq_get(device, VFIO_PCI_MSIX_IRQ_INDEX, &device->msix_info); 679 680 for (i = 0; i < ARRAY_SIZE(device->msi_eventfds); i++) 681 device->msi_eventfds[i] = -1; 682 } 683 684 const char *vfio_pci_get_cdev_path(const char *bdf) 685 { 686 char dir_path[PATH_MAX]; 687 struct dirent *entry; 688 char *cdev_path; 689 DIR *dir; 690 691 cdev_path = calloc(PATH_MAX, 1); 692 VFIO_ASSERT_NOT_NULL(cdev_path); 693 694 snprintf(dir_path, sizeof(dir_path), "/sys/bus/pci/devices/%s/vfio-dev/", bdf); 695 696 dir = opendir(dir_path); 697 VFIO_ASSERT_NOT_NULL(dir, "Failed to open directory %s\n", dir_path); 698 699 while ((entry = readdir(dir)) != NULL) { 700 /* Find the file that starts with "vfio" */ 701 if (strncmp("vfio", entry->d_name, 4)) 702 continue; 703 704 snprintf(cdev_path, PATH_MAX, "/dev/vfio/devices/%s", entry->d_name); 705 break; 706 } 707 708 VFIO_ASSERT_NE(cdev_path[0], 0, "Failed to find vfio cdev file.\n"); 709 VFIO_ASSERT_EQ(closedir(dir), 0); 710 711 return cdev_path; 712 } 713 714 /* Reminder: Keep in sync with FIXTURE_VARIANT_ADD_ALL_IOMMU_MODES(). */ 715 static const struct iommu_mode iommu_modes[] = { 716 { 717 .name = "vfio_type1_iommu", 718 .container_path = "/dev/vfio/vfio", 719 .iommu_type = VFIO_TYPE1_IOMMU, 720 }, 721 { 722 .name = "vfio_type1v2_iommu", 723 .container_path = "/dev/vfio/vfio", 724 .iommu_type = VFIO_TYPE1v2_IOMMU, 725 }, 726 { 727 .name = "iommufd_compat_type1", 728 .container_path = "/dev/iommu", 729 .iommu_type = VFIO_TYPE1_IOMMU, 730 }, 731 { 732 .name = "iommufd_compat_type1v2", 733 .container_path = "/dev/iommu", 734 .iommu_type = VFIO_TYPE1v2_IOMMU, 735 }, 736 { 737 .name = "iommufd", 738 }, 739 }; 740 741 const char *default_iommu_mode = "iommufd"; 742 743 static const struct iommu_mode *lookup_iommu_mode(const char *iommu_mode) 744 { 745 int i; 746 747 if (!iommu_mode) 748 iommu_mode = default_iommu_mode; 749 750 for (i = 0; i < ARRAY_SIZE(iommu_modes); i++) { 751 if (strcmp(iommu_mode, iommu_modes[i].name)) 752 continue; 753 754 return &iommu_modes[i]; 755 } 756 757 VFIO_FAIL("Unrecognized IOMMU mode: %s\n", iommu_mode); 758 } 759 760 static void vfio_device_bind_iommufd(int device_fd, int iommufd) 761 { 762 struct vfio_device_bind_iommufd args = { 763 .argsz = sizeof(args), 764 .iommufd = iommufd, 765 }; 766 767 ioctl_assert(device_fd, VFIO_DEVICE_BIND_IOMMUFD, &args); 768 } 769 770 static u32 iommufd_ioas_alloc(int iommufd) 771 { 772 struct iommu_ioas_alloc args = { 773 .size = sizeof(args), 774 }; 775 776 ioctl_assert(iommufd, IOMMU_IOAS_ALLOC, &args); 777 return args.out_ioas_id; 778 } 779 780 static void vfio_device_attach_iommufd_pt(int device_fd, u32 pt_id) 781 { 782 struct vfio_device_attach_iommufd_pt args = { 783 .argsz = sizeof(args), 784 .pt_id = pt_id, 785 }; 786 787 ioctl_assert(device_fd, VFIO_DEVICE_ATTACH_IOMMUFD_PT, &args); 788 } 789 790 static void vfio_pci_iommufd_setup(struct vfio_pci_device *device, const char *bdf) 791 { 792 const char *cdev_path = vfio_pci_get_cdev_path(bdf); 793 794 device->fd = open(cdev_path, O_RDWR); 795 VFIO_ASSERT_GE(device->fd, 0); 796 free((void *)cdev_path); 797 798 vfio_device_bind_iommufd(device->fd, device->iommu->iommufd); 799 vfio_device_attach_iommufd_pt(device->fd, device->iommu->ioas_id); 800 } 801 802 struct iommu *iommu_init(const char *iommu_mode) 803 { 804 const char *container_path; 805 struct iommu *iommu; 806 int version; 807 808 iommu = calloc(1, sizeof(*iommu)); 809 VFIO_ASSERT_NOT_NULL(iommu); 810 811 INIT_LIST_HEAD(&iommu->dma_regions); 812 813 iommu->mode = lookup_iommu_mode(iommu_mode); 814 815 container_path = iommu->mode->container_path; 816 if (container_path) { 817 iommu->container_fd = open(container_path, O_RDWR); 818 VFIO_ASSERT_GE(iommu->container_fd, 0, "open(%s) failed\n", container_path); 819 820 version = ioctl(iommu->container_fd, VFIO_GET_API_VERSION); 821 VFIO_ASSERT_EQ(version, VFIO_API_VERSION, "Unsupported version: %d\n", version); 822 } else { 823 /* 824 * Require device->iommufd to be >0 so that a simple non-0 check can be 825 * used to check if iommufd is enabled. In practice open() will never 826 * return 0 unless stdin is closed. 827 */ 828 iommu->iommufd = open("/dev/iommu", O_RDWR); 829 VFIO_ASSERT_GT(iommu->iommufd, 0); 830 831 iommu->ioas_id = iommufd_ioas_alloc(iommu->iommufd); 832 } 833 834 return iommu; 835 } 836 837 struct vfio_pci_device *vfio_pci_device_init(const char *bdf, struct iommu *iommu) 838 { 839 struct vfio_pci_device *device; 840 841 device = calloc(1, sizeof(*device)); 842 VFIO_ASSERT_NOT_NULL(device); 843 844 VFIO_ASSERT_NOT_NULL(iommu); 845 device->iommu = iommu; 846 device->bdf = bdf; 847 848 if (iommu->mode->container_path) 849 vfio_pci_container_setup(device, bdf); 850 else 851 vfio_pci_iommufd_setup(device, bdf); 852 853 vfio_pci_device_setup(device); 854 vfio_pci_driver_probe(device); 855 856 return device; 857 } 858 859 void vfio_pci_device_cleanup(struct vfio_pci_device *device) 860 { 861 int i; 862 863 if (device->driver.initialized) 864 vfio_pci_driver_remove(device); 865 866 vfio_pci_bar_unmap_all(device); 867 868 VFIO_ASSERT_EQ(close(device->fd), 0); 869 870 for (i = 0; i < ARRAY_SIZE(device->msi_eventfds); i++) { 871 if (device->msi_eventfds[i] < 0) 872 continue; 873 874 VFIO_ASSERT_EQ(close(device->msi_eventfds[i]), 0); 875 } 876 877 if (device->group_fd) 878 VFIO_ASSERT_EQ(close(device->group_fd), 0); 879 880 free(device); 881 } 882 883 void iommu_cleanup(struct iommu *iommu) 884 { 885 if (iommu->iommufd) 886 VFIO_ASSERT_EQ(close(iommu->iommufd), 0); 887 else 888 VFIO_ASSERT_EQ(close(iommu->container_fd), 0); 889 890 free(iommu); 891 } 892 893 static bool is_bdf(const char *str) 894 { 895 unsigned int s, b, d, f; 896 int length, count; 897 898 count = sscanf(str, "%4x:%2x:%2x.%2x%n", &s, &b, &d, &f, &length); 899 return count == 4 && length == strlen(str); 900 } 901 902 static char **get_bdfs_cmdline(int *argc, char *argv[], int *nr_bdfs) 903 { 904 int i; 905 906 for (i = *argc - 1; i > 0 && is_bdf(argv[i]); i--) 907 continue; 908 909 i++; 910 *nr_bdfs = *argc - i; 911 *argc -= *nr_bdfs; 912 913 return *nr_bdfs ? &argv[i] : NULL; 914 } 915 916 static char *get_bdf_env(void) 917 { 918 char *bdf; 919 920 bdf = getenv("VFIO_SELFTESTS_BDF"); 921 if (!bdf) 922 return NULL; 923 924 VFIO_ASSERT_TRUE(is_bdf(bdf), "Invalid BDF: %s\n", bdf); 925 return bdf; 926 } 927 928 char **vfio_selftests_get_bdfs(int *argc, char *argv[], int *nr_bdfs) 929 { 930 static char *env_bdf; 931 char **bdfs; 932 933 bdfs = get_bdfs_cmdline(argc, argv, nr_bdfs); 934 if (bdfs) 935 return bdfs; 936 937 env_bdf = get_bdf_env(); 938 if (env_bdf) { 939 *nr_bdfs = 1; 940 return &env_bdf; 941 } 942 943 fprintf(stderr, "Unable to determine which device(s) to use, skipping test.\n"); 944 fprintf(stderr, "\n"); 945 fprintf(stderr, "To pass the device address via environment variable:\n"); 946 fprintf(stderr, "\n"); 947 fprintf(stderr, " export VFIO_SELFTESTS_BDF=\"segment:bus:device.function\"\n"); 948 fprintf(stderr, " %s [options]\n", argv[0]); 949 fprintf(stderr, "\n"); 950 fprintf(stderr, "To pass the device address(es) via argv:\n"); 951 fprintf(stderr, "\n"); 952 fprintf(stderr, " %s [options] segment:bus:device.function ...\n", argv[0]); 953 fprintf(stderr, "\n"); 954 exit(KSFT_SKIP); 955 } 956 957 const char *vfio_selftests_get_bdf(int *argc, char *argv[]) 958 { 959 int nr_bdfs; 960 961 return vfio_selftests_get_bdfs(argc, argv, &nr_bdfs)[0]; 962 } 963