1 // SPDX-License-Identifier: GPL-2.0-only 2 #include <dirent.h> 3 #include <fcntl.h> 4 #include <libgen.h> 5 #include <stdint.h> 6 #include <stdlib.h> 7 #include <string.h> 8 #include <unistd.h> 9 10 #include <sys/eventfd.h> 11 #include <sys/ioctl.h> 12 #include <sys/mman.h> 13 14 #include <linux/iommufd.h> 15 #include <linux/limits.h> 16 #include <linux/mman.h> 17 #include <linux/overflow.h> 18 #include <linux/types.h> 19 #include <linux/vfio.h> 20 21 #include "kselftest.h" 22 #include <libvfio.h> 23 24 #define PCI_SYSFS_PATH "/sys/bus/pci/devices" 25 26 static void vfio_pci_irq_set(struct vfio_pci_device *device, 27 u32 index, u32 vector, u32 count, int *fds) 28 { 29 u8 buf[sizeof(struct vfio_irq_set) + sizeof(int) * count] = {}; 30 struct vfio_irq_set *irq = (void *)&buf; 31 int *irq_fds = (void *)&irq->data; 32 33 irq->argsz = sizeof(buf); 34 irq->flags = VFIO_IRQ_SET_ACTION_TRIGGER; 35 irq->index = index; 36 irq->start = vector; 37 irq->count = count; 38 39 if (count) { 40 irq->flags |= VFIO_IRQ_SET_DATA_EVENTFD; 41 memcpy(irq_fds, fds, sizeof(int) * count); 42 } else { 43 irq->flags |= VFIO_IRQ_SET_DATA_NONE; 44 } 45 46 ioctl_assert(device->fd, VFIO_DEVICE_SET_IRQS, irq); 47 } 48 49 void vfio_pci_irq_trigger(struct vfio_pci_device *device, u32 index, u32 vector) 50 { 51 struct vfio_irq_set irq = { 52 .argsz = sizeof(irq), 53 .flags = VFIO_IRQ_SET_ACTION_TRIGGER | VFIO_IRQ_SET_DATA_NONE, 54 .index = index, 55 .start = vector, 56 .count = 1, 57 }; 58 59 ioctl_assert(device->fd, VFIO_DEVICE_SET_IRQS, &irq); 60 } 61 62 static void check_supported_irq_index(u32 index) 63 { 64 /* VFIO selftests only supports MSI and MSI-x for now. */ 65 VFIO_ASSERT_TRUE(index == VFIO_PCI_MSI_IRQ_INDEX || 66 index == VFIO_PCI_MSIX_IRQ_INDEX, 67 "Unsupported IRQ index: %u\n", index); 68 } 69 70 void vfio_pci_irq_enable(struct vfio_pci_device *device, u32 index, u32 vector, 71 int count) 72 { 73 int i; 74 75 check_supported_irq_index(index); 76 77 for (i = vector; i < vector + count; i++) { 78 VFIO_ASSERT_LT(device->msi_eventfds[i], 0); 79 device->msi_eventfds[i] = eventfd(0, 0); 80 VFIO_ASSERT_GE(device->msi_eventfds[i], 0); 81 } 82 83 vfio_pci_irq_set(device, index, vector, count, device->msi_eventfds + vector); 84 } 85 86 void vfio_pci_irq_disable(struct vfio_pci_device *device, u32 index) 87 { 88 int i; 89 90 check_supported_irq_index(index); 91 92 for (i = 0; i < ARRAY_SIZE(device->msi_eventfds); i++) { 93 if (device->msi_eventfds[i] < 0) 94 continue; 95 96 VFIO_ASSERT_EQ(close(device->msi_eventfds[i]), 0); 97 device->msi_eventfds[i] = -1; 98 } 99 100 vfio_pci_irq_set(device, index, 0, 0, NULL); 101 } 102 103 static void vfio_pci_irq_get(struct vfio_pci_device *device, u32 index, 104 struct vfio_irq_info *irq_info) 105 { 106 irq_info->argsz = sizeof(*irq_info); 107 irq_info->index = index; 108 109 ioctl_assert(device->fd, VFIO_DEVICE_GET_IRQ_INFO, irq_info); 110 } 111 112 static void vfio_pci_region_get(struct vfio_pci_device *device, int index, 113 struct vfio_region_info *info) 114 { 115 memset(info, 0, sizeof(*info)); 116 117 info->argsz = sizeof(*info); 118 info->index = index; 119 120 ioctl_assert(device->fd, VFIO_DEVICE_GET_REGION_INFO, info); 121 } 122 123 static void vfio_pci_bar_map(struct vfio_pci_device *device, int index) 124 { 125 struct vfio_pci_bar *bar = &device->bars[index]; 126 int prot = 0; 127 128 VFIO_ASSERT_LT(index, PCI_STD_NUM_BARS); 129 VFIO_ASSERT_NULL(bar->vaddr); 130 VFIO_ASSERT_TRUE(bar->info.flags & VFIO_REGION_INFO_FLAG_MMAP); 131 132 if (bar->info.flags & VFIO_REGION_INFO_FLAG_READ) 133 prot |= PROT_READ; 134 if (bar->info.flags & VFIO_REGION_INFO_FLAG_WRITE) 135 prot |= PROT_WRITE; 136 137 bar->vaddr = mmap(NULL, bar->info.size, prot, MAP_FILE | MAP_SHARED, 138 device->fd, bar->info.offset); 139 VFIO_ASSERT_NE(bar->vaddr, MAP_FAILED); 140 } 141 142 static void vfio_pci_bar_unmap(struct vfio_pci_device *device, int index) 143 { 144 struct vfio_pci_bar *bar = &device->bars[index]; 145 146 VFIO_ASSERT_LT(index, PCI_STD_NUM_BARS); 147 VFIO_ASSERT_NOT_NULL(bar->vaddr); 148 149 VFIO_ASSERT_EQ(munmap(bar->vaddr, bar->info.size), 0); 150 bar->vaddr = NULL; 151 } 152 153 static void vfio_pci_bar_unmap_all(struct vfio_pci_device *device) 154 { 155 int i; 156 157 for (i = 0; i < PCI_STD_NUM_BARS; i++) { 158 if (device->bars[i].vaddr) 159 vfio_pci_bar_unmap(device, i); 160 } 161 } 162 163 void vfio_pci_config_access(struct vfio_pci_device *device, bool write, 164 size_t config, size_t size, void *data) 165 { 166 struct vfio_region_info *config_space = &device->config_space; 167 int ret; 168 169 if (write) 170 ret = pwrite(device->fd, data, size, config_space->offset + config); 171 else 172 ret = pread(device->fd, data, size, config_space->offset + config); 173 174 VFIO_ASSERT_EQ(ret, size, "Failed to %s PCI config space: 0x%lx\n", 175 write ? "write to" : "read from", config); 176 } 177 178 void vfio_pci_device_reset(struct vfio_pci_device *device) 179 { 180 ioctl_assert(device->fd, VFIO_DEVICE_RESET, NULL); 181 } 182 183 static unsigned int vfio_pci_get_group_from_dev(const char *bdf) 184 { 185 char dev_iommu_group_path[PATH_MAX] = {0}; 186 char sysfs_path[PATH_MAX] = {0}; 187 unsigned int group; 188 int ret; 189 190 snprintf(sysfs_path, PATH_MAX, "%s/%s/iommu_group", PCI_SYSFS_PATH, bdf); 191 192 ret = readlink(sysfs_path, dev_iommu_group_path, sizeof(dev_iommu_group_path)); 193 VFIO_ASSERT_NE(ret, -1, "Failed to get the IOMMU group for device: %s\n", bdf); 194 195 ret = sscanf(basename(dev_iommu_group_path), "%u", &group); 196 VFIO_ASSERT_EQ(ret, 1, "Failed to get the IOMMU group for device: %s\n", bdf); 197 198 return group; 199 } 200 201 static void vfio_pci_group_setup(struct vfio_pci_device *device, const char *bdf) 202 { 203 struct vfio_group_status group_status = { 204 .argsz = sizeof(group_status), 205 }; 206 char group_path[32]; 207 int group; 208 209 group = vfio_pci_get_group_from_dev(bdf); 210 snprintf(group_path, sizeof(group_path), "/dev/vfio/%d", group); 211 212 device->group_fd = open(group_path, O_RDWR); 213 VFIO_ASSERT_GE(device->group_fd, 0, "open(%s) failed\n", group_path); 214 215 ioctl_assert(device->group_fd, VFIO_GROUP_GET_STATUS, &group_status); 216 VFIO_ASSERT_TRUE(group_status.flags & VFIO_GROUP_FLAGS_VIABLE); 217 218 ioctl_assert(device->group_fd, VFIO_GROUP_SET_CONTAINER, &device->iommu->container_fd); 219 } 220 221 static void vfio_pci_container_setup(struct vfio_pci_device *device, const char *bdf) 222 { 223 struct iommu *iommu = device->iommu; 224 unsigned long iommu_type = iommu->mode->iommu_type; 225 int ret; 226 227 vfio_pci_group_setup(device, bdf); 228 229 ret = ioctl(iommu->container_fd, VFIO_CHECK_EXTENSION, iommu_type); 230 VFIO_ASSERT_GT(ret, 0, "VFIO IOMMU type %lu not supported\n", iommu_type); 231 232 /* 233 * Allow multiple threads to race to set the IOMMU type on the 234 * container. The first will succeed and the rest should fail 235 * because the IOMMU type is already set. 236 */ 237 (void)ioctl(iommu->container_fd, VFIO_SET_IOMMU, (void *)iommu_type); 238 239 device->fd = ioctl(device->group_fd, VFIO_GROUP_GET_DEVICE_FD, bdf); 240 VFIO_ASSERT_GE(device->fd, 0); 241 } 242 243 static void vfio_pci_device_setup(struct vfio_pci_device *device) 244 { 245 int i; 246 247 device->info.argsz = sizeof(device->info); 248 ioctl_assert(device->fd, VFIO_DEVICE_GET_INFO, &device->info); 249 250 vfio_pci_region_get(device, VFIO_PCI_CONFIG_REGION_INDEX, &device->config_space); 251 252 /* Sanity check VFIO does not advertise mmap for config space */ 253 VFIO_ASSERT_TRUE(!(device->config_space.flags & VFIO_REGION_INFO_FLAG_MMAP), 254 "PCI config space should not support mmap()\n"); 255 256 for (i = 0; i < PCI_STD_NUM_BARS; i++) { 257 struct vfio_pci_bar *bar = device->bars + i; 258 259 vfio_pci_region_get(device, i, &bar->info); 260 if (bar->info.flags & VFIO_REGION_INFO_FLAG_MMAP) 261 vfio_pci_bar_map(device, i); 262 } 263 264 vfio_pci_irq_get(device, VFIO_PCI_MSI_IRQ_INDEX, &device->msi_info); 265 vfio_pci_irq_get(device, VFIO_PCI_MSIX_IRQ_INDEX, &device->msix_info); 266 267 for (i = 0; i < ARRAY_SIZE(device->msi_eventfds); i++) 268 device->msi_eventfds[i] = -1; 269 } 270 271 const char *vfio_pci_get_cdev_path(const char *bdf) 272 { 273 char dir_path[PATH_MAX]; 274 struct dirent *entry; 275 char *cdev_path; 276 DIR *dir; 277 278 cdev_path = calloc(PATH_MAX, 1); 279 VFIO_ASSERT_NOT_NULL(cdev_path); 280 281 snprintf(dir_path, sizeof(dir_path), "/sys/bus/pci/devices/%s/vfio-dev/", bdf); 282 283 dir = opendir(dir_path); 284 VFIO_ASSERT_NOT_NULL(dir, "Failed to open directory %s\n", dir_path); 285 286 while ((entry = readdir(dir)) != NULL) { 287 /* Find the file that starts with "vfio" */ 288 if (strncmp("vfio", entry->d_name, 4)) 289 continue; 290 291 snprintf(cdev_path, PATH_MAX, "/dev/vfio/devices/%s", entry->d_name); 292 break; 293 } 294 295 VFIO_ASSERT_NE(cdev_path[0], 0, "Failed to find vfio cdev file.\n"); 296 VFIO_ASSERT_EQ(closedir(dir), 0); 297 298 return cdev_path; 299 } 300 301 static void vfio_device_bind_iommufd(int device_fd, int iommufd) 302 { 303 struct vfio_device_bind_iommufd args = { 304 .argsz = sizeof(args), 305 .iommufd = iommufd, 306 }; 307 308 ioctl_assert(device_fd, VFIO_DEVICE_BIND_IOMMUFD, &args); 309 } 310 311 static void vfio_device_attach_iommufd_pt(int device_fd, u32 pt_id) 312 { 313 struct vfio_device_attach_iommufd_pt args = { 314 .argsz = sizeof(args), 315 .pt_id = pt_id, 316 }; 317 318 ioctl_assert(device_fd, VFIO_DEVICE_ATTACH_IOMMUFD_PT, &args); 319 } 320 321 static void vfio_pci_iommufd_setup(struct vfio_pci_device *device, const char *bdf) 322 { 323 const char *cdev_path = vfio_pci_get_cdev_path(bdf); 324 325 device->fd = open(cdev_path, O_RDWR); 326 VFIO_ASSERT_GE(device->fd, 0); 327 free((void *)cdev_path); 328 329 vfio_device_bind_iommufd(device->fd, device->iommu->iommufd); 330 vfio_device_attach_iommufd_pt(device->fd, device->iommu->ioas_id); 331 } 332 333 struct vfio_pci_device *vfio_pci_device_init(const char *bdf, struct iommu *iommu) 334 { 335 struct vfio_pci_device *device; 336 337 device = calloc(1, sizeof(*device)); 338 VFIO_ASSERT_NOT_NULL(device); 339 340 VFIO_ASSERT_NOT_NULL(iommu); 341 device->iommu = iommu; 342 device->bdf = bdf; 343 344 if (iommu->mode->container_path) 345 vfio_pci_container_setup(device, bdf); 346 else 347 vfio_pci_iommufd_setup(device, bdf); 348 349 vfio_pci_device_setup(device); 350 vfio_pci_driver_probe(device); 351 352 return device; 353 } 354 355 void vfio_pci_device_cleanup(struct vfio_pci_device *device) 356 { 357 int i; 358 359 if (device->driver.initialized) 360 vfio_pci_driver_remove(device); 361 362 vfio_pci_bar_unmap_all(device); 363 364 VFIO_ASSERT_EQ(close(device->fd), 0); 365 366 for (i = 0; i < ARRAY_SIZE(device->msi_eventfds); i++) { 367 if (device->msi_eventfds[i] < 0) 368 continue; 369 370 VFIO_ASSERT_EQ(close(device->msi_eventfds[i]), 0); 371 } 372 373 if (device->group_fd) 374 VFIO_ASSERT_EQ(close(device->group_fd), 0); 375 376 free(device); 377 } 378