1 // SPDX-License-Identifier: GPL-2.0-only
2 #include <dirent.h>
3 #include <fcntl.h>
4 #include <libgen.h>
5 #include <stdint.h>
6 #include <stdlib.h>
7 #include <string.h>
8 #include <unistd.h>
9
10 #include <sys/eventfd.h>
11 #include <sys/ioctl.h>
12 #include <sys/mman.h>
13
14 #include <linux/align.h>
15 #include <linux/iommufd.h>
16 #include <linux/kernel.h>
17 #include <linux/limits.h>
18 #include <linux/log2.h>
19 #include <linux/mman.h>
20 #include <linux/overflow.h>
21 #include <linux/sizes.h>
22 #include <linux/types.h>
23 #include <linux/vfio.h>
24
25 #include "kselftest.h"
26 #include <libvfio.h>
27
28 #define PCI_SYSFS_PATH "/sys/bus/pci/devices"
29
vfio_pci_irq_set(struct vfio_pci_device * device,u32 index,u32 vector,u32 count,int * fds)30 static void vfio_pci_irq_set(struct vfio_pci_device *device,
31 u32 index, u32 vector, u32 count, int *fds)
32 {
33 u8 buf[sizeof(struct vfio_irq_set) + sizeof(int) * count] = {};
34 struct vfio_irq_set *irq = (void *)&buf;
35 int *irq_fds = (void *)&irq->data;
36
37 irq->argsz = sizeof(buf);
38 irq->flags = VFIO_IRQ_SET_ACTION_TRIGGER;
39 irq->index = index;
40 irq->start = vector;
41 irq->count = count;
42
43 if (count) {
44 irq->flags |= VFIO_IRQ_SET_DATA_EVENTFD;
45 memcpy(irq_fds, fds, sizeof(int) * count);
46 } else {
47 irq->flags |= VFIO_IRQ_SET_DATA_NONE;
48 }
49
50 ioctl_assert(device->fd, VFIO_DEVICE_SET_IRQS, irq);
51 }
52
vfio_pci_irq_trigger(struct vfio_pci_device * device,u32 index,u32 vector)53 void vfio_pci_irq_trigger(struct vfio_pci_device *device, u32 index, u32 vector)
54 {
55 struct vfio_irq_set irq = {
56 .argsz = sizeof(irq),
57 .flags = VFIO_IRQ_SET_ACTION_TRIGGER | VFIO_IRQ_SET_DATA_NONE,
58 .index = index,
59 .start = vector,
60 .count = 1,
61 };
62
63 ioctl_assert(device->fd, VFIO_DEVICE_SET_IRQS, &irq);
64 }
65
check_supported_irq_index(u32 index)66 static void check_supported_irq_index(u32 index)
67 {
68 /* VFIO selftests only supports MSI and MSI-x for now. */
69 VFIO_ASSERT_TRUE(index == VFIO_PCI_MSI_IRQ_INDEX ||
70 index == VFIO_PCI_MSIX_IRQ_INDEX,
71 "Unsupported IRQ index: %u\n", index);
72 }
73
vfio_pci_irq_enable(struct vfio_pci_device * device,u32 index,u32 vector,int count)74 void vfio_pci_irq_enable(struct vfio_pci_device *device, u32 index, u32 vector,
75 int count)
76 {
77 int i;
78
79 check_supported_irq_index(index);
80
81 for (i = vector; i < vector + count; i++) {
82 VFIO_ASSERT_LT(device->msi_eventfds[i], 0);
83 device->msi_eventfds[i] = eventfd(0, 0);
84 VFIO_ASSERT_GE(device->msi_eventfds[i], 0);
85 }
86
87 vfio_pci_irq_set(device, index, vector, count, device->msi_eventfds + vector);
88 }
89
vfio_pci_irq_disable(struct vfio_pci_device * device,u32 index)90 void vfio_pci_irq_disable(struct vfio_pci_device *device, u32 index)
91 {
92 int i;
93
94 check_supported_irq_index(index);
95
96 for (i = 0; i < ARRAY_SIZE(device->msi_eventfds); i++) {
97 if (device->msi_eventfds[i] < 0)
98 continue;
99
100 VFIO_ASSERT_EQ(close(device->msi_eventfds[i]), 0);
101 device->msi_eventfds[i] = -1;
102 }
103
104 vfio_pci_irq_set(device, index, 0, 0, NULL);
105 }
106
vfio_pci_irq_get(struct vfio_pci_device * device,u32 index,struct vfio_irq_info * irq_info)107 static void vfio_pci_irq_get(struct vfio_pci_device *device, u32 index,
108 struct vfio_irq_info *irq_info)
109 {
110 irq_info->argsz = sizeof(*irq_info);
111 irq_info->index = index;
112
113 ioctl_assert(device->fd, VFIO_DEVICE_GET_IRQ_INFO, irq_info);
114 }
115
vfio_pci_region_get(struct vfio_pci_device * device,int index,struct vfio_region_info * info)116 static void vfio_pci_region_get(struct vfio_pci_device *device, int index,
117 struct vfio_region_info *info)
118 {
119 memset(info, 0, sizeof(*info));
120
121 info->argsz = sizeof(*info);
122 info->index = index;
123
124 ioctl_assert(device->fd, VFIO_DEVICE_GET_REGION_INFO, info);
125 }
126
vfio_pci_bar_map(struct vfio_pci_device * device,int index)127 static void vfio_pci_bar_map(struct vfio_pci_device *device, int index)
128 {
129 struct vfio_pci_bar *bar = &device->bars[index];
130 size_t align, size;
131 int prot = 0;
132 void *vaddr;
133
134 VFIO_ASSERT_LT(index, PCI_STD_NUM_BARS);
135 VFIO_ASSERT_NULL(bar->vaddr);
136 VFIO_ASSERT_TRUE(bar->info.flags & VFIO_REGION_INFO_FLAG_MMAP);
137 VFIO_ASSERT_TRUE(is_power_of_2(bar->info.size));
138
139 if (bar->info.flags & VFIO_REGION_INFO_FLAG_READ)
140 prot |= PROT_READ;
141 if (bar->info.flags & VFIO_REGION_INFO_FLAG_WRITE)
142 prot |= PROT_WRITE;
143
144 size = bar->info.size;
145
146 /*
147 * Align BAR mmaps to improve page fault granularity during potential
148 * subsequent IOMMU mapping of these BAR vaddr. 1G for x86 is the
149 * largest hugepage size across any architecture, so no benefit from
150 * larger alignment. BARs smaller than 1G will be aligned by their
151 * power-of-two size, guaranteeing sufficient alignment for smaller
152 * hugepages, if present.
153 */
154 align = min_t(size_t, size, SZ_1G);
155
156 vaddr = mmap_reserve(size, align, 0);
157 bar->vaddr = mmap(vaddr, size, prot, MAP_SHARED | MAP_FIXED,
158 device->fd, bar->info.offset);
159 VFIO_ASSERT_NE(bar->vaddr, MAP_FAILED);
160
161 madvise(bar->vaddr, size, MADV_HUGEPAGE);
162 }
163
vfio_pci_bar_unmap(struct vfio_pci_device * device,int index)164 static void vfio_pci_bar_unmap(struct vfio_pci_device *device, int index)
165 {
166 struct vfio_pci_bar *bar = &device->bars[index];
167
168 VFIO_ASSERT_LT(index, PCI_STD_NUM_BARS);
169 VFIO_ASSERT_NOT_NULL(bar->vaddr);
170
171 VFIO_ASSERT_EQ(munmap(bar->vaddr, bar->info.size), 0);
172 bar->vaddr = NULL;
173 }
174
vfio_pci_bar_unmap_all(struct vfio_pci_device * device)175 static void vfio_pci_bar_unmap_all(struct vfio_pci_device *device)
176 {
177 int i;
178
179 for (i = 0; i < PCI_STD_NUM_BARS; i++) {
180 if (device->bars[i].vaddr)
181 vfio_pci_bar_unmap(device, i);
182 }
183 }
184
vfio_pci_config_access(struct vfio_pci_device * device,bool write,size_t config,size_t size,void * data)185 void vfio_pci_config_access(struct vfio_pci_device *device, bool write,
186 size_t config, size_t size, void *data)
187 {
188 struct vfio_region_info *config_space = &device->config_space;
189 int ret;
190
191 if (write)
192 ret = pwrite(device->fd, data, size, config_space->offset + config);
193 else
194 ret = pread(device->fd, data, size, config_space->offset + config);
195
196 VFIO_ASSERT_EQ(ret, size, "Failed to %s PCI config space: 0x%lx\n",
197 write ? "write to" : "read from", config);
198 }
199
vfio_pci_device_reset(struct vfio_pci_device * device)200 void vfio_pci_device_reset(struct vfio_pci_device *device)
201 {
202 ioctl_assert(device->fd, VFIO_DEVICE_RESET, NULL);
203 }
204
vfio_pci_get_group_from_dev(const char * bdf)205 static unsigned int vfio_pci_get_group_from_dev(const char *bdf)
206 {
207 char dev_iommu_group_path[PATH_MAX] = {0};
208 char sysfs_path[PATH_MAX] = {0};
209 unsigned int group;
210 int ret;
211
212 snprintf(sysfs_path, PATH_MAX, "%s/%s/iommu_group", PCI_SYSFS_PATH, bdf);
213
214 ret = readlink(sysfs_path, dev_iommu_group_path, sizeof(dev_iommu_group_path));
215 VFIO_ASSERT_NE(ret, -1, "Failed to get the IOMMU group for device: %s\n", bdf);
216
217 ret = sscanf(basename(dev_iommu_group_path), "%u", &group);
218 VFIO_ASSERT_EQ(ret, 1, "Failed to get the IOMMU group for device: %s\n", bdf);
219
220 return group;
221 }
222
vfio_pci_group_setup(struct vfio_pci_device * device,const char * bdf)223 static void vfio_pci_group_setup(struct vfio_pci_device *device, const char *bdf)
224 {
225 struct vfio_group_status group_status = {
226 .argsz = sizeof(group_status),
227 };
228 char group_path[32];
229 int group;
230
231 group = vfio_pci_get_group_from_dev(bdf);
232 snprintf(group_path, sizeof(group_path), "/dev/vfio/%d", group);
233
234 device->group_fd = open(group_path, O_RDWR);
235 VFIO_ASSERT_GE(device->group_fd, 0, "open(%s) failed\n", group_path);
236
237 ioctl_assert(device->group_fd, VFIO_GROUP_GET_STATUS, &group_status);
238 VFIO_ASSERT_TRUE(group_status.flags & VFIO_GROUP_FLAGS_VIABLE);
239
240 ioctl_assert(device->group_fd, VFIO_GROUP_SET_CONTAINER, &device->iommu->container_fd);
241 }
242
vfio_pci_container_setup(struct vfio_pci_device * device,const char * bdf)243 static void vfio_pci_container_setup(struct vfio_pci_device *device, const char *bdf)
244 {
245 struct iommu *iommu = device->iommu;
246 unsigned long iommu_type = iommu->mode->iommu_type;
247 int ret;
248
249 vfio_pci_group_setup(device, bdf);
250
251 ret = ioctl(iommu->container_fd, VFIO_CHECK_EXTENSION, iommu_type);
252 VFIO_ASSERT_GT(ret, 0, "VFIO IOMMU type %lu not supported\n", iommu_type);
253
254 /*
255 * Allow multiple threads to race to set the IOMMU type on the
256 * container. The first will succeed and the rest should fail
257 * because the IOMMU type is already set.
258 */
259 (void)ioctl(iommu->container_fd, VFIO_SET_IOMMU, (void *)iommu_type);
260
261 device->fd = ioctl(device->group_fd, VFIO_GROUP_GET_DEVICE_FD, bdf);
262 VFIO_ASSERT_GE(device->fd, 0);
263 }
264
vfio_pci_device_setup(struct vfio_pci_device * device)265 static void vfio_pci_device_setup(struct vfio_pci_device *device)
266 {
267 int i;
268
269 device->info.argsz = sizeof(device->info);
270 ioctl_assert(device->fd, VFIO_DEVICE_GET_INFO, &device->info);
271
272 vfio_pci_region_get(device, VFIO_PCI_CONFIG_REGION_INDEX, &device->config_space);
273
274 /* Sanity check VFIO does not advertise mmap for config space */
275 VFIO_ASSERT_TRUE(!(device->config_space.flags & VFIO_REGION_INFO_FLAG_MMAP),
276 "PCI config space should not support mmap()\n");
277
278 for (i = 0; i < PCI_STD_NUM_BARS; i++) {
279 struct vfio_pci_bar *bar = device->bars + i;
280
281 vfio_pci_region_get(device, i, &bar->info);
282 if (bar->info.flags & VFIO_REGION_INFO_FLAG_MMAP)
283 vfio_pci_bar_map(device, i);
284 }
285
286 vfio_pci_irq_get(device, VFIO_PCI_MSI_IRQ_INDEX, &device->msi_info);
287 vfio_pci_irq_get(device, VFIO_PCI_MSIX_IRQ_INDEX, &device->msix_info);
288
289 for (i = 0; i < ARRAY_SIZE(device->msi_eventfds); i++)
290 device->msi_eventfds[i] = -1;
291 }
292
vfio_pci_get_cdev_path(const char * bdf)293 const char *vfio_pci_get_cdev_path(const char *bdf)
294 {
295 char dir_path[PATH_MAX];
296 struct dirent *entry;
297 char *cdev_path;
298 DIR *dir;
299
300 cdev_path = calloc(PATH_MAX, 1);
301 VFIO_ASSERT_NOT_NULL(cdev_path);
302
303 snprintf(dir_path, sizeof(dir_path), "/sys/bus/pci/devices/%s/vfio-dev/", bdf);
304
305 dir = opendir(dir_path);
306 VFIO_ASSERT_NOT_NULL(dir, "Failed to open directory %s\n", dir_path);
307
308 while ((entry = readdir(dir)) != NULL) {
309 /* Find the file that starts with "vfio" */
310 if (strncmp("vfio", entry->d_name, 4))
311 continue;
312
313 snprintf(cdev_path, PATH_MAX, "/dev/vfio/devices/%s", entry->d_name);
314 break;
315 }
316
317 VFIO_ASSERT_NE(cdev_path[0], 0, "Failed to find vfio cdev file.\n");
318 VFIO_ASSERT_EQ(closedir(dir), 0);
319
320 return cdev_path;
321 }
322
vfio_device_bind_iommufd(int device_fd,int iommufd)323 static void vfio_device_bind_iommufd(int device_fd, int iommufd)
324 {
325 struct vfio_device_bind_iommufd args = {
326 .argsz = sizeof(args),
327 .iommufd = iommufd,
328 };
329
330 ioctl_assert(device_fd, VFIO_DEVICE_BIND_IOMMUFD, &args);
331 }
332
vfio_device_attach_iommufd_pt(int device_fd,u32 pt_id)333 static void vfio_device_attach_iommufd_pt(int device_fd, u32 pt_id)
334 {
335 struct vfio_device_attach_iommufd_pt args = {
336 .argsz = sizeof(args),
337 .pt_id = pt_id,
338 };
339
340 ioctl_assert(device_fd, VFIO_DEVICE_ATTACH_IOMMUFD_PT, &args);
341 }
342
vfio_pci_iommufd_setup(struct vfio_pci_device * device,const char * bdf)343 static void vfio_pci_iommufd_setup(struct vfio_pci_device *device, const char *bdf)
344 {
345 const char *cdev_path = vfio_pci_get_cdev_path(bdf);
346
347 device->fd = open(cdev_path, O_RDWR);
348 VFIO_ASSERT_GE(device->fd, 0);
349 free((void *)cdev_path);
350
351 vfio_device_bind_iommufd(device->fd, device->iommu->iommufd);
352 vfio_device_attach_iommufd_pt(device->fd, device->iommu->ioas_id);
353 }
354
vfio_pci_device_init(const char * bdf,struct iommu * iommu)355 struct vfio_pci_device *vfio_pci_device_init(const char *bdf, struct iommu *iommu)
356 {
357 struct vfio_pci_device *device;
358
359 device = calloc(1, sizeof(*device));
360 VFIO_ASSERT_NOT_NULL(device);
361
362 VFIO_ASSERT_NOT_NULL(iommu);
363 device->iommu = iommu;
364 device->bdf = bdf;
365
366 if (iommu->mode->container_path)
367 vfio_pci_container_setup(device, bdf);
368 else
369 vfio_pci_iommufd_setup(device, bdf);
370
371 vfio_pci_device_setup(device);
372 vfio_pci_driver_probe(device);
373
374 return device;
375 }
376
vfio_pci_device_cleanup(struct vfio_pci_device * device)377 void vfio_pci_device_cleanup(struct vfio_pci_device *device)
378 {
379 int i;
380
381 if (device->driver.initialized)
382 vfio_pci_driver_remove(device);
383
384 vfio_pci_bar_unmap_all(device);
385
386 VFIO_ASSERT_EQ(close(device->fd), 0);
387
388 for (i = 0; i < ARRAY_SIZE(device->msi_eventfds); i++) {
389 if (device->msi_eventfds[i] < 0)
390 continue;
391
392 VFIO_ASSERT_EQ(close(device->msi_eventfds[i]), 0);
393 }
394
395 if (device->group_fd)
396 VFIO_ASSERT_EQ(close(device->group_fd), 0);
397
398 free(device);
399 }
400