xref: /linux/tools/testing/selftests/vfio/lib/vfio_pci_device.c (revision 20face8c75ffb3dd7e8d6743498b47403e57ebda)
1 // SPDX-License-Identifier: GPL-2.0-only
2 #include <dirent.h>
3 #include <fcntl.h>
4 #include <libgen.h>
5 #include <stdint.h>
6 #include <stdlib.h>
7 #include <string.h>
8 #include <unistd.h>
9 
10 #include <sys/eventfd.h>
11 #include <sys/ioctl.h>
12 #include <sys/mman.h>
13 
14 #include <linux/align.h>
15 #include <linux/iommufd.h>
16 #include <linux/kernel.h>
17 #include <linux/limits.h>
18 #include <linux/log2.h>
19 #include <linux/mman.h>
20 #include <linux/overflow.h>
21 #include <linux/sizes.h>
22 #include <linux/types.h>
23 #include <linux/vfio.h>
24 
25 #include <uuid/uuid.h>
26 
27 #include "kselftest.h"
28 #include <libvfio.h>
29 
30 static void vfio_pci_irq_set(struct vfio_pci_device *device,
31 			     u32 index, u32 vector, u32 count, int *fds)
32 {
33 	u8 buf[sizeof(struct vfio_irq_set) + sizeof(int) * count];
34 	struct vfio_irq_set *irq = (void *)&buf;
35 	int *irq_fds = (void *)&irq->data;
36 
37 	memset(buf, 0, sizeof(buf));
38 
39 	irq->argsz = sizeof(buf);
40 	irq->flags = VFIO_IRQ_SET_ACTION_TRIGGER;
41 	irq->index = index;
42 	irq->start = vector;
43 	irq->count = count;
44 
45 	if (count) {
46 		irq->flags |= VFIO_IRQ_SET_DATA_EVENTFD;
47 		memcpy(irq_fds, fds, sizeof(int) * count);
48 	} else {
49 		irq->flags |= VFIO_IRQ_SET_DATA_NONE;
50 	}
51 
52 	ioctl_assert(device->fd, VFIO_DEVICE_SET_IRQS, irq);
53 }
54 
55 void vfio_pci_irq_trigger(struct vfio_pci_device *device, u32 index, u32 vector)
56 {
57 	struct vfio_irq_set irq = {
58 		.argsz = sizeof(irq),
59 		.flags = VFIO_IRQ_SET_ACTION_TRIGGER | VFIO_IRQ_SET_DATA_NONE,
60 		.index = index,
61 		.start = vector,
62 		.count = 1,
63 	};
64 
65 	ioctl_assert(device->fd, VFIO_DEVICE_SET_IRQS, &irq);
66 }
67 
68 static void check_supported_irq_index(u32 index)
69 {
70 	/* VFIO selftests only supports MSI and MSI-x for now. */
71 	VFIO_ASSERT_TRUE(index == VFIO_PCI_MSI_IRQ_INDEX ||
72 			 index == VFIO_PCI_MSIX_IRQ_INDEX,
73 			 "Unsupported IRQ index: %u\n", index);
74 }
75 
76 void vfio_pci_irq_enable(struct vfio_pci_device *device, u32 index, u32 vector,
77 			 int count)
78 {
79 	int i;
80 
81 	check_supported_irq_index(index);
82 
83 	for (i = vector; i < vector + count; i++) {
84 		VFIO_ASSERT_LT(device->msi_eventfds[i], 0);
85 		device->msi_eventfds[i] = eventfd(0, 0);
86 		VFIO_ASSERT_GE(device->msi_eventfds[i], 0);
87 	}
88 
89 	vfio_pci_irq_set(device, index, vector, count, device->msi_eventfds + vector);
90 }
91 
92 void vfio_pci_irq_disable(struct vfio_pci_device *device, u32 index)
93 {
94 	int i;
95 
96 	check_supported_irq_index(index);
97 
98 	for (i = 0; i < ARRAY_SIZE(device->msi_eventfds); i++) {
99 		if (device->msi_eventfds[i] < 0)
100 			continue;
101 
102 		VFIO_ASSERT_EQ(close(device->msi_eventfds[i]), 0);
103 		device->msi_eventfds[i] = -1;
104 	}
105 
106 	vfio_pci_irq_set(device, index, 0, 0, NULL);
107 }
108 
109 static void vfio_pci_irq_get(struct vfio_pci_device *device, u32 index,
110 			     struct vfio_irq_info *irq_info)
111 {
112 	irq_info->argsz = sizeof(*irq_info);
113 	irq_info->index = index;
114 
115 	ioctl_assert(device->fd, VFIO_DEVICE_GET_IRQ_INFO, irq_info);
116 }
117 
118 static int vfio_device_feature_ioctl(int fd, u32 flags, void *data,
119 				     size_t data_size)
120 {
121 	u8 buffer[sizeof(struct vfio_device_feature) + data_size] = {};
122 	struct vfio_device_feature *feature = (void *)buffer;
123 
124 	memcpy(feature->data, data, data_size);
125 
126 	feature->argsz = sizeof(buffer);
127 	feature->flags = flags;
128 
129 	return ioctl(fd, VFIO_DEVICE_FEATURE, feature);
130 }
131 
132 static void vfio_device_feature_set(int fd, u16 feature, void *data, size_t data_size)
133 {
134 	u32 flags = VFIO_DEVICE_FEATURE_SET | feature;
135 	int ret;
136 
137 	ret = vfio_device_feature_ioctl(fd, flags, data, data_size);
138 	VFIO_ASSERT_EQ(ret, 0, "Failed to set feature %u\n", feature);
139 }
140 
141 void vfio_device_set_vf_token(int fd, const char *vf_token)
142 {
143 	uuid_t token_uuid = {0};
144 
145 	VFIO_ASSERT_NOT_NULL(vf_token, "vf_token is NULL");
146 	VFIO_ASSERT_EQ(uuid_parse(vf_token, token_uuid), 0);
147 
148 	vfio_device_feature_set(fd, VFIO_DEVICE_FEATURE_PCI_VF_TOKEN,
149 				token_uuid, sizeof(uuid_t));
150 }
151 
152 static void vfio_pci_region_get(struct vfio_pci_device *device, int index,
153 				struct vfio_region_info *info)
154 {
155 	memset(info, 0, sizeof(*info));
156 
157 	info->argsz = sizeof(*info);
158 	info->index = index;
159 
160 	ioctl_assert(device->fd, VFIO_DEVICE_GET_REGION_INFO, info);
161 }
162 
163 static void vfio_pci_bar_map(struct vfio_pci_device *device, int index)
164 {
165 	struct vfio_pci_bar *bar = &device->bars[index];
166 	size_t align, size;
167 	int prot = 0;
168 	void *vaddr;
169 
170 	VFIO_ASSERT_LT(index, PCI_STD_NUM_BARS);
171 	VFIO_ASSERT_NULL(bar->vaddr);
172 	VFIO_ASSERT_TRUE(bar->info.flags & VFIO_REGION_INFO_FLAG_MMAP);
173 	VFIO_ASSERT_TRUE(is_power_of_2(bar->info.size));
174 
175 	if (bar->info.flags & VFIO_REGION_INFO_FLAG_READ)
176 		prot |= PROT_READ;
177 	if (bar->info.flags & VFIO_REGION_INFO_FLAG_WRITE)
178 		prot |= PROT_WRITE;
179 
180 	size = bar->info.size;
181 
182 	/*
183 	 * Align BAR mmaps to improve page fault granularity during potential
184 	 * subsequent IOMMU mapping of these BAR vaddr. 1G for x86 is the
185 	 * largest hugepage size across any architecture, so no benefit from
186 	 * larger alignment. BARs smaller than 1G will be aligned by their
187 	 * power-of-two size, guaranteeing sufficient alignment for smaller
188 	 * hugepages, if present.
189 	 */
190 	align = min_t(size_t, size, SZ_1G);
191 
192 	vaddr = mmap_reserve(size, align, 0);
193 	bar->vaddr = mmap(vaddr, size, prot, MAP_SHARED | MAP_FIXED,
194 			  device->fd, bar->info.offset);
195 	VFIO_ASSERT_NE(bar->vaddr, MAP_FAILED);
196 
197 	madvise(bar->vaddr, size, MADV_HUGEPAGE);
198 }
199 
200 static void vfio_pci_bar_unmap(struct vfio_pci_device *device, int index)
201 {
202 	struct vfio_pci_bar *bar = &device->bars[index];
203 
204 	VFIO_ASSERT_LT(index, PCI_STD_NUM_BARS);
205 	VFIO_ASSERT_NOT_NULL(bar->vaddr);
206 
207 	VFIO_ASSERT_EQ(munmap(bar->vaddr, bar->info.size), 0);
208 	bar->vaddr = NULL;
209 }
210 
211 static void vfio_pci_bar_unmap_all(struct vfio_pci_device *device)
212 {
213 	int i;
214 
215 	for (i = 0; i < PCI_STD_NUM_BARS; i++) {
216 		if (device->bars[i].vaddr)
217 			vfio_pci_bar_unmap(device, i);
218 	}
219 }
220 
221 void vfio_pci_config_access(struct vfio_pci_device *device, bool write,
222 			    size_t config, size_t size, void *data)
223 {
224 	struct vfio_region_info *config_space = &device->config_space;
225 	int ret;
226 
227 	if (write)
228 		ret = pwrite(device->fd, data, size, config_space->offset + config);
229 	else
230 		ret = pread(device->fd, data, size, config_space->offset + config);
231 
232 	VFIO_ASSERT_EQ(ret, size, "Failed to %s PCI config space: 0x%lx\n",
233 		       write ? "write to" : "read from", config);
234 }
235 
236 void vfio_pci_device_reset(struct vfio_pci_device *device)
237 {
238 	ioctl_assert(device->fd, VFIO_DEVICE_RESET, NULL);
239 }
240 
241 void vfio_pci_group_setup(struct vfio_pci_device *device, const char *bdf)
242 {
243 	struct vfio_group_status group_status = {
244 		.argsz = sizeof(group_status),
245 	};
246 	char group_path[32];
247 	int group;
248 
249 	group = sysfs_iommu_group_get(bdf);
250 	snprintf_assert(group_path, sizeof(group_path), "/dev/vfio/%d", group);
251 
252 	device->group_fd = open(group_path, O_RDWR);
253 	VFIO_ASSERT_GE(device->group_fd, 0, "open(%s) failed\n", group_path);
254 
255 	ioctl_assert(device->group_fd, VFIO_GROUP_GET_STATUS, &group_status);
256 	VFIO_ASSERT_TRUE(group_status.flags & VFIO_GROUP_FLAGS_VIABLE);
257 
258 	ioctl_assert(device->group_fd, VFIO_GROUP_SET_CONTAINER, &device->iommu->container_fd);
259 }
260 
261 void __vfio_pci_group_get_device_fd(struct vfio_pci_device *device,
262 				    const char *bdf, const char *vf_token)
263 {
264 	char arg[64];
265 
266 	/*
267 	 * If a vf_token exists, argument to VFIO_GROUP_GET_DEVICE_FD
268 	 * will be in the form of the following example:
269 	 * "0000:04:10.0 vf_token=bd8d9d2b-5a5f-4f5a-a211-f591514ba1f3"
270 	 */
271 	if (vf_token)
272 		snprintf_assert(arg, ARRAY_SIZE(arg), "%s vf_token=%s", bdf, vf_token);
273 	else
274 		snprintf_assert(arg, ARRAY_SIZE(arg), "%s", bdf);
275 
276 	device->fd = ioctl(device->group_fd, VFIO_GROUP_GET_DEVICE_FD, arg);
277 }
278 
279 static void vfio_pci_group_get_device_fd(struct vfio_pci_device *device,
280 					 const char *bdf, const char *vf_token)
281 {
282 	__vfio_pci_group_get_device_fd(device, bdf, vf_token);
283 	VFIO_ASSERT_GE(device->fd, 0);
284 }
285 
286 void vfio_container_set_iommu(struct vfio_pci_device *device)
287 {
288 	struct iommu *iommu = device->iommu;
289 	unsigned long iommu_type = iommu->mode->iommu_type;
290 	int ret;
291 
292 	ret = ioctl(iommu->container_fd, VFIO_CHECK_EXTENSION, iommu_type);
293 	VFIO_ASSERT_GT(ret, 0, "VFIO IOMMU type %lu not supported\n", iommu_type);
294 
295 	/*
296 	 * Allow multiple threads to race to set the IOMMU type on the
297 	 * container. The first will succeed and the rest should fail
298 	 * because the IOMMU type is already set.
299 	 */
300 	(void)ioctl(iommu->container_fd, VFIO_SET_IOMMU, (void *)iommu_type);
301 }
302 
303 static void vfio_pci_container_setup(struct vfio_pci_device *device,
304 				     const char *bdf, const char *vf_token)
305 {
306 	vfio_pci_group_setup(device, bdf);
307 	vfio_container_set_iommu(device);
308 	vfio_pci_group_get_device_fd(device, bdf, vf_token);
309 }
310 
311 static void vfio_pci_device_setup(struct vfio_pci_device *device)
312 {
313 	int i;
314 
315 	device->info.argsz = sizeof(device->info);
316 	ioctl_assert(device->fd, VFIO_DEVICE_GET_INFO, &device->info);
317 
318 	vfio_pci_region_get(device, VFIO_PCI_CONFIG_REGION_INDEX, &device->config_space);
319 
320 	/* Sanity check VFIO does not advertise mmap for config space */
321 	VFIO_ASSERT_TRUE(!(device->config_space.flags & VFIO_REGION_INFO_FLAG_MMAP),
322 			 "PCI config space should not support mmap()\n");
323 
324 	for (i = 0; i < PCI_STD_NUM_BARS; i++) {
325 		struct vfio_pci_bar *bar = device->bars + i;
326 
327 		vfio_pci_region_get(device, i, &bar->info);
328 		if (bar->info.flags & VFIO_REGION_INFO_FLAG_MMAP)
329 			vfio_pci_bar_map(device, i);
330 	}
331 
332 	vfio_pci_irq_get(device, VFIO_PCI_MSI_IRQ_INDEX, &device->msi_info);
333 	vfio_pci_irq_get(device, VFIO_PCI_MSIX_IRQ_INDEX, &device->msix_info);
334 
335 	for (i = 0; i < ARRAY_SIZE(device->msi_eventfds); i++)
336 		device->msi_eventfds[i] = -1;
337 }
338 
339 const char *vfio_pci_get_cdev_path(const char *bdf)
340 {
341 	char dir_path[PATH_MAX];
342 	struct dirent *entry;
343 	char *cdev_path;
344 	DIR *dir;
345 
346 	cdev_path = calloc(PATH_MAX, 1);
347 	VFIO_ASSERT_NOT_NULL(cdev_path);
348 
349 	snprintf_assert(dir_path, sizeof(dir_path), "/sys/bus/pci/devices/%s/vfio-dev/", bdf);
350 
351 	dir = opendir(dir_path);
352 	VFIO_ASSERT_NOT_NULL(dir, "Failed to open directory %s\n", dir_path);
353 
354 	while ((entry = readdir(dir)) != NULL) {
355 		/* Find the file that starts with "vfio" */
356 		if (strncmp("vfio", entry->d_name, 4))
357 			continue;
358 
359 		snprintf_assert(cdev_path, PATH_MAX, "/dev/vfio/devices/%s", entry->d_name);
360 		break;
361 	}
362 
363 	VFIO_ASSERT_NE(cdev_path[0], 0, "Failed to find vfio cdev file.\n");
364 	VFIO_ASSERT_EQ(closedir(dir), 0);
365 
366 	return cdev_path;
367 }
368 
369 int __vfio_device_bind_iommufd(int device_fd, int iommufd, const char *vf_token)
370 {
371 	struct vfio_device_bind_iommufd args = {
372 		.argsz = sizeof(args),
373 		.iommufd = iommufd,
374 	};
375 	uuid_t token_uuid;
376 
377 	if (vf_token) {
378 		VFIO_ASSERT_EQ(uuid_parse(vf_token, token_uuid), 0);
379 		args.flags |= VFIO_DEVICE_BIND_FLAG_TOKEN;
380 		args.token_uuid_ptr = (u64)token_uuid;
381 	}
382 
383 	if (ioctl(device_fd, VFIO_DEVICE_BIND_IOMMUFD, &args))
384 		return -errno;
385 
386 	return 0;
387 }
388 
389 static void vfio_device_bind_iommufd(int device_fd, int iommufd,
390 				     const char *vf_token)
391 {
392 	int ret = __vfio_device_bind_iommufd(device_fd, iommufd, vf_token);
393 
394 	VFIO_ASSERT_EQ(ret, 0, "Failed VFIO_DEVICE_BIND_IOMMUFD ioctl\n");
395 }
396 
397 static void vfio_device_attach_iommufd_pt(int device_fd, u32 pt_id)
398 {
399 	struct vfio_device_attach_iommufd_pt args = {
400 		.argsz = sizeof(args),
401 		.pt_id = pt_id,
402 	};
403 
404 	ioctl_assert(device_fd, VFIO_DEVICE_ATTACH_IOMMUFD_PT, &args);
405 }
406 
407 void vfio_pci_cdev_open(struct vfio_pci_device *device, const char *bdf)
408 {
409 	const char *cdev_path = vfio_pci_get_cdev_path(bdf);
410 
411 	device->fd = open(cdev_path, O_RDWR);
412 	VFIO_ASSERT_GE(device->fd, 0);
413 	free((void *)cdev_path);
414 }
415 
416 static void vfio_pci_iommufd_setup(struct vfio_pci_device *device,
417 				   const char *bdf, const char *vf_token)
418 {
419 	vfio_pci_cdev_open(device, bdf);
420 	vfio_device_bind_iommufd(device->fd, device->iommu->iommufd, vf_token);
421 	vfio_device_attach_iommufd_pt(device->fd, device->iommu->ioas_id);
422 }
423 
424 struct vfio_pci_device *vfio_pci_device_init(const char *bdf, struct iommu *iommu)
425 {
426 	struct vfio_pci_device *device;
427 
428 	device = calloc(1, sizeof(*device));
429 	VFIO_ASSERT_NOT_NULL(device);
430 
431 	VFIO_ASSERT_NOT_NULL(iommu);
432 	device->iommu = iommu;
433 	device->bdf = bdf;
434 
435 	if (iommu->mode->container_path)
436 		vfio_pci_container_setup(device, bdf, NULL);
437 	else
438 		vfio_pci_iommufd_setup(device, bdf, NULL);
439 
440 	vfio_pci_device_setup(device);
441 	vfio_pci_driver_probe(device);
442 
443 	return device;
444 }
445 
446 void vfio_pci_device_cleanup(struct vfio_pci_device *device)
447 {
448 	int i;
449 
450 	if (device->driver.initialized)
451 		vfio_pci_driver_remove(device);
452 
453 	vfio_pci_bar_unmap_all(device);
454 
455 	VFIO_ASSERT_EQ(close(device->fd), 0);
456 
457 	for (i = 0; i < ARRAY_SIZE(device->msi_eventfds); i++) {
458 		if (device->msi_eventfds[i] < 0)
459 			continue;
460 
461 		VFIO_ASSERT_EQ(close(device->msi_eventfds[i]), 0);
462 	}
463 
464 	if (device->group_fd)
465 		VFIO_ASSERT_EQ(close(device->group_fd), 0);
466 
467 	free(device);
468 }
469