xref: /linux/drivers/vfio/pci/vfio_pci_rdwr.c (revision 349bd28a86f2bc33b8c61f6cc7886f45d5c7cda7)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * VFIO PCI I/O Port & MMIO access
4  *
5  * Copyright (C) 2012 Red Hat, Inc.  All rights reserved.
6  *     Author: Alex Williamson <alex.williamson@redhat.com>
7  *
8  * Derived from original vfio:
9  * Copyright 2010 Cisco Systems, Inc.  All rights reserved.
10  * Author: Tom Lyon, pugs@cisco.com
11  */
12 
13 #include <linux/fs.h>
14 #include <linux/pci.h>
15 #include <linux/uaccess.h>
16 #include <linux/io.h>
17 #include <linux/vfio.h>
18 #include <linux/vgaarb.h>
19 #include <linux/io-64-nonatomic-lo-hi.h>
20 
21 #include "vfio_pci_priv.h"
22 
23 #ifdef __LITTLE_ENDIAN
24 #define vfio_ioread64	ioread64
25 #define vfio_iowrite64	iowrite64
26 #define vfio_ioread32	ioread32
27 #define vfio_iowrite32	iowrite32
28 #define vfio_ioread16	ioread16
29 #define vfio_iowrite16	iowrite16
30 #else
31 #define vfio_ioread64	ioread64be
32 #define vfio_iowrite64	iowrite64be
33 #define vfio_ioread32	ioread32be
34 #define vfio_iowrite32	iowrite32be
35 #define vfio_ioread16	ioread16be
36 #define vfio_iowrite16	iowrite16be
37 #endif
38 #define vfio_ioread8	ioread8
39 #define vfio_iowrite8	iowrite8
40 
41 #define VFIO_IOWRITE(size) \
42 int vfio_pci_core_iowrite##size(struct vfio_pci_core_device *vdev,	\
43 			bool test_mem, u##size val, void __iomem *io)	\
44 {									\
45 	if (test_mem) {							\
46 		down_read(&vdev->memory_lock);				\
47 		if (!__vfio_pci_memory_enabled(vdev)) {			\
48 			up_read(&vdev->memory_lock);			\
49 			return -EIO;					\
50 		}							\
51 	}								\
52 									\
53 	vfio_iowrite##size(val, io);					\
54 									\
55 	if (test_mem)							\
56 		up_read(&vdev->memory_lock);				\
57 									\
58 	return 0;							\
59 }									\
60 EXPORT_SYMBOL_GPL(vfio_pci_core_iowrite##size);
61 
62 VFIO_IOWRITE(8)
63 VFIO_IOWRITE(16)
64 VFIO_IOWRITE(32)
65 VFIO_IOWRITE(64)
66 
67 #define VFIO_IOREAD(size) \
68 int vfio_pci_core_ioread##size(struct vfio_pci_core_device *vdev,	\
69 			bool test_mem, u##size *val, void __iomem *io)	\
70 {									\
71 	if (test_mem) {							\
72 		down_read(&vdev->memory_lock);				\
73 		if (!__vfio_pci_memory_enabled(vdev)) {			\
74 			up_read(&vdev->memory_lock);			\
75 			return -EIO;					\
76 		}							\
77 	}								\
78 									\
79 	*val = vfio_ioread##size(io);					\
80 									\
81 	if (test_mem)							\
82 		up_read(&vdev->memory_lock);				\
83 									\
84 	return 0;							\
85 }									\
86 EXPORT_SYMBOL_GPL(vfio_pci_core_ioread##size);
87 
88 VFIO_IOREAD(8)
89 VFIO_IOREAD(16)
90 VFIO_IOREAD(32)
91 VFIO_IOREAD(64)
92 
93 #define VFIO_IORDWR(size)						\
94 static int vfio_pci_iordwr##size(struct vfio_pci_core_device *vdev,\
95 				bool iswrite, bool test_mem,		\
96 				void __iomem *io, char __user *buf,	\
97 				loff_t off, size_t *filled)		\
98 {									\
99 	u##size val;							\
100 	int ret;							\
101 									\
102 	if (iswrite) {							\
103 		if (copy_from_user(&val, buf, sizeof(val)))		\
104 			return -EFAULT;					\
105 									\
106 		ret = vfio_pci_core_iowrite##size(vdev, test_mem,	\
107 						  val, io + off);	\
108 		if (ret)						\
109 			return ret;					\
110 	} else {							\
111 		ret = vfio_pci_core_ioread##size(vdev, test_mem,	\
112 						 &val, io + off);	\
113 		if (ret)						\
114 			return ret;					\
115 									\
116 		if (copy_to_user(buf, &val, sizeof(val)))		\
117 			return -EFAULT;					\
118 	}								\
119 									\
120 	*filled = sizeof(val);						\
121 	return 0;							\
122 }									\
123 
124 VFIO_IORDWR(8)
125 VFIO_IORDWR(16)
126 VFIO_IORDWR(32)
127 VFIO_IORDWR(64)
128 
129 /*
130  * Read or write from an __iomem region (MMIO or I/O port) with an excluded
131  * range which is inaccessible.  The excluded range drops writes and fills
132  * reads with -1.  This is intended for handling MSI-X vector tables and
133  * leftover space for ROM BARs.
134  */
vfio_pci_core_do_io_rw(struct vfio_pci_core_device * vdev,bool test_mem,void __iomem * io,char __user * buf,loff_t off,size_t count,size_t x_start,size_t x_end,bool iswrite,enum vfio_pci_io_width max_width)135 ssize_t vfio_pci_core_do_io_rw(struct vfio_pci_core_device *vdev, bool test_mem,
136 			       void __iomem *io, char __user *buf,
137 			       loff_t off, size_t count, size_t x_start,
138 			       size_t x_end, bool iswrite,
139 			       enum vfio_pci_io_width max_width)
140 {
141 	ssize_t done = 0;
142 	int ret;
143 
144 	while (count) {
145 		size_t fillable, filled;
146 
147 		if (off < x_start)
148 			fillable = min(count, (size_t)(x_start - off));
149 		else if (off >= x_end)
150 			fillable = count;
151 		else
152 			fillable = 0;
153 
154 		if (fillable >= 8 && !(off % 8) && max_width >= 8) {
155 			ret = vfio_pci_iordwr64(vdev, iswrite, test_mem,
156 						io, buf, off, &filled);
157 			if (ret)
158 				return ret;
159 
160 		} else if (fillable >= 4 && !(off % 4) && max_width >= 4) {
161 			ret = vfio_pci_iordwr32(vdev, iswrite, test_mem,
162 						io, buf, off, &filled);
163 			if (ret)
164 				return ret;
165 
166 		} else if (fillable >= 2 && !(off % 2) && max_width >= 2) {
167 			ret = vfio_pci_iordwr16(vdev, iswrite, test_mem,
168 						io, buf, off, &filled);
169 			if (ret)
170 				return ret;
171 
172 		} else if (fillable) {
173 			ret = vfio_pci_iordwr8(vdev, iswrite, test_mem,
174 					       io, buf, off, &filled);
175 			if (ret)
176 				return ret;
177 
178 		} else {
179 			/* Fill reads with -1, drop writes */
180 			filled = min(count, (size_t)(x_end - off));
181 			if (!iswrite) {
182 				u8 val = 0xFF;
183 				size_t i;
184 
185 				for (i = 0; i < filled; i++)
186 					if (copy_to_user(buf + i, &val, 1))
187 						return -EFAULT;
188 			}
189 		}
190 
191 		count -= filled;
192 		done += filled;
193 		off += filled;
194 		buf += filled;
195 	}
196 
197 	return done;
198 }
199 EXPORT_SYMBOL_GPL(vfio_pci_core_do_io_rw);
200 
vfio_pci_core_setup_barmap(struct vfio_pci_core_device * vdev,int bar)201 int vfio_pci_core_setup_barmap(struct vfio_pci_core_device *vdev, int bar)
202 {
203 	struct pci_dev *pdev = vdev->pdev;
204 	int ret;
205 	void __iomem *io;
206 
207 	if (vdev->barmap[bar])
208 		return 0;
209 
210 	ret = pci_request_selected_regions(pdev, 1 << bar, "vfio");
211 	if (ret)
212 		return ret;
213 
214 	io = pci_iomap(pdev, bar, 0);
215 	if (!io) {
216 		pci_release_selected_regions(pdev, 1 << bar);
217 		return -ENOMEM;
218 	}
219 
220 	vdev->barmap[bar] = io;
221 
222 	return 0;
223 }
224 EXPORT_SYMBOL_GPL(vfio_pci_core_setup_barmap);
225 
vfio_pci_bar_rw(struct vfio_pci_core_device * vdev,char __user * buf,size_t count,loff_t * ppos,bool iswrite)226 ssize_t vfio_pci_bar_rw(struct vfio_pci_core_device *vdev, char __user *buf,
227 			size_t count, loff_t *ppos, bool iswrite)
228 {
229 	struct pci_dev *pdev = vdev->pdev;
230 	loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK;
231 	int bar = VFIO_PCI_OFFSET_TO_INDEX(*ppos);
232 	size_t x_start = 0, x_end = 0;
233 	resource_size_t end;
234 	void __iomem *io;
235 	struct resource *res = &vdev->pdev->resource[bar];
236 	ssize_t done;
237 	enum vfio_pci_io_width max_width = VFIO_PCI_IO_WIDTH_8;
238 
239 	if (pci_resource_start(pdev, bar))
240 		end = pci_resource_len(pdev, bar);
241 	else if (bar == PCI_ROM_RESOURCE && pdev->rom && pdev->romlen)
242 		end = roundup_pow_of_two(pdev->romlen);
243 	else
244 		return -EINVAL;
245 
246 	if (pos >= end)
247 		return -EINVAL;
248 
249 	count = min(count, (size_t)(end - pos));
250 
251 	if (bar == PCI_ROM_RESOURCE) {
252 		/*
253 		 * The ROM can fill less space than the BAR, so we start the
254 		 * excluded range at the end of the actual ROM.  This makes
255 		 * filling large ROM BARs much faster.
256 		 */
257 		if (pci_resource_start(pdev, bar)) {
258 			io = pci_map_rom(pdev, &x_start);
259 		} else {
260 			io = ioremap(pdev->rom, pdev->romlen);
261 			x_start = pdev->romlen;
262 		}
263 		if (!io)
264 			return -ENOMEM;
265 		x_end = end;
266 
267 		/*
268 		 * Certain devices (e.g. Intel X710) don't support qword
269 		 * access to the ROM bar. Otherwise PCI AER errors might be
270 		 * triggered.
271 		 *
272 		 * Disable qword access to the ROM bar universally, which
273 		 * worked reliably for years before qword access is enabled.
274 		 */
275 		max_width = VFIO_PCI_IO_WIDTH_4;
276 	} else {
277 		int ret = vfio_pci_core_setup_barmap(vdev, bar);
278 		if (ret) {
279 			done = ret;
280 			goto out;
281 		}
282 
283 		io = vdev->barmap[bar];
284 	}
285 
286 	if (bar == vdev->msix_bar) {
287 		x_start = vdev->msix_offset;
288 		x_end = vdev->msix_offset + vdev->msix_size;
289 	}
290 
291 	done = vfio_pci_core_do_io_rw(vdev, res->flags & IORESOURCE_MEM, io, buf, pos,
292 				      count, x_start, x_end, iswrite, max_width);
293 
294 	if (done >= 0)
295 		*ppos += done;
296 
297 	if (bar == PCI_ROM_RESOURCE) {
298 		if (pci_resource_start(pdev, bar))
299 			pci_unmap_rom(pdev, io);
300 		else
301 			iounmap(io);
302 	}
303 
304 out:
305 	return done;
306 }
307 
308 #ifdef CONFIG_VFIO_PCI_VGA
vfio_pci_vga_rw(struct vfio_pci_core_device * vdev,char __user * buf,size_t count,loff_t * ppos,bool iswrite)309 ssize_t vfio_pci_vga_rw(struct vfio_pci_core_device *vdev, char __user *buf,
310 			       size_t count, loff_t *ppos, bool iswrite)
311 {
312 	int ret;
313 	loff_t off, pos = *ppos & VFIO_PCI_OFFSET_MASK;
314 	void __iomem *iomem = NULL;
315 	unsigned int rsrc;
316 	bool is_ioport;
317 	ssize_t done;
318 
319 	if (!vdev->has_vga)
320 		return -EINVAL;
321 
322 	if (pos > 0xbfffful)
323 		return -EINVAL;
324 
325 	switch ((u32)pos) {
326 	case 0xa0000 ... 0xbffff:
327 		count = min(count, (size_t)(0xc0000 - pos));
328 		iomem = ioremap(0xa0000, 0xbffff - 0xa0000 + 1);
329 		off = pos - 0xa0000;
330 		rsrc = VGA_RSRC_LEGACY_MEM;
331 		is_ioport = false;
332 		break;
333 	case 0x3b0 ... 0x3bb:
334 		count = min(count, (size_t)(0x3bc - pos));
335 		iomem = ioport_map(0x3b0, 0x3bb - 0x3b0 + 1);
336 		off = pos - 0x3b0;
337 		rsrc = VGA_RSRC_LEGACY_IO;
338 		is_ioport = true;
339 		break;
340 	case 0x3c0 ... 0x3df:
341 		count = min(count, (size_t)(0x3e0 - pos));
342 		iomem = ioport_map(0x3c0, 0x3df - 0x3c0 + 1);
343 		off = pos - 0x3c0;
344 		rsrc = VGA_RSRC_LEGACY_IO;
345 		is_ioport = true;
346 		break;
347 	default:
348 		return -EINVAL;
349 	}
350 
351 	if (!iomem)
352 		return -ENOMEM;
353 
354 	ret = vga_get_interruptible(vdev->pdev, rsrc);
355 	if (ret) {
356 		is_ioport ? ioport_unmap(iomem) : iounmap(iomem);
357 		return ret;
358 	}
359 
360 	/*
361 	 * VGA MMIO is a legacy, non-BAR resource that hopefully allows
362 	 * probing, so we don't currently worry about access in relation
363 	 * to the memory enable bit in the command register.
364 	 */
365 	done = vfio_pci_core_do_io_rw(vdev, false, iomem, buf, off, count,
366 				      0, 0, iswrite, VFIO_PCI_IO_WIDTH_4);
367 
368 	vga_put(vdev->pdev, rsrc);
369 
370 	is_ioport ? ioport_unmap(iomem) : iounmap(iomem);
371 
372 	if (done >= 0)
373 		*ppos += done;
374 
375 	return done;
376 }
377 #endif
378 
vfio_pci_ioeventfd_do_write(struct vfio_pci_ioeventfd * ioeventfd,bool test_mem)379 static void vfio_pci_ioeventfd_do_write(struct vfio_pci_ioeventfd *ioeventfd,
380 					bool test_mem)
381 {
382 	switch (ioeventfd->count) {
383 	case 1:
384 		vfio_pci_core_iowrite8(ioeventfd->vdev, test_mem,
385 				       ioeventfd->data, ioeventfd->addr);
386 		break;
387 	case 2:
388 		vfio_pci_core_iowrite16(ioeventfd->vdev, test_mem,
389 					ioeventfd->data, ioeventfd->addr);
390 		break;
391 	case 4:
392 		vfio_pci_core_iowrite32(ioeventfd->vdev, test_mem,
393 					ioeventfd->data, ioeventfd->addr);
394 		break;
395 	case 8:
396 		vfio_pci_core_iowrite64(ioeventfd->vdev, test_mem,
397 					ioeventfd->data, ioeventfd->addr);
398 		break;
399 	}
400 }
401 
vfio_pci_ioeventfd_handler(void * opaque,void * unused)402 static int vfio_pci_ioeventfd_handler(void *opaque, void *unused)
403 {
404 	struct vfio_pci_ioeventfd *ioeventfd = opaque;
405 	struct vfio_pci_core_device *vdev = ioeventfd->vdev;
406 
407 	if (ioeventfd->test_mem) {
408 		if (!down_read_trylock(&vdev->memory_lock))
409 			return 1; /* Lock contended, use thread */
410 		if (!__vfio_pci_memory_enabled(vdev)) {
411 			up_read(&vdev->memory_lock);
412 			return 0;
413 		}
414 	}
415 
416 	vfio_pci_ioeventfd_do_write(ioeventfd, false);
417 
418 	if (ioeventfd->test_mem)
419 		up_read(&vdev->memory_lock);
420 
421 	return 0;
422 }
423 
vfio_pci_ioeventfd_thread(void * opaque,void * unused)424 static void vfio_pci_ioeventfd_thread(void *opaque, void *unused)
425 {
426 	struct vfio_pci_ioeventfd *ioeventfd = opaque;
427 
428 	vfio_pci_ioeventfd_do_write(ioeventfd, ioeventfd->test_mem);
429 }
430 
vfio_pci_ioeventfd(struct vfio_pci_core_device * vdev,loff_t offset,uint64_t data,int count,int fd)431 int vfio_pci_ioeventfd(struct vfio_pci_core_device *vdev, loff_t offset,
432 		       uint64_t data, int count, int fd)
433 {
434 	struct pci_dev *pdev = vdev->pdev;
435 	loff_t pos = offset & VFIO_PCI_OFFSET_MASK;
436 	int ret, bar = VFIO_PCI_OFFSET_TO_INDEX(offset);
437 	struct vfio_pci_ioeventfd *ioeventfd;
438 
439 	/* Only support ioeventfds into BARs */
440 	if (bar > VFIO_PCI_BAR5_REGION_INDEX)
441 		return -EINVAL;
442 
443 	if (pos + count > pci_resource_len(pdev, bar))
444 		return -EINVAL;
445 
446 	/* Disallow ioeventfds working around MSI-X table writes */
447 	if (bar == vdev->msix_bar &&
448 	    !(pos + count <= vdev->msix_offset ||
449 	      pos >= vdev->msix_offset + vdev->msix_size))
450 		return -EINVAL;
451 
452 	if (count == 8)
453 		return -EINVAL;
454 
455 	ret = vfio_pci_core_setup_barmap(vdev, bar);
456 	if (ret)
457 		return ret;
458 
459 	mutex_lock(&vdev->ioeventfds_lock);
460 
461 	list_for_each_entry(ioeventfd, &vdev->ioeventfds_list, next) {
462 		if (ioeventfd->pos == pos && ioeventfd->bar == bar &&
463 		    ioeventfd->data == data && ioeventfd->count == count) {
464 			if (fd == -1) {
465 				vfio_virqfd_disable(&ioeventfd->virqfd);
466 				list_del(&ioeventfd->next);
467 				vdev->ioeventfds_nr--;
468 				kfree(ioeventfd);
469 				ret = 0;
470 			} else
471 				ret = -EEXIST;
472 
473 			goto out_unlock;
474 		}
475 	}
476 
477 	if (fd < 0) {
478 		ret = -ENODEV;
479 		goto out_unlock;
480 	}
481 
482 	if (vdev->ioeventfds_nr >= VFIO_PCI_IOEVENTFD_MAX) {
483 		ret = -ENOSPC;
484 		goto out_unlock;
485 	}
486 
487 	ioeventfd = kzalloc(sizeof(*ioeventfd), GFP_KERNEL_ACCOUNT);
488 	if (!ioeventfd) {
489 		ret = -ENOMEM;
490 		goto out_unlock;
491 	}
492 
493 	ioeventfd->vdev = vdev;
494 	ioeventfd->addr = vdev->barmap[bar] + pos;
495 	ioeventfd->data = data;
496 	ioeventfd->pos = pos;
497 	ioeventfd->bar = bar;
498 	ioeventfd->count = count;
499 	ioeventfd->test_mem = vdev->pdev->resource[bar].flags & IORESOURCE_MEM;
500 
501 	ret = vfio_virqfd_enable(ioeventfd, vfio_pci_ioeventfd_handler,
502 				 vfio_pci_ioeventfd_thread, NULL,
503 				 &ioeventfd->virqfd, fd);
504 	if (ret) {
505 		kfree(ioeventfd);
506 		goto out_unlock;
507 	}
508 
509 	list_add(&ioeventfd->next, &vdev->ioeventfds_list);
510 	vdev->ioeventfds_nr++;
511 
512 out_unlock:
513 	mutex_unlock(&vdev->ioeventfds_lock);
514 
515 	return ret;
516 }
517