xref: /linux/drivers/vfio/pci/vfio_pci_rdwr.c (revision 3bf83e47b497d2630d2dcb408ec14ad95050cead)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * VFIO PCI I/O Port & MMIO access
4  *
5  * Copyright (C) 2012 Red Hat, Inc.  All rights reserved.
6  *     Author: Alex Williamson <alex.williamson@redhat.com>
7  *
8  * Derived from original vfio:
9  * Copyright 2010 Cisco Systems, Inc.  All rights reserved.
10  * Author: Tom Lyon, pugs@cisco.com
11  */
12 
13 #include <linux/fs.h>
14 #include <linux/pci.h>
15 #include <linux/uaccess.h>
16 #include <linux/io.h>
17 #include <linux/vfio.h>
18 #include <linux/vgaarb.h>
19 #include <linux/io-64-nonatomic-lo-hi.h>
20 
21 #include "vfio_pci_priv.h"
22 
23 #ifdef __LITTLE_ENDIAN
24 #define vfio_ioread64	ioread64
25 #define vfio_iowrite64	iowrite64
26 #define vfio_ioread32	ioread32
27 #define vfio_iowrite32	iowrite32
28 #define vfio_ioread16	ioread16
29 #define vfio_iowrite16	iowrite16
30 #else
31 #define vfio_ioread64	ioread64be
32 #define vfio_iowrite64	iowrite64be
33 #define vfio_ioread32	ioread32be
34 #define vfio_iowrite32	iowrite32be
35 #define vfio_ioread16	ioread16be
36 #define vfio_iowrite16	iowrite16be
37 #endif
38 #define vfio_ioread8	ioread8
39 #define vfio_iowrite8	iowrite8
40 
41 #define VFIO_IOWRITE(size) \
42 int vfio_pci_core_iowrite##size(struct vfio_pci_core_device *vdev,	\
43 			bool test_mem, u##size val, void __iomem *io)	\
44 {									\
45 	if (test_mem) {							\
46 		down_read(&vdev->memory_lock);				\
47 		if (!__vfio_pci_memory_enabled(vdev)) {			\
48 			up_read(&vdev->memory_lock);			\
49 			return -EIO;					\
50 		}							\
51 	}								\
52 									\
53 	vfio_iowrite##size(val, io);					\
54 									\
55 	if (test_mem)							\
56 		up_read(&vdev->memory_lock);				\
57 									\
58 	return 0;							\
59 }									\
60 EXPORT_SYMBOL_GPL(vfio_pci_core_iowrite##size);
61 
62 VFIO_IOWRITE(8)
63 VFIO_IOWRITE(16)
64 VFIO_IOWRITE(32)
65 VFIO_IOWRITE(64)
66 
67 #define VFIO_IOREAD(size) \
68 int vfio_pci_core_ioread##size(struct vfio_pci_core_device *vdev,	\
69 			bool test_mem, u##size *val, void __iomem *io)	\
70 {									\
71 	if (test_mem) {							\
72 		down_read(&vdev->memory_lock);				\
73 		if (!__vfio_pci_memory_enabled(vdev)) {			\
74 			up_read(&vdev->memory_lock);			\
75 			return -EIO;					\
76 		}							\
77 	}								\
78 									\
79 	*val = vfio_ioread##size(io);					\
80 									\
81 	if (test_mem)							\
82 		up_read(&vdev->memory_lock);				\
83 									\
84 	return 0;							\
85 }									\
86 EXPORT_SYMBOL_GPL(vfio_pci_core_ioread##size);
87 
88 VFIO_IOREAD(8)
89 VFIO_IOREAD(16)
90 VFIO_IOREAD(32)
91 VFIO_IOREAD(64)
92 
93 #define VFIO_IORDWR(size)						\
94 static int vfio_pci_iordwr##size(struct vfio_pci_core_device *vdev,\
95 				bool iswrite, bool test_mem,		\
96 				void __iomem *io, char __user *buf,	\
97 				loff_t off, size_t *filled)		\
98 {									\
99 	u##size val;							\
100 	int ret;							\
101 									\
102 	if (iswrite) {							\
103 		if (copy_from_user(&val, buf, sizeof(val)))		\
104 			return -EFAULT;					\
105 									\
106 		ret = vfio_pci_core_iowrite##size(vdev, test_mem,	\
107 						  val, io + off);	\
108 		if (ret)						\
109 			return ret;					\
110 	} else {							\
111 		ret = vfio_pci_core_ioread##size(vdev, test_mem,	\
112 						 &val, io + off);	\
113 		if (ret)						\
114 			return ret;					\
115 									\
116 		if (copy_to_user(buf, &val, sizeof(val)))		\
117 			return -EFAULT;					\
118 	}								\
119 									\
120 	*filled = sizeof(val);						\
121 	return 0;							\
122 }									\
123 
124 VFIO_IORDWR(8)
125 VFIO_IORDWR(16)
126 VFIO_IORDWR(32)
127 VFIO_IORDWR(64)
128 
129 /*
130  * Read or write from an __iomem region (MMIO or I/O port) with an excluded
131  * range which is inaccessible.  The excluded range drops writes and fills
132  * reads with -1.  This is intended for handling MSI-X vector tables and
133  * leftover space for ROM BARs.
134  */
vfio_pci_core_do_io_rw(struct vfio_pci_core_device * vdev,bool test_mem,void __iomem * io,char __user * buf,loff_t off,size_t count,size_t x_start,size_t x_end,bool iswrite,enum vfio_pci_io_width max_width)135 ssize_t vfio_pci_core_do_io_rw(struct vfio_pci_core_device *vdev, bool test_mem,
136 			       void __iomem *io, char __user *buf,
137 			       loff_t off, size_t count, size_t x_start,
138 			       size_t x_end, bool iswrite,
139 			       enum vfio_pci_io_width max_width)
140 {
141 	ssize_t done = 0;
142 	int ret;
143 
144 	while (count) {
145 		size_t fillable, filled;
146 
147 		if (off < x_start)
148 			fillable = min(count, (size_t)(x_start - off));
149 		else if (off >= x_end)
150 			fillable = count;
151 		else
152 			fillable = 0;
153 
154 		if (fillable >= 8 && !(off % 8) && max_width >= 8) {
155 			ret = vfio_pci_iordwr64(vdev, iswrite, test_mem,
156 						io, buf, off, &filled);
157 			if (ret)
158 				return ret;
159 
160 		} else if (fillable >= 4 && !(off % 4) && max_width >= 4) {
161 			ret = vfio_pci_iordwr32(vdev, iswrite, test_mem,
162 						io, buf, off, &filled);
163 			if (ret)
164 				return ret;
165 
166 		} else if (fillable >= 2 && !(off % 2) && max_width >= 2) {
167 			ret = vfio_pci_iordwr16(vdev, iswrite, test_mem,
168 						io, buf, off, &filled);
169 			if (ret)
170 				return ret;
171 
172 		} else if (fillable) {
173 			ret = vfio_pci_iordwr8(vdev, iswrite, test_mem,
174 					       io, buf, off, &filled);
175 			if (ret)
176 				return ret;
177 
178 		} else {
179 			/* Fill reads with -1, drop writes */
180 			filled = min(count, (size_t)(x_end - off));
181 			if (!iswrite) {
182 				u8 val = 0xFF;
183 				size_t i;
184 
185 				for (i = 0; i < filled; i++)
186 					if (copy_to_user(buf + i, &val, 1))
187 						return -EFAULT;
188 			}
189 		}
190 
191 		count -= filled;
192 		done += filled;
193 		off += filled;
194 		buf += filled;
195 	}
196 
197 	return done;
198 }
199 EXPORT_SYMBOL_GPL(vfio_pci_core_do_io_rw);
200 
201 /*
202  * The barmap is set up in vfio_pci_core_enable().  Callers use this
203  * function to check that the BAR resources are requested or that the
204  * pci_iomap() was done.
205  */
vfio_pci_core_setup_barmap(struct vfio_pci_core_device * vdev,int bar)206 int vfio_pci_core_setup_barmap(struct vfio_pci_core_device *vdev, int bar)
207 {
208 	if (IS_ERR(vdev->barmap[bar]))
209 		return PTR_ERR(vdev->barmap[bar]);
210 	return 0;
211 }
212 EXPORT_SYMBOL_GPL(vfio_pci_core_setup_barmap);
213 
vfio_pci_bar_rw(struct vfio_pci_core_device * vdev,char __user * buf,size_t count,loff_t * ppos,bool iswrite)214 ssize_t vfio_pci_bar_rw(struct vfio_pci_core_device *vdev, char __user *buf,
215 			size_t count, loff_t *ppos, bool iswrite)
216 {
217 	struct pci_dev *pdev = vdev->pdev;
218 	loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK;
219 	int bar = VFIO_PCI_OFFSET_TO_INDEX(*ppos);
220 	size_t x_start = 0, x_end = 0;
221 	resource_size_t end;
222 	void __iomem *io;
223 	struct resource *res = &vdev->pdev->resource[bar];
224 	ssize_t done;
225 	enum vfio_pci_io_width max_width = VFIO_PCI_IO_WIDTH_8;
226 
227 	if (pci_resource_start(pdev, bar))
228 		end = pci_resource_len(pdev, bar);
229 	else if (bar == PCI_ROM_RESOURCE && pdev->rom && pdev->romlen)
230 		end = roundup_pow_of_two(pdev->romlen);
231 	else
232 		return -EINVAL;
233 
234 	if (pos >= end)
235 		return -EINVAL;
236 
237 	count = min(count, (size_t)(end - pos));
238 
239 	if (bar == PCI_ROM_RESOURCE) {
240 		/*
241 		 * The ROM can fill less space than the BAR, so we start the
242 		 * excluded range at the end of the actual ROM.  This makes
243 		 * filling large ROM BARs much faster.
244 		 */
245 		if (pci_resource_start(pdev, bar)) {
246 			io = pci_map_rom(pdev, &x_start);
247 		} else {
248 			io = ioremap(pdev->rom, pdev->romlen);
249 			x_start = pdev->romlen;
250 		}
251 		if (!io)
252 			return -ENOMEM;
253 		x_end = end;
254 
255 		/*
256 		 * Certain devices (e.g. Intel X710) don't support qword
257 		 * access to the ROM bar. Otherwise PCI AER errors might be
258 		 * triggered.
259 		 *
260 		 * Disable qword access to the ROM bar universally, which
261 		 * worked reliably for years before qword access is enabled.
262 		 */
263 		max_width = VFIO_PCI_IO_WIDTH_4;
264 	} else {
265 		int ret = vfio_pci_core_setup_barmap(vdev, bar);
266 		if (ret) {
267 			done = ret;
268 			goto out;
269 		}
270 
271 		io = vdev->barmap[bar];
272 	}
273 
274 	if (bar == vdev->msix_bar) {
275 		x_start = vdev->msix_offset;
276 		x_end = vdev->msix_offset + vdev->msix_size;
277 	}
278 
279 	done = vfio_pci_core_do_io_rw(vdev, res->flags & IORESOURCE_MEM, io, buf, pos,
280 				      count, x_start, x_end, iswrite, max_width);
281 
282 	if (done >= 0)
283 		*ppos += done;
284 
285 	if (bar == PCI_ROM_RESOURCE) {
286 		if (pci_resource_start(pdev, bar))
287 			pci_unmap_rom(pdev, io);
288 		else
289 			iounmap(io);
290 	}
291 
292 out:
293 	return done;
294 }
295 
296 #ifdef CONFIG_VFIO_PCI_VGA
vfio_pci_vga_rw(struct vfio_pci_core_device * vdev,char __user * buf,size_t count,loff_t * ppos,bool iswrite)297 ssize_t vfio_pci_vga_rw(struct vfio_pci_core_device *vdev, char __user *buf,
298 			       size_t count, loff_t *ppos, bool iswrite)
299 {
300 	int ret;
301 	loff_t off, pos = *ppos & VFIO_PCI_OFFSET_MASK;
302 	void __iomem *iomem = NULL;
303 	unsigned int rsrc;
304 	bool is_ioport;
305 	ssize_t done;
306 
307 	if (!vdev->has_vga)
308 		return -EINVAL;
309 
310 	if (pos > 0xbfffful)
311 		return -EINVAL;
312 
313 	switch ((u32)pos) {
314 	case 0xa0000 ... 0xbffff:
315 		count = min(count, (size_t)(0xc0000 - pos));
316 		iomem = ioremap(0xa0000, 0xbffff - 0xa0000 + 1);
317 		off = pos - 0xa0000;
318 		rsrc = VGA_RSRC_LEGACY_MEM;
319 		is_ioport = false;
320 		break;
321 	case 0x3b0 ... 0x3bb:
322 		count = min(count, (size_t)(0x3bc - pos));
323 		iomem = ioport_map(0x3b0, 0x3bb - 0x3b0 + 1);
324 		off = pos - 0x3b0;
325 		rsrc = VGA_RSRC_LEGACY_IO;
326 		is_ioport = true;
327 		break;
328 	case 0x3c0 ... 0x3df:
329 		count = min(count, (size_t)(0x3e0 - pos));
330 		iomem = ioport_map(0x3c0, 0x3df - 0x3c0 + 1);
331 		off = pos - 0x3c0;
332 		rsrc = VGA_RSRC_LEGACY_IO;
333 		is_ioport = true;
334 		break;
335 	default:
336 		return -EINVAL;
337 	}
338 
339 	if (!iomem)
340 		return -ENOMEM;
341 
342 	ret = vga_get_interruptible(vdev->pdev, rsrc);
343 	if (ret) {
344 		is_ioport ? ioport_unmap(iomem) : iounmap(iomem);
345 		return ret;
346 	}
347 
348 	/*
349 	 * VGA MMIO is a legacy, non-BAR resource that hopefully allows
350 	 * probing, so we don't currently worry about access in relation
351 	 * to the memory enable bit in the command register.
352 	 */
353 	done = vfio_pci_core_do_io_rw(vdev, false, iomem, buf, off, count,
354 				      0, 0, iswrite, VFIO_PCI_IO_WIDTH_4);
355 
356 	vga_put(vdev->pdev, rsrc);
357 
358 	is_ioport ? ioport_unmap(iomem) : iounmap(iomem);
359 
360 	if (done >= 0)
361 		*ppos += done;
362 
363 	return done;
364 }
365 #endif
366 
vfio_pci_ioeventfd_do_write(struct vfio_pci_ioeventfd * ioeventfd,bool test_mem)367 static void vfio_pci_ioeventfd_do_write(struct vfio_pci_ioeventfd *ioeventfd,
368 					bool test_mem)
369 {
370 	switch (ioeventfd->count) {
371 	case 1:
372 		vfio_pci_core_iowrite8(ioeventfd->vdev, test_mem,
373 				       ioeventfd->data, ioeventfd->addr);
374 		break;
375 	case 2:
376 		vfio_pci_core_iowrite16(ioeventfd->vdev, test_mem,
377 					ioeventfd->data, ioeventfd->addr);
378 		break;
379 	case 4:
380 		vfio_pci_core_iowrite32(ioeventfd->vdev, test_mem,
381 					ioeventfd->data, ioeventfd->addr);
382 		break;
383 	case 8:
384 		vfio_pci_core_iowrite64(ioeventfd->vdev, test_mem,
385 					ioeventfd->data, ioeventfd->addr);
386 		break;
387 	}
388 }
389 
vfio_pci_ioeventfd_handler(void * opaque,void * unused)390 static int vfio_pci_ioeventfd_handler(void *opaque, void *unused)
391 {
392 	struct vfio_pci_ioeventfd *ioeventfd = opaque;
393 	struct vfio_pci_core_device *vdev = ioeventfd->vdev;
394 
395 	if (ioeventfd->test_mem) {
396 		if (!down_read_trylock(&vdev->memory_lock))
397 			return 1; /* Lock contended, use thread */
398 		if (!__vfio_pci_memory_enabled(vdev)) {
399 			up_read(&vdev->memory_lock);
400 			return 0;
401 		}
402 	}
403 
404 	vfio_pci_ioeventfd_do_write(ioeventfd, false);
405 
406 	if (ioeventfd->test_mem)
407 		up_read(&vdev->memory_lock);
408 
409 	return 0;
410 }
411 
vfio_pci_ioeventfd_thread(void * opaque,void * unused)412 static void vfio_pci_ioeventfd_thread(void *opaque, void *unused)
413 {
414 	struct vfio_pci_ioeventfd *ioeventfd = opaque;
415 
416 	vfio_pci_ioeventfd_do_write(ioeventfd, ioeventfd->test_mem);
417 }
418 
vfio_pci_ioeventfd(struct vfio_pci_core_device * vdev,loff_t offset,uint64_t data,int count,int fd)419 int vfio_pci_ioeventfd(struct vfio_pci_core_device *vdev, loff_t offset,
420 		       uint64_t data, int count, int fd)
421 {
422 	struct pci_dev *pdev = vdev->pdev;
423 	loff_t pos = offset & VFIO_PCI_OFFSET_MASK;
424 	int ret, bar = VFIO_PCI_OFFSET_TO_INDEX(offset);
425 	struct vfio_pci_ioeventfd *ioeventfd;
426 
427 	/* Only support ioeventfds into BARs */
428 	if (bar > VFIO_PCI_BAR5_REGION_INDEX)
429 		return -EINVAL;
430 
431 	if (pos + count > pci_resource_len(pdev, bar))
432 		return -EINVAL;
433 
434 	/* Disallow ioeventfds working around MSI-X table writes */
435 	if (bar == vdev->msix_bar &&
436 	    !(pos + count <= vdev->msix_offset ||
437 	      pos >= vdev->msix_offset + vdev->msix_size))
438 		return -EINVAL;
439 
440 	if (count == 8)
441 		return -EINVAL;
442 
443 	ret = vfio_pci_core_setup_barmap(vdev, bar);
444 	if (ret)
445 		return ret;
446 
447 	mutex_lock(&vdev->ioeventfds_lock);
448 
449 	list_for_each_entry(ioeventfd, &vdev->ioeventfds_list, next) {
450 		if (ioeventfd->pos == pos && ioeventfd->bar == bar &&
451 		    ioeventfd->data == data && ioeventfd->count == count) {
452 			if (fd == -1) {
453 				vfio_virqfd_disable(&ioeventfd->virqfd);
454 				list_del(&ioeventfd->next);
455 				vdev->ioeventfds_nr--;
456 				kfree(ioeventfd);
457 				ret = 0;
458 			} else
459 				ret = -EEXIST;
460 
461 			goto out_unlock;
462 		}
463 	}
464 
465 	if (fd < 0) {
466 		ret = -ENODEV;
467 		goto out_unlock;
468 	}
469 
470 	if (vdev->ioeventfds_nr >= VFIO_PCI_IOEVENTFD_MAX) {
471 		ret = -ENOSPC;
472 		goto out_unlock;
473 	}
474 
475 	ioeventfd = kzalloc_obj(*ioeventfd, GFP_KERNEL_ACCOUNT);
476 	if (!ioeventfd) {
477 		ret = -ENOMEM;
478 		goto out_unlock;
479 	}
480 
481 	ioeventfd->vdev = vdev;
482 	ioeventfd->addr = vdev->barmap[bar] + pos;
483 	ioeventfd->data = data;
484 	ioeventfd->pos = pos;
485 	ioeventfd->bar = bar;
486 	ioeventfd->count = count;
487 	ioeventfd->test_mem = vdev->pdev->resource[bar].flags & IORESOURCE_MEM;
488 
489 	ret = vfio_virqfd_enable(ioeventfd, vfio_pci_ioeventfd_handler,
490 				 vfio_pci_ioeventfd_thread, NULL,
491 				 &ioeventfd->virqfd, fd);
492 	if (ret) {
493 		kfree(ioeventfd);
494 		goto out_unlock;
495 	}
496 
497 	list_add(&ioeventfd->next, &vdev->ioeventfds_list);
498 	vdev->ioeventfds_nr++;
499 
500 out_unlock:
501 	mutex_unlock(&vdev->ioeventfds_lock);
502 
503 	return ret;
504 }
505