xref: /linux/drivers/vfio/pci/vfio_pci_rdwr.c (revision 3673f5be0ec4798089c2c014505e54fc361d3616)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * VFIO PCI I/O Port & MMIO access
4  *
5  * Copyright (C) 2012 Red Hat, Inc.  All rights reserved.
6  *     Author: Alex Williamson <alex.williamson@redhat.com>
7  *
8  * Derived from original vfio:
9  * Copyright 2010 Cisco Systems, Inc.  All rights reserved.
10  * Author: Tom Lyon, pugs@cisco.com
11  */
12 
13 #include <linux/fs.h>
14 #include <linux/pci.h>
15 #include <linux/uaccess.h>
16 #include <linux/io.h>
17 #include <linux/vfio.h>
18 #include <linux/vgaarb.h>
19 #include <linux/io-64-nonatomic-lo-hi.h>
20 
21 #include "vfio_pci_priv.h"
22 
23 #ifdef __LITTLE_ENDIAN
24 #define vfio_ioread64	ioread64
25 #define vfio_iowrite64	iowrite64
26 #define vfio_ioread32	ioread32
27 #define vfio_iowrite32	iowrite32
28 #define vfio_ioread16	ioread16
29 #define vfio_iowrite16	iowrite16
30 #else
31 #define vfio_ioread64	ioread64be
32 #define vfio_iowrite64	iowrite64be
33 #define vfio_ioread32	ioread32be
34 #define vfio_iowrite32	iowrite32be
35 #define vfio_ioread16	ioread16be
36 #define vfio_iowrite16	iowrite16be
37 #endif
38 #define vfio_ioread8	ioread8
39 #define vfio_iowrite8	iowrite8
40 
41 #define VFIO_IOWRITE(size) \
42 int vfio_pci_core_iowrite##size(struct vfio_pci_core_device *vdev,	\
43 			bool test_mem, u##size val, void __iomem *io)	\
44 {									\
45 	if (test_mem) {							\
46 		down_read(&vdev->memory_lock);				\
47 		if (!__vfio_pci_memory_enabled(vdev)) {			\
48 			up_read(&vdev->memory_lock);			\
49 			return -EIO;					\
50 		}							\
51 	}								\
52 									\
53 	vfio_iowrite##size(val, io);					\
54 									\
55 	if (test_mem)							\
56 		up_read(&vdev->memory_lock);				\
57 									\
58 	return 0;							\
59 }									\
60 EXPORT_SYMBOL_GPL(vfio_pci_core_iowrite##size);
61 
62 VFIO_IOWRITE(8)
63 VFIO_IOWRITE(16)
64 VFIO_IOWRITE(32)
65 VFIO_IOWRITE(64)
66 
67 #define VFIO_IOREAD(size) \
68 int vfio_pci_core_ioread##size(struct vfio_pci_core_device *vdev,	\
69 			bool test_mem, u##size *val, void __iomem *io)	\
70 {									\
71 	if (test_mem) {							\
72 		down_read(&vdev->memory_lock);				\
73 		if (!__vfio_pci_memory_enabled(vdev)) {			\
74 			up_read(&vdev->memory_lock);			\
75 			return -EIO;					\
76 		}							\
77 	}								\
78 									\
79 	*val = vfio_ioread##size(io);					\
80 									\
81 	if (test_mem)							\
82 		up_read(&vdev->memory_lock);				\
83 									\
84 	return 0;							\
85 }									\
86 EXPORT_SYMBOL_GPL(vfio_pci_core_ioread##size);
87 
88 VFIO_IOREAD(8)
89 VFIO_IOREAD(16)
90 VFIO_IOREAD(32)
91 VFIO_IOREAD(64)
92 
93 #define VFIO_IORDWR(size)						\
94 static int vfio_pci_iordwr##size(struct vfio_pci_core_device *vdev,\
95 				bool iswrite, bool test_mem,		\
96 				void __iomem *io, char __user *buf,	\
97 				loff_t off, size_t *filled)		\
98 {									\
99 	u##size val;							\
100 	int ret;							\
101 									\
102 	if (iswrite) {							\
103 		if (copy_from_user(&val, buf, sizeof(val)))		\
104 			return -EFAULT;					\
105 									\
106 		ret = vfio_pci_core_iowrite##size(vdev, test_mem,	\
107 						  val, io + off);	\
108 		if (ret)						\
109 			return ret;					\
110 	} else {							\
111 		ret = vfio_pci_core_ioread##size(vdev, test_mem,	\
112 						 &val, io + off);	\
113 		if (ret)						\
114 			return ret;					\
115 									\
116 		if (copy_to_user(buf, &val, sizeof(val)))		\
117 			return -EFAULT;					\
118 	}								\
119 									\
120 	*filled = sizeof(val);						\
121 	return 0;							\
122 }									\
123 
124 VFIO_IORDWR(8)
125 VFIO_IORDWR(16)
126 VFIO_IORDWR(32)
127 VFIO_IORDWR(64)
128 
129 /*
130  * Read or write from an __iomem region (MMIO or I/O port) with an excluded
131  * range which is inaccessible.  The excluded range drops writes and fills
132  * reads with -1.  This is intended for handling MSI-X vector tables and
133  * leftover space for ROM BARs.
134  */
135 ssize_t vfio_pci_core_do_io_rw(struct vfio_pci_core_device *vdev, bool test_mem,
136 			       void __iomem *io, char __user *buf,
137 			       loff_t off, size_t count, size_t x_start,
138 			       size_t x_end, bool iswrite)
139 {
140 	ssize_t done = 0;
141 	int ret;
142 
143 	while (count) {
144 		size_t fillable, filled;
145 
146 		if (off < x_start)
147 			fillable = min(count, (size_t)(x_start - off));
148 		else if (off >= x_end)
149 			fillable = count;
150 		else
151 			fillable = 0;
152 
153 		if (fillable >= 8 && !(off % 8)) {
154 			ret = vfio_pci_iordwr64(vdev, iswrite, test_mem,
155 						io, buf, off, &filled);
156 			if (ret)
157 				return ret;
158 
159 		} else
160 		if (fillable >= 4 && !(off % 4)) {
161 			ret = vfio_pci_iordwr32(vdev, iswrite, test_mem,
162 						io, buf, off, &filled);
163 			if (ret)
164 				return ret;
165 
166 		} else if (fillable >= 2 && !(off % 2)) {
167 			ret = vfio_pci_iordwr16(vdev, iswrite, test_mem,
168 						io, buf, off, &filled);
169 			if (ret)
170 				return ret;
171 
172 		} else if (fillable) {
173 			ret = vfio_pci_iordwr8(vdev, iswrite, test_mem,
174 					       io, buf, off, &filled);
175 			if (ret)
176 				return ret;
177 
178 		} else {
179 			/* Fill reads with -1, drop writes */
180 			filled = min(count, (size_t)(x_end - off));
181 			if (!iswrite) {
182 				u8 val = 0xFF;
183 				size_t i;
184 
185 				for (i = 0; i < filled; i++)
186 					if (copy_to_user(buf + i, &val, 1))
187 						return -EFAULT;
188 			}
189 		}
190 
191 		count -= filled;
192 		done += filled;
193 		off += filled;
194 		buf += filled;
195 	}
196 
197 	return done;
198 }
199 EXPORT_SYMBOL_GPL(vfio_pci_core_do_io_rw);
200 
201 int vfio_pci_core_setup_barmap(struct vfio_pci_core_device *vdev, int bar)
202 {
203 	struct pci_dev *pdev = vdev->pdev;
204 	int ret;
205 	void __iomem *io;
206 
207 	if (vdev->barmap[bar])
208 		return 0;
209 
210 	ret = pci_request_selected_regions(pdev, 1 << bar, "vfio");
211 	if (ret)
212 		return ret;
213 
214 	io = pci_iomap(pdev, bar, 0);
215 	if (!io) {
216 		pci_release_selected_regions(pdev, 1 << bar);
217 		return -ENOMEM;
218 	}
219 
220 	vdev->barmap[bar] = io;
221 
222 	return 0;
223 }
224 EXPORT_SYMBOL_GPL(vfio_pci_core_setup_barmap);
225 
226 ssize_t vfio_pci_bar_rw(struct vfio_pci_core_device *vdev, char __user *buf,
227 			size_t count, loff_t *ppos, bool iswrite)
228 {
229 	struct pci_dev *pdev = vdev->pdev;
230 	loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK;
231 	int bar = VFIO_PCI_OFFSET_TO_INDEX(*ppos);
232 	size_t x_start = 0, x_end = 0;
233 	resource_size_t end;
234 	void __iomem *io;
235 	struct resource *res = &vdev->pdev->resource[bar];
236 	ssize_t done;
237 
238 	if (pci_resource_start(pdev, bar))
239 		end = pci_resource_len(pdev, bar);
240 	else if (bar == PCI_ROM_RESOURCE && pdev->rom && pdev->romlen)
241 		end = roundup_pow_of_two(pdev->romlen);
242 	else
243 		return -EINVAL;
244 
245 	if (pos >= end)
246 		return -EINVAL;
247 
248 	count = min(count, (size_t)(end - pos));
249 
250 	if (bar == PCI_ROM_RESOURCE) {
251 		/*
252 		 * The ROM can fill less space than the BAR, so we start the
253 		 * excluded range at the end of the actual ROM.  This makes
254 		 * filling large ROM BARs much faster.
255 		 */
256 		if (pci_resource_start(pdev, bar)) {
257 			io = pci_map_rom(pdev, &x_start);
258 		} else {
259 			io = ioremap(pdev->rom, pdev->romlen);
260 			x_start = pdev->romlen;
261 		}
262 		if (!io)
263 			return -ENOMEM;
264 		x_end = end;
265 	} else {
266 		int ret = vfio_pci_core_setup_barmap(vdev, bar);
267 		if (ret) {
268 			done = ret;
269 			goto out;
270 		}
271 
272 		io = vdev->barmap[bar];
273 	}
274 
275 	if (bar == vdev->msix_bar) {
276 		x_start = vdev->msix_offset;
277 		x_end = vdev->msix_offset + vdev->msix_size;
278 	}
279 
280 	done = vfio_pci_core_do_io_rw(vdev, res->flags & IORESOURCE_MEM, io, buf, pos,
281 				      count, x_start, x_end, iswrite);
282 
283 	if (done >= 0)
284 		*ppos += done;
285 
286 	if (bar == PCI_ROM_RESOURCE) {
287 		if (pci_resource_start(pdev, bar))
288 			pci_unmap_rom(pdev, io);
289 		else
290 			iounmap(io);
291 	}
292 
293 out:
294 	return done;
295 }
296 
297 #ifdef CONFIG_VFIO_PCI_VGA
298 ssize_t vfio_pci_vga_rw(struct vfio_pci_core_device *vdev, char __user *buf,
299 			       size_t count, loff_t *ppos, bool iswrite)
300 {
301 	int ret;
302 	loff_t off, pos = *ppos & VFIO_PCI_OFFSET_MASK;
303 	void __iomem *iomem = NULL;
304 	unsigned int rsrc;
305 	bool is_ioport;
306 	ssize_t done;
307 
308 	if (!vdev->has_vga)
309 		return -EINVAL;
310 
311 	if (pos > 0xbfffful)
312 		return -EINVAL;
313 
314 	switch ((u32)pos) {
315 	case 0xa0000 ... 0xbffff:
316 		count = min(count, (size_t)(0xc0000 - pos));
317 		iomem = ioremap(0xa0000, 0xbffff - 0xa0000 + 1);
318 		off = pos - 0xa0000;
319 		rsrc = VGA_RSRC_LEGACY_MEM;
320 		is_ioport = false;
321 		break;
322 	case 0x3b0 ... 0x3bb:
323 		count = min(count, (size_t)(0x3bc - pos));
324 		iomem = ioport_map(0x3b0, 0x3bb - 0x3b0 + 1);
325 		off = pos - 0x3b0;
326 		rsrc = VGA_RSRC_LEGACY_IO;
327 		is_ioport = true;
328 		break;
329 	case 0x3c0 ... 0x3df:
330 		count = min(count, (size_t)(0x3e0 - pos));
331 		iomem = ioport_map(0x3c0, 0x3df - 0x3c0 + 1);
332 		off = pos - 0x3c0;
333 		rsrc = VGA_RSRC_LEGACY_IO;
334 		is_ioport = true;
335 		break;
336 	default:
337 		return -EINVAL;
338 	}
339 
340 	if (!iomem)
341 		return -ENOMEM;
342 
343 	ret = vga_get_interruptible(vdev->pdev, rsrc);
344 	if (ret) {
345 		is_ioport ? ioport_unmap(iomem) : iounmap(iomem);
346 		return ret;
347 	}
348 
349 	/*
350 	 * VGA MMIO is a legacy, non-BAR resource that hopefully allows
351 	 * probing, so we don't currently worry about access in relation
352 	 * to the memory enable bit in the command register.
353 	 */
354 	done = vfio_pci_core_do_io_rw(vdev, false, iomem, buf, off, count,
355 				      0, 0, iswrite);
356 
357 	vga_put(vdev->pdev, rsrc);
358 
359 	is_ioport ? ioport_unmap(iomem) : iounmap(iomem);
360 
361 	if (done >= 0)
362 		*ppos += done;
363 
364 	return done;
365 }
366 #endif
367 
368 static void vfio_pci_ioeventfd_do_write(struct vfio_pci_ioeventfd *ioeventfd,
369 					bool test_mem)
370 {
371 	switch (ioeventfd->count) {
372 	case 1:
373 		vfio_pci_core_iowrite8(ioeventfd->vdev, test_mem,
374 				       ioeventfd->data, ioeventfd->addr);
375 		break;
376 	case 2:
377 		vfio_pci_core_iowrite16(ioeventfd->vdev, test_mem,
378 					ioeventfd->data, ioeventfd->addr);
379 		break;
380 	case 4:
381 		vfio_pci_core_iowrite32(ioeventfd->vdev, test_mem,
382 					ioeventfd->data, ioeventfd->addr);
383 		break;
384 	case 8:
385 		vfio_pci_core_iowrite64(ioeventfd->vdev, test_mem,
386 					ioeventfd->data, ioeventfd->addr);
387 		break;
388 	}
389 }
390 
391 static int vfio_pci_ioeventfd_handler(void *opaque, void *unused)
392 {
393 	struct vfio_pci_ioeventfd *ioeventfd = opaque;
394 	struct vfio_pci_core_device *vdev = ioeventfd->vdev;
395 
396 	if (ioeventfd->test_mem) {
397 		if (!down_read_trylock(&vdev->memory_lock))
398 			return 1; /* Lock contended, use thread */
399 		if (!__vfio_pci_memory_enabled(vdev)) {
400 			up_read(&vdev->memory_lock);
401 			return 0;
402 		}
403 	}
404 
405 	vfio_pci_ioeventfd_do_write(ioeventfd, false);
406 
407 	if (ioeventfd->test_mem)
408 		up_read(&vdev->memory_lock);
409 
410 	return 0;
411 }
412 
413 static void vfio_pci_ioeventfd_thread(void *opaque, void *unused)
414 {
415 	struct vfio_pci_ioeventfd *ioeventfd = opaque;
416 
417 	vfio_pci_ioeventfd_do_write(ioeventfd, ioeventfd->test_mem);
418 }
419 
420 int vfio_pci_ioeventfd(struct vfio_pci_core_device *vdev, loff_t offset,
421 		       uint64_t data, int count, int fd)
422 {
423 	struct pci_dev *pdev = vdev->pdev;
424 	loff_t pos = offset & VFIO_PCI_OFFSET_MASK;
425 	int ret, bar = VFIO_PCI_OFFSET_TO_INDEX(offset);
426 	struct vfio_pci_ioeventfd *ioeventfd;
427 
428 	/* Only support ioeventfds into BARs */
429 	if (bar > VFIO_PCI_BAR5_REGION_INDEX)
430 		return -EINVAL;
431 
432 	if (pos + count > pci_resource_len(pdev, bar))
433 		return -EINVAL;
434 
435 	/* Disallow ioeventfds working around MSI-X table writes */
436 	if (bar == vdev->msix_bar &&
437 	    !(pos + count <= vdev->msix_offset ||
438 	      pos >= vdev->msix_offset + vdev->msix_size))
439 		return -EINVAL;
440 
441 	if (count == 8)
442 		return -EINVAL;
443 
444 	ret = vfio_pci_core_setup_barmap(vdev, bar);
445 	if (ret)
446 		return ret;
447 
448 	mutex_lock(&vdev->ioeventfds_lock);
449 
450 	list_for_each_entry(ioeventfd, &vdev->ioeventfds_list, next) {
451 		if (ioeventfd->pos == pos && ioeventfd->bar == bar &&
452 		    ioeventfd->data == data && ioeventfd->count == count) {
453 			if (fd == -1) {
454 				vfio_virqfd_disable(&ioeventfd->virqfd);
455 				list_del(&ioeventfd->next);
456 				vdev->ioeventfds_nr--;
457 				kfree(ioeventfd);
458 				ret = 0;
459 			} else
460 				ret = -EEXIST;
461 
462 			goto out_unlock;
463 		}
464 	}
465 
466 	if (fd < 0) {
467 		ret = -ENODEV;
468 		goto out_unlock;
469 	}
470 
471 	if (vdev->ioeventfds_nr >= VFIO_PCI_IOEVENTFD_MAX) {
472 		ret = -ENOSPC;
473 		goto out_unlock;
474 	}
475 
476 	ioeventfd = kzalloc(sizeof(*ioeventfd), GFP_KERNEL_ACCOUNT);
477 	if (!ioeventfd) {
478 		ret = -ENOMEM;
479 		goto out_unlock;
480 	}
481 
482 	ioeventfd->vdev = vdev;
483 	ioeventfd->addr = vdev->barmap[bar] + pos;
484 	ioeventfd->data = data;
485 	ioeventfd->pos = pos;
486 	ioeventfd->bar = bar;
487 	ioeventfd->count = count;
488 	ioeventfd->test_mem = vdev->pdev->resource[bar].flags & IORESOURCE_MEM;
489 
490 	ret = vfio_virqfd_enable(ioeventfd, vfio_pci_ioeventfd_handler,
491 				 vfio_pci_ioeventfd_thread, NULL,
492 				 &ioeventfd->virqfd, fd);
493 	if (ret) {
494 		kfree(ioeventfd);
495 		goto out_unlock;
496 	}
497 
498 	list_add(&ioeventfd->next, &vdev->ioeventfds_list);
499 	vdev->ioeventfds_nr++;
500 
501 out_unlock:
502 	mutex_unlock(&vdev->ioeventfds_lock);
503 
504 	return ret;
505 }
506