xref: /linux/drivers/vfio/pci/vfio_pci_rdwr.c (revision c94cd9508b1335b949fd13ebd269313c65492df0)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * VFIO PCI I/O Port & MMIO access
4  *
5  * Copyright (C) 2012 Red Hat, Inc.  All rights reserved.
6  *     Author: Alex Williamson <alex.williamson@redhat.com>
7  *
8  * Derived from original vfio:
9  * Copyright 2010 Cisco Systems, Inc.  All rights reserved.
10  * Author: Tom Lyon, pugs@cisco.com
11  */
12 
13 #include <linux/fs.h>
14 #include <linux/pci.h>
15 #include <linux/uaccess.h>
16 #include <linux/io.h>
17 #include <linux/vfio.h>
18 #include <linux/vgaarb.h>
19 
20 #include "vfio_pci_priv.h"
21 
22 #ifdef __LITTLE_ENDIAN
23 #define vfio_ioread64	ioread64
24 #define vfio_iowrite64	iowrite64
25 #define vfio_ioread32	ioread32
26 #define vfio_iowrite32	iowrite32
27 #define vfio_ioread16	ioread16
28 #define vfio_iowrite16	iowrite16
29 #else
30 #define vfio_ioread64	ioread64be
31 #define vfio_iowrite64	iowrite64be
32 #define vfio_ioread32	ioread32be
33 #define vfio_iowrite32	iowrite32be
34 #define vfio_ioread16	ioread16be
35 #define vfio_iowrite16	iowrite16be
36 #endif
37 #define vfio_ioread8	ioread8
38 #define vfio_iowrite8	iowrite8
39 
40 #define VFIO_IOWRITE(size) \
41 int vfio_pci_core_iowrite##size(struct vfio_pci_core_device *vdev,	\
42 			bool test_mem, u##size val, void __iomem *io)	\
43 {									\
44 	if (test_mem) {							\
45 		down_read(&vdev->memory_lock);				\
46 		if (!__vfio_pci_memory_enabled(vdev)) {			\
47 			up_read(&vdev->memory_lock);			\
48 			return -EIO;					\
49 		}							\
50 	}								\
51 									\
52 	vfio_iowrite##size(val, io);					\
53 									\
54 	if (test_mem)							\
55 		up_read(&vdev->memory_lock);				\
56 									\
57 	return 0;							\
58 }									\
59 EXPORT_SYMBOL_GPL(vfio_pci_core_iowrite##size);
60 
61 VFIO_IOWRITE(8)
62 VFIO_IOWRITE(16)
63 VFIO_IOWRITE(32)
64 #ifdef iowrite64
65 VFIO_IOWRITE(64)
66 #endif
67 
68 #define VFIO_IOREAD(size) \
69 int vfio_pci_core_ioread##size(struct vfio_pci_core_device *vdev,	\
70 			bool test_mem, u##size *val, void __iomem *io)	\
71 {									\
72 	if (test_mem) {							\
73 		down_read(&vdev->memory_lock);				\
74 		if (!__vfio_pci_memory_enabled(vdev)) {			\
75 			up_read(&vdev->memory_lock);			\
76 			return -EIO;					\
77 		}							\
78 	}								\
79 									\
80 	*val = vfio_ioread##size(io);					\
81 									\
82 	if (test_mem)							\
83 		up_read(&vdev->memory_lock);				\
84 									\
85 	return 0;							\
86 }									\
87 EXPORT_SYMBOL_GPL(vfio_pci_core_ioread##size);
88 
89 VFIO_IOREAD(8)
90 VFIO_IOREAD(16)
91 VFIO_IOREAD(32)
92 #ifdef ioread64
93 VFIO_IOREAD(64)
94 #endif
95 
96 #define VFIO_IORDWR(size)						\
97 static int vfio_pci_iordwr##size(struct vfio_pci_core_device *vdev,\
98 				bool iswrite, bool test_mem,		\
99 				void __iomem *io, char __user *buf,	\
100 				loff_t off, size_t *filled)		\
101 {									\
102 	u##size val;							\
103 	int ret;							\
104 									\
105 	if (iswrite) {							\
106 		if (copy_from_user(&val, buf, sizeof(val)))		\
107 			return -EFAULT;					\
108 									\
109 		ret = vfio_pci_core_iowrite##size(vdev, test_mem,	\
110 						  val, io + off);	\
111 		if (ret)						\
112 			return ret;					\
113 	} else {							\
114 		ret = vfio_pci_core_ioread##size(vdev, test_mem,	\
115 						 &val, io + off);	\
116 		if (ret)						\
117 			return ret;					\
118 									\
119 		if (copy_to_user(buf, &val, sizeof(val)))		\
120 			return -EFAULT;					\
121 	}								\
122 									\
123 	*filled = sizeof(val);						\
124 	return 0;							\
125 }									\
126 
127 VFIO_IORDWR(8)
128 VFIO_IORDWR(16)
129 VFIO_IORDWR(32)
130 #if defined(ioread64) && defined(iowrite64)
131 VFIO_IORDWR(64)
132 #endif
133 
134 /*
135  * Read or write from an __iomem region (MMIO or I/O port) with an excluded
136  * range which is inaccessible.  The excluded range drops writes and fills
137  * reads with -1.  This is intended for handling MSI-X vector tables and
138  * leftover space for ROM BARs.
139  */
140 ssize_t vfio_pci_core_do_io_rw(struct vfio_pci_core_device *vdev, bool test_mem,
141 			       void __iomem *io, char __user *buf,
142 			       loff_t off, size_t count, size_t x_start,
143 			       size_t x_end, bool iswrite)
144 {
145 	ssize_t done = 0;
146 	int ret;
147 
148 	while (count) {
149 		size_t fillable, filled;
150 
151 		if (off < x_start)
152 			fillable = min(count, (size_t)(x_start - off));
153 		else if (off >= x_end)
154 			fillable = count;
155 		else
156 			fillable = 0;
157 
158 #if defined(ioread64) && defined(iowrite64)
159 		if (fillable >= 8 && !(off % 8)) {
160 			ret = vfio_pci_iordwr64(vdev, iswrite, test_mem,
161 						io, buf, off, &filled);
162 			if (ret)
163 				return ret;
164 
165 		} else
166 #endif
167 		if (fillable >= 4 && !(off % 4)) {
168 			ret = vfio_pci_iordwr32(vdev, iswrite, test_mem,
169 						io, buf, off, &filled);
170 			if (ret)
171 				return ret;
172 
173 		} else if (fillable >= 2 && !(off % 2)) {
174 			ret = vfio_pci_iordwr16(vdev, iswrite, test_mem,
175 						io, buf, off, &filled);
176 			if (ret)
177 				return ret;
178 
179 		} else if (fillable) {
180 			ret = vfio_pci_iordwr8(vdev, iswrite, test_mem,
181 					       io, buf, off, &filled);
182 			if (ret)
183 				return ret;
184 
185 		} else {
186 			/* Fill reads with -1, drop writes */
187 			filled = min(count, (size_t)(x_end - off));
188 			if (!iswrite) {
189 				u8 val = 0xFF;
190 				size_t i;
191 
192 				for (i = 0; i < filled; i++)
193 					if (copy_to_user(buf + i, &val, 1))
194 						return -EFAULT;
195 			}
196 		}
197 
198 		count -= filled;
199 		done += filled;
200 		off += filled;
201 		buf += filled;
202 	}
203 
204 	return done;
205 }
206 EXPORT_SYMBOL_GPL(vfio_pci_core_do_io_rw);
207 
208 int vfio_pci_core_setup_barmap(struct vfio_pci_core_device *vdev, int bar)
209 {
210 	struct pci_dev *pdev = vdev->pdev;
211 	int ret;
212 	void __iomem *io;
213 
214 	if (vdev->barmap[bar])
215 		return 0;
216 
217 	ret = pci_request_selected_regions(pdev, 1 << bar, "vfio");
218 	if (ret)
219 		return ret;
220 
221 	io = pci_iomap(pdev, bar, 0);
222 	if (!io) {
223 		pci_release_selected_regions(pdev, 1 << bar);
224 		return -ENOMEM;
225 	}
226 
227 	vdev->barmap[bar] = io;
228 
229 	return 0;
230 }
231 EXPORT_SYMBOL_GPL(vfio_pci_core_setup_barmap);
232 
233 ssize_t vfio_pci_bar_rw(struct vfio_pci_core_device *vdev, char __user *buf,
234 			size_t count, loff_t *ppos, bool iswrite)
235 {
236 	struct pci_dev *pdev = vdev->pdev;
237 	loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK;
238 	int bar = VFIO_PCI_OFFSET_TO_INDEX(*ppos);
239 	size_t x_start = 0, x_end = 0;
240 	resource_size_t end;
241 	void __iomem *io;
242 	struct resource *res = &vdev->pdev->resource[bar];
243 	ssize_t done;
244 
245 	if (pci_resource_start(pdev, bar))
246 		end = pci_resource_len(pdev, bar);
247 	else if (bar == PCI_ROM_RESOURCE &&
248 		 pdev->resource[bar].flags & IORESOURCE_ROM_SHADOW)
249 		end = 0x20000;
250 	else
251 		return -EINVAL;
252 
253 	if (pos >= end)
254 		return -EINVAL;
255 
256 	count = min(count, (size_t)(end - pos));
257 
258 	if (bar == PCI_ROM_RESOURCE) {
259 		/*
260 		 * The ROM can fill less space than the BAR, so we start the
261 		 * excluded range at the end of the actual ROM.  This makes
262 		 * filling large ROM BARs much faster.
263 		 */
264 		io = pci_map_rom(pdev, &x_start);
265 		if (!io) {
266 			done = -ENOMEM;
267 			goto out;
268 		}
269 		x_end = end;
270 	} else {
271 		int ret = vfio_pci_core_setup_barmap(vdev, bar);
272 		if (ret) {
273 			done = ret;
274 			goto out;
275 		}
276 
277 		io = vdev->barmap[bar];
278 	}
279 
280 	if (bar == vdev->msix_bar) {
281 		x_start = vdev->msix_offset;
282 		x_end = vdev->msix_offset + vdev->msix_size;
283 	}
284 
285 	done = vfio_pci_core_do_io_rw(vdev, res->flags & IORESOURCE_MEM, io, buf, pos,
286 				      count, x_start, x_end, iswrite);
287 
288 	if (done >= 0)
289 		*ppos += done;
290 
291 	if (bar == PCI_ROM_RESOURCE)
292 		pci_unmap_rom(pdev, io);
293 out:
294 	return done;
295 }
296 
297 #ifdef CONFIG_VFIO_PCI_VGA
298 ssize_t vfio_pci_vga_rw(struct vfio_pci_core_device *vdev, char __user *buf,
299 			       size_t count, loff_t *ppos, bool iswrite)
300 {
301 	int ret;
302 	loff_t off, pos = *ppos & VFIO_PCI_OFFSET_MASK;
303 	void __iomem *iomem = NULL;
304 	unsigned int rsrc;
305 	bool is_ioport;
306 	ssize_t done;
307 
308 	if (!vdev->has_vga)
309 		return -EINVAL;
310 
311 	if (pos > 0xbfffful)
312 		return -EINVAL;
313 
314 	switch ((u32)pos) {
315 	case 0xa0000 ... 0xbffff:
316 		count = min(count, (size_t)(0xc0000 - pos));
317 		iomem = ioremap(0xa0000, 0xbffff - 0xa0000 + 1);
318 		off = pos - 0xa0000;
319 		rsrc = VGA_RSRC_LEGACY_MEM;
320 		is_ioport = false;
321 		break;
322 	case 0x3b0 ... 0x3bb:
323 		count = min(count, (size_t)(0x3bc - pos));
324 		iomem = ioport_map(0x3b0, 0x3bb - 0x3b0 + 1);
325 		off = pos - 0x3b0;
326 		rsrc = VGA_RSRC_LEGACY_IO;
327 		is_ioport = true;
328 		break;
329 	case 0x3c0 ... 0x3df:
330 		count = min(count, (size_t)(0x3e0 - pos));
331 		iomem = ioport_map(0x3c0, 0x3df - 0x3c0 + 1);
332 		off = pos - 0x3c0;
333 		rsrc = VGA_RSRC_LEGACY_IO;
334 		is_ioport = true;
335 		break;
336 	default:
337 		return -EINVAL;
338 	}
339 
340 	if (!iomem)
341 		return -ENOMEM;
342 
343 	ret = vga_get_interruptible(vdev->pdev, rsrc);
344 	if (ret) {
345 		is_ioport ? ioport_unmap(iomem) : iounmap(iomem);
346 		return ret;
347 	}
348 
349 	/*
350 	 * VGA MMIO is a legacy, non-BAR resource that hopefully allows
351 	 * probing, so we don't currently worry about access in relation
352 	 * to the memory enable bit in the command register.
353 	 */
354 	done = vfio_pci_core_do_io_rw(vdev, false, iomem, buf, off, count,
355 				      0, 0, iswrite);
356 
357 	vga_put(vdev->pdev, rsrc);
358 
359 	is_ioport ? ioport_unmap(iomem) : iounmap(iomem);
360 
361 	if (done >= 0)
362 		*ppos += done;
363 
364 	return done;
365 }
366 #endif
367 
368 static void vfio_pci_ioeventfd_do_write(struct vfio_pci_ioeventfd *ioeventfd,
369 					bool test_mem)
370 {
371 	switch (ioeventfd->count) {
372 	case 1:
373 		vfio_pci_core_iowrite8(ioeventfd->vdev, test_mem,
374 				       ioeventfd->data, ioeventfd->addr);
375 		break;
376 	case 2:
377 		vfio_pci_core_iowrite16(ioeventfd->vdev, test_mem,
378 					ioeventfd->data, ioeventfd->addr);
379 		break;
380 	case 4:
381 		vfio_pci_core_iowrite32(ioeventfd->vdev, test_mem,
382 					ioeventfd->data, ioeventfd->addr);
383 		break;
384 #ifdef iowrite64
385 	case 8:
386 		vfio_pci_core_iowrite64(ioeventfd->vdev, test_mem,
387 					ioeventfd->data, ioeventfd->addr);
388 		break;
389 #endif
390 	}
391 }
392 
393 static int vfio_pci_ioeventfd_handler(void *opaque, void *unused)
394 {
395 	struct vfio_pci_ioeventfd *ioeventfd = opaque;
396 	struct vfio_pci_core_device *vdev = ioeventfd->vdev;
397 
398 	if (ioeventfd->test_mem) {
399 		if (!down_read_trylock(&vdev->memory_lock))
400 			return 1; /* Lock contended, use thread */
401 		if (!__vfio_pci_memory_enabled(vdev)) {
402 			up_read(&vdev->memory_lock);
403 			return 0;
404 		}
405 	}
406 
407 	vfio_pci_ioeventfd_do_write(ioeventfd, false);
408 
409 	if (ioeventfd->test_mem)
410 		up_read(&vdev->memory_lock);
411 
412 	return 0;
413 }
414 
415 static void vfio_pci_ioeventfd_thread(void *opaque, void *unused)
416 {
417 	struct vfio_pci_ioeventfd *ioeventfd = opaque;
418 
419 	vfio_pci_ioeventfd_do_write(ioeventfd, ioeventfd->test_mem);
420 }
421 
422 int vfio_pci_ioeventfd(struct vfio_pci_core_device *vdev, loff_t offset,
423 		       uint64_t data, int count, int fd)
424 {
425 	struct pci_dev *pdev = vdev->pdev;
426 	loff_t pos = offset & VFIO_PCI_OFFSET_MASK;
427 	int ret, bar = VFIO_PCI_OFFSET_TO_INDEX(offset);
428 	struct vfio_pci_ioeventfd *ioeventfd;
429 
430 	/* Only support ioeventfds into BARs */
431 	if (bar > VFIO_PCI_BAR5_REGION_INDEX)
432 		return -EINVAL;
433 
434 	if (pos + count > pci_resource_len(pdev, bar))
435 		return -EINVAL;
436 
437 	/* Disallow ioeventfds working around MSI-X table writes */
438 	if (bar == vdev->msix_bar &&
439 	    !(pos + count <= vdev->msix_offset ||
440 	      pos >= vdev->msix_offset + vdev->msix_size))
441 		return -EINVAL;
442 
443 #ifndef iowrite64
444 	if (count == 8)
445 		return -EINVAL;
446 #endif
447 
448 	ret = vfio_pci_core_setup_barmap(vdev, bar);
449 	if (ret)
450 		return ret;
451 
452 	mutex_lock(&vdev->ioeventfds_lock);
453 
454 	list_for_each_entry(ioeventfd, &vdev->ioeventfds_list, next) {
455 		if (ioeventfd->pos == pos && ioeventfd->bar == bar &&
456 		    ioeventfd->data == data && ioeventfd->count == count) {
457 			if (fd == -1) {
458 				vfio_virqfd_disable(&ioeventfd->virqfd);
459 				list_del(&ioeventfd->next);
460 				vdev->ioeventfds_nr--;
461 				kfree(ioeventfd);
462 				ret = 0;
463 			} else
464 				ret = -EEXIST;
465 
466 			goto out_unlock;
467 		}
468 	}
469 
470 	if (fd < 0) {
471 		ret = -ENODEV;
472 		goto out_unlock;
473 	}
474 
475 	if (vdev->ioeventfds_nr >= VFIO_PCI_IOEVENTFD_MAX) {
476 		ret = -ENOSPC;
477 		goto out_unlock;
478 	}
479 
480 	ioeventfd = kzalloc(sizeof(*ioeventfd), GFP_KERNEL_ACCOUNT);
481 	if (!ioeventfd) {
482 		ret = -ENOMEM;
483 		goto out_unlock;
484 	}
485 
486 	ioeventfd->vdev = vdev;
487 	ioeventfd->addr = vdev->barmap[bar] + pos;
488 	ioeventfd->data = data;
489 	ioeventfd->pos = pos;
490 	ioeventfd->bar = bar;
491 	ioeventfd->count = count;
492 	ioeventfd->test_mem = vdev->pdev->resource[bar].flags & IORESOURCE_MEM;
493 
494 	ret = vfio_virqfd_enable(ioeventfd, vfio_pci_ioeventfd_handler,
495 				 vfio_pci_ioeventfd_thread, NULL,
496 				 &ioeventfd->virqfd, fd);
497 	if (ret) {
498 		kfree(ioeventfd);
499 		goto out_unlock;
500 	}
501 
502 	list_add(&ioeventfd->next, &vdev->ioeventfds_list);
503 	vdev->ioeventfds_nr++;
504 
505 out_unlock:
506 	mutex_unlock(&vdev->ioeventfds_lock);
507 
508 	return ret;
509 }
510