xref: /linux/drivers/vfio/pci/vfio_pci_rdwr.c (revision 9e56ff53b4115875667760445b028357848b4748)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * VFIO PCI I/O Port & MMIO access
4  *
5  * Copyright (C) 2012 Red Hat, Inc.  All rights reserved.
6  *     Author: Alex Williamson <alex.williamson@redhat.com>
7  *
8  * Derived from original vfio:
9  * Copyright 2010 Cisco Systems, Inc.  All rights reserved.
10  * Author: Tom Lyon, pugs@cisco.com
11  */
12 
13 #include <linux/fs.h>
14 #include <linux/pci.h>
15 #include <linux/uaccess.h>
16 #include <linux/io.h>
17 #include <linux/vfio.h>
18 #include <linux/vgaarb.h>
19 
20 #include "vfio_pci_priv.h"
21 
22 #ifdef __LITTLE_ENDIAN
23 #define vfio_ioread64	ioread64
24 #define vfio_iowrite64	iowrite64
25 #define vfio_ioread32	ioread32
26 #define vfio_iowrite32	iowrite32
27 #define vfio_ioread16	ioread16
28 #define vfio_iowrite16	iowrite16
29 #else
30 #define vfio_ioread64	ioread64be
31 #define vfio_iowrite64	iowrite64be
32 #define vfio_ioread32	ioread32be
33 #define vfio_iowrite32	iowrite32be
34 #define vfio_ioread16	ioread16be
35 #define vfio_iowrite16	iowrite16be
36 #endif
37 #define vfio_ioread8	ioread8
38 #define vfio_iowrite8	iowrite8
39 
40 #define VFIO_IOWRITE(size) \
41 int vfio_pci_core_iowrite##size(struct vfio_pci_core_device *vdev,	\
42 			bool test_mem, u##size val, void __iomem *io)	\
43 {									\
44 	if (test_mem) {							\
45 		down_read(&vdev->memory_lock);				\
46 		if (!__vfio_pci_memory_enabled(vdev)) {			\
47 			up_read(&vdev->memory_lock);			\
48 			return -EIO;					\
49 		}							\
50 	}								\
51 									\
52 	vfio_iowrite##size(val, io);					\
53 									\
54 	if (test_mem)							\
55 		up_read(&vdev->memory_lock);				\
56 									\
57 	return 0;							\
58 }									\
59 EXPORT_SYMBOL_GPL(vfio_pci_core_iowrite##size);
60 
61 VFIO_IOWRITE(8)
62 VFIO_IOWRITE(16)
63 VFIO_IOWRITE(32)
64 #ifdef iowrite64
65 VFIO_IOWRITE(64)
66 #endif
67 
68 #define VFIO_IOREAD(size) \
69 int vfio_pci_core_ioread##size(struct vfio_pci_core_device *vdev,	\
70 			bool test_mem, u##size *val, void __iomem *io)	\
71 {									\
72 	if (test_mem) {							\
73 		down_read(&vdev->memory_lock);				\
74 		if (!__vfio_pci_memory_enabled(vdev)) {			\
75 			up_read(&vdev->memory_lock);			\
76 			return -EIO;					\
77 		}							\
78 	}								\
79 									\
80 	*val = vfio_ioread##size(io);					\
81 									\
82 	if (test_mem)							\
83 		up_read(&vdev->memory_lock);				\
84 									\
85 	return 0;							\
86 }									\
87 EXPORT_SYMBOL_GPL(vfio_pci_core_ioread##size);
88 
89 VFIO_IOREAD(8)
90 VFIO_IOREAD(16)
91 VFIO_IOREAD(32)
92 
93 /*
94  * Read or write from an __iomem region (MMIO or I/O port) with an excluded
95  * range which is inaccessible.  The excluded range drops writes and fills
96  * reads with -1.  This is intended for handling MSI-X vector tables and
97  * leftover space for ROM BARs.
98  */
99 static ssize_t do_io_rw(struct vfio_pci_core_device *vdev, bool test_mem,
100 			void __iomem *io, char __user *buf,
101 			loff_t off, size_t count, size_t x_start,
102 			size_t x_end, bool iswrite)
103 {
104 	ssize_t done = 0;
105 	int ret;
106 
107 	while (count) {
108 		size_t fillable, filled;
109 
110 		if (off < x_start)
111 			fillable = min(count, (size_t)(x_start - off));
112 		else if (off >= x_end)
113 			fillable = count;
114 		else
115 			fillable = 0;
116 
117 		if (fillable >= 4 && !(off % 4)) {
118 			u32 val;
119 
120 			if (iswrite) {
121 				if (copy_from_user(&val, buf, 4))
122 					return -EFAULT;
123 
124 				ret = vfio_pci_core_iowrite32(vdev, test_mem,
125 							      val, io + off);
126 				if (ret)
127 					return ret;
128 			} else {
129 				ret = vfio_pci_core_ioread32(vdev, test_mem,
130 							     &val, io + off);
131 				if (ret)
132 					return ret;
133 
134 				if (copy_to_user(buf, &val, 4))
135 					return -EFAULT;
136 			}
137 
138 			filled = 4;
139 		} else if (fillable >= 2 && !(off % 2)) {
140 			u16 val;
141 
142 			if (iswrite) {
143 				if (copy_from_user(&val, buf, 2))
144 					return -EFAULT;
145 
146 				ret = vfio_pci_core_iowrite16(vdev, test_mem,
147 							      val, io + off);
148 				if (ret)
149 					return ret;
150 			} else {
151 				ret = vfio_pci_core_ioread16(vdev, test_mem,
152 							     &val, io + off);
153 				if (ret)
154 					return ret;
155 
156 				if (copy_to_user(buf, &val, 2))
157 					return -EFAULT;
158 			}
159 
160 			filled = 2;
161 		} else if (fillable) {
162 			u8 val;
163 
164 			if (iswrite) {
165 				if (copy_from_user(&val, buf, 1))
166 					return -EFAULT;
167 
168 				ret = vfio_pci_core_iowrite8(vdev, test_mem,
169 							     val, io + off);
170 				if (ret)
171 					return ret;
172 			} else {
173 				ret = vfio_pci_core_ioread8(vdev, test_mem,
174 							    &val, io + off);
175 				if (ret)
176 					return ret;
177 
178 				if (copy_to_user(buf, &val, 1))
179 					return -EFAULT;
180 			}
181 
182 			filled = 1;
183 		} else {
184 			/* Fill reads with -1, drop writes */
185 			filled = min(count, (size_t)(x_end - off));
186 			if (!iswrite) {
187 				u8 val = 0xFF;
188 				size_t i;
189 
190 				for (i = 0; i < filled; i++)
191 					if (copy_to_user(buf + i, &val, 1))
192 						return -EFAULT;
193 			}
194 		}
195 
196 		count -= filled;
197 		done += filled;
198 		off += filled;
199 		buf += filled;
200 	}
201 
202 	return done;
203 }
204 
205 int vfio_pci_core_setup_barmap(struct vfio_pci_core_device *vdev, int bar)
206 {
207 	struct pci_dev *pdev = vdev->pdev;
208 	int ret;
209 	void __iomem *io;
210 
211 	if (vdev->barmap[bar])
212 		return 0;
213 
214 	ret = pci_request_selected_regions(pdev, 1 << bar, "vfio");
215 	if (ret)
216 		return ret;
217 
218 	io = pci_iomap(pdev, bar, 0);
219 	if (!io) {
220 		pci_release_selected_regions(pdev, 1 << bar);
221 		return -ENOMEM;
222 	}
223 
224 	vdev->barmap[bar] = io;
225 
226 	return 0;
227 }
228 EXPORT_SYMBOL_GPL(vfio_pci_core_setup_barmap);
229 
230 ssize_t vfio_pci_bar_rw(struct vfio_pci_core_device *vdev, char __user *buf,
231 			size_t count, loff_t *ppos, bool iswrite)
232 {
233 	struct pci_dev *pdev = vdev->pdev;
234 	loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK;
235 	int bar = VFIO_PCI_OFFSET_TO_INDEX(*ppos);
236 	size_t x_start = 0, x_end = 0;
237 	resource_size_t end;
238 	void __iomem *io;
239 	struct resource *res = &vdev->pdev->resource[bar];
240 	ssize_t done;
241 
242 	if (pci_resource_start(pdev, bar))
243 		end = pci_resource_len(pdev, bar);
244 	else if (bar == PCI_ROM_RESOURCE &&
245 		 pdev->resource[bar].flags & IORESOURCE_ROM_SHADOW)
246 		end = 0x20000;
247 	else
248 		return -EINVAL;
249 
250 	if (pos >= end)
251 		return -EINVAL;
252 
253 	count = min(count, (size_t)(end - pos));
254 
255 	if (bar == PCI_ROM_RESOURCE) {
256 		/*
257 		 * The ROM can fill less space than the BAR, so we start the
258 		 * excluded range at the end of the actual ROM.  This makes
259 		 * filling large ROM BARs much faster.
260 		 */
261 		io = pci_map_rom(pdev, &x_start);
262 		if (!io) {
263 			done = -ENOMEM;
264 			goto out;
265 		}
266 		x_end = end;
267 	} else {
268 		int ret = vfio_pci_core_setup_barmap(vdev, bar);
269 		if (ret) {
270 			done = ret;
271 			goto out;
272 		}
273 
274 		io = vdev->barmap[bar];
275 	}
276 
277 	if (bar == vdev->msix_bar) {
278 		x_start = vdev->msix_offset;
279 		x_end = vdev->msix_offset + vdev->msix_size;
280 	}
281 
282 	done = do_io_rw(vdev, res->flags & IORESOURCE_MEM, io, buf, pos,
283 			count, x_start, x_end, iswrite);
284 
285 	if (done >= 0)
286 		*ppos += done;
287 
288 	if (bar == PCI_ROM_RESOURCE)
289 		pci_unmap_rom(pdev, io);
290 out:
291 	return done;
292 }
293 
294 #ifdef CONFIG_VFIO_PCI_VGA
295 ssize_t vfio_pci_vga_rw(struct vfio_pci_core_device *vdev, char __user *buf,
296 			       size_t count, loff_t *ppos, bool iswrite)
297 {
298 	int ret;
299 	loff_t off, pos = *ppos & VFIO_PCI_OFFSET_MASK;
300 	void __iomem *iomem = NULL;
301 	unsigned int rsrc;
302 	bool is_ioport;
303 	ssize_t done;
304 
305 	if (!vdev->has_vga)
306 		return -EINVAL;
307 
308 	if (pos > 0xbfffful)
309 		return -EINVAL;
310 
311 	switch ((u32)pos) {
312 	case 0xa0000 ... 0xbffff:
313 		count = min(count, (size_t)(0xc0000 - pos));
314 		iomem = ioremap(0xa0000, 0xbffff - 0xa0000 + 1);
315 		off = pos - 0xa0000;
316 		rsrc = VGA_RSRC_LEGACY_MEM;
317 		is_ioport = false;
318 		break;
319 	case 0x3b0 ... 0x3bb:
320 		count = min(count, (size_t)(0x3bc - pos));
321 		iomem = ioport_map(0x3b0, 0x3bb - 0x3b0 + 1);
322 		off = pos - 0x3b0;
323 		rsrc = VGA_RSRC_LEGACY_IO;
324 		is_ioport = true;
325 		break;
326 	case 0x3c0 ... 0x3df:
327 		count = min(count, (size_t)(0x3e0 - pos));
328 		iomem = ioport_map(0x3c0, 0x3df - 0x3c0 + 1);
329 		off = pos - 0x3c0;
330 		rsrc = VGA_RSRC_LEGACY_IO;
331 		is_ioport = true;
332 		break;
333 	default:
334 		return -EINVAL;
335 	}
336 
337 	if (!iomem)
338 		return -ENOMEM;
339 
340 	ret = vga_get_interruptible(vdev->pdev, rsrc);
341 	if (ret) {
342 		is_ioport ? ioport_unmap(iomem) : iounmap(iomem);
343 		return ret;
344 	}
345 
346 	/*
347 	 * VGA MMIO is a legacy, non-BAR resource that hopefully allows
348 	 * probing, so we don't currently worry about access in relation
349 	 * to the memory enable bit in the command register.
350 	 */
351 	done = do_io_rw(vdev, false, iomem, buf, off, count, 0, 0, iswrite);
352 
353 	vga_put(vdev->pdev, rsrc);
354 
355 	is_ioport ? ioport_unmap(iomem) : iounmap(iomem);
356 
357 	if (done >= 0)
358 		*ppos += done;
359 
360 	return done;
361 }
362 #endif
363 
364 static void vfio_pci_ioeventfd_do_write(struct vfio_pci_ioeventfd *ioeventfd,
365 					bool test_mem)
366 {
367 	switch (ioeventfd->count) {
368 	case 1:
369 		vfio_pci_core_iowrite8(ioeventfd->vdev, test_mem,
370 				       ioeventfd->data, ioeventfd->addr);
371 		break;
372 	case 2:
373 		vfio_pci_core_iowrite16(ioeventfd->vdev, test_mem,
374 					ioeventfd->data, ioeventfd->addr);
375 		break;
376 	case 4:
377 		vfio_pci_core_iowrite32(ioeventfd->vdev, test_mem,
378 					ioeventfd->data, ioeventfd->addr);
379 		break;
380 #ifdef iowrite64
381 	case 8:
382 		vfio_pci_core_iowrite64(ioeventfd->vdev, test_mem,
383 					ioeventfd->data, ioeventfd->addr);
384 		break;
385 #endif
386 	}
387 }
388 
389 static int vfio_pci_ioeventfd_handler(void *opaque, void *unused)
390 {
391 	struct vfio_pci_ioeventfd *ioeventfd = opaque;
392 	struct vfio_pci_core_device *vdev = ioeventfd->vdev;
393 
394 	if (ioeventfd->test_mem) {
395 		if (!down_read_trylock(&vdev->memory_lock))
396 			return 1; /* Lock contended, use thread */
397 		if (!__vfio_pci_memory_enabled(vdev)) {
398 			up_read(&vdev->memory_lock);
399 			return 0;
400 		}
401 	}
402 
403 	vfio_pci_ioeventfd_do_write(ioeventfd, false);
404 
405 	if (ioeventfd->test_mem)
406 		up_read(&vdev->memory_lock);
407 
408 	return 0;
409 }
410 
411 static void vfio_pci_ioeventfd_thread(void *opaque, void *unused)
412 {
413 	struct vfio_pci_ioeventfd *ioeventfd = opaque;
414 
415 	vfio_pci_ioeventfd_do_write(ioeventfd, ioeventfd->test_mem);
416 }
417 
418 int vfio_pci_ioeventfd(struct vfio_pci_core_device *vdev, loff_t offset,
419 		       uint64_t data, int count, int fd)
420 {
421 	struct pci_dev *pdev = vdev->pdev;
422 	loff_t pos = offset & VFIO_PCI_OFFSET_MASK;
423 	int ret, bar = VFIO_PCI_OFFSET_TO_INDEX(offset);
424 	struct vfio_pci_ioeventfd *ioeventfd;
425 
426 	/* Only support ioeventfds into BARs */
427 	if (bar > VFIO_PCI_BAR5_REGION_INDEX)
428 		return -EINVAL;
429 
430 	if (pos + count > pci_resource_len(pdev, bar))
431 		return -EINVAL;
432 
433 	/* Disallow ioeventfds working around MSI-X table writes */
434 	if (bar == vdev->msix_bar &&
435 	    !(pos + count <= vdev->msix_offset ||
436 	      pos >= vdev->msix_offset + vdev->msix_size))
437 		return -EINVAL;
438 
439 #ifndef iowrite64
440 	if (count == 8)
441 		return -EINVAL;
442 #endif
443 
444 	ret = vfio_pci_core_setup_barmap(vdev, bar);
445 	if (ret)
446 		return ret;
447 
448 	mutex_lock(&vdev->ioeventfds_lock);
449 
450 	list_for_each_entry(ioeventfd, &vdev->ioeventfds_list, next) {
451 		if (ioeventfd->pos == pos && ioeventfd->bar == bar &&
452 		    ioeventfd->data == data && ioeventfd->count == count) {
453 			if (fd == -1) {
454 				vfio_virqfd_disable(&ioeventfd->virqfd);
455 				list_del(&ioeventfd->next);
456 				vdev->ioeventfds_nr--;
457 				kfree(ioeventfd);
458 				ret = 0;
459 			} else
460 				ret = -EEXIST;
461 
462 			goto out_unlock;
463 		}
464 	}
465 
466 	if (fd < 0) {
467 		ret = -ENODEV;
468 		goto out_unlock;
469 	}
470 
471 	if (vdev->ioeventfds_nr >= VFIO_PCI_IOEVENTFD_MAX) {
472 		ret = -ENOSPC;
473 		goto out_unlock;
474 	}
475 
476 	ioeventfd = kzalloc(sizeof(*ioeventfd), GFP_KERNEL_ACCOUNT);
477 	if (!ioeventfd) {
478 		ret = -ENOMEM;
479 		goto out_unlock;
480 	}
481 
482 	ioeventfd->vdev = vdev;
483 	ioeventfd->addr = vdev->barmap[bar] + pos;
484 	ioeventfd->data = data;
485 	ioeventfd->pos = pos;
486 	ioeventfd->bar = bar;
487 	ioeventfd->count = count;
488 	ioeventfd->test_mem = vdev->pdev->resource[bar].flags & IORESOURCE_MEM;
489 
490 	ret = vfio_virqfd_enable(ioeventfd, vfio_pci_ioeventfd_handler,
491 				 vfio_pci_ioeventfd_thread, NULL,
492 				 &ioeventfd->virqfd, fd);
493 	if (ret) {
494 		kfree(ioeventfd);
495 		goto out_unlock;
496 	}
497 
498 	list_add(&ioeventfd->next, &vdev->ioeventfds_list);
499 	vdev->ioeventfds_nr++;
500 
501 out_unlock:
502 	mutex_unlock(&vdev->ioeventfds_lock);
503 
504 	return ret;
505 }
506