1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * VFIO PCI I/O Port & MMIO access
4 *
5 * Copyright (C) 2012 Red Hat, Inc. All rights reserved.
6 * Author: Alex Williamson <alex.williamson@redhat.com>
7 *
8 * Derived from original vfio:
9 * Copyright 2010 Cisco Systems, Inc. All rights reserved.
10 * Author: Tom Lyon, pugs@cisco.com
11 */
12
13 #include <linux/fs.h>
14 #include <linux/pci.h>
15 #include <linux/uaccess.h>
16 #include <linux/io.h>
17 #include <linux/vfio.h>
18 #include <linux/vgaarb.h>
19 #include <linux/io-64-nonatomic-lo-hi.h>
20
21 #include "vfio_pci_priv.h"
22
23 #ifdef __LITTLE_ENDIAN
24 #define vfio_ioread64 ioread64
25 #define vfio_iowrite64 iowrite64
26 #define vfio_ioread32 ioread32
27 #define vfio_iowrite32 iowrite32
28 #define vfio_ioread16 ioread16
29 #define vfio_iowrite16 iowrite16
30 #else
31 #define vfio_ioread64 ioread64be
32 #define vfio_iowrite64 iowrite64be
33 #define vfio_ioread32 ioread32be
34 #define vfio_iowrite32 iowrite32be
35 #define vfio_ioread16 ioread16be
36 #define vfio_iowrite16 iowrite16be
37 #endif
38 #define vfio_ioread8 ioread8
39 #define vfio_iowrite8 iowrite8
40
41 #define VFIO_IOWRITE(size) \
42 int vfio_pci_core_iowrite##size(struct vfio_pci_core_device *vdev, \
43 bool test_mem, u##size val, void __iomem *io) \
44 { \
45 if (test_mem) { \
46 down_read(&vdev->memory_lock); \
47 if (!__vfio_pci_memory_enabled(vdev)) { \
48 up_read(&vdev->memory_lock); \
49 return -EIO; \
50 } \
51 } \
52 \
53 vfio_iowrite##size(val, io); \
54 \
55 if (test_mem) \
56 up_read(&vdev->memory_lock); \
57 \
58 return 0; \
59 } \
60 EXPORT_SYMBOL_GPL(vfio_pci_core_iowrite##size);
61
62 VFIO_IOWRITE(8)
63 VFIO_IOWRITE(16)
64 VFIO_IOWRITE(32)
65 VFIO_IOWRITE(64)
66
67 #define VFIO_IOREAD(size) \
68 int vfio_pci_core_ioread##size(struct vfio_pci_core_device *vdev, \
69 bool test_mem, u##size *val, void __iomem *io) \
70 { \
71 if (test_mem) { \
72 down_read(&vdev->memory_lock); \
73 if (!__vfio_pci_memory_enabled(vdev)) { \
74 up_read(&vdev->memory_lock); \
75 return -EIO; \
76 } \
77 } \
78 \
79 *val = vfio_ioread##size(io); \
80 \
81 if (test_mem) \
82 up_read(&vdev->memory_lock); \
83 \
84 return 0; \
85 } \
86 EXPORT_SYMBOL_GPL(vfio_pci_core_ioread##size);
87
88 VFIO_IOREAD(8)
89 VFIO_IOREAD(16)
90 VFIO_IOREAD(32)
91 VFIO_IOREAD(64)
92
93 #define VFIO_IORDWR(size) \
94 static int vfio_pci_iordwr##size(struct vfio_pci_core_device *vdev,\
95 bool iswrite, bool test_mem, \
96 void __iomem *io, char __user *buf, \
97 loff_t off, size_t *filled) \
98 { \
99 u##size val; \
100 int ret; \
101 \
102 if (iswrite) { \
103 if (copy_from_user(&val, buf, sizeof(val))) \
104 return -EFAULT; \
105 \
106 ret = vfio_pci_core_iowrite##size(vdev, test_mem, \
107 val, io + off); \
108 if (ret) \
109 return ret; \
110 } else { \
111 ret = vfio_pci_core_ioread##size(vdev, test_mem, \
112 &val, io + off); \
113 if (ret) \
114 return ret; \
115 \
116 if (copy_to_user(buf, &val, sizeof(val))) \
117 return -EFAULT; \
118 } \
119 \
120 *filled = sizeof(val); \
121 return 0; \
122 } \
123
124 VFIO_IORDWR(8)
125 VFIO_IORDWR(16)
126 VFIO_IORDWR(32)
127 VFIO_IORDWR(64)
128
129 /*
130 * Read or write from an __iomem region (MMIO or I/O port) with an excluded
131 * range which is inaccessible. The excluded range drops writes and fills
132 * reads with -1. This is intended for handling MSI-X vector tables and
133 * leftover space for ROM BARs.
134 */
vfio_pci_core_do_io_rw(struct vfio_pci_core_device * vdev,bool test_mem,void __iomem * io,char __user * buf,loff_t off,size_t count,size_t x_start,size_t x_end,bool iswrite,enum vfio_pci_io_width max_width)135 ssize_t vfio_pci_core_do_io_rw(struct vfio_pci_core_device *vdev, bool test_mem,
136 void __iomem *io, char __user *buf,
137 loff_t off, size_t count, size_t x_start,
138 size_t x_end, bool iswrite,
139 enum vfio_pci_io_width max_width)
140 {
141 ssize_t done = 0;
142 int ret;
143
144 while (count) {
145 size_t fillable, filled;
146
147 if (off < x_start)
148 fillable = min(count, (size_t)(x_start - off));
149 else if (off >= x_end)
150 fillable = count;
151 else
152 fillable = 0;
153
154 if (fillable >= 8 && !(off % 8) && max_width >= 8) {
155 ret = vfio_pci_iordwr64(vdev, iswrite, test_mem,
156 io, buf, off, &filled);
157 if (ret)
158 return ret;
159
160 } else if (fillable >= 4 && !(off % 4) && max_width >= 4) {
161 ret = vfio_pci_iordwr32(vdev, iswrite, test_mem,
162 io, buf, off, &filled);
163 if (ret)
164 return ret;
165
166 } else if (fillable >= 2 && !(off % 2) && max_width >= 2) {
167 ret = vfio_pci_iordwr16(vdev, iswrite, test_mem,
168 io, buf, off, &filled);
169 if (ret)
170 return ret;
171
172 } else if (fillable) {
173 ret = vfio_pci_iordwr8(vdev, iswrite, test_mem,
174 io, buf, off, &filled);
175 if (ret)
176 return ret;
177
178 } else {
179 /* Fill reads with -1, drop writes */
180 filled = min(count, (size_t)(x_end - off));
181 if (!iswrite) {
182 u8 val = 0xFF;
183 size_t i;
184
185 for (i = 0; i < filled; i++)
186 if (copy_to_user(buf + i, &val, 1))
187 return -EFAULT;
188 }
189 }
190
191 count -= filled;
192 done += filled;
193 off += filled;
194 buf += filled;
195 }
196
197 return done;
198 }
199 EXPORT_SYMBOL_GPL(vfio_pci_core_do_io_rw);
200
201 /*
202 * The barmap is set up in vfio_pci_core_enable(). Callers use this
203 * function to check that the BAR resources are requested or that the
204 * pci_iomap() was done.
205 */
vfio_pci_core_setup_barmap(struct vfio_pci_core_device * vdev,int bar)206 int vfio_pci_core_setup_barmap(struct vfio_pci_core_device *vdev, int bar)
207 {
208 if (IS_ERR(vdev->barmap[bar]))
209 return PTR_ERR(vdev->barmap[bar]);
210 return 0;
211 }
212 EXPORT_SYMBOL_GPL(vfio_pci_core_setup_barmap);
213
vfio_pci_bar_rw(struct vfio_pci_core_device * vdev,char __user * buf,size_t count,loff_t * ppos,bool iswrite)214 ssize_t vfio_pci_bar_rw(struct vfio_pci_core_device *vdev, char __user *buf,
215 size_t count, loff_t *ppos, bool iswrite)
216 {
217 struct pci_dev *pdev = vdev->pdev;
218 loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK;
219 int bar = VFIO_PCI_OFFSET_TO_INDEX(*ppos);
220 size_t x_start = 0, x_end = 0;
221 resource_size_t end;
222 void __iomem *io;
223 struct resource *res = &vdev->pdev->resource[bar];
224 ssize_t done;
225 enum vfio_pci_io_width max_width = VFIO_PCI_IO_WIDTH_8;
226
227 if (pci_resource_start(pdev, bar))
228 end = pci_resource_len(pdev, bar);
229 else if (bar == PCI_ROM_RESOURCE && pdev->rom && pdev->romlen)
230 end = roundup_pow_of_two(pdev->romlen);
231 else
232 return -EINVAL;
233
234 if (pos >= end)
235 return -EINVAL;
236
237 count = min(count, (size_t)(end - pos));
238
239 if (bar == PCI_ROM_RESOURCE) {
240 /*
241 * The ROM can fill less space than the BAR, so we start the
242 * excluded range at the end of the actual ROM. This makes
243 * filling large ROM BARs much faster.
244 */
245 if (pci_resource_start(pdev, bar)) {
246 io = pci_map_rom(pdev, &x_start);
247 } else {
248 io = ioremap(pdev->rom, pdev->romlen);
249 x_start = pdev->romlen;
250 }
251 if (!io)
252 return -ENOMEM;
253 x_end = end;
254
255 /*
256 * Certain devices (e.g. Intel X710) don't support qword
257 * access to the ROM bar. Otherwise PCI AER errors might be
258 * triggered.
259 *
260 * Disable qword access to the ROM bar universally, which
261 * worked reliably for years before qword access is enabled.
262 */
263 max_width = VFIO_PCI_IO_WIDTH_4;
264 } else {
265 int ret = vfio_pci_core_setup_barmap(vdev, bar);
266 if (ret) {
267 done = ret;
268 goto out;
269 }
270
271 io = vdev->barmap[bar];
272 }
273
274 if (bar == vdev->msix_bar) {
275 x_start = vdev->msix_offset;
276 x_end = vdev->msix_offset + vdev->msix_size;
277 }
278
279 done = vfio_pci_core_do_io_rw(vdev, res->flags & IORESOURCE_MEM, io, buf, pos,
280 count, x_start, x_end, iswrite, max_width);
281
282 if (done >= 0)
283 *ppos += done;
284
285 if (bar == PCI_ROM_RESOURCE) {
286 if (pci_resource_start(pdev, bar))
287 pci_unmap_rom(pdev, io);
288 else
289 iounmap(io);
290 }
291
292 out:
293 return done;
294 }
295
296 #ifdef CONFIG_VFIO_PCI_VGA
vfio_pci_vga_rw(struct vfio_pci_core_device * vdev,char __user * buf,size_t count,loff_t * ppos,bool iswrite)297 ssize_t vfio_pci_vga_rw(struct vfio_pci_core_device *vdev, char __user *buf,
298 size_t count, loff_t *ppos, bool iswrite)
299 {
300 int ret;
301 loff_t off, pos = *ppos & VFIO_PCI_OFFSET_MASK;
302 void __iomem *iomem = NULL;
303 unsigned int rsrc;
304 bool is_ioport;
305 ssize_t done;
306
307 if (!vdev->has_vga)
308 return -EINVAL;
309
310 if (pos > 0xbfffful)
311 return -EINVAL;
312
313 switch ((u32)pos) {
314 case 0xa0000 ... 0xbffff:
315 count = min(count, (size_t)(0xc0000 - pos));
316 iomem = ioremap(0xa0000, 0xbffff - 0xa0000 + 1);
317 off = pos - 0xa0000;
318 rsrc = VGA_RSRC_LEGACY_MEM;
319 is_ioport = false;
320 break;
321 case 0x3b0 ... 0x3bb:
322 count = min(count, (size_t)(0x3bc - pos));
323 iomem = ioport_map(0x3b0, 0x3bb - 0x3b0 + 1);
324 off = pos - 0x3b0;
325 rsrc = VGA_RSRC_LEGACY_IO;
326 is_ioport = true;
327 break;
328 case 0x3c0 ... 0x3df:
329 count = min(count, (size_t)(0x3e0 - pos));
330 iomem = ioport_map(0x3c0, 0x3df - 0x3c0 + 1);
331 off = pos - 0x3c0;
332 rsrc = VGA_RSRC_LEGACY_IO;
333 is_ioport = true;
334 break;
335 default:
336 return -EINVAL;
337 }
338
339 if (!iomem)
340 return -ENOMEM;
341
342 ret = vga_get_interruptible(vdev->pdev, rsrc);
343 if (ret) {
344 is_ioport ? ioport_unmap(iomem) : iounmap(iomem);
345 return ret;
346 }
347
348 /*
349 * VGA MMIO is a legacy, non-BAR resource that hopefully allows
350 * probing, so we don't currently worry about access in relation
351 * to the memory enable bit in the command register.
352 */
353 done = vfio_pci_core_do_io_rw(vdev, false, iomem, buf, off, count,
354 0, 0, iswrite, VFIO_PCI_IO_WIDTH_4);
355
356 vga_put(vdev->pdev, rsrc);
357
358 is_ioport ? ioport_unmap(iomem) : iounmap(iomem);
359
360 if (done >= 0)
361 *ppos += done;
362
363 return done;
364 }
365 #endif
366
vfio_pci_ioeventfd_do_write(struct vfio_pci_ioeventfd * ioeventfd,bool test_mem)367 static void vfio_pci_ioeventfd_do_write(struct vfio_pci_ioeventfd *ioeventfd,
368 bool test_mem)
369 {
370 switch (ioeventfd->count) {
371 case 1:
372 vfio_pci_core_iowrite8(ioeventfd->vdev, test_mem,
373 ioeventfd->data, ioeventfd->addr);
374 break;
375 case 2:
376 vfio_pci_core_iowrite16(ioeventfd->vdev, test_mem,
377 ioeventfd->data, ioeventfd->addr);
378 break;
379 case 4:
380 vfio_pci_core_iowrite32(ioeventfd->vdev, test_mem,
381 ioeventfd->data, ioeventfd->addr);
382 break;
383 case 8:
384 vfio_pci_core_iowrite64(ioeventfd->vdev, test_mem,
385 ioeventfd->data, ioeventfd->addr);
386 break;
387 }
388 }
389
vfio_pci_ioeventfd_handler(void * opaque,void * unused)390 static int vfio_pci_ioeventfd_handler(void *opaque, void *unused)
391 {
392 struct vfio_pci_ioeventfd *ioeventfd = opaque;
393 struct vfio_pci_core_device *vdev = ioeventfd->vdev;
394
395 if (ioeventfd->test_mem) {
396 if (!down_read_trylock(&vdev->memory_lock))
397 return 1; /* Lock contended, use thread */
398 if (!__vfio_pci_memory_enabled(vdev)) {
399 up_read(&vdev->memory_lock);
400 return 0;
401 }
402 }
403
404 vfio_pci_ioeventfd_do_write(ioeventfd, false);
405
406 if (ioeventfd->test_mem)
407 up_read(&vdev->memory_lock);
408
409 return 0;
410 }
411
vfio_pci_ioeventfd_thread(void * opaque,void * unused)412 static void vfio_pci_ioeventfd_thread(void *opaque, void *unused)
413 {
414 struct vfio_pci_ioeventfd *ioeventfd = opaque;
415
416 vfio_pci_ioeventfd_do_write(ioeventfd, ioeventfd->test_mem);
417 }
418
vfio_pci_ioeventfd(struct vfio_pci_core_device * vdev,loff_t offset,uint64_t data,int count,int fd)419 int vfio_pci_ioeventfd(struct vfio_pci_core_device *vdev, loff_t offset,
420 uint64_t data, int count, int fd)
421 {
422 struct pci_dev *pdev = vdev->pdev;
423 loff_t pos = offset & VFIO_PCI_OFFSET_MASK;
424 int ret, bar = VFIO_PCI_OFFSET_TO_INDEX(offset);
425 struct vfio_pci_ioeventfd *ioeventfd;
426
427 /* Only support ioeventfds into BARs */
428 if (bar > VFIO_PCI_BAR5_REGION_INDEX)
429 return -EINVAL;
430
431 if (pos + count > pci_resource_len(pdev, bar))
432 return -EINVAL;
433
434 /* Disallow ioeventfds working around MSI-X table writes */
435 if (bar == vdev->msix_bar &&
436 !(pos + count <= vdev->msix_offset ||
437 pos >= vdev->msix_offset + vdev->msix_size))
438 return -EINVAL;
439
440 if (count == 8)
441 return -EINVAL;
442
443 ret = vfio_pci_core_setup_barmap(vdev, bar);
444 if (ret)
445 return ret;
446
447 mutex_lock(&vdev->ioeventfds_lock);
448
449 list_for_each_entry(ioeventfd, &vdev->ioeventfds_list, next) {
450 if (ioeventfd->pos == pos && ioeventfd->bar == bar &&
451 ioeventfd->data == data && ioeventfd->count == count) {
452 if (fd == -1) {
453 vfio_virqfd_disable(&ioeventfd->virqfd);
454 list_del(&ioeventfd->next);
455 vdev->ioeventfds_nr--;
456 kfree(ioeventfd);
457 ret = 0;
458 } else
459 ret = -EEXIST;
460
461 goto out_unlock;
462 }
463 }
464
465 if (fd < 0) {
466 ret = -ENODEV;
467 goto out_unlock;
468 }
469
470 if (vdev->ioeventfds_nr >= VFIO_PCI_IOEVENTFD_MAX) {
471 ret = -ENOSPC;
472 goto out_unlock;
473 }
474
475 ioeventfd = kzalloc_obj(*ioeventfd, GFP_KERNEL_ACCOUNT);
476 if (!ioeventfd) {
477 ret = -ENOMEM;
478 goto out_unlock;
479 }
480
481 ioeventfd->vdev = vdev;
482 ioeventfd->addr = vdev->barmap[bar] + pos;
483 ioeventfd->data = data;
484 ioeventfd->pos = pos;
485 ioeventfd->bar = bar;
486 ioeventfd->count = count;
487 ioeventfd->test_mem = vdev->pdev->resource[bar].flags & IORESOURCE_MEM;
488
489 ret = vfio_virqfd_enable(ioeventfd, vfio_pci_ioeventfd_handler,
490 vfio_pci_ioeventfd_thread, NULL,
491 &ioeventfd->virqfd, fd);
492 if (ret) {
493 kfree(ioeventfd);
494 goto out_unlock;
495 }
496
497 list_add(&ioeventfd->next, &vdev->ioeventfds_list);
498 vdev->ioeventfds_nr++;
499
500 out_unlock:
501 mutex_unlock(&vdev->ioeventfds_lock);
502
503 return ret;
504 }
505