1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * VFIO PCI I/O Port & MMIO access 4 * 5 * Copyright (C) 2012 Red Hat, Inc. All rights reserved. 6 * Author: Alex Williamson <alex.williamson@redhat.com> 7 * 8 * Derived from original vfio: 9 * Copyright 2010 Cisco Systems, Inc. All rights reserved. 10 * Author: Tom Lyon, pugs@cisco.com 11 */ 12 13 #include <linux/fs.h> 14 #include <linux/pci.h> 15 #include <linux/uaccess.h> 16 #include <linux/io.h> 17 #include <linux/vfio.h> 18 #include <linux/vgaarb.h> 19 #include <linux/io-64-nonatomic-lo-hi.h> 20 21 #include "vfio_pci_priv.h" 22 23 #ifdef __LITTLE_ENDIAN 24 #define vfio_ioread64 ioread64 25 #define vfio_iowrite64 iowrite64 26 #define vfio_ioread32 ioread32 27 #define vfio_iowrite32 iowrite32 28 #define vfio_ioread16 ioread16 29 #define vfio_iowrite16 iowrite16 30 #else 31 #define vfio_ioread64 ioread64be 32 #define vfio_iowrite64 iowrite64be 33 #define vfio_ioread32 ioread32be 34 #define vfio_iowrite32 iowrite32be 35 #define vfio_ioread16 ioread16be 36 #define vfio_iowrite16 iowrite16be 37 #endif 38 #define vfio_ioread8 ioread8 39 #define vfio_iowrite8 iowrite8 40 41 #define VFIO_IOWRITE(size) \ 42 int vfio_pci_core_iowrite##size(struct vfio_pci_core_device *vdev, \ 43 bool test_mem, u##size val, void __iomem *io) \ 44 { \ 45 if (test_mem) { \ 46 down_read(&vdev->memory_lock); \ 47 if (!__vfio_pci_memory_enabled(vdev)) { \ 48 up_read(&vdev->memory_lock); \ 49 return -EIO; \ 50 } \ 51 } \ 52 \ 53 vfio_iowrite##size(val, io); \ 54 \ 55 if (test_mem) \ 56 up_read(&vdev->memory_lock); \ 57 \ 58 return 0; \ 59 } \ 60 EXPORT_SYMBOL_GPL(vfio_pci_core_iowrite##size); 61 62 VFIO_IOWRITE(8) 63 VFIO_IOWRITE(16) 64 VFIO_IOWRITE(32) 65 VFIO_IOWRITE(64) 66 67 #define VFIO_IOREAD(size) \ 68 int vfio_pci_core_ioread##size(struct vfio_pci_core_device *vdev, \ 69 bool test_mem, u##size *val, void __iomem *io) \ 70 { \ 71 if (test_mem) { \ 72 down_read(&vdev->memory_lock); \ 73 if (!__vfio_pci_memory_enabled(vdev)) { \ 74 up_read(&vdev->memory_lock); \ 75 return -EIO; \ 76 } \ 77 } \ 78 \ 79 *val = vfio_ioread##size(io); \ 80 \ 81 if (test_mem) \ 82 up_read(&vdev->memory_lock); \ 83 \ 84 return 0; \ 85 } \ 86 EXPORT_SYMBOL_GPL(vfio_pci_core_ioread##size); 87 88 VFIO_IOREAD(8) 89 VFIO_IOREAD(16) 90 VFIO_IOREAD(32) 91 VFIO_IOREAD(64) 92 93 #define VFIO_IORDWR(size) \ 94 static int vfio_pci_iordwr##size(struct vfio_pci_core_device *vdev,\ 95 bool iswrite, bool test_mem, \ 96 void __iomem *io, char __user *buf, \ 97 loff_t off, size_t *filled) \ 98 { \ 99 u##size val; \ 100 int ret; \ 101 \ 102 if (iswrite) { \ 103 if (copy_from_user(&val, buf, sizeof(val))) \ 104 return -EFAULT; \ 105 \ 106 ret = vfio_pci_core_iowrite##size(vdev, test_mem, \ 107 val, io + off); \ 108 if (ret) \ 109 return ret; \ 110 } else { \ 111 ret = vfio_pci_core_ioread##size(vdev, test_mem, \ 112 &val, io + off); \ 113 if (ret) \ 114 return ret; \ 115 \ 116 if (copy_to_user(buf, &val, sizeof(val))) \ 117 return -EFAULT; \ 118 } \ 119 \ 120 *filled = sizeof(val); \ 121 return 0; \ 122 } \ 123 124 VFIO_IORDWR(8) 125 VFIO_IORDWR(16) 126 VFIO_IORDWR(32) 127 VFIO_IORDWR(64) 128 129 /* 130 * Read or write from an __iomem region (MMIO or I/O port) with an excluded 131 * range which is inaccessible. The excluded range drops writes and fills 132 * reads with -1. This is intended for handling MSI-X vector tables and 133 * leftover space for ROM BARs. 134 */ 135 ssize_t vfio_pci_core_do_io_rw(struct vfio_pci_core_device *vdev, bool test_mem, 136 void __iomem *io, char __user *buf, 137 loff_t off, size_t count, size_t x_start, 138 size_t x_end, bool iswrite) 139 { 140 ssize_t done = 0; 141 int ret; 142 143 while (count) { 144 size_t fillable, filled; 145 146 if (off < x_start) 147 fillable = min(count, (size_t)(x_start - off)); 148 else if (off >= x_end) 149 fillable = count; 150 else 151 fillable = 0; 152 153 if (fillable >= 8 && !(off % 8)) { 154 ret = vfio_pci_iordwr64(vdev, iswrite, test_mem, 155 io, buf, off, &filled); 156 if (ret) 157 return ret; 158 159 } else 160 if (fillable >= 4 && !(off % 4)) { 161 ret = vfio_pci_iordwr32(vdev, iswrite, test_mem, 162 io, buf, off, &filled); 163 if (ret) 164 return ret; 165 166 } else if (fillable >= 2 && !(off % 2)) { 167 ret = vfio_pci_iordwr16(vdev, iswrite, test_mem, 168 io, buf, off, &filled); 169 if (ret) 170 return ret; 171 172 } else if (fillable) { 173 ret = vfio_pci_iordwr8(vdev, iswrite, test_mem, 174 io, buf, off, &filled); 175 if (ret) 176 return ret; 177 178 } else { 179 /* Fill reads with -1, drop writes */ 180 filled = min(count, (size_t)(x_end - off)); 181 if (!iswrite) { 182 u8 val = 0xFF; 183 size_t i; 184 185 for (i = 0; i < filled; i++) 186 if (copy_to_user(buf + i, &val, 1)) 187 return -EFAULT; 188 } 189 } 190 191 count -= filled; 192 done += filled; 193 off += filled; 194 buf += filled; 195 } 196 197 return done; 198 } 199 EXPORT_SYMBOL_GPL(vfio_pci_core_do_io_rw); 200 201 int vfio_pci_core_setup_barmap(struct vfio_pci_core_device *vdev, int bar) 202 { 203 struct pci_dev *pdev = vdev->pdev; 204 int ret; 205 void __iomem *io; 206 207 if (vdev->barmap[bar]) 208 return 0; 209 210 ret = pci_request_selected_regions(pdev, 1 << bar, "vfio"); 211 if (ret) 212 return ret; 213 214 io = pci_iomap(pdev, bar, 0); 215 if (!io) { 216 pci_release_selected_regions(pdev, 1 << bar); 217 return -ENOMEM; 218 } 219 220 vdev->barmap[bar] = io; 221 222 return 0; 223 } 224 EXPORT_SYMBOL_GPL(vfio_pci_core_setup_barmap); 225 226 ssize_t vfio_pci_bar_rw(struct vfio_pci_core_device *vdev, char __user *buf, 227 size_t count, loff_t *ppos, bool iswrite) 228 { 229 struct pci_dev *pdev = vdev->pdev; 230 loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK; 231 int bar = VFIO_PCI_OFFSET_TO_INDEX(*ppos); 232 size_t x_start = 0, x_end = 0; 233 resource_size_t end; 234 void __iomem *io; 235 struct resource *res = &vdev->pdev->resource[bar]; 236 ssize_t done; 237 238 if (pci_resource_start(pdev, bar)) 239 end = pci_resource_len(pdev, bar); 240 else if (bar == PCI_ROM_RESOURCE && pdev->rom && pdev->romlen) 241 end = roundup_pow_of_two(pdev->romlen); 242 else 243 return -EINVAL; 244 245 if (pos >= end) 246 return -EINVAL; 247 248 count = min(count, (size_t)(end - pos)); 249 250 if (bar == PCI_ROM_RESOURCE) { 251 /* 252 * The ROM can fill less space than the BAR, so we start the 253 * excluded range at the end of the actual ROM. This makes 254 * filling large ROM BARs much faster. 255 */ 256 if (pci_resource_start(pdev, bar)) { 257 io = pci_map_rom(pdev, &x_start); 258 } else { 259 io = ioremap(pdev->rom, pdev->romlen); 260 x_start = pdev->romlen; 261 } 262 if (!io) 263 return -ENOMEM; 264 x_end = end; 265 } else { 266 int ret = vfio_pci_core_setup_barmap(vdev, bar); 267 if (ret) { 268 done = ret; 269 goto out; 270 } 271 272 io = vdev->barmap[bar]; 273 } 274 275 if (bar == vdev->msix_bar) { 276 x_start = vdev->msix_offset; 277 x_end = vdev->msix_offset + vdev->msix_size; 278 } 279 280 done = vfio_pci_core_do_io_rw(vdev, res->flags & IORESOURCE_MEM, io, buf, pos, 281 count, x_start, x_end, iswrite); 282 283 if (done >= 0) 284 *ppos += done; 285 286 if (bar == PCI_ROM_RESOURCE) { 287 if (pci_resource_start(pdev, bar)) 288 pci_unmap_rom(pdev, io); 289 else 290 iounmap(io); 291 } 292 293 out: 294 return done; 295 } 296 297 #ifdef CONFIG_VFIO_PCI_VGA 298 ssize_t vfio_pci_vga_rw(struct vfio_pci_core_device *vdev, char __user *buf, 299 size_t count, loff_t *ppos, bool iswrite) 300 { 301 int ret; 302 loff_t off, pos = *ppos & VFIO_PCI_OFFSET_MASK; 303 void __iomem *iomem = NULL; 304 unsigned int rsrc; 305 bool is_ioport; 306 ssize_t done; 307 308 if (!vdev->has_vga) 309 return -EINVAL; 310 311 if (pos > 0xbfffful) 312 return -EINVAL; 313 314 switch ((u32)pos) { 315 case 0xa0000 ... 0xbffff: 316 count = min(count, (size_t)(0xc0000 - pos)); 317 iomem = ioremap(0xa0000, 0xbffff - 0xa0000 + 1); 318 off = pos - 0xa0000; 319 rsrc = VGA_RSRC_LEGACY_MEM; 320 is_ioport = false; 321 break; 322 case 0x3b0 ... 0x3bb: 323 count = min(count, (size_t)(0x3bc - pos)); 324 iomem = ioport_map(0x3b0, 0x3bb - 0x3b0 + 1); 325 off = pos - 0x3b0; 326 rsrc = VGA_RSRC_LEGACY_IO; 327 is_ioport = true; 328 break; 329 case 0x3c0 ... 0x3df: 330 count = min(count, (size_t)(0x3e0 - pos)); 331 iomem = ioport_map(0x3c0, 0x3df - 0x3c0 + 1); 332 off = pos - 0x3c0; 333 rsrc = VGA_RSRC_LEGACY_IO; 334 is_ioport = true; 335 break; 336 default: 337 return -EINVAL; 338 } 339 340 if (!iomem) 341 return -ENOMEM; 342 343 ret = vga_get_interruptible(vdev->pdev, rsrc); 344 if (ret) { 345 is_ioport ? ioport_unmap(iomem) : iounmap(iomem); 346 return ret; 347 } 348 349 /* 350 * VGA MMIO is a legacy, non-BAR resource that hopefully allows 351 * probing, so we don't currently worry about access in relation 352 * to the memory enable bit in the command register. 353 */ 354 done = vfio_pci_core_do_io_rw(vdev, false, iomem, buf, off, count, 355 0, 0, iswrite); 356 357 vga_put(vdev->pdev, rsrc); 358 359 is_ioport ? ioport_unmap(iomem) : iounmap(iomem); 360 361 if (done >= 0) 362 *ppos += done; 363 364 return done; 365 } 366 #endif 367 368 static void vfio_pci_ioeventfd_do_write(struct vfio_pci_ioeventfd *ioeventfd, 369 bool test_mem) 370 { 371 switch (ioeventfd->count) { 372 case 1: 373 vfio_pci_core_iowrite8(ioeventfd->vdev, test_mem, 374 ioeventfd->data, ioeventfd->addr); 375 break; 376 case 2: 377 vfio_pci_core_iowrite16(ioeventfd->vdev, test_mem, 378 ioeventfd->data, ioeventfd->addr); 379 break; 380 case 4: 381 vfio_pci_core_iowrite32(ioeventfd->vdev, test_mem, 382 ioeventfd->data, ioeventfd->addr); 383 break; 384 case 8: 385 vfio_pci_core_iowrite64(ioeventfd->vdev, test_mem, 386 ioeventfd->data, ioeventfd->addr); 387 break; 388 } 389 } 390 391 static int vfio_pci_ioeventfd_handler(void *opaque, void *unused) 392 { 393 struct vfio_pci_ioeventfd *ioeventfd = opaque; 394 struct vfio_pci_core_device *vdev = ioeventfd->vdev; 395 396 if (ioeventfd->test_mem) { 397 if (!down_read_trylock(&vdev->memory_lock)) 398 return 1; /* Lock contended, use thread */ 399 if (!__vfio_pci_memory_enabled(vdev)) { 400 up_read(&vdev->memory_lock); 401 return 0; 402 } 403 } 404 405 vfio_pci_ioeventfd_do_write(ioeventfd, false); 406 407 if (ioeventfd->test_mem) 408 up_read(&vdev->memory_lock); 409 410 return 0; 411 } 412 413 static void vfio_pci_ioeventfd_thread(void *opaque, void *unused) 414 { 415 struct vfio_pci_ioeventfd *ioeventfd = opaque; 416 417 vfio_pci_ioeventfd_do_write(ioeventfd, ioeventfd->test_mem); 418 } 419 420 int vfio_pci_ioeventfd(struct vfio_pci_core_device *vdev, loff_t offset, 421 uint64_t data, int count, int fd) 422 { 423 struct pci_dev *pdev = vdev->pdev; 424 loff_t pos = offset & VFIO_PCI_OFFSET_MASK; 425 int ret, bar = VFIO_PCI_OFFSET_TO_INDEX(offset); 426 struct vfio_pci_ioeventfd *ioeventfd; 427 428 /* Only support ioeventfds into BARs */ 429 if (bar > VFIO_PCI_BAR5_REGION_INDEX) 430 return -EINVAL; 431 432 if (pos + count > pci_resource_len(pdev, bar)) 433 return -EINVAL; 434 435 /* Disallow ioeventfds working around MSI-X table writes */ 436 if (bar == vdev->msix_bar && 437 !(pos + count <= vdev->msix_offset || 438 pos >= vdev->msix_offset + vdev->msix_size)) 439 return -EINVAL; 440 441 if (count == 8) 442 return -EINVAL; 443 444 ret = vfio_pci_core_setup_barmap(vdev, bar); 445 if (ret) 446 return ret; 447 448 mutex_lock(&vdev->ioeventfds_lock); 449 450 list_for_each_entry(ioeventfd, &vdev->ioeventfds_list, next) { 451 if (ioeventfd->pos == pos && ioeventfd->bar == bar && 452 ioeventfd->data == data && ioeventfd->count == count) { 453 if (fd == -1) { 454 vfio_virqfd_disable(&ioeventfd->virqfd); 455 list_del(&ioeventfd->next); 456 vdev->ioeventfds_nr--; 457 kfree(ioeventfd); 458 ret = 0; 459 } else 460 ret = -EEXIST; 461 462 goto out_unlock; 463 } 464 } 465 466 if (fd < 0) { 467 ret = -ENODEV; 468 goto out_unlock; 469 } 470 471 if (vdev->ioeventfds_nr >= VFIO_PCI_IOEVENTFD_MAX) { 472 ret = -ENOSPC; 473 goto out_unlock; 474 } 475 476 ioeventfd = kzalloc(sizeof(*ioeventfd), GFP_KERNEL_ACCOUNT); 477 if (!ioeventfd) { 478 ret = -ENOMEM; 479 goto out_unlock; 480 } 481 482 ioeventfd->vdev = vdev; 483 ioeventfd->addr = vdev->barmap[bar] + pos; 484 ioeventfd->data = data; 485 ioeventfd->pos = pos; 486 ioeventfd->bar = bar; 487 ioeventfd->count = count; 488 ioeventfd->test_mem = vdev->pdev->resource[bar].flags & IORESOURCE_MEM; 489 490 ret = vfio_virqfd_enable(ioeventfd, vfio_pci_ioeventfd_handler, 491 vfio_pci_ioeventfd_thread, NULL, 492 &ioeventfd->virqfd, fd); 493 if (ret) { 494 kfree(ioeventfd); 495 goto out_unlock; 496 } 497 498 list_add(&ioeventfd->next, &vdev->ioeventfds_list); 499 vdev->ioeventfds_nr++; 500 501 out_unlock: 502 mutex_unlock(&vdev->ioeventfds_lock); 503 504 return ret; 505 } 506