1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * VFIO PCI I/O Port & MMIO access 4 * 5 * Copyright (C) 2012 Red Hat, Inc. All rights reserved. 6 * Author: Alex Williamson <alex.williamson@redhat.com> 7 * 8 * Derived from original vfio: 9 * Copyright 2010 Cisco Systems, Inc. All rights reserved. 10 * Author: Tom Lyon, pugs@cisco.com 11 */ 12 13 #include <linux/fs.h> 14 #include <linux/pci.h> 15 #include <linux/uaccess.h> 16 #include <linux/io.h> 17 #include <linux/vfio.h> 18 #include <linux/vgaarb.h> 19 20 #include "vfio_pci_priv.h" 21 22 #ifdef __LITTLE_ENDIAN 23 #define vfio_ioread64 ioread64 24 #define vfio_iowrite64 iowrite64 25 #define vfio_ioread32 ioread32 26 #define vfio_iowrite32 iowrite32 27 #define vfio_ioread16 ioread16 28 #define vfio_iowrite16 iowrite16 29 #else 30 #define vfio_ioread64 ioread64be 31 #define vfio_iowrite64 iowrite64be 32 #define vfio_ioread32 ioread32be 33 #define vfio_iowrite32 iowrite32be 34 #define vfio_ioread16 ioread16be 35 #define vfio_iowrite16 iowrite16be 36 #endif 37 #define vfio_ioread8 ioread8 38 #define vfio_iowrite8 iowrite8 39 40 #define VFIO_IOWRITE(size) \ 41 int vfio_pci_core_iowrite##size(struct vfio_pci_core_device *vdev, \ 42 bool test_mem, u##size val, void __iomem *io) \ 43 { \ 44 if (test_mem) { \ 45 down_read(&vdev->memory_lock); \ 46 if (!__vfio_pci_memory_enabled(vdev)) { \ 47 up_read(&vdev->memory_lock); \ 48 return -EIO; \ 49 } \ 50 } \ 51 \ 52 vfio_iowrite##size(val, io); \ 53 \ 54 if (test_mem) \ 55 up_read(&vdev->memory_lock); \ 56 \ 57 return 0; \ 58 } \ 59 EXPORT_SYMBOL_GPL(vfio_pci_core_iowrite##size); 60 61 VFIO_IOWRITE(8) 62 VFIO_IOWRITE(16) 63 VFIO_IOWRITE(32) 64 #ifdef iowrite64 65 VFIO_IOWRITE(64) 66 #endif 67 68 #define VFIO_IOREAD(size) \ 69 int vfio_pci_core_ioread##size(struct vfio_pci_core_device *vdev, \ 70 bool test_mem, u##size *val, void __iomem *io) \ 71 { \ 72 if (test_mem) { \ 73 down_read(&vdev->memory_lock); \ 74 if (!__vfio_pci_memory_enabled(vdev)) { \ 75 up_read(&vdev->memory_lock); \ 76 return -EIO; \ 77 } \ 78 } \ 79 \ 80 *val = vfio_ioread##size(io); \ 81 \ 82 if (test_mem) \ 83 up_read(&vdev->memory_lock); \ 84 \ 85 return 0; \ 86 } \ 87 EXPORT_SYMBOL_GPL(vfio_pci_core_ioread##size); 88 89 VFIO_IOREAD(8) 90 VFIO_IOREAD(16) 91 VFIO_IOREAD(32) 92 #ifdef ioread64 93 VFIO_IOREAD(64) 94 #endif 95 96 #define VFIO_IORDWR(size) \ 97 static int vfio_pci_iordwr##size(struct vfio_pci_core_device *vdev,\ 98 bool iswrite, bool test_mem, \ 99 void __iomem *io, char __user *buf, \ 100 loff_t off, size_t *filled) \ 101 { \ 102 u##size val; \ 103 int ret; \ 104 \ 105 if (iswrite) { \ 106 if (copy_from_user(&val, buf, sizeof(val))) \ 107 return -EFAULT; \ 108 \ 109 ret = vfio_pci_core_iowrite##size(vdev, test_mem, \ 110 val, io + off); \ 111 if (ret) \ 112 return ret; \ 113 } else { \ 114 ret = vfio_pci_core_ioread##size(vdev, test_mem, \ 115 &val, io + off); \ 116 if (ret) \ 117 return ret; \ 118 \ 119 if (copy_to_user(buf, &val, sizeof(val))) \ 120 return -EFAULT; \ 121 } \ 122 \ 123 *filled = sizeof(val); \ 124 return 0; \ 125 } \ 126 127 VFIO_IORDWR(8) 128 VFIO_IORDWR(16) 129 VFIO_IORDWR(32) 130 #if defined(ioread64) && defined(iowrite64) 131 VFIO_IORDWR(64) 132 #endif 133 134 /* 135 * Read or write from an __iomem region (MMIO or I/O port) with an excluded 136 * range which is inaccessible. The excluded range drops writes and fills 137 * reads with -1. This is intended for handling MSI-X vector tables and 138 * leftover space for ROM BARs. 139 */ 140 ssize_t vfio_pci_core_do_io_rw(struct vfio_pci_core_device *vdev, bool test_mem, 141 void __iomem *io, char __user *buf, 142 loff_t off, size_t count, size_t x_start, 143 size_t x_end, bool iswrite) 144 { 145 ssize_t done = 0; 146 int ret; 147 148 while (count) { 149 size_t fillable, filled; 150 151 if (off < x_start) 152 fillable = min(count, (size_t)(x_start - off)); 153 else if (off >= x_end) 154 fillable = count; 155 else 156 fillable = 0; 157 158 #if defined(ioread64) && defined(iowrite64) 159 if (fillable >= 8 && !(off % 8)) { 160 ret = vfio_pci_iordwr64(vdev, iswrite, test_mem, 161 io, buf, off, &filled); 162 if (ret) 163 return ret; 164 165 } else 166 #endif 167 if (fillable >= 4 && !(off % 4)) { 168 ret = vfio_pci_iordwr32(vdev, iswrite, test_mem, 169 io, buf, off, &filled); 170 if (ret) 171 return ret; 172 173 } else if (fillable >= 2 && !(off % 2)) { 174 ret = vfio_pci_iordwr16(vdev, iswrite, test_mem, 175 io, buf, off, &filled); 176 if (ret) 177 return ret; 178 179 } else if (fillable) { 180 ret = vfio_pci_iordwr8(vdev, iswrite, test_mem, 181 io, buf, off, &filled); 182 if (ret) 183 return ret; 184 185 } else { 186 /* Fill reads with -1, drop writes */ 187 filled = min(count, (size_t)(x_end - off)); 188 if (!iswrite) { 189 u8 val = 0xFF; 190 size_t i; 191 192 for (i = 0; i < filled; i++) 193 if (copy_to_user(buf + i, &val, 1)) 194 return -EFAULT; 195 } 196 } 197 198 count -= filled; 199 done += filled; 200 off += filled; 201 buf += filled; 202 } 203 204 return done; 205 } 206 EXPORT_SYMBOL_GPL(vfio_pci_core_do_io_rw); 207 208 int vfio_pci_core_setup_barmap(struct vfio_pci_core_device *vdev, int bar) 209 { 210 struct pci_dev *pdev = vdev->pdev; 211 int ret; 212 void __iomem *io; 213 214 if (vdev->barmap[bar]) 215 return 0; 216 217 ret = pci_request_selected_regions(pdev, 1 << bar, "vfio"); 218 if (ret) 219 return ret; 220 221 io = pci_iomap(pdev, bar, 0); 222 if (!io) { 223 pci_release_selected_regions(pdev, 1 << bar); 224 return -ENOMEM; 225 } 226 227 vdev->barmap[bar] = io; 228 229 return 0; 230 } 231 EXPORT_SYMBOL_GPL(vfio_pci_core_setup_barmap); 232 233 ssize_t vfio_pci_bar_rw(struct vfio_pci_core_device *vdev, char __user *buf, 234 size_t count, loff_t *ppos, bool iswrite) 235 { 236 struct pci_dev *pdev = vdev->pdev; 237 loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK; 238 int bar = VFIO_PCI_OFFSET_TO_INDEX(*ppos); 239 size_t x_start = 0, x_end = 0; 240 resource_size_t end; 241 void __iomem *io; 242 struct resource *res = &vdev->pdev->resource[bar]; 243 ssize_t done; 244 245 if (pci_resource_start(pdev, bar)) 246 end = pci_resource_len(pdev, bar); 247 else if (bar == PCI_ROM_RESOURCE && 248 pdev->resource[bar].flags & IORESOURCE_ROM_SHADOW) 249 end = 0x20000; 250 else 251 return -EINVAL; 252 253 if (pos >= end) 254 return -EINVAL; 255 256 count = min(count, (size_t)(end - pos)); 257 258 if (bar == PCI_ROM_RESOURCE) { 259 /* 260 * The ROM can fill less space than the BAR, so we start the 261 * excluded range at the end of the actual ROM. This makes 262 * filling large ROM BARs much faster. 263 */ 264 io = pci_map_rom(pdev, &x_start); 265 if (!io) { 266 done = -ENOMEM; 267 goto out; 268 } 269 x_end = end; 270 } else { 271 int ret = vfio_pci_core_setup_barmap(vdev, bar); 272 if (ret) { 273 done = ret; 274 goto out; 275 } 276 277 io = vdev->barmap[bar]; 278 } 279 280 if (bar == vdev->msix_bar) { 281 x_start = vdev->msix_offset; 282 x_end = vdev->msix_offset + vdev->msix_size; 283 } 284 285 done = vfio_pci_core_do_io_rw(vdev, res->flags & IORESOURCE_MEM, io, buf, pos, 286 count, x_start, x_end, iswrite); 287 288 if (done >= 0) 289 *ppos += done; 290 291 if (bar == PCI_ROM_RESOURCE) 292 pci_unmap_rom(pdev, io); 293 out: 294 return done; 295 } 296 297 #ifdef CONFIG_VFIO_PCI_VGA 298 ssize_t vfio_pci_vga_rw(struct vfio_pci_core_device *vdev, char __user *buf, 299 size_t count, loff_t *ppos, bool iswrite) 300 { 301 int ret; 302 loff_t off, pos = *ppos & VFIO_PCI_OFFSET_MASK; 303 void __iomem *iomem = NULL; 304 unsigned int rsrc; 305 bool is_ioport; 306 ssize_t done; 307 308 if (!vdev->has_vga) 309 return -EINVAL; 310 311 if (pos > 0xbfffful) 312 return -EINVAL; 313 314 switch ((u32)pos) { 315 case 0xa0000 ... 0xbffff: 316 count = min(count, (size_t)(0xc0000 - pos)); 317 iomem = ioremap(0xa0000, 0xbffff - 0xa0000 + 1); 318 off = pos - 0xa0000; 319 rsrc = VGA_RSRC_LEGACY_MEM; 320 is_ioport = false; 321 break; 322 case 0x3b0 ... 0x3bb: 323 count = min(count, (size_t)(0x3bc - pos)); 324 iomem = ioport_map(0x3b0, 0x3bb - 0x3b0 + 1); 325 off = pos - 0x3b0; 326 rsrc = VGA_RSRC_LEGACY_IO; 327 is_ioport = true; 328 break; 329 case 0x3c0 ... 0x3df: 330 count = min(count, (size_t)(0x3e0 - pos)); 331 iomem = ioport_map(0x3c0, 0x3df - 0x3c0 + 1); 332 off = pos - 0x3c0; 333 rsrc = VGA_RSRC_LEGACY_IO; 334 is_ioport = true; 335 break; 336 default: 337 return -EINVAL; 338 } 339 340 if (!iomem) 341 return -ENOMEM; 342 343 ret = vga_get_interruptible(vdev->pdev, rsrc); 344 if (ret) { 345 is_ioport ? ioport_unmap(iomem) : iounmap(iomem); 346 return ret; 347 } 348 349 /* 350 * VGA MMIO is a legacy, non-BAR resource that hopefully allows 351 * probing, so we don't currently worry about access in relation 352 * to the memory enable bit in the command register. 353 */ 354 done = vfio_pci_core_do_io_rw(vdev, false, iomem, buf, off, count, 355 0, 0, iswrite); 356 357 vga_put(vdev->pdev, rsrc); 358 359 is_ioport ? ioport_unmap(iomem) : iounmap(iomem); 360 361 if (done >= 0) 362 *ppos += done; 363 364 return done; 365 } 366 #endif 367 368 static void vfio_pci_ioeventfd_do_write(struct vfio_pci_ioeventfd *ioeventfd, 369 bool test_mem) 370 { 371 switch (ioeventfd->count) { 372 case 1: 373 vfio_pci_core_iowrite8(ioeventfd->vdev, test_mem, 374 ioeventfd->data, ioeventfd->addr); 375 break; 376 case 2: 377 vfio_pci_core_iowrite16(ioeventfd->vdev, test_mem, 378 ioeventfd->data, ioeventfd->addr); 379 break; 380 case 4: 381 vfio_pci_core_iowrite32(ioeventfd->vdev, test_mem, 382 ioeventfd->data, ioeventfd->addr); 383 break; 384 #ifdef iowrite64 385 case 8: 386 vfio_pci_core_iowrite64(ioeventfd->vdev, test_mem, 387 ioeventfd->data, ioeventfd->addr); 388 break; 389 #endif 390 } 391 } 392 393 static int vfio_pci_ioeventfd_handler(void *opaque, void *unused) 394 { 395 struct vfio_pci_ioeventfd *ioeventfd = opaque; 396 struct vfio_pci_core_device *vdev = ioeventfd->vdev; 397 398 if (ioeventfd->test_mem) { 399 if (!down_read_trylock(&vdev->memory_lock)) 400 return 1; /* Lock contended, use thread */ 401 if (!__vfio_pci_memory_enabled(vdev)) { 402 up_read(&vdev->memory_lock); 403 return 0; 404 } 405 } 406 407 vfio_pci_ioeventfd_do_write(ioeventfd, false); 408 409 if (ioeventfd->test_mem) 410 up_read(&vdev->memory_lock); 411 412 return 0; 413 } 414 415 static void vfio_pci_ioeventfd_thread(void *opaque, void *unused) 416 { 417 struct vfio_pci_ioeventfd *ioeventfd = opaque; 418 419 vfio_pci_ioeventfd_do_write(ioeventfd, ioeventfd->test_mem); 420 } 421 422 int vfio_pci_ioeventfd(struct vfio_pci_core_device *vdev, loff_t offset, 423 uint64_t data, int count, int fd) 424 { 425 struct pci_dev *pdev = vdev->pdev; 426 loff_t pos = offset & VFIO_PCI_OFFSET_MASK; 427 int ret, bar = VFIO_PCI_OFFSET_TO_INDEX(offset); 428 struct vfio_pci_ioeventfd *ioeventfd; 429 430 /* Only support ioeventfds into BARs */ 431 if (bar > VFIO_PCI_BAR5_REGION_INDEX) 432 return -EINVAL; 433 434 if (pos + count > pci_resource_len(pdev, bar)) 435 return -EINVAL; 436 437 /* Disallow ioeventfds working around MSI-X table writes */ 438 if (bar == vdev->msix_bar && 439 !(pos + count <= vdev->msix_offset || 440 pos >= vdev->msix_offset + vdev->msix_size)) 441 return -EINVAL; 442 443 #ifndef iowrite64 444 if (count == 8) 445 return -EINVAL; 446 #endif 447 448 ret = vfio_pci_core_setup_barmap(vdev, bar); 449 if (ret) 450 return ret; 451 452 mutex_lock(&vdev->ioeventfds_lock); 453 454 list_for_each_entry(ioeventfd, &vdev->ioeventfds_list, next) { 455 if (ioeventfd->pos == pos && ioeventfd->bar == bar && 456 ioeventfd->data == data && ioeventfd->count == count) { 457 if (fd == -1) { 458 vfio_virqfd_disable(&ioeventfd->virqfd); 459 list_del(&ioeventfd->next); 460 vdev->ioeventfds_nr--; 461 kfree(ioeventfd); 462 ret = 0; 463 } else 464 ret = -EEXIST; 465 466 goto out_unlock; 467 } 468 } 469 470 if (fd < 0) { 471 ret = -ENODEV; 472 goto out_unlock; 473 } 474 475 if (vdev->ioeventfds_nr >= VFIO_PCI_IOEVENTFD_MAX) { 476 ret = -ENOSPC; 477 goto out_unlock; 478 } 479 480 ioeventfd = kzalloc(sizeof(*ioeventfd), GFP_KERNEL_ACCOUNT); 481 if (!ioeventfd) { 482 ret = -ENOMEM; 483 goto out_unlock; 484 } 485 486 ioeventfd->vdev = vdev; 487 ioeventfd->addr = vdev->barmap[bar] + pos; 488 ioeventfd->data = data; 489 ioeventfd->pos = pos; 490 ioeventfd->bar = bar; 491 ioeventfd->count = count; 492 ioeventfd->test_mem = vdev->pdev->resource[bar].flags & IORESOURCE_MEM; 493 494 ret = vfio_virqfd_enable(ioeventfd, vfio_pci_ioeventfd_handler, 495 vfio_pci_ioeventfd_thread, NULL, 496 &ioeventfd->virqfd, fd); 497 if (ret) { 498 kfree(ioeventfd); 499 goto out_unlock; 500 } 501 502 list_add(&ioeventfd->next, &vdev->ioeventfds_list); 503 vdev->ioeventfds_nr++; 504 505 out_unlock: 506 mutex_unlock(&vdev->ioeventfds_lock); 507 508 return ret; 509 } 510