1 /* 2 * kvm eventfd support - use eventfd objects to signal various KVM events 3 * 4 * Copyright 2009 Novell. All Rights Reserved. 5 * Copyright 2010 Red Hat, Inc. and/or its affiliates. 6 * 7 * Author: 8 * Gregory Haskins <ghaskins@novell.com> 9 * 10 * This file is free software; you can redistribute it and/or modify 11 * it under the terms of version 2 of the GNU General Public License 12 * as published by the Free Software Foundation. 13 * 14 * This program is distributed in the hope that it will be useful, 15 * but WITHOUT ANY WARRANTY; without even the implied warranty of 16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 * GNU General Public License for more details. 18 * 19 * You should have received a copy of the GNU General Public License 20 * along with this program; if not, write to the Free Software Foundation, 21 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. 22 */ 23 24 #include <linux/kvm_host.h> 25 #include <linux/kvm.h> 26 #include <linux/workqueue.h> 27 #include <linux/syscalls.h> 28 #include <linux/wait.h> 29 #include <linux/poll.h> 30 #include <linux/file.h> 31 #include <linux/list.h> 32 #include <linux/eventfd.h> 33 #include <linux/kernel.h> 34 #include <linux/slab.h> 35 36 #include "iodev.h" 37 38 /* 39 * -------------------------------------------------------------------- 40 * irqfd: Allows an fd to be used to inject an interrupt to the guest 41 * 42 * Credit goes to Avi Kivity for the original idea. 43 * -------------------------------------------------------------------- 44 */ 45 46 struct _irqfd { 47 struct kvm *kvm; 48 struct eventfd_ctx *eventfd; 49 int gsi; 50 struct list_head list; 51 poll_table pt; 52 wait_queue_t wait; 53 struct work_struct inject; 54 struct work_struct shutdown; 55 }; 56 57 static struct workqueue_struct *irqfd_cleanup_wq; 58 59 static void 60 irqfd_inject(struct work_struct *work) 61 { 62 struct _irqfd *irqfd = container_of(work, struct _irqfd, inject); 63 struct kvm *kvm = irqfd->kvm; 64 65 kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 1); 66 kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 0); 67 } 68 69 /* 70 * Race-free decouple logic (ordering is critical) 71 */ 72 static void 73 irqfd_shutdown(struct work_struct *work) 74 { 75 struct _irqfd *irqfd = container_of(work, struct _irqfd, shutdown); 76 u64 cnt; 77 78 /* 79 * Synchronize with the wait-queue and unhook ourselves to prevent 80 * further events. 81 */ 82 eventfd_ctx_remove_wait_queue(irqfd->eventfd, &irqfd->wait, &cnt); 83 84 /* 85 * We know no new events will be scheduled at this point, so block 86 * until all previously outstanding events have completed 87 */ 88 flush_work(&irqfd->inject); 89 90 /* 91 * It is now safe to release the object's resources 92 */ 93 eventfd_ctx_put(irqfd->eventfd); 94 kfree(irqfd); 95 } 96 97 98 /* assumes kvm->irqfds.lock is held */ 99 static bool 100 irqfd_is_active(struct _irqfd *irqfd) 101 { 102 return list_empty(&irqfd->list) ? false : true; 103 } 104 105 /* 106 * Mark the irqfd as inactive and schedule it for removal 107 * 108 * assumes kvm->irqfds.lock is held 109 */ 110 static void 111 irqfd_deactivate(struct _irqfd *irqfd) 112 { 113 BUG_ON(!irqfd_is_active(irqfd)); 114 115 list_del_init(&irqfd->list); 116 117 queue_work(irqfd_cleanup_wq, &irqfd->shutdown); 118 } 119 120 /* 121 * Called with wqh->lock held and interrupts disabled 122 */ 123 static int 124 irqfd_wakeup(wait_queue_t *wait, unsigned mode, int sync, void *key) 125 { 126 struct _irqfd *irqfd = container_of(wait, struct _irqfd, wait); 127 unsigned long flags = (unsigned long)key; 128 129 if (flags & POLLIN) 130 /* An event has been signaled, inject an interrupt */ 131 schedule_work(&irqfd->inject); 132 133 if (flags & POLLHUP) { 134 /* The eventfd is closing, detach from KVM */ 135 struct kvm *kvm = irqfd->kvm; 136 unsigned long flags; 137 138 spin_lock_irqsave(&kvm->irqfds.lock, flags); 139 140 /* 141 * We must check if someone deactivated the irqfd before 142 * we could acquire the irqfds.lock since the item is 143 * deactivated from the KVM side before it is unhooked from 144 * the wait-queue. If it is already deactivated, we can 145 * simply return knowing the other side will cleanup for us. 146 * We cannot race against the irqfd going away since the 147 * other side is required to acquire wqh->lock, which we hold 148 */ 149 if (irqfd_is_active(irqfd)) 150 irqfd_deactivate(irqfd); 151 152 spin_unlock_irqrestore(&kvm->irqfds.lock, flags); 153 } 154 155 return 0; 156 } 157 158 static void 159 irqfd_ptable_queue_proc(struct file *file, wait_queue_head_t *wqh, 160 poll_table *pt) 161 { 162 struct _irqfd *irqfd = container_of(pt, struct _irqfd, pt); 163 add_wait_queue(wqh, &irqfd->wait); 164 } 165 166 static int 167 kvm_irqfd_assign(struct kvm *kvm, int fd, int gsi) 168 { 169 struct _irqfd *irqfd, *tmp; 170 struct file *file = NULL; 171 struct eventfd_ctx *eventfd = NULL; 172 int ret; 173 unsigned int events; 174 175 irqfd = kzalloc(sizeof(*irqfd), GFP_KERNEL); 176 if (!irqfd) 177 return -ENOMEM; 178 179 irqfd->kvm = kvm; 180 irqfd->gsi = gsi; 181 INIT_LIST_HEAD(&irqfd->list); 182 INIT_WORK(&irqfd->inject, irqfd_inject); 183 INIT_WORK(&irqfd->shutdown, irqfd_shutdown); 184 185 file = eventfd_fget(fd); 186 if (IS_ERR(file)) { 187 ret = PTR_ERR(file); 188 goto fail; 189 } 190 191 eventfd = eventfd_ctx_fileget(file); 192 if (IS_ERR(eventfd)) { 193 ret = PTR_ERR(eventfd); 194 goto fail; 195 } 196 197 irqfd->eventfd = eventfd; 198 199 /* 200 * Install our own custom wake-up handling so we are notified via 201 * a callback whenever someone signals the underlying eventfd 202 */ 203 init_waitqueue_func_entry(&irqfd->wait, irqfd_wakeup); 204 init_poll_funcptr(&irqfd->pt, irqfd_ptable_queue_proc); 205 206 spin_lock_irq(&kvm->irqfds.lock); 207 208 ret = 0; 209 list_for_each_entry(tmp, &kvm->irqfds.items, list) { 210 if (irqfd->eventfd != tmp->eventfd) 211 continue; 212 /* This fd is used for another irq already. */ 213 ret = -EBUSY; 214 spin_unlock_irq(&kvm->irqfds.lock); 215 goto fail; 216 } 217 218 events = file->f_op->poll(file, &irqfd->pt); 219 220 list_add_tail(&irqfd->list, &kvm->irqfds.items); 221 spin_unlock_irq(&kvm->irqfds.lock); 222 223 /* 224 * Check if there was an event already pending on the eventfd 225 * before we registered, and trigger it as if we didn't miss it. 226 */ 227 if (events & POLLIN) 228 schedule_work(&irqfd->inject); 229 230 /* 231 * do not drop the file until the irqfd is fully initialized, otherwise 232 * we might race against the POLLHUP 233 */ 234 fput(file); 235 236 return 0; 237 238 fail: 239 if (eventfd && !IS_ERR(eventfd)) 240 eventfd_ctx_put(eventfd); 241 242 if (!IS_ERR(file)) 243 fput(file); 244 245 kfree(irqfd); 246 return ret; 247 } 248 249 void 250 kvm_eventfd_init(struct kvm *kvm) 251 { 252 spin_lock_init(&kvm->irqfds.lock); 253 INIT_LIST_HEAD(&kvm->irqfds.items); 254 INIT_LIST_HEAD(&kvm->ioeventfds); 255 } 256 257 /* 258 * shutdown any irqfd's that match fd+gsi 259 */ 260 static int 261 kvm_irqfd_deassign(struct kvm *kvm, int fd, int gsi) 262 { 263 struct _irqfd *irqfd, *tmp; 264 struct eventfd_ctx *eventfd; 265 266 eventfd = eventfd_ctx_fdget(fd); 267 if (IS_ERR(eventfd)) 268 return PTR_ERR(eventfd); 269 270 spin_lock_irq(&kvm->irqfds.lock); 271 272 list_for_each_entry_safe(irqfd, tmp, &kvm->irqfds.items, list) { 273 if (irqfd->eventfd == eventfd && irqfd->gsi == gsi) 274 irqfd_deactivate(irqfd); 275 } 276 277 spin_unlock_irq(&kvm->irqfds.lock); 278 eventfd_ctx_put(eventfd); 279 280 /* 281 * Block until we know all outstanding shutdown jobs have completed 282 * so that we guarantee there will not be any more interrupts on this 283 * gsi once this deassign function returns. 284 */ 285 flush_workqueue(irqfd_cleanup_wq); 286 287 return 0; 288 } 289 290 int 291 kvm_irqfd(struct kvm *kvm, int fd, int gsi, int flags) 292 { 293 if (flags & KVM_IRQFD_FLAG_DEASSIGN) 294 return kvm_irqfd_deassign(kvm, fd, gsi); 295 296 return kvm_irqfd_assign(kvm, fd, gsi); 297 } 298 299 /* 300 * This function is called as the kvm VM fd is being released. Shutdown all 301 * irqfds that still remain open 302 */ 303 void 304 kvm_irqfd_release(struct kvm *kvm) 305 { 306 struct _irqfd *irqfd, *tmp; 307 308 spin_lock_irq(&kvm->irqfds.lock); 309 310 list_for_each_entry_safe(irqfd, tmp, &kvm->irqfds.items, list) 311 irqfd_deactivate(irqfd); 312 313 spin_unlock_irq(&kvm->irqfds.lock); 314 315 /* 316 * Block until we know all outstanding shutdown jobs have completed 317 * since we do not take a kvm* reference. 318 */ 319 flush_workqueue(irqfd_cleanup_wq); 320 321 } 322 323 /* 324 * create a host-wide workqueue for issuing deferred shutdown requests 325 * aggregated from all vm* instances. We need our own isolated single-thread 326 * queue to prevent deadlock against flushing the normal work-queue. 327 */ 328 static int __init irqfd_module_init(void) 329 { 330 irqfd_cleanup_wq = create_singlethread_workqueue("kvm-irqfd-cleanup"); 331 if (!irqfd_cleanup_wq) 332 return -ENOMEM; 333 334 return 0; 335 } 336 337 static void __exit irqfd_module_exit(void) 338 { 339 destroy_workqueue(irqfd_cleanup_wq); 340 } 341 342 module_init(irqfd_module_init); 343 module_exit(irqfd_module_exit); 344 345 /* 346 * -------------------------------------------------------------------- 347 * ioeventfd: translate a PIO/MMIO memory write to an eventfd signal. 348 * 349 * userspace can register a PIO/MMIO address with an eventfd for receiving 350 * notification when the memory has been touched. 351 * -------------------------------------------------------------------- 352 */ 353 354 struct _ioeventfd { 355 struct list_head list; 356 u64 addr; 357 int length; 358 struct eventfd_ctx *eventfd; 359 u64 datamatch; 360 struct kvm_io_device dev; 361 bool wildcard; 362 }; 363 364 static inline struct _ioeventfd * 365 to_ioeventfd(struct kvm_io_device *dev) 366 { 367 return container_of(dev, struct _ioeventfd, dev); 368 } 369 370 static void 371 ioeventfd_release(struct _ioeventfd *p) 372 { 373 eventfd_ctx_put(p->eventfd); 374 list_del(&p->list); 375 kfree(p); 376 } 377 378 static bool 379 ioeventfd_in_range(struct _ioeventfd *p, gpa_t addr, int len, const void *val) 380 { 381 u64 _val; 382 383 if (!(addr == p->addr && len == p->length)) 384 /* address-range must be precise for a hit */ 385 return false; 386 387 if (p->wildcard) 388 /* all else equal, wildcard is always a hit */ 389 return true; 390 391 /* otherwise, we have to actually compare the data */ 392 393 BUG_ON(!IS_ALIGNED((unsigned long)val, len)); 394 395 switch (len) { 396 case 1: 397 _val = *(u8 *)val; 398 break; 399 case 2: 400 _val = *(u16 *)val; 401 break; 402 case 4: 403 _val = *(u32 *)val; 404 break; 405 case 8: 406 _val = *(u64 *)val; 407 break; 408 default: 409 return false; 410 } 411 412 return _val == p->datamatch ? true : false; 413 } 414 415 /* MMIO/PIO writes trigger an event if the addr/val match */ 416 static int 417 ioeventfd_write(struct kvm_io_device *this, gpa_t addr, int len, 418 const void *val) 419 { 420 struct _ioeventfd *p = to_ioeventfd(this); 421 422 if (!ioeventfd_in_range(p, addr, len, val)) 423 return -EOPNOTSUPP; 424 425 eventfd_signal(p->eventfd, 1); 426 return 0; 427 } 428 429 /* 430 * This function is called as KVM is completely shutting down. We do not 431 * need to worry about locking just nuke anything we have as quickly as possible 432 */ 433 static void 434 ioeventfd_destructor(struct kvm_io_device *this) 435 { 436 struct _ioeventfd *p = to_ioeventfd(this); 437 438 ioeventfd_release(p); 439 } 440 441 static const struct kvm_io_device_ops ioeventfd_ops = { 442 .write = ioeventfd_write, 443 .destructor = ioeventfd_destructor, 444 }; 445 446 /* assumes kvm->slots_lock held */ 447 static bool 448 ioeventfd_check_collision(struct kvm *kvm, struct _ioeventfd *p) 449 { 450 struct _ioeventfd *_p; 451 452 list_for_each_entry(_p, &kvm->ioeventfds, list) 453 if (_p->addr == p->addr && _p->length == p->length && 454 (_p->wildcard || p->wildcard || 455 _p->datamatch == p->datamatch)) 456 return true; 457 458 return false; 459 } 460 461 static int 462 kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) 463 { 464 int pio = args->flags & KVM_IOEVENTFD_FLAG_PIO; 465 enum kvm_bus bus_idx = pio ? KVM_PIO_BUS : KVM_MMIO_BUS; 466 struct _ioeventfd *p; 467 struct eventfd_ctx *eventfd; 468 int ret; 469 470 /* must be natural-word sized */ 471 switch (args->len) { 472 case 1: 473 case 2: 474 case 4: 475 case 8: 476 break; 477 default: 478 return -EINVAL; 479 } 480 481 /* check for range overflow */ 482 if (args->addr + args->len < args->addr) 483 return -EINVAL; 484 485 /* check for extra flags that we don't understand */ 486 if (args->flags & ~KVM_IOEVENTFD_VALID_FLAG_MASK) 487 return -EINVAL; 488 489 eventfd = eventfd_ctx_fdget(args->fd); 490 if (IS_ERR(eventfd)) 491 return PTR_ERR(eventfd); 492 493 p = kzalloc(sizeof(*p), GFP_KERNEL); 494 if (!p) { 495 ret = -ENOMEM; 496 goto fail; 497 } 498 499 INIT_LIST_HEAD(&p->list); 500 p->addr = args->addr; 501 p->length = args->len; 502 p->eventfd = eventfd; 503 504 /* The datamatch feature is optional, otherwise this is a wildcard */ 505 if (args->flags & KVM_IOEVENTFD_FLAG_DATAMATCH) 506 p->datamatch = args->datamatch; 507 else 508 p->wildcard = true; 509 510 mutex_lock(&kvm->slots_lock); 511 512 /* Verify that there isnt a match already */ 513 if (ioeventfd_check_collision(kvm, p)) { 514 ret = -EEXIST; 515 goto unlock_fail; 516 } 517 518 kvm_iodevice_init(&p->dev, &ioeventfd_ops); 519 520 ret = kvm_io_bus_register_dev(kvm, bus_idx, &p->dev); 521 if (ret < 0) 522 goto unlock_fail; 523 524 list_add_tail(&p->list, &kvm->ioeventfds); 525 526 mutex_unlock(&kvm->slots_lock); 527 528 return 0; 529 530 unlock_fail: 531 mutex_unlock(&kvm->slots_lock); 532 533 fail: 534 kfree(p); 535 eventfd_ctx_put(eventfd); 536 537 return ret; 538 } 539 540 static int 541 kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) 542 { 543 int pio = args->flags & KVM_IOEVENTFD_FLAG_PIO; 544 enum kvm_bus bus_idx = pio ? KVM_PIO_BUS : KVM_MMIO_BUS; 545 struct _ioeventfd *p, *tmp; 546 struct eventfd_ctx *eventfd; 547 int ret = -ENOENT; 548 549 eventfd = eventfd_ctx_fdget(args->fd); 550 if (IS_ERR(eventfd)) 551 return PTR_ERR(eventfd); 552 553 mutex_lock(&kvm->slots_lock); 554 555 list_for_each_entry_safe(p, tmp, &kvm->ioeventfds, list) { 556 bool wildcard = !(args->flags & KVM_IOEVENTFD_FLAG_DATAMATCH); 557 558 if (p->eventfd != eventfd || 559 p->addr != args->addr || 560 p->length != args->len || 561 p->wildcard != wildcard) 562 continue; 563 564 if (!p->wildcard && p->datamatch != args->datamatch) 565 continue; 566 567 kvm_io_bus_unregister_dev(kvm, bus_idx, &p->dev); 568 ioeventfd_release(p); 569 ret = 0; 570 break; 571 } 572 573 mutex_unlock(&kvm->slots_lock); 574 575 eventfd_ctx_put(eventfd); 576 577 return ret; 578 } 579 580 int 581 kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) 582 { 583 if (args->flags & KVM_IOEVENTFD_FLAG_DEASSIGN) 584 return kvm_deassign_ioeventfd(kvm, args); 585 586 return kvm_assign_ioeventfd(kvm, args); 587 } 588