1 /* 2 * kvm eventfd support - use eventfd objects to signal various KVM events 3 * 4 * Copyright 2009 Novell. All Rights Reserved. 5 * 6 * Author: 7 * Gregory Haskins <ghaskins@novell.com> 8 * 9 * This file is free software; you can redistribute it and/or modify 10 * it under the terms of version 2 of the GNU General Public License 11 * as published by the Free Software Foundation. 12 * 13 * This program is distributed in the hope that it will be useful, 14 * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 * GNU General Public License for more details. 17 * 18 * You should have received a copy of the GNU General Public License 19 * along with this program; if not, write to the Free Software Foundation, 20 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. 21 */ 22 23 #include <linux/kvm_host.h> 24 #include <linux/kvm.h> 25 #include <linux/workqueue.h> 26 #include <linux/syscalls.h> 27 #include <linux/wait.h> 28 #include <linux/poll.h> 29 #include <linux/file.h> 30 #include <linux/list.h> 31 #include <linux/eventfd.h> 32 #include <linux/kernel.h> 33 #include <linux/slab.h> 34 35 #include "iodev.h" 36 37 /* 38 * -------------------------------------------------------------------- 39 * irqfd: Allows an fd to be used to inject an interrupt to the guest 40 * 41 * Credit goes to Avi Kivity for the original idea. 42 * -------------------------------------------------------------------- 43 */ 44 45 struct _irqfd { 46 struct kvm *kvm; 47 struct eventfd_ctx *eventfd; 48 int gsi; 49 struct list_head list; 50 poll_table pt; 51 wait_queue_t wait; 52 struct work_struct inject; 53 struct work_struct shutdown; 54 }; 55 56 static struct workqueue_struct *irqfd_cleanup_wq; 57 58 static void 59 irqfd_inject(struct work_struct *work) 60 { 61 struct _irqfd *irqfd = container_of(work, struct _irqfd, inject); 62 struct kvm *kvm = irqfd->kvm; 63 64 kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 1); 65 kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 0); 66 } 67 68 /* 69 * Race-free decouple logic (ordering is critical) 70 */ 71 static void 72 irqfd_shutdown(struct work_struct *work) 73 { 74 struct _irqfd *irqfd = container_of(work, struct _irqfd, shutdown); 75 u64 cnt; 76 77 /* 78 * Synchronize with the wait-queue and unhook ourselves to prevent 79 * further events. 80 */ 81 eventfd_ctx_remove_wait_queue(irqfd->eventfd, &irqfd->wait, &cnt); 82 83 /* 84 * We know no new events will be scheduled at this point, so block 85 * until all previously outstanding events have completed 86 */ 87 flush_work(&irqfd->inject); 88 89 /* 90 * It is now safe to release the object's resources 91 */ 92 eventfd_ctx_put(irqfd->eventfd); 93 kfree(irqfd); 94 } 95 96 97 /* assumes kvm->irqfds.lock is held */ 98 static bool 99 irqfd_is_active(struct _irqfd *irqfd) 100 { 101 return list_empty(&irqfd->list) ? false : true; 102 } 103 104 /* 105 * Mark the irqfd as inactive and schedule it for removal 106 * 107 * assumes kvm->irqfds.lock is held 108 */ 109 static void 110 irqfd_deactivate(struct _irqfd *irqfd) 111 { 112 BUG_ON(!irqfd_is_active(irqfd)); 113 114 list_del_init(&irqfd->list); 115 116 queue_work(irqfd_cleanup_wq, &irqfd->shutdown); 117 } 118 119 /* 120 * Called with wqh->lock held and interrupts disabled 121 */ 122 static int 123 irqfd_wakeup(wait_queue_t *wait, unsigned mode, int sync, void *key) 124 { 125 struct _irqfd *irqfd = container_of(wait, struct _irqfd, wait); 126 unsigned long flags = (unsigned long)key; 127 128 if (flags & POLLIN) 129 /* An event has been signaled, inject an interrupt */ 130 schedule_work(&irqfd->inject); 131 132 if (flags & POLLHUP) { 133 /* The eventfd is closing, detach from KVM */ 134 struct kvm *kvm = irqfd->kvm; 135 unsigned long flags; 136 137 spin_lock_irqsave(&kvm->irqfds.lock, flags); 138 139 /* 140 * We must check if someone deactivated the irqfd before 141 * we could acquire the irqfds.lock since the item is 142 * deactivated from the KVM side before it is unhooked from 143 * the wait-queue. If it is already deactivated, we can 144 * simply return knowing the other side will cleanup for us. 145 * We cannot race against the irqfd going away since the 146 * other side is required to acquire wqh->lock, which we hold 147 */ 148 if (irqfd_is_active(irqfd)) 149 irqfd_deactivate(irqfd); 150 151 spin_unlock_irqrestore(&kvm->irqfds.lock, flags); 152 } 153 154 return 0; 155 } 156 157 static void 158 irqfd_ptable_queue_proc(struct file *file, wait_queue_head_t *wqh, 159 poll_table *pt) 160 { 161 struct _irqfd *irqfd = container_of(pt, struct _irqfd, pt); 162 add_wait_queue(wqh, &irqfd->wait); 163 } 164 165 static int 166 kvm_irqfd_assign(struct kvm *kvm, int fd, int gsi) 167 { 168 struct _irqfd *irqfd, *tmp; 169 struct file *file = NULL; 170 struct eventfd_ctx *eventfd = NULL; 171 int ret; 172 unsigned int events; 173 174 irqfd = kzalloc(sizeof(*irqfd), GFP_KERNEL); 175 if (!irqfd) 176 return -ENOMEM; 177 178 irqfd->kvm = kvm; 179 irqfd->gsi = gsi; 180 INIT_LIST_HEAD(&irqfd->list); 181 INIT_WORK(&irqfd->inject, irqfd_inject); 182 INIT_WORK(&irqfd->shutdown, irqfd_shutdown); 183 184 file = eventfd_fget(fd); 185 if (IS_ERR(file)) { 186 ret = PTR_ERR(file); 187 goto fail; 188 } 189 190 eventfd = eventfd_ctx_fileget(file); 191 if (IS_ERR(eventfd)) { 192 ret = PTR_ERR(eventfd); 193 goto fail; 194 } 195 196 irqfd->eventfd = eventfd; 197 198 /* 199 * Install our own custom wake-up handling so we are notified via 200 * a callback whenever someone signals the underlying eventfd 201 */ 202 init_waitqueue_func_entry(&irqfd->wait, irqfd_wakeup); 203 init_poll_funcptr(&irqfd->pt, irqfd_ptable_queue_proc); 204 205 spin_lock_irq(&kvm->irqfds.lock); 206 207 ret = 0; 208 list_for_each_entry(tmp, &kvm->irqfds.items, list) { 209 if (irqfd->eventfd != tmp->eventfd) 210 continue; 211 /* This fd is used for another irq already. */ 212 ret = -EBUSY; 213 spin_unlock_irq(&kvm->irqfds.lock); 214 goto fail; 215 } 216 217 events = file->f_op->poll(file, &irqfd->pt); 218 219 list_add_tail(&irqfd->list, &kvm->irqfds.items); 220 spin_unlock_irq(&kvm->irqfds.lock); 221 222 /* 223 * Check if there was an event already pending on the eventfd 224 * before we registered, and trigger it as if we didn't miss it. 225 */ 226 if (events & POLLIN) 227 schedule_work(&irqfd->inject); 228 229 /* 230 * do not drop the file until the irqfd is fully initialized, otherwise 231 * we might race against the POLLHUP 232 */ 233 fput(file); 234 235 return 0; 236 237 fail: 238 if (eventfd && !IS_ERR(eventfd)) 239 eventfd_ctx_put(eventfd); 240 241 if (!IS_ERR(file)) 242 fput(file); 243 244 kfree(irqfd); 245 return ret; 246 } 247 248 void 249 kvm_eventfd_init(struct kvm *kvm) 250 { 251 spin_lock_init(&kvm->irqfds.lock); 252 INIT_LIST_HEAD(&kvm->irqfds.items); 253 INIT_LIST_HEAD(&kvm->ioeventfds); 254 } 255 256 /* 257 * shutdown any irqfd's that match fd+gsi 258 */ 259 static int 260 kvm_irqfd_deassign(struct kvm *kvm, int fd, int gsi) 261 { 262 struct _irqfd *irqfd, *tmp; 263 struct eventfd_ctx *eventfd; 264 265 eventfd = eventfd_ctx_fdget(fd); 266 if (IS_ERR(eventfd)) 267 return PTR_ERR(eventfd); 268 269 spin_lock_irq(&kvm->irqfds.lock); 270 271 list_for_each_entry_safe(irqfd, tmp, &kvm->irqfds.items, list) { 272 if (irqfd->eventfd == eventfd && irqfd->gsi == gsi) 273 irqfd_deactivate(irqfd); 274 } 275 276 spin_unlock_irq(&kvm->irqfds.lock); 277 eventfd_ctx_put(eventfd); 278 279 /* 280 * Block until we know all outstanding shutdown jobs have completed 281 * so that we guarantee there will not be any more interrupts on this 282 * gsi once this deassign function returns. 283 */ 284 flush_workqueue(irqfd_cleanup_wq); 285 286 return 0; 287 } 288 289 int 290 kvm_irqfd(struct kvm *kvm, int fd, int gsi, int flags) 291 { 292 if (flags & KVM_IRQFD_FLAG_DEASSIGN) 293 return kvm_irqfd_deassign(kvm, fd, gsi); 294 295 return kvm_irqfd_assign(kvm, fd, gsi); 296 } 297 298 /* 299 * This function is called as the kvm VM fd is being released. Shutdown all 300 * irqfds that still remain open 301 */ 302 void 303 kvm_irqfd_release(struct kvm *kvm) 304 { 305 struct _irqfd *irqfd, *tmp; 306 307 spin_lock_irq(&kvm->irqfds.lock); 308 309 list_for_each_entry_safe(irqfd, tmp, &kvm->irqfds.items, list) 310 irqfd_deactivate(irqfd); 311 312 spin_unlock_irq(&kvm->irqfds.lock); 313 314 /* 315 * Block until we know all outstanding shutdown jobs have completed 316 * since we do not take a kvm* reference. 317 */ 318 flush_workqueue(irqfd_cleanup_wq); 319 320 } 321 322 /* 323 * create a host-wide workqueue for issuing deferred shutdown requests 324 * aggregated from all vm* instances. We need our own isolated single-thread 325 * queue to prevent deadlock against flushing the normal work-queue. 326 */ 327 static int __init irqfd_module_init(void) 328 { 329 irqfd_cleanup_wq = create_singlethread_workqueue("kvm-irqfd-cleanup"); 330 if (!irqfd_cleanup_wq) 331 return -ENOMEM; 332 333 return 0; 334 } 335 336 static void __exit irqfd_module_exit(void) 337 { 338 destroy_workqueue(irqfd_cleanup_wq); 339 } 340 341 module_init(irqfd_module_init); 342 module_exit(irqfd_module_exit); 343 344 /* 345 * -------------------------------------------------------------------- 346 * ioeventfd: translate a PIO/MMIO memory write to an eventfd signal. 347 * 348 * userspace can register a PIO/MMIO address with an eventfd for receiving 349 * notification when the memory has been touched. 350 * -------------------------------------------------------------------- 351 */ 352 353 struct _ioeventfd { 354 struct list_head list; 355 u64 addr; 356 int length; 357 struct eventfd_ctx *eventfd; 358 u64 datamatch; 359 struct kvm_io_device dev; 360 bool wildcard; 361 }; 362 363 static inline struct _ioeventfd * 364 to_ioeventfd(struct kvm_io_device *dev) 365 { 366 return container_of(dev, struct _ioeventfd, dev); 367 } 368 369 static void 370 ioeventfd_release(struct _ioeventfd *p) 371 { 372 eventfd_ctx_put(p->eventfd); 373 list_del(&p->list); 374 kfree(p); 375 } 376 377 static bool 378 ioeventfd_in_range(struct _ioeventfd *p, gpa_t addr, int len, const void *val) 379 { 380 u64 _val; 381 382 if (!(addr == p->addr && len == p->length)) 383 /* address-range must be precise for a hit */ 384 return false; 385 386 if (p->wildcard) 387 /* all else equal, wildcard is always a hit */ 388 return true; 389 390 /* otherwise, we have to actually compare the data */ 391 392 BUG_ON(!IS_ALIGNED((unsigned long)val, len)); 393 394 switch (len) { 395 case 1: 396 _val = *(u8 *)val; 397 break; 398 case 2: 399 _val = *(u16 *)val; 400 break; 401 case 4: 402 _val = *(u32 *)val; 403 break; 404 case 8: 405 _val = *(u64 *)val; 406 break; 407 default: 408 return false; 409 } 410 411 return _val == p->datamatch ? true : false; 412 } 413 414 /* MMIO/PIO writes trigger an event if the addr/val match */ 415 static int 416 ioeventfd_write(struct kvm_io_device *this, gpa_t addr, int len, 417 const void *val) 418 { 419 struct _ioeventfd *p = to_ioeventfd(this); 420 421 if (!ioeventfd_in_range(p, addr, len, val)) 422 return -EOPNOTSUPP; 423 424 eventfd_signal(p->eventfd, 1); 425 return 0; 426 } 427 428 /* 429 * This function is called as KVM is completely shutting down. We do not 430 * need to worry about locking just nuke anything we have as quickly as possible 431 */ 432 static void 433 ioeventfd_destructor(struct kvm_io_device *this) 434 { 435 struct _ioeventfd *p = to_ioeventfd(this); 436 437 ioeventfd_release(p); 438 } 439 440 static const struct kvm_io_device_ops ioeventfd_ops = { 441 .write = ioeventfd_write, 442 .destructor = ioeventfd_destructor, 443 }; 444 445 /* assumes kvm->slots_lock held */ 446 static bool 447 ioeventfd_check_collision(struct kvm *kvm, struct _ioeventfd *p) 448 { 449 struct _ioeventfd *_p; 450 451 list_for_each_entry(_p, &kvm->ioeventfds, list) 452 if (_p->addr == p->addr && _p->length == p->length && 453 (_p->wildcard || p->wildcard || 454 _p->datamatch == p->datamatch)) 455 return true; 456 457 return false; 458 } 459 460 static int 461 kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) 462 { 463 int pio = args->flags & KVM_IOEVENTFD_FLAG_PIO; 464 enum kvm_bus bus_idx = pio ? KVM_PIO_BUS : KVM_MMIO_BUS; 465 struct _ioeventfd *p; 466 struct eventfd_ctx *eventfd; 467 int ret; 468 469 /* must be natural-word sized */ 470 switch (args->len) { 471 case 1: 472 case 2: 473 case 4: 474 case 8: 475 break; 476 default: 477 return -EINVAL; 478 } 479 480 /* check for range overflow */ 481 if (args->addr + args->len < args->addr) 482 return -EINVAL; 483 484 /* check for extra flags that we don't understand */ 485 if (args->flags & ~KVM_IOEVENTFD_VALID_FLAG_MASK) 486 return -EINVAL; 487 488 eventfd = eventfd_ctx_fdget(args->fd); 489 if (IS_ERR(eventfd)) 490 return PTR_ERR(eventfd); 491 492 p = kzalloc(sizeof(*p), GFP_KERNEL); 493 if (!p) { 494 ret = -ENOMEM; 495 goto fail; 496 } 497 498 INIT_LIST_HEAD(&p->list); 499 p->addr = args->addr; 500 p->length = args->len; 501 p->eventfd = eventfd; 502 503 /* The datamatch feature is optional, otherwise this is a wildcard */ 504 if (args->flags & KVM_IOEVENTFD_FLAG_DATAMATCH) 505 p->datamatch = args->datamatch; 506 else 507 p->wildcard = true; 508 509 mutex_lock(&kvm->slots_lock); 510 511 /* Verify that there isnt a match already */ 512 if (ioeventfd_check_collision(kvm, p)) { 513 ret = -EEXIST; 514 goto unlock_fail; 515 } 516 517 kvm_iodevice_init(&p->dev, &ioeventfd_ops); 518 519 ret = kvm_io_bus_register_dev(kvm, bus_idx, &p->dev); 520 if (ret < 0) 521 goto unlock_fail; 522 523 list_add_tail(&p->list, &kvm->ioeventfds); 524 525 mutex_unlock(&kvm->slots_lock); 526 527 return 0; 528 529 unlock_fail: 530 mutex_unlock(&kvm->slots_lock); 531 532 fail: 533 kfree(p); 534 eventfd_ctx_put(eventfd); 535 536 return ret; 537 } 538 539 static int 540 kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) 541 { 542 int pio = args->flags & KVM_IOEVENTFD_FLAG_PIO; 543 enum kvm_bus bus_idx = pio ? KVM_PIO_BUS : KVM_MMIO_BUS; 544 struct _ioeventfd *p, *tmp; 545 struct eventfd_ctx *eventfd; 546 int ret = -ENOENT; 547 548 eventfd = eventfd_ctx_fdget(args->fd); 549 if (IS_ERR(eventfd)) 550 return PTR_ERR(eventfd); 551 552 mutex_lock(&kvm->slots_lock); 553 554 list_for_each_entry_safe(p, tmp, &kvm->ioeventfds, list) { 555 bool wildcard = !(args->flags & KVM_IOEVENTFD_FLAG_DATAMATCH); 556 557 if (p->eventfd != eventfd || 558 p->addr != args->addr || 559 p->length != args->len || 560 p->wildcard != wildcard) 561 continue; 562 563 if (!p->wildcard && p->datamatch != args->datamatch) 564 continue; 565 566 kvm_io_bus_unregister_dev(kvm, bus_idx, &p->dev); 567 ioeventfd_release(p); 568 ret = 0; 569 break; 570 } 571 572 mutex_unlock(&kvm->slots_lock); 573 574 eventfd_ctx_put(eventfd); 575 576 return ret; 577 } 578 579 int 580 kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) 581 { 582 if (args->flags & KVM_IOEVENTFD_FLAG_DEASSIGN) 583 return kvm_deassign_ioeventfd(kvm, args); 584 585 return kvm_assign_ioeventfd(kvm, args); 586 } 587