1 /****************************************************************************** 2 * evtchn.c 3 * 4 * Driver for receiving and demuxing event-channel signals. 5 * 6 * Copyright (c) 2004-2005, K A Fraser 7 * Multi-process extensions Copyright (c) 2004, Steven Smith 8 * 9 * This program is free software; you can redistribute it and/or 10 * modify it under the terms of the GNU General Public License version 2 11 * as published by the Free Software Foundation; or, when distributed 12 * separately from the Linux kernel or incorporated into other 13 * software packages, subject to the following license: 14 * 15 * Permission is hereby granted, free of charge, to any person obtaining a copy 16 * of this source file (the "Software"), to deal in the Software without 17 * restriction, including without limitation the rights to use, copy, modify, 18 * merge, publish, distribute, sublicense, and/or sell copies of the Software, 19 * and to permit persons to whom the Software is furnished to do so, subject to 20 * the following conditions: 21 * 22 * The above copyright notice and this permission notice shall be included in 23 * all copies or substantial portions of the Software. 24 * 25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 26 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 27 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 28 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 29 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 30 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 31 * IN THE SOFTWARE. 32 */ 33 34 #define pr_fmt(fmt) "xen:" KBUILD_MODNAME ": " fmt 35 36 #include <linux/module.h> 37 #include <linux/kernel.h> 38 #include <linux/sched.h> 39 #include <linux/slab.h> 40 #include <linux/string.h> 41 #include <linux/errno.h> 42 #include <linux/fs.h> 43 #include <linux/miscdevice.h> 44 #include <linux/major.h> 45 #include <linux/proc_fs.h> 46 #include <linux/stat.h> 47 #include <linux/poll.h> 48 #include <linux/irq.h> 49 #include <linux/init.h> 50 #include <linux/mutex.h> 51 #include <linux/cpu.h> 52 #include <linux/mm.h> 53 #include <linux/vmalloc.h> 54 55 #include <xen/xen.h> 56 #include <xen/events.h> 57 #include <xen/evtchn.h> 58 #include <xen/xen-ops.h> 59 #include <asm/xen/hypervisor.h> 60 61 struct per_user_data { 62 struct mutex bind_mutex; /* serialize bind/unbind operations */ 63 struct rb_root evtchns; 64 unsigned int nr_evtchns; 65 66 /* Notification ring, accessed via /dev/xen/evtchn. */ 67 unsigned int ring_size; 68 evtchn_port_t *ring; 69 unsigned int ring_cons, ring_prod, ring_overflow; 70 struct mutex ring_cons_mutex; /* protect against concurrent readers */ 71 spinlock_t ring_prod_lock; /* product against concurrent interrupts */ 72 73 /* Processes wait on this queue when ring is empty. */ 74 wait_queue_head_t evtchn_wait; 75 struct fasync_struct *evtchn_async_queue; 76 const char *name; 77 78 domid_t restrict_domid; 79 }; 80 81 #define UNRESTRICTED_DOMID ((domid_t)-1) 82 83 struct user_evtchn { 84 struct rb_node node; 85 struct per_user_data *user; 86 evtchn_port_t port; 87 bool enabled; 88 bool unbinding; 89 }; 90 91 static void evtchn_free_ring(evtchn_port_t *ring) 92 { 93 kvfree(ring); 94 } 95 96 static unsigned int evtchn_ring_offset(struct per_user_data *u, 97 unsigned int idx) 98 { 99 return idx & (u->ring_size - 1); 100 } 101 102 static evtchn_port_t *evtchn_ring_entry(struct per_user_data *u, 103 unsigned int idx) 104 { 105 return u->ring + evtchn_ring_offset(u, idx); 106 } 107 108 static int add_evtchn(struct per_user_data *u, struct user_evtchn *evtchn) 109 { 110 struct rb_node **new = &(u->evtchns.rb_node), *parent = NULL; 111 112 u->nr_evtchns++; 113 114 while (*new) { 115 struct user_evtchn *this; 116 117 this = rb_entry(*new, struct user_evtchn, node); 118 119 parent = *new; 120 if (this->port < evtchn->port) 121 new = &((*new)->rb_left); 122 else if (this->port > evtchn->port) 123 new = &((*new)->rb_right); 124 else 125 return -EEXIST; 126 } 127 128 /* Add new node and rebalance tree. */ 129 rb_link_node(&evtchn->node, parent, new); 130 rb_insert_color(&evtchn->node, &u->evtchns); 131 132 return 0; 133 } 134 135 static void del_evtchn(struct per_user_data *u, struct user_evtchn *evtchn) 136 { 137 u->nr_evtchns--; 138 rb_erase(&evtchn->node, &u->evtchns); 139 kfree(evtchn); 140 } 141 142 static struct user_evtchn *find_evtchn(struct per_user_data *u, 143 evtchn_port_t port) 144 { 145 struct rb_node *node = u->evtchns.rb_node; 146 147 while (node) { 148 struct user_evtchn *evtchn; 149 150 evtchn = rb_entry(node, struct user_evtchn, node); 151 152 if (evtchn->port < port) 153 node = node->rb_left; 154 else if (evtchn->port > port) 155 node = node->rb_right; 156 else 157 return evtchn; 158 } 159 return NULL; 160 } 161 162 static irqreturn_t evtchn_interrupt(int irq, void *data) 163 { 164 struct user_evtchn *evtchn = data; 165 struct per_user_data *u = evtchn->user; 166 unsigned int prod, cons; 167 168 /* Handler might be called when tearing down the IRQ. */ 169 if (evtchn->unbinding) 170 return IRQ_HANDLED; 171 172 WARN(!evtchn->enabled, 173 "Interrupt for port %u, but apparently not enabled; per-user %p\n", 174 evtchn->port, u); 175 176 evtchn->enabled = false; 177 178 spin_lock(&u->ring_prod_lock); 179 180 prod = READ_ONCE(u->ring_prod); 181 cons = READ_ONCE(u->ring_cons); 182 183 if ((prod - cons) < u->ring_size) { 184 *evtchn_ring_entry(u, prod) = evtchn->port; 185 smp_wmb(); /* Ensure ring contents visible */ 186 WRITE_ONCE(u->ring_prod, prod + 1); 187 if (cons == prod) { 188 wake_up_interruptible(&u->evtchn_wait); 189 kill_fasync(&u->evtchn_async_queue, 190 SIGIO, POLL_IN); 191 } 192 } else 193 u->ring_overflow = 1; 194 195 spin_unlock(&u->ring_prod_lock); 196 197 return IRQ_HANDLED; 198 } 199 200 static ssize_t evtchn_read(struct file *file, char __user *buf, 201 size_t count, loff_t *ppos) 202 { 203 int rc; 204 unsigned int c, p, bytes1 = 0, bytes2 = 0; 205 struct per_user_data *u = file->private_data; 206 207 /* Whole number of ports. */ 208 count &= ~(sizeof(evtchn_port_t)-1); 209 210 if (count == 0) 211 return 0; 212 213 if (count > PAGE_SIZE) 214 count = PAGE_SIZE; 215 216 for (;;) { 217 mutex_lock(&u->ring_cons_mutex); 218 219 rc = -EFBIG; 220 if (u->ring_overflow) 221 goto unlock_out; 222 223 c = READ_ONCE(u->ring_cons); 224 p = READ_ONCE(u->ring_prod); 225 if (c != p) 226 break; 227 228 mutex_unlock(&u->ring_cons_mutex); 229 230 if (file->f_flags & O_NONBLOCK) 231 return -EAGAIN; 232 233 rc = wait_event_interruptible(u->evtchn_wait, 234 READ_ONCE(u->ring_cons) != READ_ONCE(u->ring_prod)); 235 if (rc) 236 return rc; 237 } 238 239 /* Byte lengths of two chunks. Chunk split (if any) is at ring wrap. */ 240 if (((c ^ p) & u->ring_size) != 0) { 241 bytes1 = (u->ring_size - evtchn_ring_offset(u, c)) * 242 sizeof(evtchn_port_t); 243 bytes2 = evtchn_ring_offset(u, p) * sizeof(evtchn_port_t); 244 } else { 245 bytes1 = (p - c) * sizeof(evtchn_port_t); 246 bytes2 = 0; 247 } 248 249 /* Truncate chunks according to caller's maximum byte count. */ 250 if (bytes1 > count) { 251 bytes1 = count; 252 bytes2 = 0; 253 } else if ((bytes1 + bytes2) > count) { 254 bytes2 = count - bytes1; 255 } 256 257 rc = -EFAULT; 258 smp_rmb(); /* Ensure that we see the port before we copy it. */ 259 if (copy_to_user(buf, evtchn_ring_entry(u, c), bytes1) || 260 ((bytes2 != 0) && 261 copy_to_user(&buf[bytes1], &u->ring[0], bytes2))) 262 goto unlock_out; 263 264 WRITE_ONCE(u->ring_cons, c + (bytes1 + bytes2) / sizeof(evtchn_port_t)); 265 rc = bytes1 + bytes2; 266 267 unlock_out: 268 mutex_unlock(&u->ring_cons_mutex); 269 return rc; 270 } 271 272 static ssize_t evtchn_write(struct file *file, const char __user *buf, 273 size_t count, loff_t *ppos) 274 { 275 int rc, i; 276 evtchn_port_t *kbuf = (evtchn_port_t *)__get_free_page(GFP_KERNEL); 277 struct per_user_data *u = file->private_data; 278 279 if (kbuf == NULL) 280 return -ENOMEM; 281 282 /* Whole number of ports. */ 283 count &= ~(sizeof(evtchn_port_t)-1); 284 285 rc = 0; 286 if (count == 0) 287 goto out; 288 289 if (count > PAGE_SIZE) 290 count = PAGE_SIZE; 291 292 rc = -EFAULT; 293 if (copy_from_user(kbuf, buf, count) != 0) 294 goto out; 295 296 mutex_lock(&u->bind_mutex); 297 298 for (i = 0; i < (count/sizeof(evtchn_port_t)); i++) { 299 evtchn_port_t port = kbuf[i]; 300 struct user_evtchn *evtchn; 301 302 evtchn = find_evtchn(u, port); 303 if (evtchn && !evtchn->enabled) { 304 evtchn->enabled = true; 305 xen_irq_lateeoi(irq_from_evtchn(port), 0); 306 } 307 } 308 309 mutex_unlock(&u->bind_mutex); 310 311 rc = count; 312 313 out: 314 free_page((unsigned long)kbuf); 315 return rc; 316 } 317 318 static int evtchn_resize_ring(struct per_user_data *u) 319 { 320 unsigned int new_size; 321 evtchn_port_t *new_ring, *old_ring; 322 323 /* 324 * Ensure the ring is large enough to capture all possible 325 * events. i.e., one free slot for each bound event. 326 */ 327 if (u->nr_evtchns <= u->ring_size) 328 return 0; 329 330 if (u->ring_size == 0) 331 new_size = 64; 332 else 333 new_size = 2 * u->ring_size; 334 335 new_ring = kvmalloc_array(new_size, sizeof(*new_ring), GFP_KERNEL); 336 if (!new_ring) 337 return -ENOMEM; 338 339 old_ring = u->ring; 340 341 /* 342 * Access to the ring contents is serialized by either the 343 * prod /or/ cons lock so take both when resizing. 344 */ 345 mutex_lock(&u->ring_cons_mutex); 346 spin_lock_irq(&u->ring_prod_lock); 347 348 /* 349 * Copy the old ring contents to the new ring. 350 * 351 * To take care of wrapping, a full ring, and the new index 352 * pointing into the second half, simply copy the old contents 353 * twice. 354 * 355 * +---------+ +------------------+ 356 * |34567 12| -> |34567 1234567 12| 357 * +-----p-c-+ +-------c------p---+ 358 */ 359 memcpy(new_ring, old_ring, u->ring_size * sizeof(*u->ring)); 360 memcpy(new_ring + u->ring_size, old_ring, 361 u->ring_size * sizeof(*u->ring)); 362 363 u->ring = new_ring; 364 u->ring_size = new_size; 365 366 spin_unlock_irq(&u->ring_prod_lock); 367 mutex_unlock(&u->ring_cons_mutex); 368 369 evtchn_free_ring(old_ring); 370 371 return 0; 372 } 373 374 static int evtchn_bind_to_user(struct per_user_data *u, evtchn_port_t port, 375 bool is_static) 376 { 377 struct user_evtchn *evtchn; 378 int rc = 0; 379 380 /* 381 * Ports are never reused, so every caller should pass in a 382 * unique port. 383 * 384 * (Locking not necessary because we haven't registered the 385 * interrupt handler yet, and our caller has already 386 * serialized bind operations.) 387 */ 388 389 evtchn = kzalloc(sizeof(*evtchn), GFP_KERNEL); 390 if (!evtchn) 391 return -ENOMEM; 392 393 evtchn->user = u; 394 evtchn->port = port; 395 evtchn->enabled = true; /* start enabled */ 396 397 rc = add_evtchn(u, evtchn); 398 if (rc < 0) 399 goto err; 400 401 rc = evtchn_resize_ring(u); 402 if (rc < 0) 403 goto err; 404 405 rc = bind_evtchn_to_irqhandler_lateeoi(port, evtchn_interrupt, IRQF_SHARED, 406 u->name, evtchn); 407 if (rc < 0) 408 goto err; 409 410 rc = evtchn_make_refcounted(port, is_static); 411 return rc; 412 413 err: 414 /* bind failed, should close the port now */ 415 if (!is_static) 416 xen_evtchn_close(port); 417 418 del_evtchn(u, evtchn); 419 return rc; 420 } 421 422 static void evtchn_unbind_from_user(struct per_user_data *u, 423 struct user_evtchn *evtchn) 424 { 425 int irq = irq_from_evtchn(evtchn->port); 426 427 BUG_ON(irq < 0); 428 429 evtchn->unbinding = true; 430 unbind_from_irqhandler(irq, evtchn); 431 432 del_evtchn(u, evtchn); 433 } 434 435 static long evtchn_ioctl(struct file *file, 436 unsigned int cmd, unsigned long arg) 437 { 438 int rc; 439 struct per_user_data *u = file->private_data; 440 void __user *uarg = (void __user *) arg; 441 442 /* Prevent bind from racing with unbind */ 443 mutex_lock(&u->bind_mutex); 444 445 switch (cmd) { 446 case IOCTL_EVTCHN_BIND_VIRQ: { 447 struct ioctl_evtchn_bind_virq bind; 448 struct evtchn_bind_virq bind_virq; 449 450 rc = -EACCES; 451 if (u->restrict_domid != UNRESTRICTED_DOMID) 452 break; 453 454 rc = -EFAULT; 455 if (copy_from_user(&bind, uarg, sizeof(bind))) 456 break; 457 458 bind_virq.virq = bind.virq; 459 bind_virq.vcpu = xen_vcpu_nr(0); 460 rc = HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq, 461 &bind_virq); 462 if (rc != 0) 463 break; 464 465 rc = evtchn_bind_to_user(u, bind_virq.port, false); 466 if (rc == 0) 467 rc = bind_virq.port; 468 break; 469 } 470 471 case IOCTL_EVTCHN_BIND_INTERDOMAIN: { 472 struct ioctl_evtchn_bind_interdomain bind; 473 struct evtchn_bind_interdomain bind_interdomain; 474 475 rc = -EFAULT; 476 if (copy_from_user(&bind, uarg, sizeof(bind))) 477 break; 478 479 rc = -EACCES; 480 if (u->restrict_domid != UNRESTRICTED_DOMID && 481 u->restrict_domid != bind.remote_domain) 482 break; 483 484 bind_interdomain.remote_dom = bind.remote_domain; 485 bind_interdomain.remote_port = bind.remote_port; 486 rc = HYPERVISOR_event_channel_op(EVTCHNOP_bind_interdomain, 487 &bind_interdomain); 488 if (rc != 0) 489 break; 490 491 rc = evtchn_bind_to_user(u, bind_interdomain.local_port, false); 492 if (rc == 0) 493 rc = bind_interdomain.local_port; 494 break; 495 } 496 497 case IOCTL_EVTCHN_BIND_UNBOUND_PORT: { 498 struct ioctl_evtchn_bind_unbound_port bind; 499 struct evtchn_alloc_unbound alloc_unbound; 500 501 rc = -EACCES; 502 if (u->restrict_domid != UNRESTRICTED_DOMID) 503 break; 504 505 rc = -EFAULT; 506 if (copy_from_user(&bind, uarg, sizeof(bind))) 507 break; 508 509 alloc_unbound.dom = DOMID_SELF; 510 alloc_unbound.remote_dom = bind.remote_domain; 511 rc = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound, 512 &alloc_unbound); 513 if (rc != 0) 514 break; 515 516 rc = evtchn_bind_to_user(u, alloc_unbound.port, false); 517 if (rc == 0) 518 rc = alloc_unbound.port; 519 break; 520 } 521 522 case IOCTL_EVTCHN_UNBIND: { 523 struct ioctl_evtchn_unbind unbind; 524 struct user_evtchn *evtchn; 525 526 rc = -EFAULT; 527 if (copy_from_user(&unbind, uarg, sizeof(unbind))) 528 break; 529 530 rc = -EINVAL; 531 if (unbind.port >= xen_evtchn_nr_channels()) 532 break; 533 534 rc = -ENOTCONN; 535 evtchn = find_evtchn(u, unbind.port); 536 if (!evtchn) 537 break; 538 539 disable_irq(irq_from_evtchn(unbind.port)); 540 evtchn_unbind_from_user(u, evtchn); 541 rc = 0; 542 break; 543 } 544 545 case IOCTL_EVTCHN_BIND_STATIC: { 546 struct ioctl_evtchn_bind bind; 547 struct user_evtchn *evtchn; 548 549 rc = -EFAULT; 550 if (copy_from_user(&bind, uarg, sizeof(bind))) 551 break; 552 553 rc = -EISCONN; 554 evtchn = find_evtchn(u, bind.port); 555 if (evtchn) 556 break; 557 558 rc = evtchn_bind_to_user(u, bind.port, true); 559 break; 560 } 561 562 case IOCTL_EVTCHN_NOTIFY: { 563 struct ioctl_evtchn_notify notify; 564 struct user_evtchn *evtchn; 565 566 rc = -EFAULT; 567 if (copy_from_user(¬ify, uarg, sizeof(notify))) 568 break; 569 570 rc = -ENOTCONN; 571 evtchn = find_evtchn(u, notify.port); 572 if (evtchn) { 573 notify_remote_via_evtchn(notify.port); 574 rc = 0; 575 } 576 break; 577 } 578 579 case IOCTL_EVTCHN_RESET: { 580 /* Initialise the ring to empty. Clear errors. */ 581 mutex_lock(&u->ring_cons_mutex); 582 spin_lock_irq(&u->ring_prod_lock); 583 WRITE_ONCE(u->ring_cons, 0); 584 WRITE_ONCE(u->ring_prod, 0); 585 u->ring_overflow = 0; 586 spin_unlock_irq(&u->ring_prod_lock); 587 mutex_unlock(&u->ring_cons_mutex); 588 rc = 0; 589 break; 590 } 591 592 case IOCTL_EVTCHN_RESTRICT_DOMID: { 593 struct ioctl_evtchn_restrict_domid ierd; 594 595 rc = -EACCES; 596 if (u->restrict_domid != UNRESTRICTED_DOMID) 597 break; 598 599 rc = -EFAULT; 600 if (copy_from_user(&ierd, uarg, sizeof(ierd))) 601 break; 602 603 rc = -EINVAL; 604 if (ierd.domid == 0 || ierd.domid >= DOMID_FIRST_RESERVED) 605 break; 606 607 u->restrict_domid = ierd.domid; 608 rc = 0; 609 610 break; 611 } 612 613 default: 614 rc = -ENOSYS; 615 break; 616 } 617 mutex_unlock(&u->bind_mutex); 618 619 return rc; 620 } 621 622 static __poll_t evtchn_poll(struct file *file, poll_table *wait) 623 { 624 __poll_t mask = EPOLLOUT | EPOLLWRNORM; 625 struct per_user_data *u = file->private_data; 626 627 poll_wait(file, &u->evtchn_wait, wait); 628 if (READ_ONCE(u->ring_cons) != READ_ONCE(u->ring_prod)) 629 mask |= EPOLLIN | EPOLLRDNORM; 630 if (u->ring_overflow) 631 mask = EPOLLERR; 632 return mask; 633 } 634 635 static int evtchn_fasync(int fd, struct file *filp, int on) 636 { 637 struct per_user_data *u = filp->private_data; 638 return fasync_helper(fd, filp, on, &u->evtchn_async_queue); 639 } 640 641 static int evtchn_open(struct inode *inode, struct file *filp) 642 { 643 struct per_user_data *u; 644 645 u = kzalloc(sizeof(*u), GFP_KERNEL); 646 if (u == NULL) 647 return -ENOMEM; 648 649 u->name = kasprintf(GFP_KERNEL, "evtchn:%s", current->comm); 650 if (u->name == NULL) { 651 kfree(u); 652 return -ENOMEM; 653 } 654 655 init_waitqueue_head(&u->evtchn_wait); 656 657 mutex_init(&u->bind_mutex); 658 mutex_init(&u->ring_cons_mutex); 659 spin_lock_init(&u->ring_prod_lock); 660 661 u->restrict_domid = UNRESTRICTED_DOMID; 662 663 filp->private_data = u; 664 665 return stream_open(inode, filp); 666 } 667 668 static int evtchn_release(struct inode *inode, struct file *filp) 669 { 670 struct per_user_data *u = filp->private_data; 671 struct rb_node *node; 672 673 while ((node = u->evtchns.rb_node)) { 674 struct user_evtchn *evtchn; 675 676 evtchn = rb_entry(node, struct user_evtchn, node); 677 disable_irq(irq_from_evtchn(evtchn->port)); 678 evtchn_unbind_from_user(u, evtchn); 679 } 680 681 evtchn_free_ring(u->ring); 682 kfree(u->name); 683 kfree(u); 684 685 return 0; 686 } 687 688 static const struct file_operations evtchn_fops = { 689 .owner = THIS_MODULE, 690 .read = evtchn_read, 691 .write = evtchn_write, 692 .unlocked_ioctl = evtchn_ioctl, 693 .poll = evtchn_poll, 694 .fasync = evtchn_fasync, 695 .open = evtchn_open, 696 .release = evtchn_release, 697 }; 698 699 static struct miscdevice evtchn_miscdev = { 700 .minor = MISC_DYNAMIC_MINOR, 701 .name = "xen/evtchn", 702 .fops = &evtchn_fops, 703 }; 704 static int __init evtchn_init(void) 705 { 706 int err; 707 708 if (!xen_domain()) 709 return -ENODEV; 710 711 /* Create '/dev/xen/evtchn'. */ 712 err = misc_register(&evtchn_miscdev); 713 if (err != 0) { 714 pr_err("Could not register /dev/xen/evtchn\n"); 715 return err; 716 } 717 718 pr_info("Event-channel device installed\n"); 719 720 return 0; 721 } 722 723 static void __exit evtchn_cleanup(void) 724 { 725 misc_deregister(&evtchn_miscdev); 726 } 727 728 module_init(evtchn_init); 729 module_exit(evtchn_cleanup); 730 731 MODULE_DESCRIPTION("Xen /dev/xen/evtchn device driver"); 732 MODULE_LICENSE("GPL"); 733