1 /****************************************************************************** 2 * evtchn.c 3 * 4 * Driver for receiving and demuxing event-channel signals. 5 * 6 * Copyright (c) 2004-2005, K A Fraser 7 * Multi-process extensions Copyright (c) 2004, Steven Smith 8 * 9 * This program is free software; you can redistribute it and/or 10 * modify it under the terms of the GNU General Public License version 2 11 * as published by the Free Software Foundation; or, when distributed 12 * separately from the Linux kernel or incorporated into other 13 * software packages, subject to the following license: 14 * 15 * Permission is hereby granted, free of charge, to any person obtaining a copy 16 * of this source file (the "Software"), to deal in the Software without 17 * restriction, including without limitation the rights to use, copy, modify, 18 * merge, publish, distribute, sublicense, and/or sell copies of the Software, 19 * and to permit persons to whom the Software is furnished to do so, subject to 20 * the following conditions: 21 * 22 * The above copyright notice and this permission notice shall be included in 23 * all copies or substantial portions of the Software. 24 * 25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 26 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 27 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 28 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 29 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 30 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 31 * IN THE SOFTWARE. 32 */ 33 34 #define pr_fmt(fmt) "xen:" KBUILD_MODNAME ": " fmt 35 36 #include <linux/module.h> 37 #include <linux/kernel.h> 38 #include <linux/sched.h> 39 #include <linux/slab.h> 40 #include <linux/string.h> 41 #include <linux/errno.h> 42 #include <linux/fs.h> 43 #include <linux/miscdevice.h> 44 #include <linux/major.h> 45 #include <linux/proc_fs.h> 46 #include <linux/stat.h> 47 #include <linux/poll.h> 48 #include <linux/irq.h> 49 #include <linux/init.h> 50 #include <linux/mutex.h> 51 #include <linux/cpu.h> 52 #include <linux/mm.h> 53 #include <linux/vmalloc.h> 54 55 #include <xen/xen.h> 56 #include <xen/events.h> 57 #include <xen/evtchn.h> 58 #include <xen/xen-ops.h> 59 #include <asm/xen/hypervisor.h> 60 61 struct per_user_data { 62 struct mutex bind_mutex; /* serialize bind/unbind operations */ 63 struct rb_root evtchns; 64 unsigned int nr_evtchns; 65 66 /* Notification ring, accessed via /dev/xen/evtchn. */ 67 unsigned int ring_size; 68 evtchn_port_t *ring; 69 unsigned int ring_cons, ring_prod, ring_overflow; 70 struct mutex ring_cons_mutex; /* protect against concurrent readers */ 71 spinlock_t ring_prod_lock; /* product against concurrent interrupts */ 72 73 /* Processes wait on this queue when ring is empty. */ 74 wait_queue_head_t evtchn_wait; 75 struct fasync_struct *evtchn_async_queue; 76 const char *name; 77 78 domid_t restrict_domid; 79 }; 80 81 #define UNRESTRICTED_DOMID ((domid_t)-1) 82 83 struct user_evtchn { 84 struct rb_node node; 85 struct per_user_data *user; 86 unsigned port; 87 bool enabled; 88 }; 89 90 static evtchn_port_t *evtchn_alloc_ring(unsigned int size) 91 { 92 evtchn_port_t *ring; 93 size_t s = size * sizeof(*ring); 94 95 ring = kmalloc(s, GFP_KERNEL); 96 if (!ring) 97 ring = vmalloc(s); 98 99 return ring; 100 } 101 102 static void evtchn_free_ring(evtchn_port_t *ring) 103 { 104 kvfree(ring); 105 } 106 107 static unsigned int evtchn_ring_offset(struct per_user_data *u, 108 unsigned int idx) 109 { 110 return idx & (u->ring_size - 1); 111 } 112 113 static evtchn_port_t *evtchn_ring_entry(struct per_user_data *u, 114 unsigned int idx) 115 { 116 return u->ring + evtchn_ring_offset(u, idx); 117 } 118 119 static int add_evtchn(struct per_user_data *u, struct user_evtchn *evtchn) 120 { 121 struct rb_node **new = &(u->evtchns.rb_node), *parent = NULL; 122 123 u->nr_evtchns++; 124 125 while (*new) { 126 struct user_evtchn *this; 127 128 this = container_of(*new, struct user_evtchn, node); 129 130 parent = *new; 131 if (this->port < evtchn->port) 132 new = &((*new)->rb_left); 133 else if (this->port > evtchn->port) 134 new = &((*new)->rb_right); 135 else 136 return -EEXIST; 137 } 138 139 /* Add new node and rebalance tree. */ 140 rb_link_node(&evtchn->node, parent, new); 141 rb_insert_color(&evtchn->node, &u->evtchns); 142 143 return 0; 144 } 145 146 static void del_evtchn(struct per_user_data *u, struct user_evtchn *evtchn) 147 { 148 u->nr_evtchns--; 149 rb_erase(&evtchn->node, &u->evtchns); 150 kfree(evtchn); 151 } 152 153 static struct user_evtchn *find_evtchn(struct per_user_data *u, unsigned port) 154 { 155 struct rb_node *node = u->evtchns.rb_node; 156 157 while (node) { 158 struct user_evtchn *evtchn; 159 160 evtchn = container_of(node, struct user_evtchn, node); 161 162 if (evtchn->port < port) 163 node = node->rb_left; 164 else if (evtchn->port > port) 165 node = node->rb_right; 166 else 167 return evtchn; 168 } 169 return NULL; 170 } 171 172 static irqreturn_t evtchn_interrupt(int irq, void *data) 173 { 174 struct user_evtchn *evtchn = data; 175 struct per_user_data *u = evtchn->user; 176 177 WARN(!evtchn->enabled, 178 "Interrupt for port %d, but apparently not enabled; per-user %p\n", 179 evtchn->port, u); 180 181 disable_irq_nosync(irq); 182 evtchn->enabled = false; 183 184 spin_lock(&u->ring_prod_lock); 185 186 if ((u->ring_prod - u->ring_cons) < u->ring_size) { 187 *evtchn_ring_entry(u, u->ring_prod) = evtchn->port; 188 wmb(); /* Ensure ring contents visible */ 189 if (u->ring_cons == u->ring_prod++) { 190 wake_up_interruptible(&u->evtchn_wait); 191 kill_fasync(&u->evtchn_async_queue, 192 SIGIO, POLL_IN); 193 } 194 } else 195 u->ring_overflow = 1; 196 197 spin_unlock(&u->ring_prod_lock); 198 199 return IRQ_HANDLED; 200 } 201 202 static ssize_t evtchn_read(struct file *file, char __user *buf, 203 size_t count, loff_t *ppos) 204 { 205 int rc; 206 unsigned int c, p, bytes1 = 0, bytes2 = 0; 207 struct per_user_data *u = file->private_data; 208 209 /* Whole number of ports. */ 210 count &= ~(sizeof(evtchn_port_t)-1); 211 212 if (count == 0) 213 return 0; 214 215 if (count > PAGE_SIZE) 216 count = PAGE_SIZE; 217 218 for (;;) { 219 mutex_lock(&u->ring_cons_mutex); 220 221 rc = -EFBIG; 222 if (u->ring_overflow) 223 goto unlock_out; 224 225 c = u->ring_cons; 226 p = u->ring_prod; 227 if (c != p) 228 break; 229 230 mutex_unlock(&u->ring_cons_mutex); 231 232 if (file->f_flags & O_NONBLOCK) 233 return -EAGAIN; 234 235 rc = wait_event_interruptible(u->evtchn_wait, 236 u->ring_cons != u->ring_prod); 237 if (rc) 238 return rc; 239 } 240 241 /* Byte lengths of two chunks. Chunk split (if any) is at ring wrap. */ 242 if (((c ^ p) & u->ring_size) != 0) { 243 bytes1 = (u->ring_size - evtchn_ring_offset(u, c)) * 244 sizeof(evtchn_port_t); 245 bytes2 = evtchn_ring_offset(u, p) * sizeof(evtchn_port_t); 246 } else { 247 bytes1 = (p - c) * sizeof(evtchn_port_t); 248 bytes2 = 0; 249 } 250 251 /* Truncate chunks according to caller's maximum byte count. */ 252 if (bytes1 > count) { 253 bytes1 = count; 254 bytes2 = 0; 255 } else if ((bytes1 + bytes2) > count) { 256 bytes2 = count - bytes1; 257 } 258 259 rc = -EFAULT; 260 rmb(); /* Ensure that we see the port before we copy it. */ 261 if (copy_to_user(buf, evtchn_ring_entry(u, c), bytes1) || 262 ((bytes2 != 0) && 263 copy_to_user(&buf[bytes1], &u->ring[0], bytes2))) 264 goto unlock_out; 265 266 u->ring_cons += (bytes1 + bytes2) / sizeof(evtchn_port_t); 267 rc = bytes1 + bytes2; 268 269 unlock_out: 270 mutex_unlock(&u->ring_cons_mutex); 271 return rc; 272 } 273 274 static ssize_t evtchn_write(struct file *file, const char __user *buf, 275 size_t count, loff_t *ppos) 276 { 277 int rc, i; 278 evtchn_port_t *kbuf = (evtchn_port_t *)__get_free_page(GFP_KERNEL); 279 struct per_user_data *u = file->private_data; 280 281 if (kbuf == NULL) 282 return -ENOMEM; 283 284 /* Whole number of ports. */ 285 count &= ~(sizeof(evtchn_port_t)-1); 286 287 rc = 0; 288 if (count == 0) 289 goto out; 290 291 if (count > PAGE_SIZE) 292 count = PAGE_SIZE; 293 294 rc = -EFAULT; 295 if (copy_from_user(kbuf, buf, count) != 0) 296 goto out; 297 298 mutex_lock(&u->bind_mutex); 299 300 for (i = 0; i < (count/sizeof(evtchn_port_t)); i++) { 301 unsigned port = kbuf[i]; 302 struct user_evtchn *evtchn; 303 304 evtchn = find_evtchn(u, port); 305 if (evtchn && !evtchn->enabled) { 306 evtchn->enabled = true; 307 enable_irq(irq_from_evtchn(port)); 308 } 309 } 310 311 mutex_unlock(&u->bind_mutex); 312 313 rc = count; 314 315 out: 316 free_page((unsigned long)kbuf); 317 return rc; 318 } 319 320 static int evtchn_resize_ring(struct per_user_data *u) 321 { 322 unsigned int new_size; 323 evtchn_port_t *new_ring, *old_ring; 324 325 /* 326 * Ensure the ring is large enough to capture all possible 327 * events. i.e., one free slot for each bound event. 328 */ 329 if (u->nr_evtchns <= u->ring_size) 330 return 0; 331 332 if (u->ring_size == 0) 333 new_size = 64; 334 else 335 new_size = 2 * u->ring_size; 336 337 new_ring = evtchn_alloc_ring(new_size); 338 if (!new_ring) 339 return -ENOMEM; 340 341 old_ring = u->ring; 342 343 /* 344 * Access to the ring contents is serialized by either the 345 * prod /or/ cons lock so take both when resizing. 346 */ 347 mutex_lock(&u->ring_cons_mutex); 348 spin_lock_irq(&u->ring_prod_lock); 349 350 /* 351 * Copy the old ring contents to the new ring. 352 * 353 * To take care of wrapping, a full ring, and the new index 354 * pointing into the second half, simply copy the old contents 355 * twice. 356 * 357 * +---------+ +------------------+ 358 * |34567 12| -> |34567 1234567 12| 359 * +-----p-c-+ +-------c------p---+ 360 */ 361 memcpy(new_ring, old_ring, u->ring_size * sizeof(*u->ring)); 362 memcpy(new_ring + u->ring_size, old_ring, 363 u->ring_size * sizeof(*u->ring)); 364 365 u->ring = new_ring; 366 u->ring_size = new_size; 367 368 spin_unlock_irq(&u->ring_prod_lock); 369 mutex_unlock(&u->ring_cons_mutex); 370 371 evtchn_free_ring(old_ring); 372 373 return 0; 374 } 375 376 static int evtchn_bind_to_user(struct per_user_data *u, int port) 377 { 378 struct user_evtchn *evtchn; 379 struct evtchn_close close; 380 int rc = 0; 381 382 /* 383 * Ports are never reused, so every caller should pass in a 384 * unique port. 385 * 386 * (Locking not necessary because we haven't registered the 387 * interrupt handler yet, and our caller has already 388 * serialized bind operations.) 389 */ 390 391 evtchn = kzalloc(sizeof(*evtchn), GFP_KERNEL); 392 if (!evtchn) 393 return -ENOMEM; 394 395 evtchn->user = u; 396 evtchn->port = port; 397 evtchn->enabled = true; /* start enabled */ 398 399 rc = add_evtchn(u, evtchn); 400 if (rc < 0) 401 goto err; 402 403 rc = evtchn_resize_ring(u); 404 if (rc < 0) 405 goto err; 406 407 rc = bind_evtchn_to_irqhandler(port, evtchn_interrupt, 0, 408 u->name, evtchn); 409 if (rc < 0) 410 goto err; 411 412 rc = evtchn_make_refcounted(port); 413 return rc; 414 415 err: 416 /* bind failed, should close the port now */ 417 close.port = port; 418 if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0) 419 BUG(); 420 del_evtchn(u, evtchn); 421 return rc; 422 } 423 424 static void evtchn_unbind_from_user(struct per_user_data *u, 425 struct user_evtchn *evtchn) 426 { 427 int irq = irq_from_evtchn(evtchn->port); 428 429 BUG_ON(irq < 0); 430 431 unbind_from_irqhandler(irq, evtchn); 432 433 del_evtchn(u, evtchn); 434 } 435 436 static long evtchn_ioctl(struct file *file, 437 unsigned int cmd, unsigned long arg) 438 { 439 int rc; 440 struct per_user_data *u = file->private_data; 441 void __user *uarg = (void __user *) arg; 442 443 /* Prevent bind from racing with unbind */ 444 mutex_lock(&u->bind_mutex); 445 446 switch (cmd) { 447 case IOCTL_EVTCHN_BIND_VIRQ: { 448 struct ioctl_evtchn_bind_virq bind; 449 struct evtchn_bind_virq bind_virq; 450 451 rc = -EACCES; 452 if (u->restrict_domid != UNRESTRICTED_DOMID) 453 break; 454 455 rc = -EFAULT; 456 if (copy_from_user(&bind, uarg, sizeof(bind))) 457 break; 458 459 bind_virq.virq = bind.virq; 460 bind_virq.vcpu = xen_vcpu_nr(0); 461 rc = HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq, 462 &bind_virq); 463 if (rc != 0) 464 break; 465 466 rc = evtchn_bind_to_user(u, bind_virq.port); 467 if (rc == 0) 468 rc = bind_virq.port; 469 break; 470 } 471 472 case IOCTL_EVTCHN_BIND_INTERDOMAIN: { 473 struct ioctl_evtchn_bind_interdomain bind; 474 struct evtchn_bind_interdomain bind_interdomain; 475 476 rc = -EFAULT; 477 if (copy_from_user(&bind, uarg, sizeof(bind))) 478 break; 479 480 rc = -EACCES; 481 if (u->restrict_domid != UNRESTRICTED_DOMID && 482 u->restrict_domid != bind.remote_domain) 483 break; 484 485 bind_interdomain.remote_dom = bind.remote_domain; 486 bind_interdomain.remote_port = bind.remote_port; 487 rc = HYPERVISOR_event_channel_op(EVTCHNOP_bind_interdomain, 488 &bind_interdomain); 489 if (rc != 0) 490 break; 491 492 rc = evtchn_bind_to_user(u, bind_interdomain.local_port); 493 if (rc == 0) 494 rc = bind_interdomain.local_port; 495 break; 496 } 497 498 case IOCTL_EVTCHN_BIND_UNBOUND_PORT: { 499 struct ioctl_evtchn_bind_unbound_port bind; 500 struct evtchn_alloc_unbound alloc_unbound; 501 502 rc = -EACCES; 503 if (u->restrict_domid != UNRESTRICTED_DOMID) 504 break; 505 506 rc = -EFAULT; 507 if (copy_from_user(&bind, uarg, sizeof(bind))) 508 break; 509 510 alloc_unbound.dom = DOMID_SELF; 511 alloc_unbound.remote_dom = bind.remote_domain; 512 rc = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound, 513 &alloc_unbound); 514 if (rc != 0) 515 break; 516 517 rc = evtchn_bind_to_user(u, alloc_unbound.port); 518 if (rc == 0) 519 rc = alloc_unbound.port; 520 break; 521 } 522 523 case IOCTL_EVTCHN_UNBIND: { 524 struct ioctl_evtchn_unbind unbind; 525 struct user_evtchn *evtchn; 526 527 rc = -EFAULT; 528 if (copy_from_user(&unbind, uarg, sizeof(unbind))) 529 break; 530 531 rc = -EINVAL; 532 if (unbind.port >= xen_evtchn_nr_channels()) 533 break; 534 535 rc = -ENOTCONN; 536 evtchn = find_evtchn(u, unbind.port); 537 if (!evtchn) 538 break; 539 540 disable_irq(irq_from_evtchn(unbind.port)); 541 evtchn_unbind_from_user(u, evtchn); 542 rc = 0; 543 break; 544 } 545 546 case IOCTL_EVTCHN_NOTIFY: { 547 struct ioctl_evtchn_notify notify; 548 struct user_evtchn *evtchn; 549 550 rc = -EFAULT; 551 if (copy_from_user(¬ify, uarg, sizeof(notify))) 552 break; 553 554 rc = -ENOTCONN; 555 evtchn = find_evtchn(u, notify.port); 556 if (evtchn) { 557 notify_remote_via_evtchn(notify.port); 558 rc = 0; 559 } 560 break; 561 } 562 563 case IOCTL_EVTCHN_RESET: { 564 /* Initialise the ring to empty. Clear errors. */ 565 mutex_lock(&u->ring_cons_mutex); 566 spin_lock_irq(&u->ring_prod_lock); 567 u->ring_cons = u->ring_prod = u->ring_overflow = 0; 568 spin_unlock_irq(&u->ring_prod_lock); 569 mutex_unlock(&u->ring_cons_mutex); 570 rc = 0; 571 break; 572 } 573 574 case IOCTL_EVTCHN_RESTRICT_DOMID: { 575 struct ioctl_evtchn_restrict_domid ierd; 576 577 rc = -EACCES; 578 if (u->restrict_domid != UNRESTRICTED_DOMID) 579 break; 580 581 rc = -EFAULT; 582 if (copy_from_user(&ierd, uarg, sizeof(ierd))) 583 break; 584 585 rc = -EINVAL; 586 if (ierd.domid == 0 || ierd.domid >= DOMID_FIRST_RESERVED) 587 break; 588 589 u->restrict_domid = ierd.domid; 590 rc = 0; 591 592 break; 593 } 594 595 default: 596 rc = -ENOSYS; 597 break; 598 } 599 mutex_unlock(&u->bind_mutex); 600 601 return rc; 602 } 603 604 static unsigned int evtchn_poll(struct file *file, poll_table *wait) 605 { 606 unsigned int mask = POLLOUT | POLLWRNORM; 607 struct per_user_data *u = file->private_data; 608 609 poll_wait(file, &u->evtchn_wait, wait); 610 if (u->ring_cons != u->ring_prod) 611 mask |= POLLIN | POLLRDNORM; 612 if (u->ring_overflow) 613 mask = POLLERR; 614 return mask; 615 } 616 617 static int evtchn_fasync(int fd, struct file *filp, int on) 618 { 619 struct per_user_data *u = filp->private_data; 620 return fasync_helper(fd, filp, on, &u->evtchn_async_queue); 621 } 622 623 static int evtchn_open(struct inode *inode, struct file *filp) 624 { 625 struct per_user_data *u; 626 627 u = kzalloc(sizeof(*u), GFP_KERNEL); 628 if (u == NULL) 629 return -ENOMEM; 630 631 u->name = kasprintf(GFP_KERNEL, "evtchn:%s", current->comm); 632 if (u->name == NULL) { 633 kfree(u); 634 return -ENOMEM; 635 } 636 637 init_waitqueue_head(&u->evtchn_wait); 638 639 mutex_init(&u->bind_mutex); 640 mutex_init(&u->ring_cons_mutex); 641 spin_lock_init(&u->ring_prod_lock); 642 643 u->restrict_domid = UNRESTRICTED_DOMID; 644 645 filp->private_data = u; 646 647 return nonseekable_open(inode, filp); 648 } 649 650 static int evtchn_release(struct inode *inode, struct file *filp) 651 { 652 struct per_user_data *u = filp->private_data; 653 struct rb_node *node; 654 655 while ((node = u->evtchns.rb_node)) { 656 struct user_evtchn *evtchn; 657 658 evtchn = rb_entry(node, struct user_evtchn, node); 659 disable_irq(irq_from_evtchn(evtchn->port)); 660 evtchn_unbind_from_user(u, evtchn); 661 } 662 663 evtchn_free_ring(u->ring); 664 kfree(u->name); 665 kfree(u); 666 667 return 0; 668 } 669 670 static const struct file_operations evtchn_fops = { 671 .owner = THIS_MODULE, 672 .read = evtchn_read, 673 .write = evtchn_write, 674 .unlocked_ioctl = evtchn_ioctl, 675 .poll = evtchn_poll, 676 .fasync = evtchn_fasync, 677 .open = evtchn_open, 678 .release = evtchn_release, 679 .llseek = no_llseek, 680 }; 681 682 static struct miscdevice evtchn_miscdev = { 683 .minor = MISC_DYNAMIC_MINOR, 684 .name = "xen/evtchn", 685 .fops = &evtchn_fops, 686 }; 687 static int __init evtchn_init(void) 688 { 689 int err; 690 691 if (!xen_domain()) 692 return -ENODEV; 693 694 /* Create '/dev/xen/evtchn'. */ 695 err = misc_register(&evtchn_miscdev); 696 if (err != 0) { 697 pr_err("Could not register /dev/xen/evtchn\n"); 698 return err; 699 } 700 701 pr_info("Event-channel device installed\n"); 702 703 return 0; 704 } 705 706 static void __exit evtchn_cleanup(void) 707 { 708 misc_deregister(&evtchn_miscdev); 709 } 710 711 module_init(evtchn_init); 712 module_exit(evtchn_cleanup); 713 714 MODULE_LICENSE("GPL"); 715