1 /****************************************************************************** 2 * gntdev.c 3 * 4 * Device for accessing (in user-space) pages that have been granted by other 5 * domains. 6 * 7 * Copyright (c) 2006-2007, D G Murray. 8 * (c) 2009 Gerd Hoffmann <kraxel@redhat.com> 9 * 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * 15 * You should have received a copy of the GNU General Public License 16 * along with this program; if not, write to the Free Software 17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 18 */ 19 20 #undef DEBUG 21 22 #include <linux/module.h> 23 #include <linux/kernel.h> 24 #include <linux/init.h> 25 #include <linux/miscdevice.h> 26 #include <linux/fs.h> 27 #include <linux/mm.h> 28 #include <linux/mman.h> 29 #include <linux/mmu_notifier.h> 30 #include <linux/types.h> 31 #include <linux/uaccess.h> 32 #include <linux/sched.h> 33 #include <linux/spinlock.h> 34 #include <linux/slab.h> 35 #include <linux/highmem.h> 36 37 #include <xen/xen.h> 38 #include <xen/grant_table.h> 39 #include <xen/balloon.h> 40 #include <xen/gntdev.h> 41 #include <xen/events.h> 42 #include <asm/xen/hypervisor.h> 43 #include <asm/xen/hypercall.h> 44 #include <asm/xen/page.h> 45 46 MODULE_LICENSE("GPL"); 47 MODULE_AUTHOR("Derek G. Murray <Derek.Murray@cl.cam.ac.uk>, " 48 "Gerd Hoffmann <kraxel@redhat.com>"); 49 MODULE_DESCRIPTION("User-space granted page access driver"); 50 51 static int limit = 1024*1024; 52 module_param(limit, int, 0644); 53 MODULE_PARM_DESC(limit, "Maximum number of grants that may be mapped by " 54 "the gntdev device"); 55 56 static atomic_t pages_mapped = ATOMIC_INIT(0); 57 58 static int use_ptemod; 59 60 struct gntdev_priv { 61 struct list_head maps; 62 /* lock protects maps from concurrent changes */ 63 spinlock_t lock; 64 struct mm_struct *mm; 65 struct mmu_notifier mn; 66 }; 67 68 struct unmap_notify { 69 int flags; 70 /* Address relative to the start of the grant_map */ 71 int addr; 72 int event; 73 }; 74 75 struct grant_map { 76 struct list_head next; 77 struct vm_area_struct *vma; 78 int index; 79 int count; 80 int flags; 81 atomic_t users; 82 struct unmap_notify notify; 83 struct ioctl_gntdev_grant_ref *grants; 84 struct gnttab_map_grant_ref *map_ops; 85 struct gnttab_unmap_grant_ref *unmap_ops; 86 struct page **pages; 87 }; 88 89 static int unmap_grant_pages(struct grant_map *map, int offset, int pages); 90 91 /* ------------------------------------------------------------------ */ 92 93 static void gntdev_print_maps(struct gntdev_priv *priv, 94 char *text, int text_index) 95 { 96 #ifdef DEBUG 97 struct grant_map *map; 98 99 pr_debug("%s: maps list (priv %p)\n", __func__, priv); 100 list_for_each_entry(map, &priv->maps, next) 101 pr_debug(" index %2d, count %2d %s\n", 102 map->index, map->count, 103 map->index == text_index && text ? text : ""); 104 #endif 105 } 106 107 static struct grant_map *gntdev_alloc_map(struct gntdev_priv *priv, int count) 108 { 109 struct grant_map *add; 110 int i; 111 112 add = kzalloc(sizeof(struct grant_map), GFP_KERNEL); 113 if (NULL == add) 114 return NULL; 115 116 add->grants = kzalloc(sizeof(add->grants[0]) * count, GFP_KERNEL); 117 add->map_ops = kzalloc(sizeof(add->map_ops[0]) * count, GFP_KERNEL); 118 add->unmap_ops = kzalloc(sizeof(add->unmap_ops[0]) * count, GFP_KERNEL); 119 add->pages = kzalloc(sizeof(add->pages[0]) * count, GFP_KERNEL); 120 if (NULL == add->grants || 121 NULL == add->map_ops || 122 NULL == add->unmap_ops || 123 NULL == add->pages) 124 goto err; 125 126 if (alloc_xenballooned_pages(count, add->pages)) 127 goto err; 128 129 for (i = 0; i < count; i++) { 130 add->map_ops[i].handle = -1; 131 add->unmap_ops[i].handle = -1; 132 } 133 134 add->index = 0; 135 add->count = count; 136 atomic_set(&add->users, 1); 137 138 return add; 139 140 err: 141 kfree(add->pages); 142 kfree(add->grants); 143 kfree(add->map_ops); 144 kfree(add->unmap_ops); 145 kfree(add); 146 return NULL; 147 } 148 149 static void gntdev_add_map(struct gntdev_priv *priv, struct grant_map *add) 150 { 151 struct grant_map *map; 152 153 list_for_each_entry(map, &priv->maps, next) { 154 if (add->index + add->count < map->index) { 155 list_add_tail(&add->next, &map->next); 156 goto done; 157 } 158 add->index = map->index + map->count; 159 } 160 list_add_tail(&add->next, &priv->maps); 161 162 done: 163 gntdev_print_maps(priv, "[new]", add->index); 164 } 165 166 static struct grant_map *gntdev_find_map_index(struct gntdev_priv *priv, 167 int index, int count) 168 { 169 struct grant_map *map; 170 171 list_for_each_entry(map, &priv->maps, next) { 172 if (map->index != index) 173 continue; 174 if (count && map->count != count) 175 continue; 176 return map; 177 } 178 return NULL; 179 } 180 181 static void gntdev_put_map(struct grant_map *map) 182 { 183 if (!map) 184 return; 185 186 if (!atomic_dec_and_test(&map->users)) 187 return; 188 189 atomic_sub(map->count, &pages_mapped); 190 191 if (map->notify.flags & UNMAP_NOTIFY_SEND_EVENT) { 192 notify_remote_via_evtchn(map->notify.event); 193 } 194 195 if (map->pages) { 196 if (!use_ptemod) 197 unmap_grant_pages(map, 0, map->count); 198 199 free_xenballooned_pages(map->count, map->pages); 200 } 201 kfree(map->pages); 202 kfree(map->grants); 203 kfree(map->map_ops); 204 kfree(map->unmap_ops); 205 kfree(map); 206 } 207 208 /* ------------------------------------------------------------------ */ 209 210 static int find_grant_ptes(pte_t *pte, pgtable_t token, 211 unsigned long addr, void *data) 212 { 213 struct grant_map *map = data; 214 unsigned int pgnr = (addr - map->vma->vm_start) >> PAGE_SHIFT; 215 int flags = map->flags | GNTMAP_application_map | GNTMAP_contains_pte; 216 u64 pte_maddr; 217 218 BUG_ON(pgnr >= map->count); 219 pte_maddr = arbitrary_virt_to_machine(pte).maddr; 220 221 gnttab_set_map_op(&map->map_ops[pgnr], pte_maddr, flags, 222 map->grants[pgnr].ref, 223 map->grants[pgnr].domid); 224 gnttab_set_unmap_op(&map->unmap_ops[pgnr], pte_maddr, flags, 225 -1 /* handle */); 226 return 0; 227 } 228 229 static int map_grant_pages(struct grant_map *map) 230 { 231 int i, err = 0; 232 233 if (!use_ptemod) { 234 /* Note: it could already be mapped */ 235 if (map->map_ops[0].handle != -1) 236 return 0; 237 for (i = 0; i < map->count; i++) { 238 unsigned long addr = (unsigned long) 239 pfn_to_kaddr(page_to_pfn(map->pages[i])); 240 gnttab_set_map_op(&map->map_ops[i], addr, map->flags, 241 map->grants[i].ref, 242 map->grants[i].domid); 243 gnttab_set_unmap_op(&map->unmap_ops[i], addr, 244 map->flags, -1 /* handle */); 245 } 246 } 247 248 pr_debug("map %d+%d\n", map->index, map->count); 249 err = gnttab_map_refs(map->map_ops, map->pages, map->count); 250 if (err) 251 return err; 252 253 for (i = 0; i < map->count; i++) { 254 if (map->map_ops[i].status) 255 err = -EINVAL; 256 else { 257 BUG_ON(map->map_ops[i].handle == -1); 258 map->unmap_ops[i].handle = map->map_ops[i].handle; 259 pr_debug("map handle=%d\n", map->map_ops[i].handle); 260 } 261 } 262 return err; 263 } 264 265 static int __unmap_grant_pages(struct grant_map *map, int offset, int pages) 266 { 267 int i, err = 0; 268 269 if (map->notify.flags & UNMAP_NOTIFY_CLEAR_BYTE) { 270 int pgno = (map->notify.addr >> PAGE_SHIFT); 271 if (pgno >= offset && pgno < offset + pages && use_ptemod) { 272 void __user *tmp = (void __user *) 273 map->vma->vm_start + map->notify.addr; 274 err = copy_to_user(tmp, &err, 1); 275 if (err) 276 return err; 277 map->notify.flags &= ~UNMAP_NOTIFY_CLEAR_BYTE; 278 } else if (pgno >= offset && pgno < offset + pages) { 279 uint8_t *tmp = kmap(map->pages[pgno]); 280 tmp[map->notify.addr & (PAGE_SIZE-1)] = 0; 281 kunmap(map->pages[pgno]); 282 map->notify.flags &= ~UNMAP_NOTIFY_CLEAR_BYTE; 283 } 284 } 285 286 err = gnttab_unmap_refs(map->unmap_ops + offset, map->pages + offset, pages); 287 if (err) 288 return err; 289 290 for (i = 0; i < pages; i++) { 291 if (map->unmap_ops[offset+i].status) 292 err = -EINVAL; 293 pr_debug("unmap handle=%d st=%d\n", 294 map->unmap_ops[offset+i].handle, 295 map->unmap_ops[offset+i].status); 296 map->unmap_ops[offset+i].handle = -1; 297 } 298 return err; 299 } 300 301 static int unmap_grant_pages(struct grant_map *map, int offset, int pages) 302 { 303 int range, err = 0; 304 305 pr_debug("unmap %d+%d [%d+%d]\n", map->index, map->count, offset, pages); 306 307 /* It is possible the requested range will have a "hole" where we 308 * already unmapped some of the grants. Only unmap valid ranges. 309 */ 310 while (pages && !err) { 311 while (pages && map->unmap_ops[offset].handle == -1) { 312 offset++; 313 pages--; 314 } 315 range = 0; 316 while (range < pages) { 317 if (map->unmap_ops[offset+range].handle == -1) { 318 range--; 319 break; 320 } 321 range++; 322 } 323 err = __unmap_grant_pages(map, offset, range); 324 offset += range; 325 pages -= range; 326 } 327 328 return err; 329 } 330 331 /* ------------------------------------------------------------------ */ 332 333 static void gntdev_vma_close(struct vm_area_struct *vma) 334 { 335 struct grant_map *map = vma->vm_private_data; 336 337 pr_debug("close %p\n", vma); 338 map->vma = NULL; 339 vma->vm_private_data = NULL; 340 gntdev_put_map(map); 341 } 342 343 static struct vm_operations_struct gntdev_vmops = { 344 .close = gntdev_vma_close, 345 }; 346 347 /* ------------------------------------------------------------------ */ 348 349 static void mn_invl_range_start(struct mmu_notifier *mn, 350 struct mm_struct *mm, 351 unsigned long start, unsigned long end) 352 { 353 struct gntdev_priv *priv = container_of(mn, struct gntdev_priv, mn); 354 struct grant_map *map; 355 unsigned long mstart, mend; 356 int err; 357 358 spin_lock(&priv->lock); 359 list_for_each_entry(map, &priv->maps, next) { 360 if (!map->vma) 361 continue; 362 if (map->vma->vm_start >= end) 363 continue; 364 if (map->vma->vm_end <= start) 365 continue; 366 mstart = max(start, map->vma->vm_start); 367 mend = min(end, map->vma->vm_end); 368 pr_debug("map %d+%d (%lx %lx), range %lx %lx, mrange %lx %lx\n", 369 map->index, map->count, 370 map->vma->vm_start, map->vma->vm_end, 371 start, end, mstart, mend); 372 err = unmap_grant_pages(map, 373 (mstart - map->vma->vm_start) >> PAGE_SHIFT, 374 (mend - mstart) >> PAGE_SHIFT); 375 WARN_ON(err); 376 } 377 spin_unlock(&priv->lock); 378 } 379 380 static void mn_invl_page(struct mmu_notifier *mn, 381 struct mm_struct *mm, 382 unsigned long address) 383 { 384 mn_invl_range_start(mn, mm, address, address + PAGE_SIZE); 385 } 386 387 static void mn_release(struct mmu_notifier *mn, 388 struct mm_struct *mm) 389 { 390 struct gntdev_priv *priv = container_of(mn, struct gntdev_priv, mn); 391 struct grant_map *map; 392 int err; 393 394 spin_lock(&priv->lock); 395 list_for_each_entry(map, &priv->maps, next) { 396 if (!map->vma) 397 continue; 398 pr_debug("map %d+%d (%lx %lx)\n", 399 map->index, map->count, 400 map->vma->vm_start, map->vma->vm_end); 401 err = unmap_grant_pages(map, /* offset */ 0, map->count); 402 WARN_ON(err); 403 } 404 spin_unlock(&priv->lock); 405 } 406 407 struct mmu_notifier_ops gntdev_mmu_ops = { 408 .release = mn_release, 409 .invalidate_page = mn_invl_page, 410 .invalidate_range_start = mn_invl_range_start, 411 }; 412 413 /* ------------------------------------------------------------------ */ 414 415 static int gntdev_open(struct inode *inode, struct file *flip) 416 { 417 struct gntdev_priv *priv; 418 int ret = 0; 419 420 priv = kzalloc(sizeof(*priv), GFP_KERNEL); 421 if (!priv) 422 return -ENOMEM; 423 424 INIT_LIST_HEAD(&priv->maps); 425 spin_lock_init(&priv->lock); 426 427 if (use_ptemod) { 428 priv->mm = get_task_mm(current); 429 if (!priv->mm) { 430 kfree(priv); 431 return -ENOMEM; 432 } 433 priv->mn.ops = &gntdev_mmu_ops; 434 ret = mmu_notifier_register(&priv->mn, priv->mm); 435 mmput(priv->mm); 436 } 437 438 if (ret) { 439 kfree(priv); 440 return ret; 441 } 442 443 flip->private_data = priv; 444 pr_debug("priv %p\n", priv); 445 446 return 0; 447 } 448 449 static int gntdev_release(struct inode *inode, struct file *flip) 450 { 451 struct gntdev_priv *priv = flip->private_data; 452 struct grant_map *map; 453 454 pr_debug("priv %p\n", priv); 455 456 spin_lock(&priv->lock); 457 while (!list_empty(&priv->maps)) { 458 map = list_entry(priv->maps.next, struct grant_map, next); 459 list_del(&map->next); 460 gntdev_put_map(map); 461 } 462 spin_unlock(&priv->lock); 463 464 if (use_ptemod) 465 mmu_notifier_unregister(&priv->mn, priv->mm); 466 kfree(priv); 467 return 0; 468 } 469 470 static long gntdev_ioctl_map_grant_ref(struct gntdev_priv *priv, 471 struct ioctl_gntdev_map_grant_ref __user *u) 472 { 473 struct ioctl_gntdev_map_grant_ref op; 474 struct grant_map *map; 475 int err; 476 477 if (copy_from_user(&op, u, sizeof(op)) != 0) 478 return -EFAULT; 479 pr_debug("priv %p, add %d\n", priv, op.count); 480 if (unlikely(op.count <= 0)) 481 return -EINVAL; 482 483 err = -ENOMEM; 484 map = gntdev_alloc_map(priv, op.count); 485 if (!map) 486 return err; 487 488 if (unlikely(atomic_add_return(op.count, &pages_mapped) > limit)) { 489 pr_debug("can't map: over limit\n"); 490 gntdev_put_map(map); 491 return err; 492 } 493 494 if (copy_from_user(map->grants, &u->refs, 495 sizeof(map->grants[0]) * op.count) != 0) { 496 gntdev_put_map(map); 497 return err; 498 } 499 500 spin_lock(&priv->lock); 501 gntdev_add_map(priv, map); 502 op.index = map->index << PAGE_SHIFT; 503 spin_unlock(&priv->lock); 504 505 if (copy_to_user(u, &op, sizeof(op)) != 0) 506 return -EFAULT; 507 508 return 0; 509 } 510 511 static long gntdev_ioctl_unmap_grant_ref(struct gntdev_priv *priv, 512 struct ioctl_gntdev_unmap_grant_ref __user *u) 513 { 514 struct ioctl_gntdev_unmap_grant_ref op; 515 struct grant_map *map; 516 int err = -ENOENT; 517 518 if (copy_from_user(&op, u, sizeof(op)) != 0) 519 return -EFAULT; 520 pr_debug("priv %p, del %d+%d\n", priv, (int)op.index, (int)op.count); 521 522 spin_lock(&priv->lock); 523 map = gntdev_find_map_index(priv, op.index >> PAGE_SHIFT, op.count); 524 if (map) { 525 list_del(&map->next); 526 gntdev_put_map(map); 527 err = 0; 528 } 529 spin_unlock(&priv->lock); 530 return err; 531 } 532 533 static long gntdev_ioctl_get_offset_for_vaddr(struct gntdev_priv *priv, 534 struct ioctl_gntdev_get_offset_for_vaddr __user *u) 535 { 536 struct ioctl_gntdev_get_offset_for_vaddr op; 537 struct vm_area_struct *vma; 538 struct grant_map *map; 539 540 if (copy_from_user(&op, u, sizeof(op)) != 0) 541 return -EFAULT; 542 pr_debug("priv %p, offset for vaddr %lx\n", priv, (unsigned long)op.vaddr); 543 544 vma = find_vma(current->mm, op.vaddr); 545 if (!vma || vma->vm_ops != &gntdev_vmops) 546 return -EINVAL; 547 548 map = vma->vm_private_data; 549 if (!map) 550 return -EINVAL; 551 552 op.offset = map->index << PAGE_SHIFT; 553 op.count = map->count; 554 555 if (copy_to_user(u, &op, sizeof(op)) != 0) 556 return -EFAULT; 557 return 0; 558 } 559 560 static long gntdev_ioctl_notify(struct gntdev_priv *priv, void __user *u) 561 { 562 struct ioctl_gntdev_unmap_notify op; 563 struct grant_map *map; 564 int rc; 565 566 if (copy_from_user(&op, u, sizeof(op))) 567 return -EFAULT; 568 569 if (op.action & ~(UNMAP_NOTIFY_CLEAR_BYTE|UNMAP_NOTIFY_SEND_EVENT)) 570 return -EINVAL; 571 572 spin_lock(&priv->lock); 573 574 list_for_each_entry(map, &priv->maps, next) { 575 uint64_t begin = map->index << PAGE_SHIFT; 576 uint64_t end = (map->index + map->count) << PAGE_SHIFT; 577 if (op.index >= begin && op.index < end) 578 goto found; 579 } 580 rc = -ENOENT; 581 goto unlock_out; 582 583 found: 584 if ((op.action & UNMAP_NOTIFY_CLEAR_BYTE) && 585 (map->flags & GNTMAP_readonly)) { 586 rc = -EINVAL; 587 goto unlock_out; 588 } 589 590 map->notify.flags = op.action; 591 map->notify.addr = op.index - (map->index << PAGE_SHIFT); 592 map->notify.event = op.event_channel_port; 593 rc = 0; 594 unlock_out: 595 spin_unlock(&priv->lock); 596 return rc; 597 } 598 599 static long gntdev_ioctl(struct file *flip, 600 unsigned int cmd, unsigned long arg) 601 { 602 struct gntdev_priv *priv = flip->private_data; 603 void __user *ptr = (void __user *)arg; 604 605 switch (cmd) { 606 case IOCTL_GNTDEV_MAP_GRANT_REF: 607 return gntdev_ioctl_map_grant_ref(priv, ptr); 608 609 case IOCTL_GNTDEV_UNMAP_GRANT_REF: 610 return gntdev_ioctl_unmap_grant_ref(priv, ptr); 611 612 case IOCTL_GNTDEV_GET_OFFSET_FOR_VADDR: 613 return gntdev_ioctl_get_offset_for_vaddr(priv, ptr); 614 615 case IOCTL_GNTDEV_SET_UNMAP_NOTIFY: 616 return gntdev_ioctl_notify(priv, ptr); 617 618 default: 619 pr_debug("priv %p, unknown cmd %x\n", priv, cmd); 620 return -ENOIOCTLCMD; 621 } 622 623 return 0; 624 } 625 626 static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma) 627 { 628 struct gntdev_priv *priv = flip->private_data; 629 int index = vma->vm_pgoff; 630 int count = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; 631 struct grant_map *map; 632 int i, err = -EINVAL; 633 634 if ((vma->vm_flags & VM_WRITE) && !(vma->vm_flags & VM_SHARED)) 635 return -EINVAL; 636 637 pr_debug("map %d+%d at %lx (pgoff %lx)\n", 638 index, count, vma->vm_start, vma->vm_pgoff); 639 640 spin_lock(&priv->lock); 641 map = gntdev_find_map_index(priv, index, count); 642 if (!map) 643 goto unlock_out; 644 if (use_ptemod && map->vma) 645 goto unlock_out; 646 if (use_ptemod && priv->mm != vma->vm_mm) { 647 printk(KERN_WARNING "Huh? Other mm?\n"); 648 goto unlock_out; 649 } 650 651 atomic_inc(&map->users); 652 653 vma->vm_ops = &gntdev_vmops; 654 655 vma->vm_flags |= VM_RESERVED|VM_DONTCOPY|VM_DONTEXPAND|VM_PFNMAP; 656 657 vma->vm_private_data = map; 658 659 if (use_ptemod) 660 map->vma = vma; 661 662 if (map->flags) { 663 if ((vma->vm_flags & VM_WRITE) && 664 (map->flags & GNTMAP_readonly)) 665 return -EINVAL; 666 } else { 667 map->flags = GNTMAP_host_map; 668 if (!(vma->vm_flags & VM_WRITE)) 669 map->flags |= GNTMAP_readonly; 670 } 671 672 spin_unlock(&priv->lock); 673 674 if (use_ptemod) { 675 err = apply_to_page_range(vma->vm_mm, vma->vm_start, 676 vma->vm_end - vma->vm_start, 677 find_grant_ptes, map); 678 if (err) { 679 printk(KERN_WARNING "find_grant_ptes() failure.\n"); 680 goto out_put_map; 681 } 682 } 683 684 err = map_grant_pages(map); 685 if (err) 686 goto out_put_map; 687 688 if (!use_ptemod) { 689 for (i = 0; i < count; i++) { 690 err = vm_insert_page(vma, vma->vm_start + i*PAGE_SIZE, 691 map->pages[i]); 692 if (err) 693 goto out_put_map; 694 } 695 } 696 697 return 0; 698 699 unlock_out: 700 spin_unlock(&priv->lock); 701 return err; 702 703 out_put_map: 704 if (use_ptemod) 705 map->vma = NULL; 706 gntdev_put_map(map); 707 return err; 708 } 709 710 static const struct file_operations gntdev_fops = { 711 .owner = THIS_MODULE, 712 .open = gntdev_open, 713 .release = gntdev_release, 714 .mmap = gntdev_mmap, 715 .unlocked_ioctl = gntdev_ioctl 716 }; 717 718 static struct miscdevice gntdev_miscdev = { 719 .minor = MISC_DYNAMIC_MINOR, 720 .name = "xen/gntdev", 721 .fops = &gntdev_fops, 722 }; 723 724 /* ------------------------------------------------------------------ */ 725 726 static int __init gntdev_init(void) 727 { 728 int err; 729 730 if (!xen_domain()) 731 return -ENODEV; 732 733 use_ptemod = xen_pv_domain(); 734 735 err = misc_register(&gntdev_miscdev); 736 if (err != 0) { 737 printk(KERN_ERR "Could not register gntdev device\n"); 738 return err; 739 } 740 return 0; 741 } 742 743 static void __exit gntdev_exit(void) 744 { 745 misc_deregister(&gntdev_miscdev); 746 } 747 748 module_init(gntdev_init); 749 module_exit(gntdev_exit); 750 751 /* ------------------------------------------------------------------ */ 752