1 /****************************************************************************** 2 * privcmd.c 3 * 4 * Interface to privileged domain-0 commands. 5 * 6 * Copyright (c) 2002-2004, K A Fraser, B Dragovic 7 */ 8 9 #define pr_fmt(fmt) "xen:" KBUILD_MODNAME ": " fmt 10 11 #include <linux/kernel.h> 12 #include <linux/module.h> 13 #include <linux/sched.h> 14 #include <linux/slab.h> 15 #include <linux/string.h> 16 #include <linux/errno.h> 17 #include <linux/mm.h> 18 #include <linux/mman.h> 19 #include <linux/uaccess.h> 20 #include <linux/swap.h> 21 #include <linux/highmem.h> 22 #include <linux/pagemap.h> 23 #include <linux/seq_file.h> 24 #include <linux/miscdevice.h> 25 #include <linux/moduleparam.h> 26 27 #include <asm/pgalloc.h> 28 #include <asm/pgtable.h> 29 #include <asm/tlb.h> 30 #include <asm/xen/hypervisor.h> 31 #include <asm/xen/hypercall.h> 32 33 #include <xen/xen.h> 34 #include <xen/privcmd.h> 35 #include <xen/interface/xen.h> 36 #include <xen/interface/memory.h> 37 #include <xen/interface/hvm/dm_op.h> 38 #include <xen/features.h> 39 #include <xen/page.h> 40 #include <xen/xen-ops.h> 41 #include <xen/balloon.h> 42 43 #include "privcmd.h" 44 45 MODULE_LICENSE("GPL"); 46 47 #define PRIV_VMA_LOCKED ((void *)1) 48 49 static unsigned int privcmd_dm_op_max_num = 16; 50 module_param_named(dm_op_max_nr_bufs, privcmd_dm_op_max_num, uint, 0644); 51 MODULE_PARM_DESC(dm_op_max_nr_bufs, 52 "Maximum number of buffers per dm_op hypercall"); 53 54 static unsigned int privcmd_dm_op_buf_max_size = 4096; 55 module_param_named(dm_op_buf_max_size, privcmd_dm_op_buf_max_size, uint, 56 0644); 57 MODULE_PARM_DESC(dm_op_buf_max_size, 58 "Maximum size of a dm_op hypercall buffer"); 59 60 struct privcmd_data { 61 domid_t domid; 62 }; 63 64 static int privcmd_vma_range_is_mapped( 65 struct vm_area_struct *vma, 66 unsigned long addr, 67 unsigned long nr_pages); 68 69 static long privcmd_ioctl_hypercall(struct file *file, void __user *udata) 70 { 71 struct privcmd_data *data = file->private_data; 72 struct privcmd_hypercall hypercall; 73 long ret; 74 75 /* Disallow arbitrary hypercalls if restricted */ 76 if (data->domid != DOMID_INVALID) 77 return -EPERM; 78 79 if (copy_from_user(&hypercall, udata, sizeof(hypercall))) 80 return -EFAULT; 81 82 xen_preemptible_hcall_begin(); 83 ret = privcmd_call(hypercall.op, 84 hypercall.arg[0], hypercall.arg[1], 85 hypercall.arg[2], hypercall.arg[3], 86 hypercall.arg[4]); 87 xen_preemptible_hcall_end(); 88 89 return ret; 90 } 91 92 static void free_page_list(struct list_head *pages) 93 { 94 struct page *p, *n; 95 96 list_for_each_entry_safe(p, n, pages, lru) 97 __free_page(p); 98 99 INIT_LIST_HEAD(pages); 100 } 101 102 /* 103 * Given an array of items in userspace, return a list of pages 104 * containing the data. If copying fails, either because of memory 105 * allocation failure or a problem reading user memory, return an 106 * error code; its up to the caller to dispose of any partial list. 107 */ 108 static int gather_array(struct list_head *pagelist, 109 unsigned nelem, size_t size, 110 const void __user *data) 111 { 112 unsigned pageidx; 113 void *pagedata; 114 int ret; 115 116 if (size > PAGE_SIZE) 117 return 0; 118 119 pageidx = PAGE_SIZE; 120 pagedata = NULL; /* quiet, gcc */ 121 while (nelem--) { 122 if (pageidx > PAGE_SIZE-size) { 123 struct page *page = alloc_page(GFP_KERNEL); 124 125 ret = -ENOMEM; 126 if (page == NULL) 127 goto fail; 128 129 pagedata = page_address(page); 130 131 list_add_tail(&page->lru, pagelist); 132 pageidx = 0; 133 } 134 135 ret = -EFAULT; 136 if (copy_from_user(pagedata + pageidx, data, size)) 137 goto fail; 138 139 data += size; 140 pageidx += size; 141 } 142 143 ret = 0; 144 145 fail: 146 return ret; 147 } 148 149 /* 150 * Call function "fn" on each element of the array fragmented 151 * over a list of pages. 152 */ 153 static int traverse_pages(unsigned nelem, size_t size, 154 struct list_head *pos, 155 int (*fn)(void *data, void *state), 156 void *state) 157 { 158 void *pagedata; 159 unsigned pageidx; 160 int ret = 0; 161 162 BUG_ON(size > PAGE_SIZE); 163 164 pageidx = PAGE_SIZE; 165 pagedata = NULL; /* hush, gcc */ 166 167 while (nelem--) { 168 if (pageidx > PAGE_SIZE-size) { 169 struct page *page; 170 pos = pos->next; 171 page = list_entry(pos, struct page, lru); 172 pagedata = page_address(page); 173 pageidx = 0; 174 } 175 176 ret = (*fn)(pagedata + pageidx, state); 177 if (ret) 178 break; 179 pageidx += size; 180 } 181 182 return ret; 183 } 184 185 /* 186 * Similar to traverse_pages, but use each page as a "block" of 187 * data to be processed as one unit. 188 */ 189 static int traverse_pages_block(unsigned nelem, size_t size, 190 struct list_head *pos, 191 int (*fn)(void *data, int nr, void *state), 192 void *state) 193 { 194 void *pagedata; 195 int ret = 0; 196 197 BUG_ON(size > PAGE_SIZE); 198 199 while (nelem) { 200 int nr = (PAGE_SIZE/size); 201 struct page *page; 202 if (nr > nelem) 203 nr = nelem; 204 pos = pos->next; 205 page = list_entry(pos, struct page, lru); 206 pagedata = page_address(page); 207 ret = (*fn)(pagedata, nr, state); 208 if (ret) 209 break; 210 nelem -= nr; 211 } 212 213 return ret; 214 } 215 216 struct mmap_gfn_state { 217 unsigned long va; 218 struct vm_area_struct *vma; 219 domid_t domain; 220 }; 221 222 static int mmap_gfn_range(void *data, void *state) 223 { 224 struct privcmd_mmap_entry *msg = data; 225 struct mmap_gfn_state *st = state; 226 struct vm_area_struct *vma = st->vma; 227 int rc; 228 229 /* Do not allow range to wrap the address space. */ 230 if ((msg->npages > (LONG_MAX >> PAGE_SHIFT)) || 231 ((unsigned long)(msg->npages << PAGE_SHIFT) >= -st->va)) 232 return -EINVAL; 233 234 /* Range chunks must be contiguous in va space. */ 235 if ((msg->va != st->va) || 236 ((msg->va+(msg->npages<<PAGE_SHIFT)) > vma->vm_end)) 237 return -EINVAL; 238 239 rc = xen_remap_domain_gfn_range(vma, 240 msg->va & PAGE_MASK, 241 msg->mfn, msg->npages, 242 vma->vm_page_prot, 243 st->domain, NULL); 244 if (rc < 0) 245 return rc; 246 247 st->va += msg->npages << PAGE_SHIFT; 248 249 return 0; 250 } 251 252 static long privcmd_ioctl_mmap(struct file *file, void __user *udata) 253 { 254 struct privcmd_data *data = file->private_data; 255 struct privcmd_mmap mmapcmd; 256 struct mm_struct *mm = current->mm; 257 struct vm_area_struct *vma; 258 int rc; 259 LIST_HEAD(pagelist); 260 struct mmap_gfn_state state; 261 262 /* We only support privcmd_ioctl_mmap_batch for auto translated. */ 263 if (xen_feature(XENFEAT_auto_translated_physmap)) 264 return -ENOSYS; 265 266 if (copy_from_user(&mmapcmd, udata, sizeof(mmapcmd))) 267 return -EFAULT; 268 269 /* If restriction is in place, check the domid matches */ 270 if (data->domid != DOMID_INVALID && data->domid != mmapcmd.dom) 271 return -EPERM; 272 273 rc = gather_array(&pagelist, 274 mmapcmd.num, sizeof(struct privcmd_mmap_entry), 275 mmapcmd.entry); 276 277 if (rc || list_empty(&pagelist)) 278 goto out; 279 280 down_write(&mm->mmap_sem); 281 282 { 283 struct page *page = list_first_entry(&pagelist, 284 struct page, lru); 285 struct privcmd_mmap_entry *msg = page_address(page); 286 287 vma = find_vma(mm, msg->va); 288 rc = -EINVAL; 289 290 if (!vma || (msg->va != vma->vm_start) || vma->vm_private_data) 291 goto out_up; 292 vma->vm_private_data = PRIV_VMA_LOCKED; 293 } 294 295 state.va = vma->vm_start; 296 state.vma = vma; 297 state.domain = mmapcmd.dom; 298 299 rc = traverse_pages(mmapcmd.num, sizeof(struct privcmd_mmap_entry), 300 &pagelist, 301 mmap_gfn_range, &state); 302 303 304 out_up: 305 up_write(&mm->mmap_sem); 306 307 out: 308 free_page_list(&pagelist); 309 310 return rc; 311 } 312 313 struct mmap_batch_state { 314 domid_t domain; 315 unsigned long va; 316 struct vm_area_struct *vma; 317 int index; 318 /* A tristate: 319 * 0 for no errors 320 * 1 if at least one error has happened (and no 321 * -ENOENT errors have happened) 322 * -ENOENT if at least 1 -ENOENT has happened. 323 */ 324 int global_error; 325 int version; 326 327 /* User-space gfn array to store errors in the second pass for V1. */ 328 xen_pfn_t __user *user_gfn; 329 /* User-space int array to store errors in the second pass for V2. */ 330 int __user *user_err; 331 }; 332 333 /* auto translated dom0 note: if domU being created is PV, then gfn is 334 * mfn(addr on bus). If it's auto xlated, then gfn is pfn (input to HAP). 335 */ 336 static int mmap_batch_fn(void *data, int nr, void *state) 337 { 338 xen_pfn_t *gfnp = data; 339 struct mmap_batch_state *st = state; 340 struct vm_area_struct *vma = st->vma; 341 struct page **pages = vma->vm_private_data; 342 struct page **cur_pages = NULL; 343 int ret; 344 345 if (xen_feature(XENFEAT_auto_translated_physmap)) 346 cur_pages = &pages[st->index]; 347 348 BUG_ON(nr < 0); 349 ret = xen_remap_domain_gfn_array(st->vma, st->va & PAGE_MASK, gfnp, nr, 350 (int *)gfnp, st->vma->vm_page_prot, 351 st->domain, cur_pages); 352 353 /* Adjust the global_error? */ 354 if (ret != nr) { 355 if (ret == -ENOENT) 356 st->global_error = -ENOENT; 357 else { 358 /* Record that at least one error has happened. */ 359 if (st->global_error == 0) 360 st->global_error = 1; 361 } 362 } 363 st->va += XEN_PAGE_SIZE * nr; 364 st->index += nr / XEN_PFN_PER_PAGE; 365 366 return 0; 367 } 368 369 static int mmap_return_error(int err, struct mmap_batch_state *st) 370 { 371 int ret; 372 373 if (st->version == 1) { 374 if (err) { 375 xen_pfn_t gfn; 376 377 ret = get_user(gfn, st->user_gfn); 378 if (ret < 0) 379 return ret; 380 /* 381 * V1 encodes the error codes in the 32bit top 382 * nibble of the gfn (with its known 383 * limitations vis-a-vis 64 bit callers). 384 */ 385 gfn |= (err == -ENOENT) ? 386 PRIVCMD_MMAPBATCH_PAGED_ERROR : 387 PRIVCMD_MMAPBATCH_MFN_ERROR; 388 return __put_user(gfn, st->user_gfn++); 389 } else 390 st->user_gfn++; 391 } else { /* st->version == 2 */ 392 if (err) 393 return __put_user(err, st->user_err++); 394 else 395 st->user_err++; 396 } 397 398 return 0; 399 } 400 401 static int mmap_return_errors(void *data, int nr, void *state) 402 { 403 struct mmap_batch_state *st = state; 404 int *errs = data; 405 int i; 406 int ret; 407 408 for (i = 0; i < nr; i++) { 409 ret = mmap_return_error(errs[i], st); 410 if (ret < 0) 411 return ret; 412 } 413 return 0; 414 } 415 416 /* Allocate pfns that are then mapped with gfns from foreign domid. Update 417 * the vma with the page info to use later. 418 * Returns: 0 if success, otherwise -errno 419 */ 420 static int alloc_empty_pages(struct vm_area_struct *vma, int numpgs) 421 { 422 int rc; 423 struct page **pages; 424 425 pages = kcalloc(numpgs, sizeof(pages[0]), GFP_KERNEL); 426 if (pages == NULL) 427 return -ENOMEM; 428 429 rc = alloc_xenballooned_pages(numpgs, pages); 430 if (rc != 0) { 431 pr_warn("%s Could not alloc %d pfns rc:%d\n", __func__, 432 numpgs, rc); 433 kfree(pages); 434 return -ENOMEM; 435 } 436 BUG_ON(vma->vm_private_data != NULL); 437 vma->vm_private_data = pages; 438 439 return 0; 440 } 441 442 static const struct vm_operations_struct privcmd_vm_ops; 443 444 static long privcmd_ioctl_mmap_batch( 445 struct file *file, void __user *udata, int version) 446 { 447 struct privcmd_data *data = file->private_data; 448 int ret; 449 struct privcmd_mmapbatch_v2 m; 450 struct mm_struct *mm = current->mm; 451 struct vm_area_struct *vma; 452 unsigned long nr_pages; 453 LIST_HEAD(pagelist); 454 struct mmap_batch_state state; 455 456 switch (version) { 457 case 1: 458 if (copy_from_user(&m, udata, sizeof(struct privcmd_mmapbatch))) 459 return -EFAULT; 460 /* Returns per-frame error in m.arr. */ 461 m.err = NULL; 462 if (!access_ok(VERIFY_WRITE, m.arr, m.num * sizeof(*m.arr))) 463 return -EFAULT; 464 break; 465 case 2: 466 if (copy_from_user(&m, udata, sizeof(struct privcmd_mmapbatch_v2))) 467 return -EFAULT; 468 /* Returns per-frame error code in m.err. */ 469 if (!access_ok(VERIFY_WRITE, m.err, m.num * (sizeof(*m.err)))) 470 return -EFAULT; 471 break; 472 default: 473 return -EINVAL; 474 } 475 476 /* If restriction is in place, check the domid matches */ 477 if (data->domid != DOMID_INVALID && data->domid != m.dom) 478 return -EPERM; 479 480 nr_pages = DIV_ROUND_UP(m.num, XEN_PFN_PER_PAGE); 481 if ((m.num <= 0) || (nr_pages > (LONG_MAX >> PAGE_SHIFT))) 482 return -EINVAL; 483 484 ret = gather_array(&pagelist, m.num, sizeof(xen_pfn_t), m.arr); 485 486 if (ret) 487 goto out; 488 if (list_empty(&pagelist)) { 489 ret = -EINVAL; 490 goto out; 491 } 492 493 if (version == 2) { 494 /* Zero error array now to only copy back actual errors. */ 495 if (clear_user(m.err, sizeof(int) * m.num)) { 496 ret = -EFAULT; 497 goto out; 498 } 499 } 500 501 down_write(&mm->mmap_sem); 502 503 vma = find_vma(mm, m.addr); 504 if (!vma || 505 vma->vm_ops != &privcmd_vm_ops) { 506 ret = -EINVAL; 507 goto out_unlock; 508 } 509 510 /* 511 * Caller must either: 512 * 513 * Map the whole VMA range, which will also allocate all the 514 * pages required for the auto_translated_physmap case. 515 * 516 * Or 517 * 518 * Map unmapped holes left from a previous map attempt (e.g., 519 * because those foreign frames were previously paged out). 520 */ 521 if (vma->vm_private_data == NULL) { 522 if (m.addr != vma->vm_start || 523 m.addr + (nr_pages << PAGE_SHIFT) != vma->vm_end) { 524 ret = -EINVAL; 525 goto out_unlock; 526 } 527 if (xen_feature(XENFEAT_auto_translated_physmap)) { 528 ret = alloc_empty_pages(vma, nr_pages); 529 if (ret < 0) 530 goto out_unlock; 531 } else 532 vma->vm_private_data = PRIV_VMA_LOCKED; 533 } else { 534 if (m.addr < vma->vm_start || 535 m.addr + (nr_pages << PAGE_SHIFT) > vma->vm_end) { 536 ret = -EINVAL; 537 goto out_unlock; 538 } 539 if (privcmd_vma_range_is_mapped(vma, m.addr, nr_pages)) { 540 ret = -EINVAL; 541 goto out_unlock; 542 } 543 } 544 545 state.domain = m.dom; 546 state.vma = vma; 547 state.va = m.addr; 548 state.index = 0; 549 state.global_error = 0; 550 state.version = version; 551 552 BUILD_BUG_ON(((PAGE_SIZE / sizeof(xen_pfn_t)) % XEN_PFN_PER_PAGE) != 0); 553 /* mmap_batch_fn guarantees ret == 0 */ 554 BUG_ON(traverse_pages_block(m.num, sizeof(xen_pfn_t), 555 &pagelist, mmap_batch_fn, &state)); 556 557 up_write(&mm->mmap_sem); 558 559 if (state.global_error) { 560 /* Write back errors in second pass. */ 561 state.user_gfn = (xen_pfn_t *)m.arr; 562 state.user_err = m.err; 563 ret = traverse_pages_block(m.num, sizeof(xen_pfn_t), 564 &pagelist, mmap_return_errors, &state); 565 } else 566 ret = 0; 567 568 /* If we have not had any EFAULT-like global errors then set the global 569 * error to -ENOENT if necessary. */ 570 if ((ret == 0) && (state.global_error == -ENOENT)) 571 ret = -ENOENT; 572 573 out: 574 free_page_list(&pagelist); 575 return ret; 576 577 out_unlock: 578 up_write(&mm->mmap_sem); 579 goto out; 580 } 581 582 static int lock_pages( 583 struct privcmd_dm_op_buf kbufs[], unsigned int num, 584 struct page *pages[], unsigned int nr_pages) 585 { 586 unsigned int i; 587 588 for (i = 0; i < num; i++) { 589 unsigned int requested; 590 int pinned; 591 592 requested = DIV_ROUND_UP( 593 offset_in_page(kbufs[i].uptr) + kbufs[i].size, 594 PAGE_SIZE); 595 if (requested > nr_pages) 596 return -ENOSPC; 597 598 pinned = get_user_pages_fast( 599 (unsigned long) kbufs[i].uptr, 600 requested, FOLL_WRITE, pages); 601 if (pinned < 0) 602 return pinned; 603 604 nr_pages -= pinned; 605 pages += pinned; 606 } 607 608 return 0; 609 } 610 611 static void unlock_pages(struct page *pages[], unsigned int nr_pages) 612 { 613 unsigned int i; 614 615 if (!pages) 616 return; 617 618 for (i = 0; i < nr_pages; i++) { 619 if (pages[i]) 620 put_page(pages[i]); 621 } 622 } 623 624 static long privcmd_ioctl_dm_op(struct file *file, void __user *udata) 625 { 626 struct privcmd_data *data = file->private_data; 627 struct privcmd_dm_op kdata; 628 struct privcmd_dm_op_buf *kbufs; 629 unsigned int nr_pages = 0; 630 struct page **pages = NULL; 631 struct xen_dm_op_buf *xbufs = NULL; 632 unsigned int i; 633 long rc; 634 635 if (copy_from_user(&kdata, udata, sizeof(kdata))) 636 return -EFAULT; 637 638 /* If restriction is in place, check the domid matches */ 639 if (data->domid != DOMID_INVALID && data->domid != kdata.dom) 640 return -EPERM; 641 642 if (kdata.num == 0) 643 return 0; 644 645 if (kdata.num > privcmd_dm_op_max_num) 646 return -E2BIG; 647 648 kbufs = kcalloc(kdata.num, sizeof(*kbufs), GFP_KERNEL); 649 if (!kbufs) 650 return -ENOMEM; 651 652 if (copy_from_user(kbufs, kdata.ubufs, 653 sizeof(*kbufs) * kdata.num)) { 654 rc = -EFAULT; 655 goto out; 656 } 657 658 for (i = 0; i < kdata.num; i++) { 659 if (kbufs[i].size > privcmd_dm_op_buf_max_size) { 660 rc = -E2BIG; 661 goto out; 662 } 663 664 if (!access_ok(VERIFY_WRITE, kbufs[i].uptr, 665 kbufs[i].size)) { 666 rc = -EFAULT; 667 goto out; 668 } 669 670 nr_pages += DIV_ROUND_UP( 671 offset_in_page(kbufs[i].uptr) + kbufs[i].size, 672 PAGE_SIZE); 673 } 674 675 pages = kcalloc(nr_pages, sizeof(*pages), GFP_KERNEL); 676 if (!pages) { 677 rc = -ENOMEM; 678 goto out; 679 } 680 681 xbufs = kcalloc(kdata.num, sizeof(*xbufs), GFP_KERNEL); 682 if (!xbufs) { 683 rc = -ENOMEM; 684 goto out; 685 } 686 687 rc = lock_pages(kbufs, kdata.num, pages, nr_pages); 688 if (rc) 689 goto out; 690 691 for (i = 0; i < kdata.num; i++) { 692 set_xen_guest_handle(xbufs[i].h, kbufs[i].uptr); 693 xbufs[i].size = kbufs[i].size; 694 } 695 696 xen_preemptible_hcall_begin(); 697 rc = HYPERVISOR_dm_op(kdata.dom, kdata.num, xbufs); 698 xen_preemptible_hcall_end(); 699 700 out: 701 unlock_pages(pages, nr_pages); 702 kfree(xbufs); 703 kfree(pages); 704 kfree(kbufs); 705 706 return rc; 707 } 708 709 static long privcmd_ioctl_restrict(struct file *file, void __user *udata) 710 { 711 struct privcmd_data *data = file->private_data; 712 domid_t dom; 713 714 if (copy_from_user(&dom, udata, sizeof(dom))) 715 return -EFAULT; 716 717 /* Set restriction to the specified domain, or check it matches */ 718 if (data->domid == DOMID_INVALID) 719 data->domid = dom; 720 else if (data->domid != dom) 721 return -EINVAL; 722 723 return 0; 724 } 725 726 struct remap_pfn { 727 struct mm_struct *mm; 728 struct page **pages; 729 pgprot_t prot; 730 unsigned long i; 731 }; 732 733 static int remap_pfn_fn(pte_t *ptep, pgtable_t token, unsigned long addr, 734 void *data) 735 { 736 struct remap_pfn *r = data; 737 struct page *page = r->pages[r->i]; 738 pte_t pte = pte_mkspecial(pfn_pte(page_to_pfn(page), r->prot)); 739 740 set_pte_at(r->mm, addr, ptep, pte); 741 r->i++; 742 743 return 0; 744 } 745 746 static long privcmd_ioctl_mmap_resource(struct file *file, void __user *udata) 747 { 748 struct privcmd_data *data = file->private_data; 749 struct mm_struct *mm = current->mm; 750 struct vm_area_struct *vma; 751 struct privcmd_mmap_resource kdata; 752 xen_pfn_t *pfns = NULL; 753 struct xen_mem_acquire_resource xdata; 754 int rc; 755 756 if (copy_from_user(&kdata, udata, sizeof(kdata))) 757 return -EFAULT; 758 759 /* If restriction is in place, check the domid matches */ 760 if (data->domid != DOMID_INVALID && data->domid != kdata.dom) 761 return -EPERM; 762 763 down_write(&mm->mmap_sem); 764 765 vma = find_vma(mm, kdata.addr); 766 if (!vma || vma->vm_ops != &privcmd_vm_ops) { 767 rc = -EINVAL; 768 goto out; 769 } 770 771 pfns = kcalloc(kdata.num, sizeof(*pfns), GFP_KERNEL); 772 if (!pfns) { 773 rc = -ENOMEM; 774 goto out; 775 } 776 777 if (xen_feature(XENFEAT_auto_translated_physmap)) { 778 unsigned int nr = DIV_ROUND_UP(kdata.num, XEN_PFN_PER_PAGE); 779 struct page **pages; 780 unsigned int i; 781 782 rc = alloc_empty_pages(vma, nr); 783 if (rc < 0) 784 goto out; 785 786 pages = vma->vm_private_data; 787 for (i = 0; i < kdata.num; i++) { 788 xen_pfn_t pfn = 789 page_to_xen_pfn(pages[i / XEN_PFN_PER_PAGE]); 790 791 pfns[i] = pfn + (i % XEN_PFN_PER_PAGE); 792 } 793 } else 794 vma->vm_private_data = PRIV_VMA_LOCKED; 795 796 memset(&xdata, 0, sizeof(xdata)); 797 xdata.domid = kdata.dom; 798 xdata.type = kdata.type; 799 xdata.id = kdata.id; 800 xdata.frame = kdata.idx; 801 xdata.nr_frames = kdata.num; 802 set_xen_guest_handle(xdata.frame_list, pfns); 803 804 xen_preemptible_hcall_begin(); 805 rc = HYPERVISOR_memory_op(XENMEM_acquire_resource, &xdata); 806 xen_preemptible_hcall_end(); 807 808 if (rc) 809 goto out; 810 811 if (xen_feature(XENFEAT_auto_translated_physmap)) { 812 struct remap_pfn r = { 813 .mm = vma->vm_mm, 814 .pages = vma->vm_private_data, 815 .prot = vma->vm_page_prot, 816 }; 817 818 rc = apply_to_page_range(r.mm, kdata.addr, 819 kdata.num << PAGE_SHIFT, 820 remap_pfn_fn, &r); 821 } else { 822 unsigned int domid = 823 (xdata.flags & XENMEM_rsrc_acq_caller_owned) ? 824 DOMID_SELF : kdata.dom; 825 int num; 826 827 num = xen_remap_domain_mfn_array(vma, 828 kdata.addr & PAGE_MASK, 829 pfns, kdata.num, (int *)pfns, 830 vma->vm_page_prot, 831 domid, 832 vma->vm_private_data); 833 if (num < 0) 834 rc = num; 835 else if (num != kdata.num) { 836 unsigned int i; 837 838 for (i = 0; i < num; i++) { 839 rc = pfns[i]; 840 if (rc < 0) 841 break; 842 } 843 } else 844 rc = 0; 845 } 846 847 out: 848 up_write(&mm->mmap_sem); 849 kfree(pfns); 850 851 return rc; 852 } 853 854 static long privcmd_ioctl(struct file *file, 855 unsigned int cmd, unsigned long data) 856 { 857 int ret = -ENOTTY; 858 void __user *udata = (void __user *) data; 859 860 switch (cmd) { 861 case IOCTL_PRIVCMD_HYPERCALL: 862 ret = privcmd_ioctl_hypercall(file, udata); 863 break; 864 865 case IOCTL_PRIVCMD_MMAP: 866 ret = privcmd_ioctl_mmap(file, udata); 867 break; 868 869 case IOCTL_PRIVCMD_MMAPBATCH: 870 ret = privcmd_ioctl_mmap_batch(file, udata, 1); 871 break; 872 873 case IOCTL_PRIVCMD_MMAPBATCH_V2: 874 ret = privcmd_ioctl_mmap_batch(file, udata, 2); 875 break; 876 877 case IOCTL_PRIVCMD_DM_OP: 878 ret = privcmd_ioctl_dm_op(file, udata); 879 break; 880 881 case IOCTL_PRIVCMD_RESTRICT: 882 ret = privcmd_ioctl_restrict(file, udata); 883 break; 884 885 case IOCTL_PRIVCMD_MMAP_RESOURCE: 886 ret = privcmd_ioctl_mmap_resource(file, udata); 887 break; 888 889 default: 890 break; 891 } 892 893 return ret; 894 } 895 896 static int privcmd_open(struct inode *ino, struct file *file) 897 { 898 struct privcmd_data *data = kzalloc(sizeof(*data), GFP_KERNEL); 899 900 if (!data) 901 return -ENOMEM; 902 903 /* DOMID_INVALID implies no restriction */ 904 data->domid = DOMID_INVALID; 905 906 file->private_data = data; 907 return 0; 908 } 909 910 static int privcmd_release(struct inode *ino, struct file *file) 911 { 912 struct privcmd_data *data = file->private_data; 913 914 kfree(data); 915 return 0; 916 } 917 918 static void privcmd_close(struct vm_area_struct *vma) 919 { 920 struct page **pages = vma->vm_private_data; 921 int numpgs = vma_pages(vma); 922 int numgfns = (vma->vm_end - vma->vm_start) >> XEN_PAGE_SHIFT; 923 int rc; 924 925 if (!xen_feature(XENFEAT_auto_translated_physmap) || !numpgs || !pages) 926 return; 927 928 rc = xen_unmap_domain_gfn_range(vma, numgfns, pages); 929 if (rc == 0) 930 free_xenballooned_pages(numpgs, pages); 931 else 932 pr_crit("unable to unmap MFN range: leaking %d pages. rc=%d\n", 933 numpgs, rc); 934 kfree(pages); 935 } 936 937 static vm_fault_t privcmd_fault(struct vm_fault *vmf) 938 { 939 printk(KERN_DEBUG "privcmd_fault: vma=%p %lx-%lx, pgoff=%lx, uv=%p\n", 940 vmf->vma, vmf->vma->vm_start, vmf->vma->vm_end, 941 vmf->pgoff, (void *)vmf->address); 942 943 return VM_FAULT_SIGBUS; 944 } 945 946 static const struct vm_operations_struct privcmd_vm_ops = { 947 .close = privcmd_close, 948 .fault = privcmd_fault 949 }; 950 951 static int privcmd_mmap(struct file *file, struct vm_area_struct *vma) 952 { 953 /* DONTCOPY is essential for Xen because copy_page_range doesn't know 954 * how to recreate these mappings */ 955 vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTCOPY | 956 VM_DONTEXPAND | VM_DONTDUMP; 957 vma->vm_ops = &privcmd_vm_ops; 958 vma->vm_private_data = NULL; 959 960 return 0; 961 } 962 963 /* 964 * For MMAPBATCH*. This allows asserting the singleshot mapping 965 * on a per pfn/pte basis. Mapping calls that fail with ENOENT 966 * can be then retried until success. 967 */ 968 static int is_mapped_fn(pte_t *pte, struct page *pmd_page, 969 unsigned long addr, void *data) 970 { 971 return pte_none(*pte) ? 0 : -EBUSY; 972 } 973 974 static int privcmd_vma_range_is_mapped( 975 struct vm_area_struct *vma, 976 unsigned long addr, 977 unsigned long nr_pages) 978 { 979 return apply_to_page_range(vma->vm_mm, addr, nr_pages << PAGE_SHIFT, 980 is_mapped_fn, NULL) != 0; 981 } 982 983 const struct file_operations xen_privcmd_fops = { 984 .owner = THIS_MODULE, 985 .unlocked_ioctl = privcmd_ioctl, 986 .open = privcmd_open, 987 .release = privcmd_release, 988 .mmap = privcmd_mmap, 989 }; 990 EXPORT_SYMBOL_GPL(xen_privcmd_fops); 991 992 static struct miscdevice privcmd_dev = { 993 .minor = MISC_DYNAMIC_MINOR, 994 .name = "xen/privcmd", 995 .fops = &xen_privcmd_fops, 996 }; 997 998 static int __init privcmd_init(void) 999 { 1000 int err; 1001 1002 if (!xen_domain()) 1003 return -ENODEV; 1004 1005 err = misc_register(&privcmd_dev); 1006 if (err != 0) { 1007 pr_err("Could not register Xen privcmd device\n"); 1008 return err; 1009 } 1010 1011 err = misc_register(&xen_privcmdbuf_dev); 1012 if (err != 0) { 1013 pr_err("Could not register Xen hypercall-buf device\n"); 1014 misc_deregister(&privcmd_dev); 1015 return err; 1016 } 1017 1018 return 0; 1019 } 1020 1021 static void __exit privcmd_exit(void) 1022 { 1023 misc_deregister(&privcmd_dev); 1024 misc_deregister(&xen_privcmdbuf_dev); 1025 } 1026 1027 module_init(privcmd_init); 1028 module_exit(privcmd_exit); 1029