1 /****************************************************************************** 2 * privcmd.c 3 * 4 * Interface to privileged domain-0 commands. 5 * 6 * Copyright (c) 2002-2004, K A Fraser, B Dragovic 7 */ 8 9 #include <linux/kernel.h> 10 #include <linux/module.h> 11 #include <linux/sched.h> 12 #include <linux/slab.h> 13 #include <linux/string.h> 14 #include <linux/errno.h> 15 #include <linux/mm.h> 16 #include <linux/mman.h> 17 #include <linux/uaccess.h> 18 #include <linux/swap.h> 19 #include <linux/highmem.h> 20 #include <linux/pagemap.h> 21 #include <linux/seq_file.h> 22 #include <linux/miscdevice.h> 23 24 #include <asm/pgalloc.h> 25 #include <asm/pgtable.h> 26 #include <asm/tlb.h> 27 #include <asm/xen/hypervisor.h> 28 #include <asm/xen/hypercall.h> 29 30 #include <xen/xen.h> 31 #include <xen/privcmd.h> 32 #include <xen/interface/xen.h> 33 #include <xen/features.h> 34 #include <xen/page.h> 35 #include <xen/xen-ops.h> 36 #include <xen/balloon.h> 37 38 #include "privcmd.h" 39 40 MODULE_LICENSE("GPL"); 41 42 #define PRIV_VMA_LOCKED ((void *)1) 43 44 #ifndef HAVE_ARCH_PRIVCMD_MMAP 45 static int privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma); 46 #endif 47 48 static long privcmd_ioctl_hypercall(void __user *udata) 49 { 50 struct privcmd_hypercall hypercall; 51 long ret; 52 53 if (copy_from_user(&hypercall, udata, sizeof(hypercall))) 54 return -EFAULT; 55 56 ret = privcmd_call(hypercall.op, 57 hypercall.arg[0], hypercall.arg[1], 58 hypercall.arg[2], hypercall.arg[3], 59 hypercall.arg[4]); 60 61 return ret; 62 } 63 64 static void free_page_list(struct list_head *pages) 65 { 66 struct page *p, *n; 67 68 list_for_each_entry_safe(p, n, pages, lru) 69 __free_page(p); 70 71 INIT_LIST_HEAD(pages); 72 } 73 74 /* 75 * Given an array of items in userspace, return a list of pages 76 * containing the data. If copying fails, either because of memory 77 * allocation failure or a problem reading user memory, return an 78 * error code; its up to the caller to dispose of any partial list. 79 */ 80 static int gather_array(struct list_head *pagelist, 81 unsigned nelem, size_t size, 82 const void __user *data) 83 { 84 unsigned pageidx; 85 void *pagedata; 86 int ret; 87 88 if (size > PAGE_SIZE) 89 return 0; 90 91 pageidx = PAGE_SIZE; 92 pagedata = NULL; /* quiet, gcc */ 93 while (nelem--) { 94 if (pageidx > PAGE_SIZE-size) { 95 struct page *page = alloc_page(GFP_KERNEL); 96 97 ret = -ENOMEM; 98 if (page == NULL) 99 goto fail; 100 101 pagedata = page_address(page); 102 103 list_add_tail(&page->lru, pagelist); 104 pageidx = 0; 105 } 106 107 ret = -EFAULT; 108 if (copy_from_user(pagedata + pageidx, data, size)) 109 goto fail; 110 111 data += size; 112 pageidx += size; 113 } 114 115 ret = 0; 116 117 fail: 118 return ret; 119 } 120 121 /* 122 * Call function "fn" on each element of the array fragmented 123 * over a list of pages. 124 */ 125 static int traverse_pages(unsigned nelem, size_t size, 126 struct list_head *pos, 127 int (*fn)(void *data, void *state), 128 void *state) 129 { 130 void *pagedata; 131 unsigned pageidx; 132 int ret = 0; 133 134 BUG_ON(size > PAGE_SIZE); 135 136 pageidx = PAGE_SIZE; 137 pagedata = NULL; /* hush, gcc */ 138 139 while (nelem--) { 140 if (pageidx > PAGE_SIZE-size) { 141 struct page *page; 142 pos = pos->next; 143 page = list_entry(pos, struct page, lru); 144 pagedata = page_address(page); 145 pageidx = 0; 146 } 147 148 ret = (*fn)(pagedata + pageidx, state); 149 if (ret) 150 break; 151 pageidx += size; 152 } 153 154 return ret; 155 } 156 157 struct mmap_mfn_state { 158 unsigned long va; 159 struct vm_area_struct *vma; 160 domid_t domain; 161 }; 162 163 static int mmap_mfn_range(void *data, void *state) 164 { 165 struct privcmd_mmap_entry *msg = data; 166 struct mmap_mfn_state *st = state; 167 struct vm_area_struct *vma = st->vma; 168 int rc; 169 170 /* Do not allow range to wrap the address space. */ 171 if ((msg->npages > (LONG_MAX >> PAGE_SHIFT)) || 172 ((unsigned long)(msg->npages << PAGE_SHIFT) >= -st->va)) 173 return -EINVAL; 174 175 /* Range chunks must be contiguous in va space. */ 176 if ((msg->va != st->va) || 177 ((msg->va+(msg->npages<<PAGE_SHIFT)) > vma->vm_end)) 178 return -EINVAL; 179 180 rc = xen_remap_domain_mfn_range(vma, 181 msg->va & PAGE_MASK, 182 msg->mfn, msg->npages, 183 vma->vm_page_prot, 184 st->domain, NULL); 185 if (rc < 0) 186 return rc; 187 188 st->va += msg->npages << PAGE_SHIFT; 189 190 return 0; 191 } 192 193 static long privcmd_ioctl_mmap(void __user *udata) 194 { 195 struct privcmd_mmap mmapcmd; 196 struct mm_struct *mm = current->mm; 197 struct vm_area_struct *vma; 198 int rc; 199 LIST_HEAD(pagelist); 200 struct mmap_mfn_state state; 201 202 if (!xen_initial_domain()) 203 return -EPERM; 204 205 /* We only support privcmd_ioctl_mmap_batch for auto translated. */ 206 if (xen_feature(XENFEAT_auto_translated_physmap)) 207 return -ENOSYS; 208 209 if (copy_from_user(&mmapcmd, udata, sizeof(mmapcmd))) 210 return -EFAULT; 211 212 rc = gather_array(&pagelist, 213 mmapcmd.num, sizeof(struct privcmd_mmap_entry), 214 mmapcmd.entry); 215 216 if (rc || list_empty(&pagelist)) 217 goto out; 218 219 down_write(&mm->mmap_sem); 220 221 { 222 struct page *page = list_first_entry(&pagelist, 223 struct page, lru); 224 struct privcmd_mmap_entry *msg = page_address(page); 225 226 vma = find_vma(mm, msg->va); 227 rc = -EINVAL; 228 229 if (!vma || (msg->va != vma->vm_start) || 230 !privcmd_enforce_singleshot_mapping(vma)) 231 goto out_up; 232 } 233 234 state.va = vma->vm_start; 235 state.vma = vma; 236 state.domain = mmapcmd.dom; 237 238 rc = traverse_pages(mmapcmd.num, sizeof(struct privcmd_mmap_entry), 239 &pagelist, 240 mmap_mfn_range, &state); 241 242 243 out_up: 244 up_write(&mm->mmap_sem); 245 246 out: 247 free_page_list(&pagelist); 248 249 return rc; 250 } 251 252 struct mmap_batch_state { 253 domid_t domain; 254 unsigned long va; 255 struct vm_area_struct *vma; 256 int index; 257 /* A tristate: 258 * 0 for no errors 259 * 1 if at least one error has happened (and no 260 * -ENOENT errors have happened) 261 * -ENOENT if at least 1 -ENOENT has happened. 262 */ 263 int global_error; 264 /* An array for individual errors */ 265 int *err; 266 267 /* User-space mfn array to store errors in the second pass for V1. */ 268 xen_pfn_t __user *user_mfn; 269 }; 270 271 /* auto translated dom0 note: if domU being created is PV, then mfn is 272 * mfn(addr on bus). If it's auto xlated, then mfn is pfn (input to HAP). 273 */ 274 static int mmap_batch_fn(void *data, void *state) 275 { 276 xen_pfn_t *mfnp = data; 277 struct mmap_batch_state *st = state; 278 struct vm_area_struct *vma = st->vma; 279 struct page **pages = vma->vm_private_data; 280 struct page *cur_page = NULL; 281 int ret; 282 283 if (xen_feature(XENFEAT_auto_translated_physmap)) 284 cur_page = pages[st->index++]; 285 286 ret = xen_remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, *mfnp, 1, 287 st->vma->vm_page_prot, st->domain, 288 &cur_page); 289 290 /* Store error code for second pass. */ 291 *(st->err++) = ret; 292 293 /* And see if it affects the global_error. */ 294 if (ret < 0) { 295 if (ret == -ENOENT) 296 st->global_error = -ENOENT; 297 else { 298 /* Record that at least one error has happened. */ 299 if (st->global_error == 0) 300 st->global_error = 1; 301 } 302 } 303 st->va += PAGE_SIZE; 304 305 return 0; 306 } 307 308 static int mmap_return_errors_v1(void *data, void *state) 309 { 310 xen_pfn_t *mfnp = data; 311 struct mmap_batch_state *st = state; 312 int err = *(st->err++); 313 314 /* 315 * V1 encodes the error codes in the 32bit top nibble of the 316 * mfn (with its known limitations vis-a-vis 64 bit callers). 317 */ 318 *mfnp |= (err == -ENOENT) ? 319 PRIVCMD_MMAPBATCH_PAGED_ERROR : 320 PRIVCMD_MMAPBATCH_MFN_ERROR; 321 return __put_user(*mfnp, st->user_mfn++); 322 } 323 324 /* Allocate pfns that are then mapped with gmfns from foreign domid. Update 325 * the vma with the page info to use later. 326 * Returns: 0 if success, otherwise -errno 327 */ 328 static int alloc_empty_pages(struct vm_area_struct *vma, int numpgs) 329 { 330 int rc; 331 struct page **pages; 332 333 pages = kcalloc(numpgs, sizeof(pages[0]), GFP_KERNEL); 334 if (pages == NULL) 335 return -ENOMEM; 336 337 rc = alloc_xenballooned_pages(numpgs, pages, 0); 338 if (rc != 0) { 339 pr_warn("%s Could not alloc %d pfns rc:%d\n", __func__, 340 numpgs, rc); 341 kfree(pages); 342 return -ENOMEM; 343 } 344 BUG_ON(vma->vm_private_data != PRIV_VMA_LOCKED); 345 vma->vm_private_data = pages; 346 347 return 0; 348 } 349 350 static struct vm_operations_struct privcmd_vm_ops; 351 352 static long privcmd_ioctl_mmap_batch(void __user *udata, int version) 353 { 354 int ret; 355 struct privcmd_mmapbatch_v2 m; 356 struct mm_struct *mm = current->mm; 357 struct vm_area_struct *vma; 358 unsigned long nr_pages; 359 LIST_HEAD(pagelist); 360 int *err_array = NULL; 361 struct mmap_batch_state state; 362 363 if (!xen_initial_domain()) 364 return -EPERM; 365 366 switch (version) { 367 case 1: 368 if (copy_from_user(&m, udata, sizeof(struct privcmd_mmapbatch))) 369 return -EFAULT; 370 /* Returns per-frame error in m.arr. */ 371 m.err = NULL; 372 if (!access_ok(VERIFY_WRITE, m.arr, m.num * sizeof(*m.arr))) 373 return -EFAULT; 374 break; 375 case 2: 376 if (copy_from_user(&m, udata, sizeof(struct privcmd_mmapbatch_v2))) 377 return -EFAULT; 378 /* Returns per-frame error code in m.err. */ 379 if (!access_ok(VERIFY_WRITE, m.err, m.num * (sizeof(*m.err)))) 380 return -EFAULT; 381 break; 382 default: 383 return -EINVAL; 384 } 385 386 nr_pages = m.num; 387 if ((m.num <= 0) || (nr_pages > (LONG_MAX >> PAGE_SHIFT))) 388 return -EINVAL; 389 390 ret = gather_array(&pagelist, m.num, sizeof(xen_pfn_t), m.arr); 391 392 if (ret) 393 goto out; 394 if (list_empty(&pagelist)) { 395 ret = -EINVAL; 396 goto out; 397 } 398 399 err_array = kcalloc(m.num, sizeof(int), GFP_KERNEL); 400 if (err_array == NULL) { 401 ret = -ENOMEM; 402 goto out; 403 } 404 405 down_write(&mm->mmap_sem); 406 407 vma = find_vma(mm, m.addr); 408 if (!vma || 409 vma->vm_ops != &privcmd_vm_ops || 410 (m.addr != vma->vm_start) || 411 ((m.addr + (nr_pages << PAGE_SHIFT)) != vma->vm_end) || 412 !privcmd_enforce_singleshot_mapping(vma)) { 413 up_write(&mm->mmap_sem); 414 ret = -EINVAL; 415 goto out; 416 } 417 if (xen_feature(XENFEAT_auto_translated_physmap)) { 418 ret = alloc_empty_pages(vma, m.num); 419 if (ret < 0) { 420 up_write(&mm->mmap_sem); 421 goto out; 422 } 423 } 424 425 state.domain = m.dom; 426 state.vma = vma; 427 state.va = m.addr; 428 state.index = 0; 429 state.global_error = 0; 430 state.err = err_array; 431 432 /* mmap_batch_fn guarantees ret == 0 */ 433 BUG_ON(traverse_pages(m.num, sizeof(xen_pfn_t), 434 &pagelist, mmap_batch_fn, &state)); 435 436 up_write(&mm->mmap_sem); 437 438 if (version == 1) { 439 if (state.global_error) { 440 /* Write back errors in second pass. */ 441 state.user_mfn = (xen_pfn_t *)m.arr; 442 state.err = err_array; 443 ret = traverse_pages(m.num, sizeof(xen_pfn_t), 444 &pagelist, mmap_return_errors_v1, &state); 445 } else 446 ret = 0; 447 448 } else if (version == 2) { 449 ret = __copy_to_user(m.err, err_array, m.num * sizeof(int)); 450 if (ret) 451 ret = -EFAULT; 452 } 453 454 /* If we have not had any EFAULT-like global errors then set the global 455 * error to -ENOENT if necessary. */ 456 if ((ret == 0) && (state.global_error == -ENOENT)) 457 ret = -ENOENT; 458 459 out: 460 kfree(err_array); 461 free_page_list(&pagelist); 462 463 return ret; 464 } 465 466 static long privcmd_ioctl(struct file *file, 467 unsigned int cmd, unsigned long data) 468 { 469 int ret = -ENOSYS; 470 void __user *udata = (void __user *) data; 471 472 switch (cmd) { 473 case IOCTL_PRIVCMD_HYPERCALL: 474 ret = privcmd_ioctl_hypercall(udata); 475 break; 476 477 case IOCTL_PRIVCMD_MMAP: 478 ret = privcmd_ioctl_mmap(udata); 479 break; 480 481 case IOCTL_PRIVCMD_MMAPBATCH: 482 ret = privcmd_ioctl_mmap_batch(udata, 1); 483 break; 484 485 case IOCTL_PRIVCMD_MMAPBATCH_V2: 486 ret = privcmd_ioctl_mmap_batch(udata, 2); 487 break; 488 489 default: 490 ret = -EINVAL; 491 break; 492 } 493 494 return ret; 495 } 496 497 static void privcmd_close(struct vm_area_struct *vma) 498 { 499 struct page **pages = vma->vm_private_data; 500 int numpgs = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; 501 502 if (!xen_feature(XENFEAT_auto_translated_physmap || !numpgs || !pages)) 503 return; 504 505 xen_unmap_domain_mfn_range(vma, numpgs, pages); 506 free_xenballooned_pages(numpgs, pages); 507 kfree(pages); 508 } 509 510 static int privcmd_fault(struct vm_area_struct *vma, struct vm_fault *vmf) 511 { 512 printk(KERN_DEBUG "privcmd_fault: vma=%p %lx-%lx, pgoff=%lx, uv=%p\n", 513 vma, vma->vm_start, vma->vm_end, 514 vmf->pgoff, vmf->virtual_address); 515 516 return VM_FAULT_SIGBUS; 517 } 518 519 static struct vm_operations_struct privcmd_vm_ops = { 520 .close = privcmd_close, 521 .fault = privcmd_fault 522 }; 523 524 static int privcmd_mmap(struct file *file, struct vm_area_struct *vma) 525 { 526 /* DONTCOPY is essential for Xen because copy_page_range doesn't know 527 * how to recreate these mappings */ 528 vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTCOPY | 529 VM_DONTEXPAND | VM_DONTDUMP; 530 vma->vm_ops = &privcmd_vm_ops; 531 vma->vm_private_data = NULL; 532 533 return 0; 534 } 535 536 static int privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma) 537 { 538 return !cmpxchg(&vma->vm_private_data, NULL, PRIV_VMA_LOCKED); 539 } 540 541 const struct file_operations xen_privcmd_fops = { 542 .owner = THIS_MODULE, 543 .unlocked_ioctl = privcmd_ioctl, 544 .mmap = privcmd_mmap, 545 }; 546 EXPORT_SYMBOL_GPL(xen_privcmd_fops); 547 548 static struct miscdevice privcmd_dev = { 549 .minor = MISC_DYNAMIC_MINOR, 550 .name = "xen/privcmd", 551 .fops = &xen_privcmd_fops, 552 }; 553 554 static int __init privcmd_init(void) 555 { 556 int err; 557 558 if (!xen_domain()) 559 return -ENODEV; 560 561 err = misc_register(&privcmd_dev); 562 if (err != 0) { 563 printk(KERN_ERR "Could not register Xen privcmd device\n"); 564 return err; 565 } 566 return 0; 567 } 568 569 static void __exit privcmd_exit(void) 570 { 571 misc_deregister(&privcmd_dev); 572 } 573 574 module_init(privcmd_init); 575 module_exit(privcmd_exit); 576