1 /*- 2 * Copyright (c) 2016 Akshay Jaggi <jaggi@FreeBSD.org> 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 * 26 * gntdev.c 27 * 28 * Interface to /dev/xen/gntdev. 29 * 30 */ 31 32 #include <sys/param.h> 33 #include <sys/systm.h> 34 #include <sys/uio.h> 35 #include <sys/bus.h> 36 #include <sys/malloc.h> 37 #include <sys/kernel.h> 38 #include <sys/lock.h> 39 #include <sys/mutex.h> 40 #include <sys/rwlock.h> 41 #include <sys/selinfo.h> 42 #include <sys/poll.h> 43 #include <sys/conf.h> 44 #include <sys/fcntl.h> 45 #include <sys/ioccom.h> 46 #include <sys/rman.h> 47 #include <sys/tree.h> 48 #include <sys/module.h> 49 #include <sys/proc.h> 50 #include <sys/bitset.h> 51 #include <sys/queue.h> 52 #include <sys/mman.h> 53 #include <sys/syslog.h> 54 #include <sys/taskqueue.h> 55 56 #include <vm/vm.h> 57 #include <vm/vm_param.h> 58 #include <vm/vm_extern.h> 59 #include <vm/vm_kern.h> 60 #include <vm/vm_page.h> 61 #include <vm/vm_map.h> 62 #include <vm/vm_object.h> 63 #include <vm/vm_pager.h> 64 65 #include <machine/md_var.h> 66 67 #include <xen/xen-os.h> 68 #include <xen/hypervisor.h> 69 #include <xen/error.h> 70 #include <xen/xen_intr.h> 71 #include <xen/gnttab.h> 72 #include <xen/gntdev.h> 73 74 MALLOC_DEFINE(M_GNTDEV, "gntdev", "Xen grant-table user-space device"); 75 76 #define MAX_OFFSET_COUNT ((0xffffffffffffffffull >> PAGE_SHIFT) + 1) 77 78 static d_open_t gntdev_open; 79 static d_ioctl_t gntdev_ioctl; 80 static d_mmap_single_t gntdev_mmap_single; 81 82 static struct cdevsw gntdev_devsw = { 83 .d_version = D_VERSION, 84 .d_open = gntdev_open, 85 .d_ioctl = gntdev_ioctl, 86 .d_mmap_single = gntdev_mmap_single, 87 .d_name = "gntdev", 88 }; 89 90 static device_t gntdev_dev = NULL; 91 92 struct gntdev_gref; 93 struct gntdev_gmap; 94 STAILQ_HEAD(gref_list_head, gntdev_gref); 95 STAILQ_HEAD(gmap_list_head, gntdev_gmap); 96 RB_HEAD(gref_tree_head, gntdev_gref); 97 RB_HEAD(gmap_tree_head, gntdev_gmap); 98 99 struct file_offset_struct { 100 RB_ENTRY(file_offset_struct) next; 101 uint64_t file_offset; 102 uint64_t count; 103 }; 104 105 static int 106 offset_cmp(struct file_offset_struct *f1, struct file_offset_struct *f2) 107 { 108 return (f1->file_offset - f2->file_offset); 109 } 110 111 RB_HEAD(file_offset_head, file_offset_struct); 112 RB_GENERATE_STATIC(file_offset_head, file_offset_struct, next, offset_cmp); 113 114 struct per_user_data { 115 struct mtx user_data_lock; 116 struct gref_tree_head gref_tree; 117 struct gmap_tree_head gmap_tree; 118 struct file_offset_head file_offset; 119 }; 120 121 /* 122 * Get offset into the file which will be used while mmapping the 123 * appropriate pages by the userspace program. 124 */ 125 static int 126 get_file_offset(struct per_user_data *priv_user, uint32_t count, 127 uint64_t *file_offset) 128 { 129 struct file_offset_struct *offset, *offset_tmp; 130 131 if (count == 0) 132 return (EINVAL); 133 mtx_lock(&priv_user->user_data_lock); 134 RB_FOREACH_SAFE(offset, file_offset_head, &priv_user->file_offset, 135 offset_tmp) { 136 if (offset->count >= count) { 137 offset->count -= count; 138 *file_offset = offset->file_offset + offset->count * 139 PAGE_SIZE; 140 if (offset->count == 0) { 141 RB_REMOVE(file_offset_head, 142 &priv_user->file_offset, offset); 143 free(offset, M_GNTDEV); 144 } 145 mtx_unlock(&priv_user->user_data_lock); 146 return (0); 147 } 148 } 149 mtx_unlock(&priv_user->user_data_lock); 150 151 return (ENOSPC); 152 } 153 154 static void 155 put_file_offset(struct per_user_data *priv_user, uint32_t count, 156 uint64_t file_offset) 157 { 158 struct file_offset_struct *offset, *offset_nxt, *offset_prv; 159 160 offset = malloc(sizeof(*offset), M_GNTDEV, M_WAITOK | M_ZERO); 161 offset->file_offset = file_offset; 162 offset->count = count; 163 164 mtx_lock(&priv_user->user_data_lock); 165 RB_INSERT(file_offset_head, &priv_user->file_offset, offset); 166 offset_nxt = RB_NEXT(file_offset_head, &priv_user->file_offset, offset); 167 offset_prv = RB_PREV(file_offset_head, &priv_user->file_offset, offset); 168 if (offset_nxt != NULL && 169 offset_nxt->file_offset == offset->file_offset + offset->count * 170 PAGE_SIZE) { 171 offset->count += offset_nxt->count; 172 RB_REMOVE(file_offset_head, &priv_user->file_offset, 173 offset_nxt); 174 free(offset_nxt, M_GNTDEV); 175 } 176 if (offset_prv != NULL && 177 offset->file_offset == offset_prv->file_offset + offset_prv->count * 178 PAGE_SIZE) { 179 offset_prv->count += offset->count; 180 RB_REMOVE(file_offset_head, &priv_user->file_offset, offset); 181 free(offset, M_GNTDEV); 182 } 183 mtx_unlock(&priv_user->user_data_lock); 184 } 185 186 static int gntdev_gmap_pg_ctor(void *handle, vm_ooffset_t size, 187 vm_prot_t prot, vm_ooffset_t foff, struct ucred *cred, u_short *color); 188 static void gntdev_gmap_pg_dtor(void *handle); 189 static int gntdev_gmap_pg_fault(vm_object_t object, vm_ooffset_t offset, 190 int prot, vm_page_t *mres); 191 192 static struct cdev_pager_ops gntdev_gmap_pg_ops = { 193 .cdev_pg_fault = gntdev_gmap_pg_fault, 194 .cdev_pg_ctor = gntdev_gmap_pg_ctor, 195 .cdev_pg_dtor = gntdev_gmap_pg_dtor, 196 }; 197 198 struct cleanup_data_struct { 199 struct mtx to_kill_grefs_mtx; 200 struct mtx to_kill_gmaps_mtx; 201 struct gref_list_head to_kill_grefs; 202 struct gmap_list_head to_kill_gmaps; 203 }; 204 205 static struct cleanup_data_struct cleanup_data = { 206 .to_kill_grefs = STAILQ_HEAD_INITIALIZER(cleanup_data.to_kill_grefs), 207 .to_kill_gmaps = STAILQ_HEAD_INITIALIZER(cleanup_data.to_kill_gmaps), 208 }; 209 MTX_SYSINIT(to_kill_grefs_mtx, &cleanup_data.to_kill_grefs_mtx, 210 "gntdev to_kill_grefs mutex", MTX_DEF); 211 MTX_SYSINIT(to_kill_gmaps_mtx, &cleanup_data.to_kill_gmaps_mtx, 212 "gntdev to_kill_gmaps mutex", MTX_DEF); 213 214 static void cleanup_function(void *arg, __unused int pending); 215 static struct task cleanup_task = TASK_INITIALIZER(0, cleanup_function, 216 &cleanup_data); 217 218 struct notify_data { 219 uint64_t index; 220 uint32_t action; 221 uint32_t event_channel_port; 222 xen_intr_handle_t notify_evtchn_handle; 223 }; 224 225 static void notify(struct notify_data *notify, vm_page_t page); 226 227 /*-------------------- Grant Allocation Methods -----------------------------*/ 228 229 struct gntdev_gref { 230 union gref_next_union { 231 STAILQ_ENTRY(gntdev_gref) list; 232 RB_ENTRY(gntdev_gref) tree; 233 } gref_next; 234 uint64_t file_index; 235 grant_ref_t gref_id; 236 vm_page_t page; 237 struct notify_data *notify; 238 }; 239 240 static int 241 gref_cmp(struct gntdev_gref *g1, struct gntdev_gref *g2) 242 { 243 return (g1->file_index - g2->file_index); 244 } 245 246 RB_GENERATE_STATIC(gref_tree_head, gntdev_gref, gref_next.tree, gref_cmp); 247 248 /* 249 * Traverse over the device-list of to-be-deleted grants allocated, and 250 * if all accesses, both local mmaps and foreign maps, to them have ended, 251 * destroy them. 252 */ 253 static void 254 gref_list_dtor(struct cleanup_data_struct *cleanup_data) 255 { 256 struct gref_list_head tmp_grefs; 257 struct gntdev_gref *gref, *gref_tmp, *gref_previous; 258 259 STAILQ_INIT(&tmp_grefs); 260 mtx_lock(&cleanup_data->to_kill_grefs_mtx); 261 STAILQ_SWAP(&cleanup_data->to_kill_grefs, &tmp_grefs, gntdev_gref); 262 mtx_unlock(&cleanup_data->to_kill_grefs_mtx); 263 264 gref_previous = NULL; 265 STAILQ_FOREACH_SAFE(gref, &tmp_grefs, gref_next.list, gref_tmp) { 266 if (gref->page && gref->page->object == NULL) { 267 if (gref->notify) { 268 notify(gref->notify, gref->page); 269 } 270 if (gref->gref_id != GRANT_REF_INVALID) { 271 if (gnttab_query_foreign_access(gref->gref_id)) 272 continue; 273 if (gnttab_end_foreign_access_ref(gref->gref_id) 274 == 0) 275 continue; 276 gnttab_free_grant_reference(gref->gref_id); 277 } 278 vm_page_unwire_noq(gref->page); 279 vm_page_free(gref->page); 280 gref->page = NULL; 281 } 282 if (gref->page == NULL) { 283 if (gref_previous == NULL) 284 STAILQ_REMOVE_HEAD(&tmp_grefs, gref_next.list); 285 else 286 STAILQ_REMOVE_AFTER(&tmp_grefs, gref_previous, 287 gref_next.list); 288 if (gref->notify) 289 free(gref->notify, M_GNTDEV); 290 free(gref, M_GNTDEV); 291 } 292 else 293 gref_previous = gref; 294 } 295 296 if (!STAILQ_EMPTY(&tmp_grefs)) { 297 mtx_lock(&cleanup_data->to_kill_grefs_mtx); 298 STAILQ_CONCAT(&cleanup_data->to_kill_grefs, &tmp_grefs); 299 mtx_unlock(&cleanup_data->to_kill_grefs_mtx); 300 } 301 } 302 303 /* 304 * Find count number of contiguous allocated grants for a given userspace 305 * program by file-offset (index). 306 */ 307 static struct gntdev_gref* 308 gntdev_find_grefs(struct per_user_data *priv_user, 309 uint64_t index, uint32_t count) 310 { 311 struct gntdev_gref find_gref, *gref, *gref_start = NULL; 312 313 find_gref.file_index = index; 314 315 mtx_lock(&priv_user->user_data_lock); 316 gref_start = RB_FIND(gref_tree_head, &priv_user->gref_tree, &find_gref); 317 for (gref = gref_start; gref != NULL && count > 0; gref = 318 RB_NEXT(gref_tree_head, &priv_user->gref_tree, gref)) { 319 if (index != gref->file_index) 320 break; 321 index += PAGE_SIZE; 322 count--; 323 } 324 mtx_unlock(&priv_user->user_data_lock); 325 326 if (count) 327 return (NULL); 328 return (gref_start); 329 } 330 331 /* 332 * IOCTL_GNTDEV_ALLOC_GREF 333 * Allocate required number of wired pages for the request, grant foreign 334 * access to the physical frames for these pages, and add details about 335 * this allocation to the per user private data, so that these pages can 336 * be mmapped by the userspace program. 337 */ 338 static int 339 gntdev_alloc_gref(struct ioctl_gntdev_alloc_gref *arg) 340 { 341 uint32_t i; 342 int error, readonly; 343 uint64_t file_offset; 344 struct gntdev_gref *grefs; 345 struct per_user_data *priv_user; 346 347 readonly = !(arg->flags & GNTDEV_ALLOC_FLAG_WRITABLE); 348 349 error = devfs_get_cdevpriv((void**) &priv_user); 350 if (error != 0) 351 return (EINVAL); 352 353 /* Cleanup grefs and free pages. */ 354 taskqueue_enqueue(taskqueue_thread, &cleanup_task); 355 356 /* Get file offset for this request. */ 357 error = get_file_offset(priv_user, arg->count, &file_offset); 358 if (error != 0) 359 return (error); 360 361 /* Allocate grefs. */ 362 grefs = malloc(sizeof(*grefs) * arg->count, M_GNTDEV, M_WAITOK); 363 364 for (i = 0; i < arg->count; i++) { 365 grefs[i].file_index = file_offset + i * PAGE_SIZE; 366 grefs[i].gref_id = GRANT_REF_INVALID; 367 grefs[i].notify = NULL; 368 grefs[i].page = vm_page_alloc_noobj(VM_ALLOC_WIRED | 369 VM_ALLOC_ZERO); 370 if (grefs[i].page == NULL) { 371 log(LOG_ERR, "Page allocation failed."); 372 error = ENOMEM; 373 break; 374 } 375 grefs[i].page->valid = VM_PAGE_BITS_ALL; 376 377 error = gnttab_grant_foreign_access(arg->domid, 378 (VM_PAGE_TO_PHYS(grefs[i].page) >> PAGE_SHIFT), 379 readonly, &grefs[i].gref_id); 380 if (error != 0) { 381 log(LOG_ERR, "Grant Table Hypercall failed."); 382 break; 383 } 384 } 385 386 /* Copy the output values. */ 387 arg->index = file_offset; 388 for (i = 0; error == 0 && i < arg->count; i++) { 389 if (suword32(&arg->gref_ids[i], grefs[i].gref_id) != 0) 390 error = EFAULT; 391 } 392 393 if (error != 0) { 394 /* 395 * If target domain maps the gref (by guessing the gref-id), 396 * then we can't clean it up yet and we have to leave the 397 * page in place so as to not leak our memory to that domain. 398 * Add it to a global list to be cleaned up later. 399 */ 400 mtx_lock(&cleanup_data.to_kill_grefs_mtx); 401 for (i = 0; i < arg->count; i++) 402 STAILQ_INSERT_TAIL(&cleanup_data.to_kill_grefs, 403 &grefs[i], gref_next.list); 404 mtx_unlock(&cleanup_data.to_kill_grefs_mtx); 405 406 taskqueue_enqueue(taskqueue_thread, &cleanup_task); 407 408 return (error); 409 } 410 411 /* Modify the per user private data. */ 412 mtx_lock(&priv_user->user_data_lock); 413 for (i = 0; i < arg->count; i++) 414 RB_INSERT(gref_tree_head, &priv_user->gref_tree, &grefs[i]); 415 mtx_unlock(&priv_user->user_data_lock); 416 417 return (error); 418 } 419 420 /* 421 * IOCTL_GNTDEV_DEALLOC_GREF 422 * Remove grant allocation information from the per user private data, so 423 * that it can't be mmapped anymore by the userspace program, and add it 424 * to the to-be-deleted grants global device-list. 425 */ 426 static int 427 gntdev_dealloc_gref(struct ioctl_gntdev_dealloc_gref *arg) 428 { 429 int error; 430 uint32_t count; 431 struct gntdev_gref *gref, *gref_tmp; 432 struct per_user_data *priv_user; 433 434 error = devfs_get_cdevpriv((void**) &priv_user); 435 if (error != 0) 436 return (EINVAL); 437 438 gref = gntdev_find_grefs(priv_user, arg->index, arg->count); 439 if (gref == NULL) { 440 log(LOG_ERR, "Can't find requested grant-refs."); 441 return (EINVAL); 442 } 443 444 /* Remove the grefs from user private data. */ 445 count = arg->count; 446 mtx_lock(&priv_user->user_data_lock); 447 mtx_lock(&cleanup_data.to_kill_grefs_mtx); 448 for (; gref != NULL && count > 0; gref = gref_tmp) { 449 gref_tmp = RB_NEXT(gref_tree_head, &priv_user->gref_tree, gref); 450 RB_REMOVE(gref_tree_head, &priv_user->gref_tree, gref); 451 STAILQ_INSERT_TAIL(&cleanup_data.to_kill_grefs, gref, 452 gref_next.list); 453 count--; 454 } 455 mtx_unlock(&cleanup_data.to_kill_grefs_mtx); 456 mtx_unlock(&priv_user->user_data_lock); 457 458 taskqueue_enqueue(taskqueue_thread, &cleanup_task); 459 put_file_offset(priv_user, arg->count, arg->index); 460 461 return (0); 462 } 463 464 /*-------------------- Grant Mapping Methods --------------------------------*/ 465 466 struct gntdev_gmap_map { 467 vm_object_t mem; 468 struct resource *pseudo_phys_res; 469 int pseudo_phys_res_id; 470 vm_paddr_t phys_base_addr; 471 }; 472 473 struct gntdev_gmap { 474 union gmap_next_union { 475 STAILQ_ENTRY(gntdev_gmap) list; 476 RB_ENTRY(gntdev_gmap) tree; 477 } gmap_next; 478 uint64_t file_index; 479 uint32_t count; 480 struct gnttab_map_grant_ref *grant_map_ops; 481 struct gntdev_gmap_map *map; 482 struct notify_data *notify; 483 }; 484 485 static int 486 gmap_cmp(struct gntdev_gmap *g1, struct gntdev_gmap *g2) 487 { 488 return (g1->file_index - g2->file_index); 489 } 490 491 RB_GENERATE_STATIC(gmap_tree_head, gntdev_gmap, gmap_next.tree, gmap_cmp); 492 493 /* 494 * Traverse over the device-list of to-be-deleted grant mappings, and if 495 * the region is no longer mmapped by anyone, free the memory used to 496 * store information about the mapping. 497 */ 498 static void 499 gmap_list_dtor(struct cleanup_data_struct *cleanup_data) 500 { 501 struct gmap_list_head tmp_gmaps; 502 struct gntdev_gmap *gmap, *gmap_tmp, *gmap_previous; 503 504 STAILQ_INIT(&tmp_gmaps); 505 mtx_lock(&cleanup_data->to_kill_gmaps_mtx); 506 STAILQ_SWAP(&cleanup_data->to_kill_gmaps, &tmp_gmaps, gntdev_gmap); 507 mtx_unlock(&cleanup_data->to_kill_gmaps_mtx); 508 509 gmap_previous = NULL; 510 STAILQ_FOREACH_SAFE(gmap, &tmp_gmaps, gmap_next.list, gmap_tmp) { 511 if (gmap->map == NULL) { 512 if (gmap_previous == NULL) 513 STAILQ_REMOVE_HEAD(&tmp_gmaps, gmap_next.list); 514 else 515 STAILQ_REMOVE_AFTER(&tmp_gmaps, gmap_previous, 516 gmap_next.list); 517 518 if (gmap->notify) 519 free(gmap->notify, M_GNTDEV); 520 free(gmap->grant_map_ops, M_GNTDEV); 521 free(gmap, M_GNTDEV); 522 } 523 else 524 gmap_previous = gmap; 525 } 526 527 if (!STAILQ_EMPTY(&tmp_gmaps)) { 528 mtx_lock(&cleanup_data->to_kill_gmaps_mtx); 529 STAILQ_CONCAT(&cleanup_data->to_kill_gmaps, &tmp_gmaps); 530 mtx_unlock(&cleanup_data->to_kill_gmaps_mtx); 531 } 532 } 533 534 /* 535 * Find mapped grants for a given userspace program, by file-offset (index) 536 * and count, as supplied during the map-ioctl. 537 */ 538 static struct gntdev_gmap* 539 gntdev_find_gmap(struct per_user_data *priv_user, 540 uint64_t index, uint32_t count) 541 { 542 struct gntdev_gmap find_gmap, *gmap; 543 544 find_gmap.file_index = index; 545 546 mtx_lock(&priv_user->user_data_lock); 547 gmap = RB_FIND(gmap_tree_head, &priv_user->gmap_tree, &find_gmap); 548 mtx_unlock(&priv_user->user_data_lock); 549 550 if (gmap != NULL && gmap->count == count) 551 return (gmap); 552 return (NULL); 553 } 554 555 /* 556 * Remove the pages from the mgtdevice pager, call the unmap hypercall, 557 * free the xenmem resource. This function is called during the 558 * destruction of the mgtdevice pager, which happens when all mmaps to 559 * it have been removed, and the unmap-ioctl has been performed. 560 */ 561 static int 562 notify_unmap_cleanup(struct gntdev_gmap *gmap) 563 { 564 uint32_t i; 565 int error, count; 566 struct gnttab_unmap_grant_ref *unmap_ops; 567 568 unmap_ops = malloc(sizeof(struct gnttab_unmap_grant_ref) * gmap->count, 569 M_GNTDEV, M_WAITOK); 570 571 /* Enumerate freeable maps. */ 572 count = 0; 573 for (i = 0; i < gmap->count; i++) { 574 if (gmap->grant_map_ops[i].handle != -1) { 575 unmap_ops[count].handle = gmap->grant_map_ops[i].handle; 576 unmap_ops[count].host_addr = 577 gmap->grant_map_ops[i].host_addr; 578 unmap_ops[count].dev_bus_addr = 0; 579 count++; 580 } 581 } 582 583 /* Perform notification. */ 584 if (count > 0 && gmap->notify) { 585 vm_page_t page; 586 uint64_t page_offset; 587 588 page_offset = gmap->notify->index - gmap->file_index; 589 page = PHYS_TO_VM_PAGE(gmap->map->phys_base_addr + page_offset); 590 notify(gmap->notify, page); 591 } 592 593 /* Free the pages. */ 594 cdev_mgtdev_pager_free_pages(gmap->map->mem); 595 596 /* Perform unmap hypercall. */ 597 error = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, 598 unmap_ops, count); 599 600 for (i = 0; i < gmap->count; i++) { 601 gmap->grant_map_ops[i].handle = -1; 602 gmap->grant_map_ops[i].host_addr = 0; 603 } 604 605 if (gmap->map) { 606 error = xenmem_free(gntdev_dev, gmap->map->pseudo_phys_res_id, 607 gmap->map->pseudo_phys_res); 608 KASSERT(error == 0, 609 ("Unable to release memory resource: %d", error)); 610 611 free(gmap->map, M_GNTDEV); 612 gmap->map = NULL; 613 } 614 615 free(unmap_ops, M_GNTDEV); 616 617 return (error); 618 } 619 620 /* 621 * IOCTL_GNTDEV_MAP_GRANT_REF 622 * Populate structures for mapping the grant reference in the per user 623 * private data. Actual resource allocation and map hypercall is performed 624 * during the mmap. 625 */ 626 static int 627 gntdev_map_grant_ref(struct ioctl_gntdev_map_grant_ref *arg) 628 { 629 uint32_t i; 630 int error; 631 struct gntdev_gmap *gmap; 632 struct per_user_data *priv_user; 633 634 error = devfs_get_cdevpriv((void**) &priv_user); 635 if (error != 0) 636 return (EINVAL); 637 638 gmap = malloc(sizeof(*gmap), M_GNTDEV, M_WAITOK | M_ZERO); 639 gmap->count = arg->count; 640 gmap->grant_map_ops = 641 malloc(sizeof(struct gnttab_map_grant_ref) * arg->count, 642 M_GNTDEV, M_WAITOK | M_ZERO); 643 644 for (i = 0; i < arg->count; i++) { 645 struct ioctl_gntdev_grant_ref ref; 646 647 error = copyin(&arg->refs[i], &ref, sizeof(ref)); 648 if (error != 0) { 649 free(gmap->grant_map_ops, M_GNTDEV); 650 free(gmap, M_GNTDEV); 651 return (error); 652 } 653 gmap->grant_map_ops[i].dom = ref.domid; 654 gmap->grant_map_ops[i].ref = ref.ref; 655 gmap->grant_map_ops[i].handle = -1; 656 gmap->grant_map_ops[i].flags = GNTMAP_host_map; 657 } 658 659 error = get_file_offset(priv_user, arg->count, &gmap->file_index); 660 if (error != 0) { 661 free(gmap->grant_map_ops, M_GNTDEV); 662 free(gmap, M_GNTDEV); 663 return (error); 664 } 665 666 mtx_lock(&priv_user->user_data_lock); 667 RB_INSERT(gmap_tree_head, &priv_user->gmap_tree, gmap); 668 mtx_unlock(&priv_user->user_data_lock); 669 670 arg->index = gmap->file_index; 671 672 return (error); 673 } 674 675 /* 676 * IOCTL_GNTDEV_UNMAP_GRANT_REF 677 * Remove the map information from the per user private data and add it 678 * to the global device-list of mappings to be deleted. A reference to 679 * the mgtdevice pager is also decreased, the reason for which is 680 * explained in mmap_gmap(). 681 */ 682 static int 683 gntdev_unmap_grant_ref(struct ioctl_gntdev_unmap_grant_ref *arg) 684 { 685 int error; 686 struct gntdev_gmap *gmap; 687 struct per_user_data *priv_user; 688 689 error = devfs_get_cdevpriv((void**) &priv_user); 690 if (error != 0) 691 return (EINVAL); 692 693 gmap = gntdev_find_gmap(priv_user, arg->index, arg->count); 694 if (gmap == NULL) { 695 log(LOG_ERR, "Can't find requested grant-map."); 696 return (EINVAL); 697 } 698 699 mtx_lock(&priv_user->user_data_lock); 700 mtx_lock(&cleanup_data.to_kill_gmaps_mtx); 701 RB_REMOVE(gmap_tree_head, &priv_user->gmap_tree, gmap); 702 STAILQ_INSERT_TAIL(&cleanup_data.to_kill_gmaps, gmap, gmap_next.list); 703 mtx_unlock(&cleanup_data.to_kill_gmaps_mtx); 704 mtx_unlock(&priv_user->user_data_lock); 705 706 if (gmap->map) 707 vm_object_deallocate(gmap->map->mem); 708 709 taskqueue_enqueue(taskqueue_thread, &cleanup_task); 710 put_file_offset(priv_user, arg->count, arg->index); 711 712 return (0); 713 } 714 715 /* 716 * IOCTL_GNTDEV_GET_OFFSET_FOR_VADDR 717 * Get file-offset and count for a given mapping, from the virtual address 718 * where the mapping is mmapped. 719 * Please note, this only works for grants mapped by this domain, and not 720 * grants allocated. Count doesn't make much sense in reference to grants 721 * allocated. Also, because this function is present in the linux gntdev 722 * device, but not in the linux gntalloc one, most userspace code only use 723 * it for mapped grants. 724 */ 725 static int 726 gntdev_get_offset_for_vaddr(struct ioctl_gntdev_get_offset_for_vaddr *arg, 727 struct thread *td) 728 { 729 int error; 730 vm_map_t map; 731 vm_map_entry_t entry; 732 vm_object_t mem; 733 vm_pindex_t pindex; 734 vm_prot_t prot; 735 boolean_t wired; 736 struct gntdev_gmap *gmap; 737 int rc; 738 739 map = &td->td_proc->p_vmspace->vm_map; 740 error = vm_map_lookup(&map, arg->vaddr, VM_PROT_NONE, &entry, 741 &mem, &pindex, &prot, &wired); 742 if (error != KERN_SUCCESS) 743 return (EINVAL); 744 745 if ((mem->type != OBJT_MGTDEVICE) || 746 (mem->un_pager.devp.ops != &gntdev_gmap_pg_ops)) { 747 rc = EINVAL; 748 goto out; 749 } 750 751 gmap = mem->handle; 752 if (gmap == NULL || 753 (entry->end - entry->start) != (gmap->count * PAGE_SIZE)) { 754 rc = EINVAL; 755 goto out; 756 } 757 758 arg->count = gmap->count; 759 arg->offset = gmap->file_index; 760 rc = 0; 761 762 out: 763 vm_map_lookup_done(map, entry); 764 return (rc); 765 } 766 767 /*-------------------- Grant Mapping Pager ----------------------------------*/ 768 769 static int 770 gntdev_gmap_pg_ctor(void *handle, vm_ooffset_t size, vm_prot_t prot, 771 vm_ooffset_t foff, struct ucred *cred, u_short *color) 772 { 773 774 return (0); 775 } 776 777 static void 778 gntdev_gmap_pg_dtor(void *handle) 779 { 780 781 notify_unmap_cleanup((struct gntdev_gmap *)handle); 782 } 783 784 static int 785 gntdev_gmap_pg_fault(vm_object_t object, vm_ooffset_t offset, int prot, 786 vm_page_t *mres) 787 { 788 struct gntdev_gmap *gmap = object->handle; 789 vm_pindex_t pidx, ridx; 790 vm_page_t page; 791 vm_ooffset_t relative_offset; 792 793 if (gmap->map == NULL) 794 return (VM_PAGER_FAIL); 795 796 relative_offset = offset - gmap->file_index; 797 798 pidx = OFF_TO_IDX(offset); 799 ridx = OFF_TO_IDX(relative_offset); 800 if (ridx >= gmap->count || 801 gmap->grant_map_ops[ridx].status != GNTST_okay) 802 return (VM_PAGER_FAIL); 803 804 page = PHYS_TO_VM_PAGE(gmap->map->phys_base_addr + relative_offset); 805 if (page == NULL) 806 return (VM_PAGER_FAIL); 807 808 KASSERT((page->flags & PG_FICTITIOUS) != 0, 809 ("not fictitious %p", page)); 810 KASSERT(vm_page_wired(page), ("page %p is not wired", page)); 811 KASSERT(!vm_page_busied(page), ("page %p is busy", page)); 812 813 vm_page_busy_acquire(page, 0); 814 vm_page_valid(page); 815 if (*mres != NULL) 816 vm_page_replace(page, object, pidx, *mres); 817 else 818 vm_page_insert(page, object, pidx); 819 *mres = page; 820 return (VM_PAGER_OK); 821 } 822 823 /*------------------ Grant Table Methods ------------------------------------*/ 824 825 static void 826 notify(struct notify_data *notify, vm_page_t page) 827 { 828 if (notify->action & UNMAP_NOTIFY_CLEAR_BYTE) { 829 uint8_t *mem; 830 uint64_t offset; 831 832 offset = notify->index & PAGE_MASK; 833 mem = (uint8_t *)pmap_quick_enter_page(page); 834 mem[offset] = 0; 835 pmap_quick_remove_page((vm_offset_t)mem); 836 } 837 if (notify->action & UNMAP_NOTIFY_SEND_EVENT) { 838 xen_intr_signal(notify->notify_evtchn_handle); 839 xen_intr_unbind(¬ify->notify_evtchn_handle); 840 } 841 notify->action = 0; 842 } 843 844 /* 845 * Helper to copy new arguments from the notify ioctl into 846 * the existing notify data. 847 */ 848 static int 849 copy_notify_helper(struct notify_data *destination, 850 struct ioctl_gntdev_unmap_notify *source) 851 { 852 xen_intr_handle_t handlep = NULL; 853 854 /* 855 * "Get" before "Put"ting previous reference, as we might be 856 * holding the last reference to the event channel port. 857 */ 858 if (source->action & UNMAP_NOTIFY_SEND_EVENT) 859 if (xen_intr_get_evtchn_from_port(source->event_channel_port, 860 &handlep) != 0) 861 return (EINVAL); 862 863 if (destination->action & UNMAP_NOTIFY_SEND_EVENT) 864 xen_intr_unbind(&destination->notify_evtchn_handle); 865 866 destination->action = source->action; 867 destination->event_channel_port = source->event_channel_port; 868 destination->index = source->index; 869 destination->notify_evtchn_handle = handlep; 870 871 return (0); 872 } 873 874 /* 875 * IOCTL_GNTDEV_SET_UNMAP_NOTIFY 876 * Set unmap notification inside the appropriate grant. It sends a 877 * notification when the grant is completely munmapped by this domain 878 * and ready for destruction. 879 */ 880 static int 881 gntdev_set_unmap_notify(struct ioctl_gntdev_unmap_notify *arg) 882 { 883 int error; 884 uint64_t index; 885 struct per_user_data *priv_user; 886 struct gntdev_gref *gref = NULL; 887 struct gntdev_gmap *gmap; 888 889 error = devfs_get_cdevpriv((void**) &priv_user); 890 if (error != 0) 891 return (EINVAL); 892 893 if (arg->action & ~(UNMAP_NOTIFY_CLEAR_BYTE|UNMAP_NOTIFY_SEND_EVENT)) 894 return (EINVAL); 895 896 index = arg->index & ~PAGE_MASK; 897 gref = gntdev_find_grefs(priv_user, index, 1); 898 if (gref) { 899 if (gref->notify == NULL) 900 gref->notify = malloc(sizeof(*arg), M_GNTDEV, 901 M_WAITOK | M_ZERO); 902 return (copy_notify_helper(gref->notify, arg)); 903 } 904 905 error = EINVAL; 906 mtx_lock(&priv_user->user_data_lock); 907 RB_FOREACH(gmap, gmap_tree_head, &priv_user->gmap_tree) { 908 if (arg->index >= gmap->file_index && 909 arg->index < gmap->file_index + gmap->count * PAGE_SIZE) { 910 if (gmap->notify == NULL) 911 gmap->notify = malloc(sizeof(*arg), M_GNTDEV, 912 M_WAITOK | M_ZERO); 913 error = copy_notify_helper(gmap->notify, arg); 914 break; 915 } 916 } 917 mtx_unlock(&priv_user->user_data_lock); 918 919 return (error); 920 } 921 922 /*------------------ Gntdev Char Device Methods -----------------------------*/ 923 924 static void 925 cleanup_function(void *arg, __unused int pending) 926 { 927 928 gref_list_dtor((struct cleanup_data_struct *) arg); 929 gmap_list_dtor((struct cleanup_data_struct *) arg); 930 } 931 932 static void 933 per_user_data_dtor(void *arg) 934 { 935 struct gntdev_gref *gref, *gref_tmp; 936 struct gntdev_gmap *gmap, *gmap_tmp; 937 struct file_offset_struct *offset, *offset_tmp; 938 struct per_user_data *priv_user; 939 940 priv_user = (struct per_user_data *) arg; 941 942 mtx_lock(&priv_user->user_data_lock); 943 944 mtx_lock(&cleanup_data.to_kill_grefs_mtx); 945 RB_FOREACH_SAFE(gref, gref_tree_head, &priv_user->gref_tree, gref_tmp) { 946 RB_REMOVE(gref_tree_head, &priv_user->gref_tree, gref); 947 STAILQ_INSERT_TAIL(&cleanup_data.to_kill_grefs, gref, 948 gref_next.list); 949 } 950 mtx_unlock(&cleanup_data.to_kill_grefs_mtx); 951 952 mtx_lock(&cleanup_data.to_kill_gmaps_mtx); 953 RB_FOREACH_SAFE(gmap, gmap_tree_head, &priv_user->gmap_tree, gmap_tmp) { 954 RB_REMOVE(gmap_tree_head, &priv_user->gmap_tree, gmap); 955 STAILQ_INSERT_TAIL(&cleanup_data.to_kill_gmaps, gmap, 956 gmap_next.list); 957 if (gmap->map) 958 vm_object_deallocate(gmap->map->mem); 959 } 960 mtx_unlock(&cleanup_data.to_kill_gmaps_mtx); 961 962 RB_FOREACH_SAFE(offset, file_offset_head, &priv_user->file_offset, 963 offset_tmp) { 964 RB_REMOVE(file_offset_head, &priv_user->file_offset, offset); 965 free(offset, M_GNTDEV); 966 } 967 968 mtx_unlock(&priv_user->user_data_lock); 969 970 taskqueue_enqueue(taskqueue_thread, &cleanup_task); 971 972 mtx_destroy(&priv_user->user_data_lock); 973 free(priv_user, M_GNTDEV); 974 } 975 976 static int 977 gntdev_open(struct cdev *dev, int flag, int otyp, struct thread *td) 978 { 979 int error; 980 struct per_user_data *priv_user; 981 struct file_offset_struct *offset; 982 983 priv_user = malloc(sizeof(*priv_user), M_GNTDEV, M_WAITOK | M_ZERO); 984 RB_INIT(&priv_user->gref_tree); 985 RB_INIT(&priv_user->gmap_tree); 986 RB_INIT(&priv_user->file_offset); 987 offset = malloc(sizeof(*offset), M_GNTDEV, M_WAITOK | M_ZERO); 988 offset->file_offset = 0; 989 offset->count = MAX_OFFSET_COUNT; 990 RB_INSERT(file_offset_head, &priv_user->file_offset, offset); 991 mtx_init(&priv_user->user_data_lock, 992 "per user data mutex", NULL, MTX_DEF); 993 994 error = devfs_set_cdevpriv(priv_user, per_user_data_dtor); 995 if (error != 0) 996 per_user_data_dtor(priv_user); 997 998 return (error); 999 } 1000 1001 static int 1002 gntdev_ioctl(struct cdev *dev, u_long cmd, caddr_t data, 1003 int fflag, struct thread *td) 1004 { 1005 int error; 1006 1007 switch (cmd) { 1008 case IOCTL_GNTDEV_SET_UNMAP_NOTIFY: 1009 error = gntdev_set_unmap_notify( 1010 (struct ioctl_gntdev_unmap_notify*) data); 1011 break; 1012 case IOCTL_GNTDEV_ALLOC_GREF: 1013 error = gntdev_alloc_gref( 1014 (struct ioctl_gntdev_alloc_gref*) data); 1015 break; 1016 case IOCTL_GNTDEV_DEALLOC_GREF: 1017 error = gntdev_dealloc_gref( 1018 (struct ioctl_gntdev_dealloc_gref*) data); 1019 break; 1020 case IOCTL_GNTDEV_MAP_GRANT_REF: 1021 error = gntdev_map_grant_ref( 1022 (struct ioctl_gntdev_map_grant_ref*) data); 1023 break; 1024 case IOCTL_GNTDEV_UNMAP_GRANT_REF: 1025 error = gntdev_unmap_grant_ref( 1026 (struct ioctl_gntdev_unmap_grant_ref*) data); 1027 break; 1028 case IOCTL_GNTDEV_GET_OFFSET_FOR_VADDR: 1029 error = gntdev_get_offset_for_vaddr( 1030 (struct ioctl_gntdev_get_offset_for_vaddr*) data, td); 1031 break; 1032 default: 1033 error = ENOSYS; 1034 break; 1035 } 1036 1037 return (error); 1038 } 1039 1040 /* 1041 * MMAP an allocated grant into user memory. 1042 * Please note, that the grants must not already be mmapped, otherwise 1043 * this function will fail. 1044 */ 1045 static int 1046 mmap_gref(struct per_user_data *priv_user, struct gntdev_gref *gref_start, 1047 uint32_t count, vm_size_t size, struct vm_object **object) 1048 { 1049 vm_object_t mem_obj; 1050 struct gntdev_gref *gref; 1051 1052 mem_obj = vm_pager_allocate(OBJT_PHYS, NULL, size, VM_PROT_ALL, 0, 1053 curthread->td_ucred); 1054 if (mem_obj == NULL) 1055 return (ENOMEM); 1056 1057 mtx_lock(&priv_user->user_data_lock); 1058 VM_OBJECT_WLOCK(mem_obj); 1059 for (gref = gref_start; gref != NULL && count > 0; gref = 1060 RB_NEXT(gref_tree_head, &priv_user->gref_tree, gref)) { 1061 if (gref->page->object) 1062 break; 1063 1064 vm_page_insert(gref->page, mem_obj, 1065 OFF_TO_IDX(gref->file_index)); 1066 1067 count--; 1068 } 1069 VM_OBJECT_WUNLOCK(mem_obj); 1070 mtx_unlock(&priv_user->user_data_lock); 1071 1072 if (count) { 1073 vm_object_deallocate(mem_obj); 1074 return (EINVAL); 1075 } 1076 1077 *object = mem_obj; 1078 1079 return (0); 1080 1081 } 1082 1083 /* 1084 * MMAP a mapped grant into user memory. 1085 */ 1086 static int 1087 mmap_gmap(struct per_user_data *priv_user, struct gntdev_gmap *gmap_start, 1088 vm_ooffset_t *offset, vm_size_t size, struct vm_object **object, int nprot) 1089 { 1090 uint32_t i; 1091 int error; 1092 1093 /* 1094 * The grant map hypercall might already be done. 1095 * If that is the case, increase a reference to the 1096 * vm object and return the already allocated object. 1097 */ 1098 if (gmap_start->map) { 1099 vm_object_reference(gmap_start->map->mem); 1100 *object = gmap_start->map->mem; 1101 return (0); 1102 } 1103 1104 gmap_start->map = malloc(sizeof(*(gmap_start->map)), M_GNTDEV, 1105 M_WAITOK | M_ZERO); 1106 1107 /* Allocate the xen pseudo physical memory resource. */ 1108 gmap_start->map->pseudo_phys_res_id = 0; 1109 gmap_start->map->pseudo_phys_res = xenmem_alloc(gntdev_dev, 1110 &gmap_start->map->pseudo_phys_res_id, size); 1111 if (gmap_start->map->pseudo_phys_res == NULL) { 1112 free(gmap_start->map, M_GNTDEV); 1113 gmap_start->map = NULL; 1114 return (ENOMEM); 1115 } 1116 gmap_start->map->phys_base_addr = 1117 rman_get_start(gmap_start->map->pseudo_phys_res); 1118 1119 /* Allocate the mgtdevice pager. */ 1120 gmap_start->map->mem = cdev_pager_allocate(gmap_start, OBJT_MGTDEVICE, 1121 &gntdev_gmap_pg_ops, size, nprot, *offset, NULL); 1122 if (gmap_start->map->mem == NULL) { 1123 xenmem_free(gntdev_dev, gmap_start->map->pseudo_phys_res_id, 1124 gmap_start->map->pseudo_phys_res); 1125 free(gmap_start->map, M_GNTDEV); 1126 gmap_start->map = NULL; 1127 return (ENOMEM); 1128 } 1129 1130 for (i = 0; i < gmap_start->count; i++) { 1131 gmap_start->grant_map_ops[i].host_addr = 1132 gmap_start->map->phys_base_addr + i * PAGE_SIZE; 1133 1134 if ((nprot & PROT_WRITE) == 0) 1135 gmap_start->grant_map_ops[i].flags |= GNTMAP_readonly; 1136 } 1137 /* Make the MAP hypercall. */ 1138 error = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, 1139 gmap_start->grant_map_ops, gmap_start->count); 1140 if (error != 0) { 1141 /* 1142 * Deallocate pager. 1143 * Pager deallocation will automatically take care of 1144 * xenmem deallocation, etc. 1145 */ 1146 vm_object_deallocate(gmap_start->map->mem); 1147 1148 return (EINVAL); 1149 } 1150 1151 /* Retry EAGAIN maps. */ 1152 for (i = 0; i < gmap_start->count; i++) { 1153 int delay = 1; 1154 while (delay < 256 && 1155 gmap_start->grant_map_ops[i].status == GNTST_eagain) { 1156 HYPERVISOR_grant_table_op( GNTTABOP_map_grant_ref, 1157 &gmap_start->grant_map_ops[i], 1); 1158 pause(("gntmap"), delay * SBT_1MS); 1159 delay++; 1160 } 1161 if (gmap_start->grant_map_ops[i].status == GNTST_eagain) 1162 gmap_start->grant_map_ops[i].status = GNTST_bad_page; 1163 1164 if (gmap_start->grant_map_ops[i].status != GNTST_okay) { 1165 /* 1166 * Deallocate pager. 1167 * Pager deallocation will automatically take care of 1168 * xenmem deallocation, notification, unmap hypercall, 1169 * etc. 1170 */ 1171 vm_object_deallocate(gmap_start->map->mem); 1172 1173 return (EINVAL); 1174 } 1175 } 1176 1177 /* 1178 * Add a reference to the vm object. We do not want 1179 * the vm object to be deleted when all the mmaps are 1180 * unmapped, because it may be re-mmapped. Instead, 1181 * we want the object to be deleted, when along with 1182 * munmaps, we have also processed the unmap-ioctl. 1183 */ 1184 vm_object_reference(gmap_start->map->mem); 1185 1186 *object = gmap_start->map->mem; 1187 1188 return (0); 1189 } 1190 1191 static int 1192 gntdev_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t size, 1193 struct vm_object **object, int nprot) 1194 { 1195 int error; 1196 uint32_t count; 1197 struct gntdev_gref *gref_start; 1198 struct gntdev_gmap *gmap_start; 1199 struct per_user_data *priv_user; 1200 1201 error = devfs_get_cdevpriv((void**) &priv_user); 1202 if (error != 0) 1203 return (EINVAL); 1204 1205 count = OFF_TO_IDX(size); 1206 1207 gref_start = gntdev_find_grefs(priv_user, *offset, count); 1208 if (gref_start) { 1209 error = mmap_gref(priv_user, gref_start, count, size, object); 1210 return (error); 1211 } 1212 1213 gmap_start = gntdev_find_gmap(priv_user, *offset, count); 1214 if (gmap_start) { 1215 error = mmap_gmap(priv_user, gmap_start, offset, size, object, 1216 nprot); 1217 return (error); 1218 } 1219 1220 return (EINVAL); 1221 } 1222 1223 /*------------------ Private Device Attachment Functions --------------------*/ 1224 static void 1225 gntdev_identify(driver_t *driver, device_t parent) 1226 { 1227 1228 KASSERT((xen_domain()), 1229 ("Trying to attach gntdev device on non Xen domain")); 1230 1231 if (BUS_ADD_CHILD(parent, 0, "gntdev", 0) == NULL) 1232 panic("unable to attach gntdev user-space device"); 1233 } 1234 1235 static int 1236 gntdev_probe(device_t dev) 1237 { 1238 1239 gntdev_dev = dev; 1240 device_set_desc(dev, "Xen grant-table user-space device"); 1241 return (BUS_PROBE_NOWILDCARD); 1242 } 1243 1244 static int 1245 gntdev_attach(device_t dev) 1246 { 1247 1248 make_dev_credf(MAKEDEV_ETERNAL, &gntdev_devsw, 0, NULL, UID_ROOT, 1249 GID_WHEEL, 0600, "xen/gntdev"); 1250 return (0); 1251 } 1252 1253 /*-------------------- Private Device Attachment Data -----------------------*/ 1254 static device_method_t gntdev_methods[] = { 1255 DEVMETHOD(device_identify, gntdev_identify), 1256 DEVMETHOD(device_probe, gntdev_probe), 1257 DEVMETHOD(device_attach, gntdev_attach), 1258 DEVMETHOD_END 1259 }; 1260 1261 static driver_t gntdev_driver = { 1262 "gntdev", 1263 gntdev_methods, 1264 0, 1265 }; 1266 1267 DRIVER_MODULE(gntdev, xenpv, gntdev_driver, 0, 0); 1268 MODULE_DEPEND(gntdev, xenpv, 1, 1, 1); 1269