1 /****************************************************************************** 2 * gnttab.c 3 * 4 * Two sets of functionality: 5 * 1. Granting foreign access to our memory reservation. 6 * 2. Accessing others' memory reservations via grant references. 7 * (i.e., mechanisms for both sender and recipient of grant references) 8 * 9 * Copyright (c) 2005, Christopher Clark 10 * Copyright (c) 2004, K A Fraser 11 */ 12 13 #include <sys/cdefs.h> 14 __FBSDID("$FreeBSD$"); 15 16 #include "opt_pmap.h" 17 18 #include <sys/param.h> 19 #include <sys/systm.h> 20 #include <sys/bus.h> 21 #include <sys/conf.h> 22 #include <sys/module.h> 23 #include <sys/kernel.h> 24 #include <sys/lock.h> 25 #include <sys/malloc.h> 26 #include <sys/mman.h> 27 #include <sys/limits.h> 28 #include <sys/rman.h> 29 #include <machine/resource.h> 30 31 #include <xen/xen-os.h> 32 #include <xen/hypervisor.h> 33 #include <machine/xen/synch_bitops.h> 34 35 #include <xen/hypervisor.h> 36 #include <xen/gnttab.h> 37 38 #include <vm/vm.h> 39 #include <vm/vm_kern.h> 40 #include <vm/vm_extern.h> 41 #include <vm/pmap.h> 42 43 #define cmpxchg(a, b, c) atomic_cmpset_int((volatile u_int *)(a),(b),(c)) 44 45 /* External tools reserve first few grant table entries. */ 46 #define NR_RESERVED_ENTRIES 8 47 #define GREFS_PER_GRANT_FRAME (PAGE_SIZE / sizeof(grant_entry_t)) 48 49 static grant_ref_t **gnttab_list; 50 static unsigned int nr_grant_frames; 51 static unsigned int boot_max_nr_grant_frames; 52 static int gnttab_free_count; 53 static grant_ref_t gnttab_free_head; 54 static struct mtx gnttab_list_lock; 55 56 #ifdef XENHVM 57 /* 58 * Resource representing allocated physical address space 59 * for the grant table metainfo 60 */ 61 static struct resource *gnttab_pseudo_phys_res; 62 63 /* Resource id for allocated physical address space. */ 64 static int gnttab_pseudo_phys_res_id; 65 #endif 66 67 static grant_entry_t *shared; 68 69 static struct gnttab_free_callback *gnttab_free_callback_list = NULL; 70 71 static int gnttab_expand(unsigned int req_entries); 72 73 #define RPP (PAGE_SIZE / sizeof(grant_ref_t)) 74 #define gnttab_entry(entry) (gnttab_list[(entry) / RPP][(entry) % RPP]) 75 76 static int 77 get_free_entries(int count, int *entries) 78 { 79 int ref, error; 80 grant_ref_t head; 81 82 mtx_lock(&gnttab_list_lock); 83 if ((gnttab_free_count < count) && 84 ((error = gnttab_expand(count - gnttab_free_count)) != 0)) { 85 mtx_unlock(&gnttab_list_lock); 86 return (error); 87 } 88 ref = head = gnttab_free_head; 89 gnttab_free_count -= count; 90 while (count-- > 1) 91 head = gnttab_entry(head); 92 gnttab_free_head = gnttab_entry(head); 93 gnttab_entry(head) = GNTTAB_LIST_END; 94 mtx_unlock(&gnttab_list_lock); 95 96 *entries = ref; 97 return (0); 98 } 99 100 static void 101 do_free_callbacks(void) 102 { 103 struct gnttab_free_callback *callback, *next; 104 105 callback = gnttab_free_callback_list; 106 gnttab_free_callback_list = NULL; 107 108 while (callback != NULL) { 109 next = callback->next; 110 if (gnttab_free_count >= callback->count) { 111 callback->next = NULL; 112 callback->fn(callback->arg); 113 } else { 114 callback->next = gnttab_free_callback_list; 115 gnttab_free_callback_list = callback; 116 } 117 callback = next; 118 } 119 } 120 121 static inline void 122 check_free_callbacks(void) 123 { 124 if (__predict_false(gnttab_free_callback_list != NULL)) 125 do_free_callbacks(); 126 } 127 128 static void 129 put_free_entry(grant_ref_t ref) 130 { 131 132 mtx_lock(&gnttab_list_lock); 133 gnttab_entry(ref) = gnttab_free_head; 134 gnttab_free_head = ref; 135 gnttab_free_count++; 136 check_free_callbacks(); 137 mtx_unlock(&gnttab_list_lock); 138 } 139 140 /* 141 * Public grant-issuing interface functions 142 */ 143 144 int 145 gnttab_grant_foreign_access(domid_t domid, unsigned long frame, int readonly, 146 grant_ref_t *result) 147 { 148 int error, ref; 149 150 error = get_free_entries(1, &ref); 151 152 if (__predict_false(error)) 153 return (error); 154 155 shared[ref].frame = frame; 156 shared[ref].domid = domid; 157 wmb(); 158 shared[ref].flags = GTF_permit_access | (readonly ? GTF_readonly : 0); 159 160 if (result) 161 *result = ref; 162 163 return (0); 164 } 165 166 void 167 gnttab_grant_foreign_access_ref(grant_ref_t ref, domid_t domid, 168 unsigned long frame, int readonly) 169 { 170 171 shared[ref].frame = frame; 172 shared[ref].domid = domid; 173 wmb(); 174 shared[ref].flags = GTF_permit_access | (readonly ? GTF_readonly : 0); 175 } 176 177 int 178 gnttab_query_foreign_access(grant_ref_t ref) 179 { 180 uint16_t nflags; 181 182 nflags = shared[ref].flags; 183 184 return (nflags & (GTF_reading|GTF_writing)); 185 } 186 187 int 188 gnttab_end_foreign_access_ref(grant_ref_t ref) 189 { 190 uint16_t flags, nflags; 191 192 nflags = shared[ref].flags; 193 do { 194 if ( (flags = nflags) & (GTF_reading|GTF_writing) ) { 195 printf("%s: WARNING: g.e. still in use!\n", __func__); 196 return (0); 197 } 198 } while ((nflags = synch_cmpxchg(&shared[ref].flags, flags, 0)) != 199 flags); 200 201 return (1); 202 } 203 204 void 205 gnttab_end_foreign_access(grant_ref_t ref, void *page) 206 { 207 if (gnttab_end_foreign_access_ref(ref)) { 208 put_free_entry(ref); 209 if (page != NULL) { 210 free(page, M_DEVBUF); 211 } 212 } 213 else { 214 /* XXX This needs to be fixed so that the ref and page are 215 placed on a list to be freed up later. */ 216 printf("%s: WARNING: leaking g.e. and page still in use!\n", 217 __func__); 218 } 219 } 220 221 void 222 gnttab_end_foreign_access_references(u_int count, grant_ref_t *refs) 223 { 224 grant_ref_t *last_ref; 225 grant_ref_t head; 226 grant_ref_t tail; 227 228 head = GNTTAB_LIST_END; 229 tail = *refs; 230 last_ref = refs + count; 231 while (refs != last_ref) { 232 233 if (gnttab_end_foreign_access_ref(*refs)) { 234 gnttab_entry(*refs) = head; 235 head = *refs; 236 } else { 237 /* 238 * XXX This needs to be fixed so that the ref 239 * is placed on a list to be freed up later. 240 */ 241 printf("%s: WARNING: leaking g.e. still in use!\n", 242 __func__); 243 count--; 244 } 245 refs++; 246 } 247 248 if (count != 0) { 249 mtx_lock(&gnttab_list_lock); 250 gnttab_free_count += count; 251 gnttab_entry(tail) = gnttab_free_head; 252 gnttab_free_head = head; 253 mtx_unlock(&gnttab_list_lock); 254 } 255 } 256 257 int 258 gnttab_grant_foreign_transfer(domid_t domid, unsigned long pfn, 259 grant_ref_t *result) 260 { 261 int error, ref; 262 263 error = get_free_entries(1, &ref); 264 if (__predict_false(error)) 265 return (error); 266 267 gnttab_grant_foreign_transfer_ref(ref, domid, pfn); 268 269 *result = ref; 270 return (0); 271 } 272 273 void 274 gnttab_grant_foreign_transfer_ref(grant_ref_t ref, domid_t domid, 275 unsigned long pfn) 276 { 277 shared[ref].frame = pfn; 278 shared[ref].domid = domid; 279 wmb(); 280 shared[ref].flags = GTF_accept_transfer; 281 } 282 283 unsigned long 284 gnttab_end_foreign_transfer_ref(grant_ref_t ref) 285 { 286 unsigned long frame; 287 uint16_t flags; 288 289 /* 290 * If a transfer is not even yet started, try to reclaim the grant 291 * reference and return failure (== 0). 292 */ 293 while (!((flags = shared[ref].flags) & GTF_transfer_committed)) { 294 if ( synch_cmpxchg(&shared[ref].flags, flags, 0) == flags ) 295 return (0); 296 cpu_relax(); 297 } 298 299 /* If a transfer is in progress then wait until it is completed. */ 300 while (!(flags & GTF_transfer_completed)) { 301 flags = shared[ref].flags; 302 cpu_relax(); 303 } 304 305 /* Read the frame number /after/ reading completion status. */ 306 rmb(); 307 frame = shared[ref].frame; 308 KASSERT(frame != 0, ("grant table inconsistent")); 309 310 return (frame); 311 } 312 313 unsigned long 314 gnttab_end_foreign_transfer(grant_ref_t ref) 315 { 316 unsigned long frame = gnttab_end_foreign_transfer_ref(ref); 317 318 put_free_entry(ref); 319 return (frame); 320 } 321 322 void 323 gnttab_free_grant_reference(grant_ref_t ref) 324 { 325 326 put_free_entry(ref); 327 } 328 329 void 330 gnttab_free_grant_references(grant_ref_t head) 331 { 332 grant_ref_t ref; 333 int count = 1; 334 335 if (head == GNTTAB_LIST_END) 336 return; 337 338 ref = head; 339 while (gnttab_entry(ref) != GNTTAB_LIST_END) { 340 ref = gnttab_entry(ref); 341 count++; 342 } 343 mtx_lock(&gnttab_list_lock); 344 gnttab_entry(ref) = gnttab_free_head; 345 gnttab_free_head = head; 346 gnttab_free_count += count; 347 check_free_callbacks(); 348 mtx_unlock(&gnttab_list_lock); 349 } 350 351 int 352 gnttab_alloc_grant_references(uint16_t count, grant_ref_t *head) 353 { 354 int ref, error; 355 356 error = get_free_entries(count, &ref); 357 if (__predict_false(error)) 358 return (error); 359 360 *head = ref; 361 return (0); 362 } 363 364 int 365 gnttab_empty_grant_references(const grant_ref_t *private_head) 366 { 367 368 return (*private_head == GNTTAB_LIST_END); 369 } 370 371 int 372 gnttab_claim_grant_reference(grant_ref_t *private_head) 373 { 374 grant_ref_t g = *private_head; 375 376 if (__predict_false(g == GNTTAB_LIST_END)) 377 return (g); 378 *private_head = gnttab_entry(g); 379 return (g); 380 } 381 382 void 383 gnttab_release_grant_reference(grant_ref_t *private_head, grant_ref_t release) 384 { 385 386 gnttab_entry(release) = *private_head; 387 *private_head = release; 388 } 389 390 void 391 gnttab_request_free_callback(struct gnttab_free_callback *callback, 392 void (*fn)(void *), void *arg, uint16_t count) 393 { 394 395 mtx_lock(&gnttab_list_lock); 396 if (callback->next) 397 goto out; 398 callback->fn = fn; 399 callback->arg = arg; 400 callback->count = count; 401 callback->next = gnttab_free_callback_list; 402 gnttab_free_callback_list = callback; 403 check_free_callbacks(); 404 out: 405 mtx_unlock(&gnttab_list_lock); 406 407 } 408 409 void 410 gnttab_cancel_free_callback(struct gnttab_free_callback *callback) 411 { 412 struct gnttab_free_callback **pcb; 413 414 mtx_lock(&gnttab_list_lock); 415 for (pcb = &gnttab_free_callback_list; *pcb; pcb = &(*pcb)->next) { 416 if (*pcb == callback) { 417 *pcb = callback->next; 418 break; 419 } 420 } 421 mtx_unlock(&gnttab_list_lock); 422 } 423 424 425 static int 426 grow_gnttab_list(unsigned int more_frames) 427 { 428 unsigned int new_nr_grant_frames, extra_entries, i; 429 430 new_nr_grant_frames = nr_grant_frames + more_frames; 431 extra_entries = more_frames * GREFS_PER_GRANT_FRAME; 432 433 for (i = nr_grant_frames; i < new_nr_grant_frames; i++) 434 { 435 gnttab_list[i] = (grant_ref_t *) 436 malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT); 437 438 if (!gnttab_list[i]) 439 goto grow_nomem; 440 } 441 442 for (i = GREFS_PER_GRANT_FRAME * nr_grant_frames; 443 i < GREFS_PER_GRANT_FRAME * new_nr_grant_frames - 1; i++) 444 gnttab_entry(i) = i + 1; 445 446 gnttab_entry(i) = gnttab_free_head; 447 gnttab_free_head = GREFS_PER_GRANT_FRAME * nr_grant_frames; 448 gnttab_free_count += extra_entries; 449 450 nr_grant_frames = new_nr_grant_frames; 451 452 check_free_callbacks(); 453 454 return (0); 455 456 grow_nomem: 457 for ( ; i >= nr_grant_frames; i--) 458 free(gnttab_list[i], M_DEVBUF); 459 return (ENOMEM); 460 } 461 462 static unsigned int 463 __max_nr_grant_frames(void) 464 { 465 struct gnttab_query_size query; 466 int rc; 467 468 query.dom = DOMID_SELF; 469 470 rc = HYPERVISOR_grant_table_op(GNTTABOP_query_size, &query, 1); 471 if ((rc < 0) || (query.status != GNTST_okay)) 472 return (4); /* Legacy max supported number of frames */ 473 474 return (query.max_nr_frames); 475 } 476 477 static inline 478 unsigned int max_nr_grant_frames(void) 479 { 480 unsigned int xen_max = __max_nr_grant_frames(); 481 482 if (xen_max > boot_max_nr_grant_frames) 483 return (boot_max_nr_grant_frames); 484 return (xen_max); 485 } 486 487 #ifdef notyet 488 /* 489 * XXX needed for backend support 490 * 491 */ 492 static int 493 map_pte_fn(pte_t *pte, struct page *pmd_page, 494 unsigned long addr, void *data) 495 { 496 unsigned long **frames = (unsigned long **)data; 497 498 set_pte_at(&init_mm, addr, pte, pfn_pte_ma((*frames)[0], PAGE_KERNEL)); 499 (*frames)++; 500 return 0; 501 } 502 503 static int 504 unmap_pte_fn(pte_t *pte, struct page *pmd_page, 505 unsigned long addr, void *data) 506 { 507 508 set_pte_at(&init_mm, addr, pte, __pte(0)); 509 return 0; 510 } 511 #endif 512 513 #ifndef XENHVM 514 515 static int 516 gnttab_map(unsigned int start_idx, unsigned int end_idx) 517 { 518 struct gnttab_setup_table setup; 519 u_long *frames; 520 521 unsigned int nr_gframes = end_idx + 1; 522 int i, rc; 523 524 frames = malloc(nr_gframes * sizeof(unsigned long), M_DEVBUF, M_NOWAIT); 525 if (!frames) 526 return (ENOMEM); 527 528 setup.dom = DOMID_SELF; 529 setup.nr_frames = nr_gframes; 530 set_xen_guest_handle(setup.frame_list, frames); 531 532 rc = HYPERVISOR_grant_table_op(GNTTABOP_setup_table, &setup, 1); 533 if (rc == -ENOSYS) { 534 free(frames, M_DEVBUF); 535 return (ENOSYS); 536 } 537 KASSERT(!(rc || setup.status), 538 ("unexpected result from grant_table_op")); 539 540 if (shared == NULL) { 541 vm_offset_t area; 542 543 area = kva_alloc(PAGE_SIZE * max_nr_grant_frames()); 544 KASSERT(area, ("can't allocate VM space for grant table")); 545 shared = (grant_entry_t *)area; 546 } 547 548 for (i = 0; i < nr_gframes; i++) 549 PT_SET_MA(((caddr_t)shared) + i*PAGE_SIZE, 550 ((vm_paddr_t)frames[i]) << PAGE_SHIFT | PG_RW | PG_V); 551 552 free(frames, M_DEVBUF); 553 554 return (0); 555 } 556 557 int 558 gnttab_resume(device_t dev) 559 { 560 561 if (max_nr_grant_frames() < nr_grant_frames) 562 return (ENOSYS); 563 return (gnttab_map(0, nr_grant_frames - 1)); 564 } 565 566 int 567 gnttab_suspend(void) 568 { 569 int i; 570 571 for (i = 0; i < nr_grant_frames; i++) 572 pmap_kremove((vm_offset_t) shared + i * PAGE_SIZE); 573 574 return (0); 575 } 576 577 #else /* XENHVM */ 578 579 static vm_paddr_t resume_frames; 580 581 static int 582 gnttab_map(unsigned int start_idx, unsigned int end_idx) 583 { 584 struct xen_add_to_physmap xatp; 585 unsigned int i = end_idx; 586 587 /* 588 * Loop backwards, so that the first hypercall has the largest index, 589 * ensuring that the table will grow only once. 590 */ 591 do { 592 xatp.domid = DOMID_SELF; 593 xatp.idx = i; 594 xatp.space = XENMAPSPACE_grant_table; 595 xatp.gpfn = (resume_frames >> PAGE_SHIFT) + i; 596 if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp)) 597 panic("HYPERVISOR_memory_op failed to map gnttab"); 598 } while (i-- > start_idx); 599 600 if (shared == NULL) { 601 vm_offset_t area; 602 603 area = kva_alloc(PAGE_SIZE * max_nr_grant_frames()); 604 KASSERT(area, ("can't allocate VM space for grant table")); 605 shared = (grant_entry_t *)area; 606 } 607 608 for (i = start_idx; i <= end_idx; i++) { 609 pmap_kenter((vm_offset_t) shared + i * PAGE_SIZE, 610 resume_frames + i * PAGE_SIZE); 611 } 612 613 return (0); 614 } 615 616 int 617 gnttab_resume(device_t dev) 618 { 619 unsigned int max_nr_gframes, nr_gframes; 620 621 nr_gframes = nr_grant_frames; 622 max_nr_gframes = max_nr_grant_frames(); 623 if (max_nr_gframes < nr_gframes) 624 return (ENOSYS); 625 626 if (!resume_frames) { 627 KASSERT(dev != NULL, 628 ("No resume frames and no device provided")); 629 630 gnttab_pseudo_phys_res = bus_alloc_resource(dev, 631 SYS_RES_MEMORY, &gnttab_pseudo_phys_res_id, 0, ~0, 632 PAGE_SIZE * max_nr_gframes, RF_ACTIVE); 633 if (gnttab_pseudo_phys_res == NULL) 634 panic("Unable to reserve physical memory for gnttab"); 635 resume_frames = rman_get_start(gnttab_pseudo_phys_res); 636 } 637 638 return (gnttab_map(0, nr_gframes - 1)); 639 } 640 641 #endif 642 643 static int 644 gnttab_expand(unsigned int req_entries) 645 { 646 int error; 647 unsigned int cur, extra; 648 649 cur = nr_grant_frames; 650 extra = ((req_entries + (GREFS_PER_GRANT_FRAME-1)) / 651 GREFS_PER_GRANT_FRAME); 652 if (cur + extra > max_nr_grant_frames()) 653 return (ENOSPC); 654 655 error = gnttab_map(cur, cur + extra - 1); 656 if (!error) 657 error = grow_gnttab_list(extra); 658 659 return (error); 660 } 661 662 MTX_SYSINIT(gnttab, &gnttab_list_lock, "GNTTAB LOCK", MTX_DEF); 663 664 /*------------------ Private Device Attachment Functions --------------------*/ 665 /** 666 * \brief Identify instances of this device type in the system. 667 * 668 * \param driver The driver performing this identify action. 669 * \param parent The NewBus parent device for any devices this method adds. 670 */ 671 static void 672 granttable_identify(driver_t *driver __unused, device_t parent) 673 { 674 675 KASSERT(xen_domain(), 676 ("Trying to attach grant-table device on non Xen domain")); 677 /* 678 * A single device instance for our driver is always present 679 * in a system operating under Xen. 680 */ 681 if (BUS_ADD_CHILD(parent, 0, driver->name, 0) == NULL) 682 panic("unable to attach Xen Grant-table device"); 683 } 684 685 /** 686 * \brief Probe for the existence of the Xen Grant-table device 687 * 688 * \param dev NewBus device_t for this instance. 689 * 690 * \return Always returns 0 indicating success. 691 */ 692 static int 693 granttable_probe(device_t dev) 694 { 695 696 device_set_desc(dev, "Xen Grant-table Device"); 697 return (BUS_PROBE_NOWILDCARD); 698 } 699 700 /** 701 * \brief Attach the Xen Grant-table device. 702 * 703 * \param dev NewBus device_t for this instance. 704 * 705 * \return On success, 0. Otherwise an errno value indicating the 706 * type of failure. 707 */ 708 static int 709 granttable_attach(device_t dev) 710 { 711 int i; 712 unsigned int max_nr_glist_frames; 713 unsigned int nr_init_grefs; 714 715 nr_grant_frames = 1; 716 boot_max_nr_grant_frames = __max_nr_grant_frames(); 717 718 /* Determine the maximum number of frames required for the 719 * grant reference free list on the current hypervisor. 720 */ 721 max_nr_glist_frames = (boot_max_nr_grant_frames * 722 GREFS_PER_GRANT_FRAME / 723 (PAGE_SIZE / sizeof(grant_ref_t))); 724 725 gnttab_list = malloc(max_nr_glist_frames * sizeof(grant_ref_t *), 726 M_DEVBUF, M_NOWAIT); 727 728 if (gnttab_list == NULL) 729 return (ENOMEM); 730 731 for (i = 0; i < nr_grant_frames; i++) { 732 gnttab_list[i] = (grant_ref_t *) 733 malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT); 734 if (gnttab_list[i] == NULL) 735 goto ini_nomem; 736 } 737 738 if (gnttab_resume(dev)) 739 return (ENODEV); 740 741 nr_init_grefs = nr_grant_frames * GREFS_PER_GRANT_FRAME; 742 743 for (i = NR_RESERVED_ENTRIES; i < nr_init_grefs - 1; i++) 744 gnttab_entry(i) = i + 1; 745 746 gnttab_entry(nr_init_grefs - 1) = GNTTAB_LIST_END; 747 gnttab_free_count = nr_init_grefs - NR_RESERVED_ENTRIES; 748 gnttab_free_head = NR_RESERVED_ENTRIES; 749 750 if (bootverbose) 751 printf("Grant table initialized\n"); 752 753 return (0); 754 755 ini_nomem: 756 for (i--; i >= 0; i--) 757 free(gnttab_list[i], M_DEVBUF); 758 free(gnttab_list, M_DEVBUF); 759 return (ENOMEM); 760 } 761 762 /*-------------------- Private Device Attachment Data -----------------------*/ 763 static device_method_t granttable_methods[] = { 764 /* Device interface */ 765 DEVMETHOD(device_identify, granttable_identify), 766 DEVMETHOD(device_probe, granttable_probe), 767 DEVMETHOD(device_attach, granttable_attach), 768 769 DEVMETHOD_END 770 }; 771 772 DEFINE_CLASS_0(granttable, granttable_driver, granttable_methods, 0); 773 devclass_t granttable_devclass; 774 775 DRIVER_MODULE_ORDERED(granttable, xenpv, granttable_driver, granttable_devclass, 776 NULL, NULL, SI_ORDER_FIRST); 777