1 /****************************************************************************** 2 * gnttab.c 3 * 4 * Two sets of functionality: 5 * 1. Granting foreign access to our memory reservation. 6 * 2. Accessing others' memory reservations via grant references. 7 * (i.e., mechanisms for both sender and recipient of grant references) 8 * 9 * Copyright (c) 2005, Christopher Clark 10 * Copyright (c) 2004, K A Fraser 11 */ 12 13 #include <sys/cdefs.h> 14 __FBSDID("$FreeBSD$"); 15 16 #include <sys/param.h> 17 #include <sys/systm.h> 18 #include <sys/bus.h> 19 #include <sys/conf.h> 20 #include <sys/module.h> 21 #include <sys/kernel.h> 22 #include <sys/lock.h> 23 #include <sys/malloc.h> 24 #include <sys/mman.h> 25 #include <sys/limits.h> 26 #include <sys/rman.h> 27 #include <machine/resource.h> 28 #include <machine/cpu.h> 29 30 #include <xen/xen-os.h> 31 #include <xen/hypervisor.h> 32 #include <xen/gnttab.h> 33 34 #include <vm/vm.h> 35 #include <vm/vm_kern.h> 36 #include <vm/vm_extern.h> 37 #include <vm/pmap.h> 38 39 /* External tools reserve first few grant table entries. */ 40 #define NR_RESERVED_ENTRIES 8 41 #define GREFS_PER_GRANT_FRAME (PAGE_SIZE / sizeof(grant_entry_v1_t)) 42 43 static grant_ref_t **gnttab_list; 44 static unsigned int nr_grant_frames; 45 static unsigned int boot_max_nr_grant_frames; 46 static int gnttab_free_count; 47 static grant_ref_t gnttab_free_head; 48 static struct mtx gnttab_list_lock; 49 50 /* 51 * Resource representing allocated physical address space 52 * for the grant table metainfo 53 */ 54 static struct resource *gnttab_pseudo_phys_res; 55 56 /* Resource id for allocated physical address space. */ 57 static int gnttab_pseudo_phys_res_id; 58 59 static grant_entry_v1_t *shared; 60 61 static struct gnttab_free_callback *gnttab_free_callback_list = NULL; 62 63 static int gnttab_expand(unsigned int req_entries); 64 65 #define RPP (PAGE_SIZE / sizeof(grant_ref_t)) 66 #define gnttab_entry(entry) (gnttab_list[(entry) / RPP][(entry) % RPP]) 67 68 static int 69 get_free_entries(int count, int *entries) 70 { 71 int ref, error; 72 grant_ref_t head; 73 74 mtx_lock(&gnttab_list_lock); 75 if ((gnttab_free_count < count) && 76 ((error = gnttab_expand(count - gnttab_free_count)) != 0)) { 77 mtx_unlock(&gnttab_list_lock); 78 return (error); 79 } 80 ref = head = gnttab_free_head; 81 gnttab_free_count -= count; 82 while (count-- > 1) 83 head = gnttab_entry(head); 84 gnttab_free_head = gnttab_entry(head); 85 gnttab_entry(head) = GNTTAB_LIST_END; 86 mtx_unlock(&gnttab_list_lock); 87 88 *entries = ref; 89 return (0); 90 } 91 92 static void 93 do_free_callbacks(void) 94 { 95 struct gnttab_free_callback *callback, *next; 96 97 callback = gnttab_free_callback_list; 98 gnttab_free_callback_list = NULL; 99 100 while (callback != NULL) { 101 next = callback->next; 102 if (gnttab_free_count >= callback->count) { 103 callback->next = NULL; 104 callback->fn(callback->arg); 105 } else { 106 callback->next = gnttab_free_callback_list; 107 gnttab_free_callback_list = callback; 108 } 109 callback = next; 110 } 111 } 112 113 static inline void 114 check_free_callbacks(void) 115 { 116 if (__predict_false(gnttab_free_callback_list != NULL)) 117 do_free_callbacks(); 118 } 119 120 static void 121 put_free_entry(grant_ref_t ref) 122 { 123 124 mtx_lock(&gnttab_list_lock); 125 gnttab_entry(ref) = gnttab_free_head; 126 gnttab_free_head = ref; 127 gnttab_free_count++; 128 check_free_callbacks(); 129 mtx_unlock(&gnttab_list_lock); 130 } 131 132 /* 133 * Public grant-issuing interface functions 134 */ 135 136 int 137 gnttab_grant_foreign_access(domid_t domid, unsigned long frame, int readonly, 138 grant_ref_t *result) 139 { 140 int error, ref; 141 142 error = get_free_entries(1, &ref); 143 144 if (__predict_false(error)) 145 return (error); 146 147 shared[ref].frame = frame; 148 shared[ref].domid = domid; 149 wmb(); 150 shared[ref].flags = GTF_permit_access | (readonly ? GTF_readonly : 0); 151 152 if (result) 153 *result = ref; 154 155 return (0); 156 } 157 158 void 159 gnttab_grant_foreign_access_ref(grant_ref_t ref, domid_t domid, 160 unsigned long frame, int readonly) 161 { 162 163 shared[ref].frame = frame; 164 shared[ref].domid = domid; 165 wmb(); 166 shared[ref].flags = GTF_permit_access | (readonly ? GTF_readonly : 0); 167 } 168 169 int 170 gnttab_query_foreign_access(grant_ref_t ref) 171 { 172 uint16_t nflags; 173 174 nflags = shared[ref].flags; 175 176 return (nflags & (GTF_reading|GTF_writing)); 177 } 178 179 int 180 gnttab_end_foreign_access_ref(grant_ref_t ref) 181 { 182 uint16_t flags; 183 184 while (!((flags = atomic_load_16(&shared[ref].flags)) & 185 (GTF_reading|GTF_writing))) 186 if (atomic_cmpset_16(&shared[ref].flags, flags, 0)) 187 return (1); 188 189 printf("%s: WARNING: g.e. still in use!\n", __func__); 190 return (0); 191 } 192 193 void 194 gnttab_end_foreign_access(grant_ref_t ref, void *page) 195 { 196 if (gnttab_end_foreign_access_ref(ref)) { 197 put_free_entry(ref); 198 if (page != NULL) { 199 free(page, M_DEVBUF); 200 } 201 } 202 else { 203 /* XXX This needs to be fixed so that the ref and page are 204 placed on a list to be freed up later. */ 205 printf("%s: WARNING: leaking g.e. and page still in use!\n", 206 __func__); 207 } 208 } 209 210 void 211 gnttab_end_foreign_access_references(u_int count, grant_ref_t *refs) 212 { 213 grant_ref_t *last_ref; 214 grant_ref_t head; 215 grant_ref_t tail; 216 217 head = GNTTAB_LIST_END; 218 tail = *refs; 219 last_ref = refs + count; 220 while (refs != last_ref) { 221 if (gnttab_end_foreign_access_ref(*refs)) { 222 gnttab_entry(*refs) = head; 223 head = *refs; 224 } else { 225 /* 226 * XXX This needs to be fixed so that the ref 227 * is placed on a list to be freed up later. 228 */ 229 printf("%s: WARNING: leaking g.e. still in use!\n", 230 __func__); 231 count--; 232 } 233 refs++; 234 } 235 236 if (count != 0) { 237 mtx_lock(&gnttab_list_lock); 238 gnttab_free_count += count; 239 gnttab_entry(tail) = gnttab_free_head; 240 gnttab_free_head = head; 241 check_free_callbacks(); 242 mtx_unlock(&gnttab_list_lock); 243 } 244 } 245 246 int 247 gnttab_grant_foreign_transfer(domid_t domid, unsigned long pfn, 248 grant_ref_t *result) 249 { 250 int error, ref; 251 252 error = get_free_entries(1, &ref); 253 if (__predict_false(error)) 254 return (error); 255 256 gnttab_grant_foreign_transfer_ref(ref, domid, pfn); 257 258 *result = ref; 259 return (0); 260 } 261 262 void 263 gnttab_grant_foreign_transfer_ref(grant_ref_t ref, domid_t domid, 264 unsigned long pfn) 265 { 266 shared[ref].frame = pfn; 267 shared[ref].domid = domid; 268 wmb(); 269 shared[ref].flags = GTF_accept_transfer; 270 } 271 272 unsigned long 273 gnttab_end_foreign_transfer_ref(grant_ref_t ref) 274 { 275 unsigned long frame; 276 uint16_t flags; 277 278 /* 279 * If a transfer is not even yet started, try to reclaim the grant 280 * reference and return failure (== 0). 281 * 282 * NOTE: This is a loop since the atomic cmpset can fail multiple 283 * times. In normal operation it will be rare to execute more than 284 * twice. Attempting an attack would consume a great deal of 285 * attacker resources and be unlikely to prolong the loop very much. 286 */ 287 while (!((flags = atomic_load_16(&shared[ref].flags)) & 288 GTF_transfer_committed)) 289 if (atomic_cmpset_16(&shared[ref].flags, flags, 0)) 290 return (0); 291 292 /* If a transfer is in progress then wait until it is completed. */ 293 while (!(flags & GTF_transfer_completed)) { 294 cpu_spinwait(); 295 flags = atomic_load_16(&shared[ref].flags); 296 } 297 298 /* Read the frame number /after/ reading completion status. */ 299 rmb(); 300 frame = shared[ref].frame; 301 KASSERT(frame != 0, ("grant table inconsistent")); 302 303 return (frame); 304 } 305 306 unsigned long 307 gnttab_end_foreign_transfer(grant_ref_t ref) 308 { 309 unsigned long frame = gnttab_end_foreign_transfer_ref(ref); 310 311 put_free_entry(ref); 312 return (frame); 313 } 314 315 void 316 gnttab_free_grant_reference(grant_ref_t ref) 317 { 318 319 put_free_entry(ref); 320 } 321 322 void 323 gnttab_free_grant_references(grant_ref_t head) 324 { 325 grant_ref_t ref; 326 int count = 1; 327 328 if (head == GNTTAB_LIST_END) 329 return; 330 331 ref = head; 332 while (gnttab_entry(ref) != GNTTAB_LIST_END) { 333 ref = gnttab_entry(ref); 334 count++; 335 } 336 mtx_lock(&gnttab_list_lock); 337 gnttab_entry(ref) = gnttab_free_head; 338 gnttab_free_head = head; 339 gnttab_free_count += count; 340 check_free_callbacks(); 341 mtx_unlock(&gnttab_list_lock); 342 } 343 344 int 345 gnttab_alloc_grant_references(uint16_t count, grant_ref_t *head) 346 { 347 int ref, error; 348 349 error = get_free_entries(count, &ref); 350 if (__predict_false(error)) 351 return (error); 352 353 *head = ref; 354 return (0); 355 } 356 357 int 358 gnttab_empty_grant_references(const grant_ref_t *private_head) 359 { 360 361 return (*private_head == GNTTAB_LIST_END); 362 } 363 364 int 365 gnttab_claim_grant_reference(grant_ref_t *private_head) 366 { 367 grant_ref_t g = *private_head; 368 369 if (__predict_false(g == GNTTAB_LIST_END)) 370 return (g); 371 *private_head = gnttab_entry(g); 372 return (g); 373 } 374 375 void 376 gnttab_release_grant_reference(grant_ref_t *private_head, grant_ref_t release) 377 { 378 379 gnttab_entry(release) = *private_head; 380 *private_head = release; 381 } 382 383 void 384 gnttab_request_free_callback(struct gnttab_free_callback *callback, 385 void (*fn)(void *), void *arg, uint16_t count) 386 { 387 388 mtx_lock(&gnttab_list_lock); 389 if (callback->next) 390 goto out; 391 callback->fn = fn; 392 callback->arg = arg; 393 callback->count = count; 394 callback->next = gnttab_free_callback_list; 395 gnttab_free_callback_list = callback; 396 check_free_callbacks(); 397 out: 398 mtx_unlock(&gnttab_list_lock); 399 400 } 401 402 void 403 gnttab_cancel_free_callback(struct gnttab_free_callback *callback) 404 { 405 struct gnttab_free_callback **pcb; 406 407 mtx_lock(&gnttab_list_lock); 408 for (pcb = &gnttab_free_callback_list; *pcb; pcb = &(*pcb)->next) { 409 if (*pcb == callback) { 410 *pcb = callback->next; 411 break; 412 } 413 } 414 mtx_unlock(&gnttab_list_lock); 415 } 416 417 static int 418 grow_gnttab_list(unsigned int more_frames) 419 { 420 unsigned int new_nr_grant_frames, extra_entries, i; 421 422 new_nr_grant_frames = nr_grant_frames + more_frames; 423 extra_entries = more_frames * GREFS_PER_GRANT_FRAME; 424 425 for (i = nr_grant_frames; i < new_nr_grant_frames; i++) 426 { 427 gnttab_list[i] = (grant_ref_t *) 428 malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT); 429 430 if (!gnttab_list[i]) 431 goto grow_nomem; 432 } 433 434 for (i = GREFS_PER_GRANT_FRAME * nr_grant_frames; 435 i < GREFS_PER_GRANT_FRAME * new_nr_grant_frames - 1; i++) 436 gnttab_entry(i) = i + 1; 437 438 gnttab_entry(i) = gnttab_free_head; 439 gnttab_free_head = GREFS_PER_GRANT_FRAME * nr_grant_frames; 440 gnttab_free_count += extra_entries; 441 442 nr_grant_frames = new_nr_grant_frames; 443 444 check_free_callbacks(); 445 446 return (0); 447 448 grow_nomem: 449 for ( ; i >= nr_grant_frames; i--) 450 free(gnttab_list[i], M_DEVBUF); 451 return (ENOMEM); 452 } 453 454 static unsigned int 455 __max_nr_grant_frames(void) 456 { 457 struct gnttab_query_size query; 458 int rc; 459 460 query.dom = DOMID_SELF; 461 462 rc = HYPERVISOR_grant_table_op(GNTTABOP_query_size, &query, 1); 463 if ((rc < 0) || (query.status != GNTST_okay)) 464 return (4); /* Legacy max supported number of frames */ 465 466 return (query.max_nr_frames); 467 } 468 469 static inline 470 unsigned int max_nr_grant_frames(void) 471 { 472 473 return (min(__max_nr_grant_frames(), boot_max_nr_grant_frames)); 474 } 475 476 #ifdef notyet 477 /* 478 * XXX needed for backend support 479 * 480 */ 481 static int 482 map_pte_fn(pte_t *pte, struct page *pmd_page, 483 unsigned long addr, void *data) 484 { 485 unsigned long **frames = (unsigned long **)data; 486 487 set_pte_at(&init_mm, addr, pte, pfn_pte_ma((*frames)[0], PAGE_KERNEL)); 488 (*frames)++; 489 return 0; 490 } 491 492 static int 493 unmap_pte_fn(pte_t *pte, struct page *pmd_page, 494 unsigned long addr, void *data) 495 { 496 497 set_pte_at(&init_mm, addr, pte, __pte(0)); 498 return 0; 499 } 500 #endif 501 502 static vm_paddr_t resume_frames; 503 504 static void 505 gnttab_map(unsigned int start_idx, unsigned int end_idx) 506 { 507 struct xen_add_to_physmap xatp; 508 unsigned int i = end_idx; 509 510 /* 511 * Loop backwards, so that the first hypercall has the largest index, 512 * ensuring that the table will grow only once. 513 */ 514 do { 515 xatp.domid = DOMID_SELF; 516 xatp.idx = i; 517 xatp.space = XENMAPSPACE_grant_table; 518 xatp.gpfn = (resume_frames >> PAGE_SHIFT) + i; 519 if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp)) 520 panic("HYPERVISOR_memory_op failed to map gnttab"); 521 } while (i-- > start_idx); 522 } 523 524 int 525 gnttab_resume(device_t dev) 526 { 527 unsigned int max_nr_gframes, nr_gframes; 528 529 nr_gframes = nr_grant_frames; 530 max_nr_gframes = max_nr_grant_frames(); 531 if (max_nr_gframes < nr_gframes) 532 return (ENOSYS); 533 534 if (!resume_frames) { 535 KASSERT(dev != NULL, 536 ("No resume frames and no device provided")); 537 538 gnttab_pseudo_phys_res = xenmem_alloc(dev, 539 &gnttab_pseudo_phys_res_id, PAGE_SIZE * max_nr_gframes); 540 if (gnttab_pseudo_phys_res == NULL) 541 panic("Unable to reserve physical memory for gnttab"); 542 resume_frames = rman_get_start(gnttab_pseudo_phys_res); 543 shared = rman_get_virtual(gnttab_pseudo_phys_res); 544 } 545 gnttab_map(0, nr_gframes - 1); 546 547 return (0); 548 } 549 550 static int 551 gnttab_expand(unsigned int req_entries) 552 { 553 unsigned int cur, extra; 554 555 cur = nr_grant_frames; 556 extra = howmany(req_entries, GREFS_PER_GRANT_FRAME); 557 if (cur + extra > max_nr_grant_frames()) 558 return (ENOSPC); 559 560 gnttab_map(cur, cur + extra - 1); 561 562 return (grow_gnttab_list(extra)); 563 } 564 565 MTX_SYSINIT(gnttab, &gnttab_list_lock, "GNTTAB LOCK", MTX_DEF | MTX_RECURSE); 566 567 /*------------------ Private Device Attachment Functions --------------------*/ 568 /** 569 * \brief Identify instances of this device type in the system. 570 * 571 * \param driver The driver performing this identify action. 572 * \param parent The NewBus parent device for any devices this method adds. 573 */ 574 static void 575 granttable_identify(driver_t *driver __unused, device_t parent) 576 { 577 578 KASSERT(xen_domain(), 579 ("Trying to attach grant-table device on non Xen domain")); 580 /* 581 * A single device instance for our driver is always present 582 * in a system operating under Xen. 583 */ 584 if (BUS_ADD_CHILD(parent, 0, driver->name, 0) == NULL) 585 panic("unable to attach Xen Grant-table device"); 586 } 587 588 /** 589 * \brief Probe for the existence of the Xen Grant-table device 590 * 591 * \param dev NewBus device_t for this instance. 592 * 593 * \return Always returns 0 indicating success. 594 */ 595 static int 596 granttable_probe(device_t dev) 597 { 598 599 device_set_desc(dev, "Xen Grant-table Device"); 600 return (BUS_PROBE_NOWILDCARD); 601 } 602 603 /** 604 * \brief Attach the Xen Grant-table device. 605 * 606 * \param dev NewBus device_t for this instance. 607 * 608 * \return On success, 0. Otherwise an errno value indicating the 609 * type of failure. 610 */ 611 static int 612 granttable_attach(device_t dev) 613 { 614 int i; 615 unsigned int max_nr_glist_frames; 616 unsigned int nr_init_grefs; 617 618 nr_grant_frames = 1; 619 boot_max_nr_grant_frames = __max_nr_grant_frames(); 620 621 /* Determine the maximum number of frames required for the 622 * grant reference free list on the current hypervisor. 623 */ 624 max_nr_glist_frames = (boot_max_nr_grant_frames * 625 GREFS_PER_GRANT_FRAME / 626 (PAGE_SIZE / sizeof(grant_ref_t))); 627 628 gnttab_list = malloc(max_nr_glist_frames * sizeof(grant_ref_t *), 629 M_DEVBUF, M_NOWAIT); 630 631 if (gnttab_list == NULL) 632 return (ENOMEM); 633 634 for (i = 0; i < nr_grant_frames; i++) { 635 gnttab_list[i] = (grant_ref_t *) 636 malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT); 637 if (gnttab_list[i] == NULL) 638 goto ini_nomem; 639 } 640 641 if (gnttab_resume(dev)) 642 return (ENODEV); 643 644 nr_init_grefs = nr_grant_frames * GREFS_PER_GRANT_FRAME; 645 646 for (i = NR_RESERVED_ENTRIES; i < nr_init_grefs - 1; i++) 647 gnttab_entry(i) = i + 1; 648 649 gnttab_entry(nr_init_grefs - 1) = GNTTAB_LIST_END; 650 gnttab_free_count = nr_init_grefs - NR_RESERVED_ENTRIES; 651 gnttab_free_head = NR_RESERVED_ENTRIES; 652 653 if (bootverbose) 654 printf("Grant table initialized\n"); 655 656 return (0); 657 658 ini_nomem: 659 for (i--; i >= 0; i--) 660 free(gnttab_list[i], M_DEVBUF); 661 free(gnttab_list, M_DEVBUF); 662 return (ENOMEM); 663 } 664 665 /*-------------------- Private Device Attachment Data -----------------------*/ 666 static device_method_t granttable_methods[] = { 667 /* Device interface */ 668 DEVMETHOD(device_identify, granttable_identify), 669 DEVMETHOD(device_probe, granttable_probe), 670 DEVMETHOD(device_attach, granttable_attach), 671 672 DEVMETHOD_END 673 }; 674 675 DEFINE_CLASS_0(granttable, granttable_driver, granttable_methods, 0); 676 677 DRIVER_MODULE_ORDERED(granttable, xenpv, granttable_driver, NULL, NULL, 678 SI_ORDER_FIRST); 679