1 /****************************************************************************** 2 * gnttab.c 3 * 4 * Two sets of functionality: 5 * 1. Granting foreign access to our memory reservation. 6 * 2. Accessing others' memory reservations via grant references. 7 * (i.e., mechanisms for both sender and recipient of grant references) 8 * 9 * Copyright (c) 2005, Christopher Clark 10 * Copyright (c) 2004, K A Fraser 11 */ 12 13 #include <sys/cdefs.h> 14 #include <sys/param.h> 15 #include <sys/systm.h> 16 #include <sys/bus.h> 17 #include <sys/conf.h> 18 #include <sys/module.h> 19 #include <sys/kernel.h> 20 #include <sys/lock.h> 21 #include <sys/malloc.h> 22 #include <sys/mman.h> 23 #include <sys/limits.h> 24 #include <sys/rman.h> 25 #include <machine/resource.h> 26 #include <machine/cpu.h> 27 28 #include <xen/xen-os.h> 29 #include <xen/hypervisor.h> 30 #include <xen/gnttab.h> 31 32 #include <vm/vm.h> 33 #include <vm/vm_kern.h> 34 #include <vm/vm_extern.h> 35 #include <vm/pmap.h> 36 37 /* External tools reserve first few grant table entries. */ 38 #define NR_RESERVED_ENTRIES 8 39 #define GREFS_PER_GRANT_FRAME (PAGE_SIZE / sizeof(grant_entry_v1_t)) 40 41 static grant_ref_t **gnttab_list; 42 static unsigned int nr_grant_frames; 43 static unsigned int boot_max_nr_grant_frames; 44 static int gnttab_free_count; 45 static grant_ref_t gnttab_free_head; 46 static struct mtx gnttab_list_lock; 47 48 /* 49 * Resource representing allocated physical address space 50 * for the grant table metainfo 51 */ 52 static struct resource *gnttab_pseudo_phys_res; 53 54 /* Resource id for allocated physical address space. */ 55 static int gnttab_pseudo_phys_res_id; 56 57 static grant_entry_v1_t *shared; 58 59 static struct gnttab_free_callback *gnttab_free_callback_list = NULL; 60 61 static int gnttab_expand(unsigned int req_entries); 62 63 #define RPP (PAGE_SIZE / sizeof(grant_ref_t)) 64 #define gnttab_entry(entry) (gnttab_list[(entry) / RPP][(entry) % RPP]) 65 66 static int 67 get_free_entries(int count, int *entries) 68 { 69 int ref, error; 70 grant_ref_t head; 71 72 mtx_lock(&gnttab_list_lock); 73 if ((gnttab_free_count < count) && 74 ((error = gnttab_expand(count - gnttab_free_count)) != 0)) { 75 mtx_unlock(&gnttab_list_lock); 76 return (error); 77 } 78 ref = head = gnttab_free_head; 79 gnttab_free_count -= count; 80 while (count-- > 1) 81 head = gnttab_entry(head); 82 gnttab_free_head = gnttab_entry(head); 83 gnttab_entry(head) = GNTTAB_LIST_END; 84 mtx_unlock(&gnttab_list_lock); 85 86 *entries = ref; 87 return (0); 88 } 89 90 static void 91 do_free_callbacks(void) 92 { 93 struct gnttab_free_callback *callback, *next; 94 95 callback = gnttab_free_callback_list; 96 gnttab_free_callback_list = NULL; 97 98 while (callback != NULL) { 99 next = callback->next; 100 if (gnttab_free_count >= callback->count) { 101 callback->next = NULL; 102 callback->fn(callback->arg); 103 } else { 104 callback->next = gnttab_free_callback_list; 105 gnttab_free_callback_list = callback; 106 } 107 callback = next; 108 } 109 } 110 111 static inline void 112 check_free_callbacks(void) 113 { 114 if (__predict_false(gnttab_free_callback_list != NULL)) 115 do_free_callbacks(); 116 } 117 118 static void 119 put_free_entry(grant_ref_t ref) 120 { 121 122 mtx_lock(&gnttab_list_lock); 123 gnttab_entry(ref) = gnttab_free_head; 124 gnttab_free_head = ref; 125 gnttab_free_count++; 126 check_free_callbacks(); 127 mtx_unlock(&gnttab_list_lock); 128 } 129 130 /* 131 * Public grant-issuing interface functions 132 */ 133 134 int 135 gnttab_grant_foreign_access(domid_t domid, unsigned long frame, int readonly, 136 grant_ref_t *result) 137 { 138 int error, ref; 139 140 error = get_free_entries(1, &ref); 141 142 if (__predict_false(error)) 143 return (error); 144 145 shared[ref].frame = frame; 146 shared[ref].domid = domid; 147 wmb(); 148 shared[ref].flags = GTF_permit_access | (readonly ? GTF_readonly : 0); 149 150 if (result) 151 *result = ref; 152 153 return (0); 154 } 155 156 void 157 gnttab_grant_foreign_access_ref(grant_ref_t ref, domid_t domid, 158 unsigned long frame, int readonly) 159 { 160 161 shared[ref].frame = frame; 162 shared[ref].domid = domid; 163 wmb(); 164 shared[ref].flags = GTF_permit_access | (readonly ? GTF_readonly : 0); 165 } 166 167 int 168 gnttab_query_foreign_access(grant_ref_t ref) 169 { 170 uint16_t nflags; 171 172 nflags = shared[ref].flags; 173 174 return (nflags & (GTF_reading|GTF_writing)); 175 } 176 177 int 178 gnttab_end_foreign_access_ref(grant_ref_t ref) 179 { 180 uint16_t flags; 181 182 while (!((flags = atomic_load_16(&shared[ref].flags)) & 183 (GTF_reading|GTF_writing))) 184 if (atomic_cmpset_16(&shared[ref].flags, flags, 0)) 185 return (1); 186 187 printf("%s: WARNING: g.e. still in use!\n", __func__); 188 return (0); 189 } 190 191 void 192 gnttab_end_foreign_access(grant_ref_t ref, void *page) 193 { 194 if (gnttab_end_foreign_access_ref(ref)) { 195 put_free_entry(ref); 196 if (page != NULL) { 197 free(page, M_DEVBUF); 198 } 199 } 200 else { 201 /* XXX This needs to be fixed so that the ref and page are 202 placed on a list to be freed up later. */ 203 printf("%s: WARNING: leaking g.e. and page still in use!\n", 204 __func__); 205 } 206 } 207 208 void 209 gnttab_end_foreign_access_references(u_int count, grant_ref_t *refs) 210 { 211 grant_ref_t *last_ref; 212 grant_ref_t head; 213 grant_ref_t tail; 214 215 head = GNTTAB_LIST_END; 216 tail = *refs; 217 last_ref = refs + count; 218 while (refs != last_ref) { 219 if (gnttab_end_foreign_access_ref(*refs)) { 220 gnttab_entry(*refs) = head; 221 head = *refs; 222 } else { 223 /* 224 * XXX This needs to be fixed so that the ref 225 * is placed on a list to be freed up later. 226 */ 227 printf("%s: WARNING: leaking g.e. still in use!\n", 228 __func__); 229 count--; 230 } 231 refs++; 232 } 233 234 if (count != 0) { 235 mtx_lock(&gnttab_list_lock); 236 gnttab_free_count += count; 237 gnttab_entry(tail) = gnttab_free_head; 238 gnttab_free_head = head; 239 check_free_callbacks(); 240 mtx_unlock(&gnttab_list_lock); 241 } 242 } 243 244 int 245 gnttab_grant_foreign_transfer(domid_t domid, unsigned long pfn, 246 grant_ref_t *result) 247 { 248 int error, ref; 249 250 error = get_free_entries(1, &ref); 251 if (__predict_false(error)) 252 return (error); 253 254 gnttab_grant_foreign_transfer_ref(ref, domid, pfn); 255 256 *result = ref; 257 return (0); 258 } 259 260 void 261 gnttab_grant_foreign_transfer_ref(grant_ref_t ref, domid_t domid, 262 unsigned long pfn) 263 { 264 shared[ref].frame = pfn; 265 shared[ref].domid = domid; 266 wmb(); 267 shared[ref].flags = GTF_accept_transfer; 268 } 269 270 unsigned long 271 gnttab_end_foreign_transfer_ref(grant_ref_t ref) 272 { 273 unsigned long frame; 274 uint16_t flags; 275 276 /* 277 * If a transfer is not even yet started, try to reclaim the grant 278 * reference and return failure (== 0). 279 * 280 * NOTE: This is a loop since the atomic cmpset can fail multiple 281 * times. In normal operation it will be rare to execute more than 282 * twice. Attempting an attack would consume a great deal of 283 * attacker resources and be unlikely to prolong the loop very much. 284 */ 285 while (!((flags = atomic_load_16(&shared[ref].flags)) & 286 GTF_transfer_committed)) 287 if (atomic_cmpset_16(&shared[ref].flags, flags, 0)) 288 return (0); 289 290 /* If a transfer is in progress then wait until it is completed. */ 291 while (!(flags & GTF_transfer_completed)) { 292 cpu_spinwait(); 293 flags = atomic_load_16(&shared[ref].flags); 294 } 295 296 /* Read the frame number /after/ reading completion status. */ 297 rmb(); 298 frame = shared[ref].frame; 299 KASSERT(frame != 0, ("grant table inconsistent")); 300 301 return (frame); 302 } 303 304 unsigned long 305 gnttab_end_foreign_transfer(grant_ref_t ref) 306 { 307 unsigned long frame = gnttab_end_foreign_transfer_ref(ref); 308 309 put_free_entry(ref); 310 return (frame); 311 } 312 313 void 314 gnttab_free_grant_reference(grant_ref_t ref) 315 { 316 317 put_free_entry(ref); 318 } 319 320 void 321 gnttab_free_grant_references(grant_ref_t head) 322 { 323 grant_ref_t ref; 324 int count = 1; 325 326 if (head == GNTTAB_LIST_END) 327 return; 328 329 ref = head; 330 while (gnttab_entry(ref) != GNTTAB_LIST_END) { 331 ref = gnttab_entry(ref); 332 count++; 333 } 334 mtx_lock(&gnttab_list_lock); 335 gnttab_entry(ref) = gnttab_free_head; 336 gnttab_free_head = head; 337 gnttab_free_count += count; 338 check_free_callbacks(); 339 mtx_unlock(&gnttab_list_lock); 340 } 341 342 int 343 gnttab_alloc_grant_references(uint16_t count, grant_ref_t *head) 344 { 345 int ref, error; 346 347 error = get_free_entries(count, &ref); 348 if (__predict_false(error)) 349 return (error); 350 351 *head = ref; 352 return (0); 353 } 354 355 int 356 gnttab_empty_grant_references(const grant_ref_t *private_head) 357 { 358 359 return (*private_head == GNTTAB_LIST_END); 360 } 361 362 int 363 gnttab_claim_grant_reference(grant_ref_t *private_head) 364 { 365 grant_ref_t g = *private_head; 366 367 if (__predict_false(g == GNTTAB_LIST_END)) 368 return (g); 369 *private_head = gnttab_entry(g); 370 return (g); 371 } 372 373 void 374 gnttab_release_grant_reference(grant_ref_t *private_head, grant_ref_t release) 375 { 376 377 gnttab_entry(release) = *private_head; 378 *private_head = release; 379 } 380 381 void 382 gnttab_request_free_callback(struct gnttab_free_callback *callback, 383 void (*fn)(void *), void *arg, uint16_t count) 384 { 385 386 mtx_lock(&gnttab_list_lock); 387 if (callback->next) 388 goto out; 389 callback->fn = fn; 390 callback->arg = arg; 391 callback->count = count; 392 callback->next = gnttab_free_callback_list; 393 gnttab_free_callback_list = callback; 394 check_free_callbacks(); 395 out: 396 mtx_unlock(&gnttab_list_lock); 397 398 } 399 400 void 401 gnttab_cancel_free_callback(struct gnttab_free_callback *callback) 402 { 403 struct gnttab_free_callback **pcb; 404 405 mtx_lock(&gnttab_list_lock); 406 for (pcb = &gnttab_free_callback_list; *pcb; pcb = &(*pcb)->next) { 407 if (*pcb == callback) { 408 *pcb = callback->next; 409 break; 410 } 411 } 412 mtx_unlock(&gnttab_list_lock); 413 } 414 415 static int 416 grow_gnttab_list(unsigned int more_frames) 417 { 418 unsigned int new_nr_grant_frames, extra_entries, i; 419 420 new_nr_grant_frames = nr_grant_frames + more_frames; 421 extra_entries = more_frames * GREFS_PER_GRANT_FRAME; 422 423 for (i = nr_grant_frames; i < new_nr_grant_frames; i++) 424 { 425 gnttab_list[i] = (grant_ref_t *) 426 malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT); 427 428 if (!gnttab_list[i]) 429 goto grow_nomem; 430 } 431 432 for (i = GREFS_PER_GRANT_FRAME * nr_grant_frames; 433 i < GREFS_PER_GRANT_FRAME * new_nr_grant_frames - 1; i++) 434 gnttab_entry(i) = i + 1; 435 436 gnttab_entry(i) = gnttab_free_head; 437 gnttab_free_head = GREFS_PER_GRANT_FRAME * nr_grant_frames; 438 gnttab_free_count += extra_entries; 439 440 nr_grant_frames = new_nr_grant_frames; 441 442 check_free_callbacks(); 443 444 return (0); 445 446 grow_nomem: 447 for ( ; i >= nr_grant_frames; i--) 448 free(gnttab_list[i], M_DEVBUF); 449 return (ENOMEM); 450 } 451 452 static unsigned int 453 __max_nr_grant_frames(void) 454 { 455 struct gnttab_query_size query; 456 int rc; 457 458 query.dom = DOMID_SELF; 459 460 rc = HYPERVISOR_grant_table_op(GNTTABOP_query_size, &query, 1); 461 if ((rc < 0) || (query.status != GNTST_okay)) 462 return (4); /* Legacy max supported number of frames */ 463 464 return (query.max_nr_frames); 465 } 466 467 static inline 468 unsigned int max_nr_grant_frames(void) 469 { 470 471 return (min(__max_nr_grant_frames(), boot_max_nr_grant_frames)); 472 } 473 474 #ifdef notyet 475 /* 476 * XXX needed for backend support 477 * 478 */ 479 static int 480 map_pte_fn(pte_t *pte, struct page *pmd_page, 481 unsigned long addr, void *data) 482 { 483 unsigned long **frames = (unsigned long **)data; 484 485 set_pte_at(&init_mm, addr, pte, pfn_pte_ma((*frames)[0], PAGE_KERNEL)); 486 (*frames)++; 487 return 0; 488 } 489 490 static int 491 unmap_pte_fn(pte_t *pte, struct page *pmd_page, 492 unsigned long addr, void *data) 493 { 494 495 set_pte_at(&init_mm, addr, pte, __pte(0)); 496 return 0; 497 } 498 #endif 499 500 static vm_paddr_t resume_frames; 501 502 static void 503 gnttab_map(unsigned int start_idx, unsigned int end_idx) 504 { 505 struct xen_add_to_physmap xatp; 506 unsigned int i = end_idx; 507 508 /* 509 * Loop backwards, so that the first hypercall has the largest index, 510 * ensuring that the table will grow only once. 511 */ 512 do { 513 xatp.domid = DOMID_SELF; 514 xatp.idx = i; 515 xatp.space = XENMAPSPACE_grant_table; 516 xatp.gpfn = (resume_frames >> PAGE_SHIFT) + i; 517 if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp)) 518 panic("HYPERVISOR_memory_op failed to map gnttab"); 519 } while (i-- > start_idx); 520 } 521 522 int 523 gnttab_resume(device_t dev) 524 { 525 unsigned int max_nr_gframes, nr_gframes; 526 527 nr_gframes = nr_grant_frames; 528 max_nr_gframes = max_nr_grant_frames(); 529 if (max_nr_gframes < nr_gframes) 530 return (ENOSYS); 531 532 if (!resume_frames) { 533 KASSERT(dev != NULL, 534 ("No resume frames and no device provided")); 535 536 gnttab_pseudo_phys_res = xenmem_alloc(dev, 537 &gnttab_pseudo_phys_res_id, PAGE_SIZE * max_nr_gframes); 538 if (gnttab_pseudo_phys_res == NULL) 539 panic("Unable to reserve physical memory for gnttab"); 540 resume_frames = rman_get_start(gnttab_pseudo_phys_res); 541 shared = rman_get_virtual(gnttab_pseudo_phys_res); 542 } 543 gnttab_map(0, nr_gframes - 1); 544 545 return (0); 546 } 547 548 static int 549 gnttab_expand(unsigned int req_entries) 550 { 551 unsigned int cur, extra; 552 553 cur = nr_grant_frames; 554 extra = howmany(req_entries, GREFS_PER_GRANT_FRAME); 555 if (cur + extra > max_nr_grant_frames()) 556 return (ENOSPC); 557 558 gnttab_map(cur, cur + extra - 1); 559 560 return (grow_gnttab_list(extra)); 561 } 562 563 MTX_SYSINIT(gnttab, &gnttab_list_lock, "GNTTAB LOCK", MTX_DEF | MTX_RECURSE); 564 565 /*------------------ Private Device Attachment Functions --------------------*/ 566 /** 567 * \brief Identify instances of this device type in the system. 568 * 569 * \param driver The driver performing this identify action. 570 * \param parent The NewBus parent device for any devices this method adds. 571 */ 572 static void 573 granttable_identify(driver_t *driver __unused, device_t parent) 574 { 575 576 KASSERT(xen_domain(), 577 ("Trying to attach grant-table device on non Xen domain")); 578 /* 579 * A single device instance for our driver is always present 580 * in a system operating under Xen. 581 */ 582 if (BUS_ADD_CHILD(parent, 0, driver->name, 0) == NULL) 583 panic("unable to attach Xen Grant-table device"); 584 } 585 586 /** 587 * \brief Probe for the existence of the Xen Grant-table device 588 * 589 * \param dev NewBus device_t for this instance. 590 * 591 * \return Always returns 0 indicating success. 592 */ 593 static int 594 granttable_probe(device_t dev) 595 { 596 597 device_set_desc(dev, "Xen Grant-table Device"); 598 return (BUS_PROBE_NOWILDCARD); 599 } 600 601 /** 602 * \brief Attach the Xen Grant-table device. 603 * 604 * \param dev NewBus device_t for this instance. 605 * 606 * \return On success, 0. Otherwise an errno value indicating the 607 * type of failure. 608 */ 609 static int 610 granttable_attach(device_t dev) 611 { 612 int i; 613 unsigned int max_nr_glist_frames; 614 unsigned int nr_init_grefs; 615 616 nr_grant_frames = 1; 617 boot_max_nr_grant_frames = __max_nr_grant_frames(); 618 619 /* Determine the maximum number of frames required for the 620 * grant reference free list on the current hypervisor. 621 */ 622 max_nr_glist_frames = (boot_max_nr_grant_frames * 623 GREFS_PER_GRANT_FRAME / 624 (PAGE_SIZE / sizeof(grant_ref_t))); 625 626 gnttab_list = malloc(max_nr_glist_frames * sizeof(grant_ref_t *), 627 M_DEVBUF, M_NOWAIT); 628 629 if (gnttab_list == NULL) 630 return (ENOMEM); 631 632 for (i = 0; i < nr_grant_frames; i++) { 633 gnttab_list[i] = (grant_ref_t *) 634 malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT); 635 if (gnttab_list[i] == NULL) 636 goto ini_nomem; 637 } 638 639 if (gnttab_resume(dev)) 640 return (ENODEV); 641 642 nr_init_grefs = nr_grant_frames * GREFS_PER_GRANT_FRAME; 643 644 for (i = NR_RESERVED_ENTRIES; i < nr_init_grefs - 1; i++) 645 gnttab_entry(i) = i + 1; 646 647 gnttab_entry(nr_init_grefs - 1) = GNTTAB_LIST_END; 648 gnttab_free_count = nr_init_grefs - NR_RESERVED_ENTRIES; 649 gnttab_free_head = NR_RESERVED_ENTRIES; 650 651 if (bootverbose) 652 printf("Grant table initialized\n"); 653 654 return (0); 655 656 ini_nomem: 657 for (i--; i >= 0; i--) 658 free(gnttab_list[i], M_DEVBUF); 659 free(gnttab_list, M_DEVBUF); 660 return (ENOMEM); 661 } 662 663 /*-------------------- Private Device Attachment Data -----------------------*/ 664 static device_method_t granttable_methods[] = { 665 /* Device interface */ 666 DEVMETHOD(device_identify, granttable_identify), 667 DEVMETHOD(device_probe, granttable_probe), 668 DEVMETHOD(device_attach, granttable_attach), 669 670 DEVMETHOD_END 671 }; 672 673 DEFINE_CLASS_0(granttable, granttable_driver, granttable_methods, 0); 674 675 DRIVER_MODULE_ORDERED(granttable, xenpv, granttable_driver, NULL, NULL, 676 SI_ORDER_FIRST); 677