1 /****************************************************************************** 2 * gnttab.c 3 * 4 * Two sets of functionality: 5 * 1. Granting foreign access to our memory reservation. 6 * 2. Accessing others' memory reservations via grant references. 7 * (i.e., mechanisms for both sender and recipient of grant references) 8 * 9 * Copyright (c) 2005, Christopher Clark 10 * Copyright (c) 2004, K A Fraser 11 */ 12 13 #include <sys/param.h> 14 #include <sys/systm.h> 15 #include <sys/bus.h> 16 #include <sys/conf.h> 17 #include <sys/module.h> 18 #include <sys/kernel.h> 19 #include <sys/lock.h> 20 #include <sys/malloc.h> 21 #include <sys/mman.h> 22 #include <sys/limits.h> 23 #include <sys/rman.h> 24 #include <machine/resource.h> 25 #include <machine/cpu.h> 26 27 #include <xen/xen-os.h> 28 #include <xen/hypervisor.h> 29 #include <xen/gnttab.h> 30 31 #include <vm/vm.h> 32 #include <vm/vm_kern.h> 33 #include <vm/vm_extern.h> 34 #include <vm/pmap.h> 35 36 /* External tools reserve first few grant table entries. */ 37 #define NR_RESERVED_ENTRIES 8 38 #define GREFS_PER_GRANT_FRAME (PAGE_SIZE / sizeof(grant_entry_v1_t)) 39 40 static grant_ref_t **gnttab_list; 41 static unsigned int nr_grant_frames; 42 static unsigned int boot_max_nr_grant_frames; 43 static int gnttab_free_count; 44 static grant_ref_t gnttab_free_head; 45 static struct mtx gnttab_list_lock; 46 47 /* 48 * Resource representing allocated physical address space 49 * for the grant table metainfo 50 */ 51 static struct resource *gnttab_pseudo_phys_res; 52 53 /* Resource id for allocated physical address space. */ 54 static int gnttab_pseudo_phys_res_id; 55 56 static grant_entry_v1_t *shared; 57 58 static struct gnttab_free_callback *gnttab_free_callback_list = NULL; 59 60 static int gnttab_expand(unsigned int req_entries); 61 62 #define RPP (PAGE_SIZE / sizeof(grant_ref_t)) 63 #define gnttab_entry(entry) (gnttab_list[(entry) / RPP][(entry) % RPP]) 64 65 static int 66 get_free_entries(int count, int *entries) 67 { 68 int ref, error; 69 grant_ref_t head; 70 71 mtx_lock(&gnttab_list_lock); 72 if ((gnttab_free_count < count) && 73 ((error = gnttab_expand(count - gnttab_free_count)) != 0)) { 74 mtx_unlock(&gnttab_list_lock); 75 return (error); 76 } 77 ref = head = gnttab_free_head; 78 gnttab_free_count -= count; 79 while (count-- > 1) 80 head = gnttab_entry(head); 81 gnttab_free_head = gnttab_entry(head); 82 gnttab_entry(head) = GNTTAB_LIST_END; 83 mtx_unlock(&gnttab_list_lock); 84 85 *entries = ref; 86 return (0); 87 } 88 89 static void 90 do_free_callbacks(void) 91 { 92 struct gnttab_free_callback *callback, *next; 93 94 callback = gnttab_free_callback_list; 95 gnttab_free_callback_list = NULL; 96 97 while (callback != NULL) { 98 next = callback->next; 99 if (gnttab_free_count >= callback->count) { 100 callback->next = NULL; 101 callback->fn(callback->arg); 102 } else { 103 callback->next = gnttab_free_callback_list; 104 gnttab_free_callback_list = callback; 105 } 106 callback = next; 107 } 108 } 109 110 static inline void 111 check_free_callbacks(void) 112 { 113 if (__predict_false(gnttab_free_callback_list != NULL)) 114 do_free_callbacks(); 115 } 116 117 static void 118 put_free_entry(grant_ref_t ref) 119 { 120 121 mtx_lock(&gnttab_list_lock); 122 gnttab_entry(ref) = gnttab_free_head; 123 gnttab_free_head = ref; 124 gnttab_free_count++; 125 check_free_callbacks(); 126 mtx_unlock(&gnttab_list_lock); 127 } 128 129 /* 130 * Public grant-issuing interface functions 131 */ 132 133 int 134 gnttab_grant_foreign_access(domid_t domid, unsigned long frame, int readonly, 135 grant_ref_t *result) 136 { 137 int error, ref; 138 139 error = get_free_entries(1, &ref); 140 141 if (__predict_false(error)) 142 return (error); 143 144 shared[ref].frame = frame; 145 shared[ref].domid = domid; 146 wmb(); 147 shared[ref].flags = GTF_permit_access | (readonly ? GTF_readonly : 0); 148 149 if (result) 150 *result = ref; 151 152 return (0); 153 } 154 155 void 156 gnttab_grant_foreign_access_ref(grant_ref_t ref, domid_t domid, 157 unsigned long frame, int readonly) 158 { 159 160 shared[ref].frame = frame; 161 shared[ref].domid = domid; 162 wmb(); 163 shared[ref].flags = GTF_permit_access | (readonly ? GTF_readonly : 0); 164 } 165 166 int 167 gnttab_query_foreign_access(grant_ref_t ref) 168 { 169 uint16_t nflags; 170 171 nflags = shared[ref].flags; 172 173 return (nflags & (GTF_reading|GTF_writing)); 174 } 175 176 int 177 gnttab_end_foreign_access_ref(grant_ref_t ref) 178 { 179 uint16_t flags; 180 181 while (!((flags = atomic_load_16(&shared[ref].flags)) & 182 (GTF_reading|GTF_writing))) 183 if (atomic_cmpset_16(&shared[ref].flags, flags, 0)) 184 return (1); 185 186 printf("%s: WARNING: g.e. still in use!\n", __func__); 187 return (0); 188 } 189 190 void 191 gnttab_end_foreign_access(grant_ref_t ref, void *page) 192 { 193 if (gnttab_end_foreign_access_ref(ref)) { 194 put_free_entry(ref); 195 if (page != NULL) { 196 free(page, M_DEVBUF); 197 } 198 } 199 else { 200 /* XXX This needs to be fixed so that the ref and page are 201 placed on a list to be freed up later. */ 202 printf("%s: WARNING: leaking g.e. and page still in use!\n", 203 __func__); 204 } 205 } 206 207 void 208 gnttab_end_foreign_access_references(u_int count, grant_ref_t *refs) 209 { 210 grant_ref_t *last_ref; 211 grant_ref_t head; 212 grant_ref_t tail; 213 214 head = GNTTAB_LIST_END; 215 tail = *refs; 216 last_ref = refs + count; 217 while (refs != last_ref) { 218 if (gnttab_end_foreign_access_ref(*refs)) { 219 gnttab_entry(*refs) = head; 220 head = *refs; 221 } else { 222 /* 223 * XXX This needs to be fixed so that the ref 224 * is placed on a list to be freed up later. 225 */ 226 printf("%s: WARNING: leaking g.e. still in use!\n", 227 __func__); 228 count--; 229 } 230 refs++; 231 } 232 233 if (count != 0) { 234 mtx_lock(&gnttab_list_lock); 235 gnttab_free_count += count; 236 gnttab_entry(tail) = gnttab_free_head; 237 gnttab_free_head = head; 238 check_free_callbacks(); 239 mtx_unlock(&gnttab_list_lock); 240 } 241 } 242 243 int 244 gnttab_grant_foreign_transfer(domid_t domid, unsigned long pfn, 245 grant_ref_t *result) 246 { 247 int error, ref; 248 249 error = get_free_entries(1, &ref); 250 if (__predict_false(error)) 251 return (error); 252 253 gnttab_grant_foreign_transfer_ref(ref, domid, pfn); 254 255 *result = ref; 256 return (0); 257 } 258 259 void 260 gnttab_grant_foreign_transfer_ref(grant_ref_t ref, domid_t domid, 261 unsigned long pfn) 262 { 263 shared[ref].frame = pfn; 264 shared[ref].domid = domid; 265 wmb(); 266 shared[ref].flags = GTF_accept_transfer; 267 } 268 269 unsigned long 270 gnttab_end_foreign_transfer_ref(grant_ref_t ref) 271 { 272 unsigned long frame; 273 uint16_t flags; 274 275 /* 276 * If a transfer is not even yet started, try to reclaim the grant 277 * reference and return failure (== 0). 278 * 279 * NOTE: This is a loop since the atomic cmpset can fail multiple 280 * times. In normal operation it will be rare to execute more than 281 * twice. Attempting an attack would consume a great deal of 282 * attacker resources and be unlikely to prolong the loop very much. 283 */ 284 while (!((flags = atomic_load_16(&shared[ref].flags)) & 285 GTF_transfer_committed)) 286 if (atomic_cmpset_16(&shared[ref].flags, flags, 0)) 287 return (0); 288 289 /* If a transfer is in progress then wait until it is completed. */ 290 while (!(flags & GTF_transfer_completed)) { 291 cpu_spinwait(); 292 flags = atomic_load_16(&shared[ref].flags); 293 } 294 295 /* Read the frame number /after/ reading completion status. */ 296 rmb(); 297 frame = shared[ref].frame; 298 KASSERT(frame != 0, ("grant table inconsistent")); 299 300 return (frame); 301 } 302 303 unsigned long 304 gnttab_end_foreign_transfer(grant_ref_t ref) 305 { 306 unsigned long frame = gnttab_end_foreign_transfer_ref(ref); 307 308 put_free_entry(ref); 309 return (frame); 310 } 311 312 void 313 gnttab_free_grant_reference(grant_ref_t ref) 314 { 315 316 put_free_entry(ref); 317 } 318 319 void 320 gnttab_free_grant_references(grant_ref_t head) 321 { 322 grant_ref_t ref; 323 int count = 1; 324 325 if (head == GNTTAB_LIST_END) 326 return; 327 328 ref = head; 329 while (gnttab_entry(ref) != GNTTAB_LIST_END) { 330 ref = gnttab_entry(ref); 331 count++; 332 } 333 mtx_lock(&gnttab_list_lock); 334 gnttab_entry(ref) = gnttab_free_head; 335 gnttab_free_head = head; 336 gnttab_free_count += count; 337 check_free_callbacks(); 338 mtx_unlock(&gnttab_list_lock); 339 } 340 341 int 342 gnttab_alloc_grant_references(uint16_t count, grant_ref_t *head) 343 { 344 int ref, error; 345 346 error = get_free_entries(count, &ref); 347 if (__predict_false(error)) 348 return (error); 349 350 *head = ref; 351 return (0); 352 } 353 354 int 355 gnttab_empty_grant_references(const grant_ref_t *private_head) 356 { 357 358 return (*private_head == GNTTAB_LIST_END); 359 } 360 361 int 362 gnttab_claim_grant_reference(grant_ref_t *private_head) 363 { 364 grant_ref_t g = *private_head; 365 366 if (__predict_false(g == GNTTAB_LIST_END)) 367 return (g); 368 *private_head = gnttab_entry(g); 369 return (g); 370 } 371 372 void 373 gnttab_release_grant_reference(grant_ref_t *private_head, grant_ref_t release) 374 { 375 376 gnttab_entry(release) = *private_head; 377 *private_head = release; 378 } 379 380 void 381 gnttab_request_free_callback(struct gnttab_free_callback *callback, 382 void (*fn)(void *), void *arg, uint16_t count) 383 { 384 385 mtx_lock(&gnttab_list_lock); 386 if (callback->next) 387 goto out; 388 callback->fn = fn; 389 callback->arg = arg; 390 callback->count = count; 391 callback->next = gnttab_free_callback_list; 392 gnttab_free_callback_list = callback; 393 check_free_callbacks(); 394 out: 395 mtx_unlock(&gnttab_list_lock); 396 397 } 398 399 void 400 gnttab_cancel_free_callback(struct gnttab_free_callback *callback) 401 { 402 struct gnttab_free_callback **pcb; 403 404 mtx_lock(&gnttab_list_lock); 405 for (pcb = &gnttab_free_callback_list; *pcb; pcb = &(*pcb)->next) { 406 if (*pcb == callback) { 407 *pcb = callback->next; 408 break; 409 } 410 } 411 mtx_unlock(&gnttab_list_lock); 412 } 413 414 static int 415 grow_gnttab_list(unsigned int more_frames) 416 { 417 unsigned int new_nr_grant_frames, extra_entries, i; 418 419 new_nr_grant_frames = nr_grant_frames + more_frames; 420 extra_entries = more_frames * GREFS_PER_GRANT_FRAME; 421 422 for (i = nr_grant_frames; i < new_nr_grant_frames; i++) 423 { 424 gnttab_list[i] = (grant_ref_t *) 425 malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT); 426 427 if (!gnttab_list[i]) 428 goto grow_nomem; 429 } 430 431 for (i = GREFS_PER_GRANT_FRAME * nr_grant_frames; 432 i < GREFS_PER_GRANT_FRAME * new_nr_grant_frames - 1; i++) 433 gnttab_entry(i) = i + 1; 434 435 gnttab_entry(i) = gnttab_free_head; 436 gnttab_free_head = GREFS_PER_GRANT_FRAME * nr_grant_frames; 437 gnttab_free_count += extra_entries; 438 439 nr_grant_frames = new_nr_grant_frames; 440 441 check_free_callbacks(); 442 443 return (0); 444 445 grow_nomem: 446 for ( ; i >= nr_grant_frames; i--) 447 free(gnttab_list[i], M_DEVBUF); 448 return (ENOMEM); 449 } 450 451 static unsigned int 452 __max_nr_grant_frames(void) 453 { 454 struct gnttab_query_size query; 455 int rc; 456 457 query.dom = DOMID_SELF; 458 459 rc = HYPERVISOR_grant_table_op(GNTTABOP_query_size, &query, 1); 460 if ((rc < 0) || (query.status != GNTST_okay)) 461 return (4); /* Legacy max supported number of frames */ 462 463 return (query.max_nr_frames); 464 } 465 466 static inline 467 unsigned int max_nr_grant_frames(void) 468 { 469 470 return (min(__max_nr_grant_frames(), boot_max_nr_grant_frames)); 471 } 472 473 #ifdef notyet 474 /* 475 * XXX needed for backend support 476 * 477 */ 478 static int 479 map_pte_fn(pte_t *pte, struct page *pmd_page, 480 unsigned long addr, void *data) 481 { 482 unsigned long **frames = (unsigned long **)data; 483 484 set_pte_at(&init_mm, addr, pte, pfn_pte_ma((*frames)[0], PAGE_KERNEL)); 485 (*frames)++; 486 return 0; 487 } 488 489 static int 490 unmap_pte_fn(pte_t *pte, struct page *pmd_page, 491 unsigned long addr, void *data) 492 { 493 494 set_pte_at(&init_mm, addr, pte, __pte(0)); 495 return 0; 496 } 497 #endif 498 499 static vm_paddr_t resume_frames; 500 501 static void 502 gnttab_map(unsigned int start_idx, unsigned int end_idx) 503 { 504 struct xen_add_to_physmap xatp; 505 unsigned int i = end_idx; 506 507 /* 508 * Loop backwards, so that the first hypercall has the largest index, 509 * ensuring that the table will grow only once. 510 */ 511 do { 512 xatp.domid = DOMID_SELF; 513 xatp.idx = i; 514 xatp.space = XENMAPSPACE_grant_table; 515 xatp.gpfn = (resume_frames >> PAGE_SHIFT) + i; 516 if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp)) 517 panic("HYPERVISOR_memory_op failed to map gnttab"); 518 } while (i-- > start_idx); 519 } 520 521 int 522 gnttab_resume(device_t dev) 523 { 524 unsigned int max_nr_gframes, nr_gframes; 525 526 nr_gframes = nr_grant_frames; 527 max_nr_gframes = max_nr_grant_frames(); 528 if (max_nr_gframes < nr_gframes) 529 return (ENOSYS); 530 531 if (!resume_frames) { 532 KASSERT(dev != NULL, 533 ("No resume frames and no device provided")); 534 535 gnttab_pseudo_phys_res = xenmem_alloc(dev, 536 &gnttab_pseudo_phys_res_id, PAGE_SIZE * max_nr_gframes); 537 if (gnttab_pseudo_phys_res == NULL) 538 panic("Unable to reserve physical memory for gnttab"); 539 resume_frames = rman_get_start(gnttab_pseudo_phys_res); 540 shared = rman_get_virtual(gnttab_pseudo_phys_res); 541 } 542 gnttab_map(0, nr_gframes - 1); 543 544 return (0); 545 } 546 547 static int 548 gnttab_expand(unsigned int req_entries) 549 { 550 unsigned int cur, extra; 551 552 cur = nr_grant_frames; 553 extra = howmany(req_entries, GREFS_PER_GRANT_FRAME); 554 if (cur + extra > max_nr_grant_frames()) 555 return (ENOSPC); 556 557 gnttab_map(cur, cur + extra - 1); 558 559 return (grow_gnttab_list(extra)); 560 } 561 562 MTX_SYSINIT(gnttab, &gnttab_list_lock, "GNTTAB LOCK", MTX_DEF | MTX_RECURSE); 563 564 /*------------------ Private Device Attachment Functions --------------------*/ 565 /** 566 * \brief Identify instances of this device type in the system. 567 * 568 * \param driver The driver performing this identify action. 569 * \param parent The NewBus parent device for any devices this method adds. 570 */ 571 static void 572 granttable_identify(driver_t *driver __unused, device_t parent) 573 { 574 575 KASSERT(xen_domain(), 576 ("Trying to attach grant-table device on non Xen domain")); 577 /* 578 * A single device instance for our driver is always present 579 * in a system operating under Xen. 580 */ 581 if (BUS_ADD_CHILD(parent, 0, driver->name, 0) == NULL) 582 panic("unable to attach Xen Grant-table device"); 583 } 584 585 /** 586 * \brief Probe for the existence of the Xen Grant-table device 587 * 588 * \param dev NewBus device_t for this instance. 589 * 590 * \return Always returns 0 indicating success. 591 */ 592 static int 593 granttable_probe(device_t dev) 594 { 595 596 device_set_desc(dev, "Xen Grant-table Device"); 597 return (BUS_PROBE_NOWILDCARD); 598 } 599 600 /** 601 * \brief Attach the Xen Grant-table device. 602 * 603 * \param dev NewBus device_t for this instance. 604 * 605 * \return On success, 0. Otherwise an errno value indicating the 606 * type of failure. 607 */ 608 static int 609 granttable_attach(device_t dev) 610 { 611 int i; 612 unsigned int nr_init_grefs; 613 614 nr_grant_frames = 1; 615 boot_max_nr_grant_frames = __max_nr_grant_frames(); 616 617 gnttab_list = malloc(boot_max_nr_grant_frames * sizeof(grant_ref_t *), 618 M_DEVBUF, M_NOWAIT); 619 620 if (gnttab_list == NULL) 621 return (ENOMEM); 622 623 for (i = 0; i < nr_grant_frames; i++) { 624 gnttab_list[i] = (grant_ref_t *) 625 malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT); 626 if (gnttab_list[i] == NULL) 627 goto ini_nomem; 628 } 629 630 if (gnttab_resume(dev)) 631 return (ENODEV); 632 633 nr_init_grefs = nr_grant_frames * GREFS_PER_GRANT_FRAME; 634 635 for (i = NR_RESERVED_ENTRIES; i < nr_init_grefs - 1; i++) 636 gnttab_entry(i) = i + 1; 637 638 gnttab_entry(nr_init_grefs - 1) = GNTTAB_LIST_END; 639 gnttab_free_count = nr_init_grefs - NR_RESERVED_ENTRIES; 640 gnttab_free_head = NR_RESERVED_ENTRIES; 641 642 if (bootverbose) 643 printf("Grant table initialized\n"); 644 645 return (0); 646 647 ini_nomem: 648 for (i--; i >= 0; i--) 649 free(gnttab_list[i], M_DEVBUF); 650 free(gnttab_list, M_DEVBUF); 651 return (ENOMEM); 652 } 653 654 /*-------------------- Private Device Attachment Data -----------------------*/ 655 static device_method_t granttable_methods[] = { 656 /* Device interface */ 657 DEVMETHOD(device_identify, granttable_identify), 658 DEVMETHOD(device_probe, granttable_probe), 659 DEVMETHOD(device_attach, granttable_attach), 660 661 DEVMETHOD_END 662 }; 663 664 DEFINE_CLASS_0(granttable, granttable_driver, granttable_methods, 0); 665 666 DRIVER_MODULE_ORDERED(granttable, xenpv, granttable_driver, NULL, NULL, 667 SI_ORDER_FIRST); 668