1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #ifdef DEBUG 30 #define XNB_DEBUG 1 31 #endif /* DEBUG */ 32 33 #include "xnb.h" 34 35 #include <sys/sunddi.h> 36 #include <sys/sunndi.h> 37 #include <sys/modctl.h> 38 #include <sys/conf.h> 39 #include <sys/mac.h> 40 #include <sys/dlpi.h> 41 #include <sys/strsubr.h> 42 #include <sys/strsun.h> 43 #include <sys/pattr.h> 44 #include <vm/seg_kmem.h> 45 #include <vm/hat_i86.h> 46 #include <xen/sys/xenbus_impl.h> 47 #include <xen/sys/xendev.h> 48 #include <sys/balloon_impl.h> 49 #include <sys/evtchn_impl.h> 50 #include <sys/gnttab.h> 51 #include <vm/vm_dep.h> 52 53 #include <sys/gld.h> 54 #include <inet/ip.h> 55 #include <inet/ip_impl.h> 56 #include <sys/vnic_impl.h> /* blech. */ 57 58 /* 59 * The terms "transmit" and "receive" are used in their traditional 60 * sense here - packets from other parts of this system are 61 * "transmitted" to the peer domain and those originating from the 62 * peer are "received". 63 * 64 * In some cases this can be confusing, because various data 65 * structures are shared with the domU driver, which has the opposite 66 * view of what constitutes "transmit" and "receive". In naming the 67 * shared structures the domU driver always wins. 68 */ 69 70 /* 71 * XXPV dme: things to do, as well as various things indicated 72 * throughout the source: 73 * - copy avoidance outbound. 74 * - copy avoidance inbound. 75 * - transfer credit limiting. 76 * - MAC address based filtering. 77 */ 78 79 /* 80 * Linux expects to have some headroom in received buffers. The Linux 81 * frontend driver (netfront) checks to see if the headroom is 82 * available and will re-allocate the buffer to make room if 83 * necessary. To avoid this we add TX_BUFFER_HEADROOM bytes of 84 * headroom to each packet we pass to the peer. 85 */ 86 #define TX_BUFFER_HEADROOM 16 87 88 static boolean_t xnb_cksum_offload = B_TRUE; 89 90 static boolean_t xnb_connect_rings(dev_info_t *); 91 static void xnb_disconnect_rings(dev_info_t *); 92 static void xnb_oe_state_change(dev_info_t *, ddi_eventcookie_t, 93 void *, void *); 94 static void xnb_hp_state_change(dev_info_t *, ddi_eventcookie_t, 95 void *, void *); 96 97 static int xnb_rxbuf_constructor(void *, void *, int); 98 static void xnb_rxbuf_destructor(void *, void *); 99 static xnb_rxbuf_t *xnb_rxbuf_get(xnb_t *, int); 100 static void xnb_rxbuf_put(xnb_t *, xnb_rxbuf_t *); 101 static void xnb_rx_notify_peer(xnb_t *); 102 static void xnb_rx_complete(xnb_rxbuf_t *); 103 static void xnb_rx_mark_complete(xnb_t *, RING_IDX, int16_t); 104 static void xnb_rx_schedule_unmop(xnb_t *, gnttab_map_grant_ref_t *); 105 static void xnb_rx_perform_pending_unmop(xnb_t *); 106 107 #ifdef XNB_DEBUG 108 #define NR_GRANT_ENTRIES \ 109 (NR_GRANT_FRAMES * PAGESIZE / sizeof (grant_entry_t)) 110 #endif /* XNB_DEBUG */ 111 112 /* XXPV dme: are these really invalid? */ 113 #define INVALID_GRANT_HANDLE ((grant_handle_t)-1) 114 #define INVALID_GRANT_REF ((grant_ref_t)-1) 115 116 static kmem_cache_t *xnb_rxbuf_cachep; 117 static kmutex_t xnb_alloc_page_lock; 118 119 /* 120 * Statistics. 121 */ 122 static char *aux_statistics[] = { 123 "tx_cksum_deferred", 124 "rx_cksum_no_need", 125 "tx_notify_deferred", 126 "tx_notify_sent", 127 "rx_notify_deferred", 128 "rx_notify_sent", 129 "tx_too_early", 130 "rx_too_early", 131 "rx_allocb_failed", 132 "mac_full", 133 "spurious_intr", 134 "allocation_success", 135 "allocation_failure", 136 "small_allocation_success", 137 "small_allocation_failure", 138 "csum_hardware", 139 "csum_software", 140 }; 141 142 static int 143 xnb_ks_aux_update(kstat_t *ksp, int flag) 144 { 145 xnb_t *xnbp; 146 kstat_named_t *knp; 147 148 if (flag != KSTAT_READ) 149 return (EACCES); 150 151 xnbp = ksp->ks_private; 152 knp = ksp->ks_data; 153 154 /* 155 * Assignment order should match that of the names in 156 * aux_statistics. 157 */ 158 (knp++)->value.ui64 = xnbp->x_stat_tx_cksum_deferred; 159 (knp++)->value.ui64 = xnbp->x_stat_rx_cksum_no_need; 160 (knp++)->value.ui64 = xnbp->x_stat_tx_notify_deferred; 161 (knp++)->value.ui64 = xnbp->x_stat_tx_notify_sent; 162 (knp++)->value.ui64 = xnbp->x_stat_rx_notify_deferred; 163 (knp++)->value.ui64 = xnbp->x_stat_rx_notify_sent; 164 (knp++)->value.ui64 = xnbp->x_stat_tx_too_early; 165 (knp++)->value.ui64 = xnbp->x_stat_rx_too_early; 166 (knp++)->value.ui64 = xnbp->x_stat_rx_allocb_failed; 167 (knp++)->value.ui64 = xnbp->x_stat_mac_full; 168 (knp++)->value.ui64 = xnbp->x_stat_spurious_intr; 169 (knp++)->value.ui64 = xnbp->x_stat_allocation_success; 170 (knp++)->value.ui64 = xnbp->x_stat_allocation_failure; 171 (knp++)->value.ui64 = xnbp->x_stat_small_allocation_success; 172 (knp++)->value.ui64 = xnbp->x_stat_small_allocation_failure; 173 (knp++)->value.ui64 = xnbp->x_stat_csum_hardware; 174 (knp++)->value.ui64 = xnbp->x_stat_csum_software; 175 176 return (0); 177 } 178 179 static boolean_t 180 xnb_ks_init(xnb_t *xnbp) 181 { 182 int nstat = sizeof (aux_statistics) / 183 sizeof (aux_statistics[0]); 184 char **cp = aux_statistics; 185 kstat_named_t *knp; 186 187 /* 188 * Create and initialise kstats. 189 */ 190 xnbp->x_kstat_aux = kstat_create(ddi_driver_name(xnbp->x_devinfo), 191 ddi_get_instance(xnbp->x_devinfo), "aux_statistics", "net", 192 KSTAT_TYPE_NAMED, nstat, 0); 193 if (xnbp->x_kstat_aux == NULL) 194 return (B_FALSE); 195 196 xnbp->x_kstat_aux->ks_private = xnbp; 197 xnbp->x_kstat_aux->ks_update = xnb_ks_aux_update; 198 199 knp = xnbp->x_kstat_aux->ks_data; 200 while (nstat > 0) { 201 kstat_named_init(knp, *cp, KSTAT_DATA_UINT64); 202 203 knp++; 204 cp++; 205 nstat--; 206 } 207 208 kstat_install(xnbp->x_kstat_aux); 209 210 return (B_TRUE); 211 } 212 213 static void 214 xnb_ks_free(xnb_t *xnbp) 215 { 216 kstat_delete(xnbp->x_kstat_aux); 217 } 218 219 /* 220 * Software checksum calculation and insertion for an arbitrary packet. 221 */ 222 /*ARGSUSED*/ 223 static mblk_t * 224 xnb_software_csum(xnb_t *xnbp, mblk_t *mp) 225 { 226 /* 227 * XXPV dme: shouldn't rely on vnic_fix_cksum(), not least 228 * because it doesn't cover all of the interesting cases :-( 229 */ 230 (void) hcksum_assoc(mp, NULL, NULL, 0, 0, 0, 0, 231 HCK_FULLCKSUM, KM_NOSLEEP); 232 233 return (vnic_fix_cksum(mp)); 234 } 235 236 mblk_t * 237 xnb_process_cksum_flags(xnb_t *xnbp, mblk_t *mp, uint32_t capab) 238 { 239 struct ether_header *ehp; 240 uint16_t sap; 241 uint32_t offset; 242 ipha_t *ipha; 243 244 ASSERT(mp->b_next == NULL); 245 246 /* 247 * Check that the packet is contained in a single mblk. In 248 * the "from peer" path this is true today, but will change 249 * when scatter gather support is added. In the "to peer" 250 * path we cannot be sure, but in most cases it will be true 251 * (in the xnbo case the packet has come from a MAC device 252 * which is unlikely to split packets). 253 */ 254 if (mp->b_cont != NULL) 255 goto software; 256 257 /* 258 * If the MAC has no hardware capability don't do any further 259 * checking. 260 */ 261 if (capab == 0) 262 goto software; 263 264 ASSERT(MBLKL(mp) >= sizeof (struct ether_header)); 265 ehp = (struct ether_header *)mp->b_rptr; 266 267 if (ntohs(ehp->ether_type) == VLAN_TPID) { 268 struct ether_vlan_header *evhp; 269 270 ASSERT(MBLKL(mp) >= sizeof (struct ether_vlan_header)); 271 evhp = (struct ether_vlan_header *)mp->b_rptr; 272 sap = ntohs(evhp->ether_type); 273 offset = sizeof (struct ether_vlan_header); 274 } else { 275 sap = ntohs(ehp->ether_type); 276 offset = sizeof (struct ether_header); 277 } 278 279 /* 280 * We only attempt to do IPv4 packets in hardware. 281 */ 282 if (sap != ETHERTYPE_IP) 283 goto software; 284 285 /* 286 * We know that this is an IPv4 packet. 287 */ 288 ipha = (ipha_t *)(mp->b_rptr + offset); 289 290 switch (ipha->ipha_protocol) { 291 case IPPROTO_TCP: 292 case IPPROTO_UDP: 293 /* 294 * This is a TCP/IPv4 or UDP/IPv4 packet. 295 * 296 * If the capabilities indicate that full checksum 297 * offload is available, use it. 298 */ 299 if ((capab & HCKSUM_INET_FULL_V4) != 0) { 300 (void) hcksum_assoc(mp, NULL, NULL, 301 0, 0, 0, 0, 302 HCK_FULLCKSUM, KM_NOSLEEP); 303 304 xnbp->x_stat_csum_hardware++; 305 306 return (mp); 307 } 308 309 /* 310 * XXPV dme: If the capabilities indicate that partial 311 * checksum offload is available, we should use it. 312 */ 313 314 break; 315 316 default: 317 /* Use software. */ 318 break; 319 } 320 321 software: 322 /* 323 * We are not able to use any offload so do the whole thing in 324 * software. 325 */ 326 xnbp->x_stat_csum_software++; 327 328 return (xnb_software_csum(xnbp, mp)); 329 } 330 331 int 332 xnb_attach(dev_info_t *dip, xnb_flavour_t *flavour, void *flavour_data) 333 { 334 xnb_t *xnbp; 335 char *xsname, mac[ETHERADDRL * 3]; 336 337 xnbp = kmem_zalloc(sizeof (*xnbp), KM_SLEEP); 338 339 xnbp->x_flavour = flavour; 340 xnbp->x_flavour_data = flavour_data; 341 xnbp->x_devinfo = dip; 342 xnbp->x_evtchn = INVALID_EVTCHN; 343 xnbp->x_irq = B_FALSE; 344 xnbp->x_tx_ring_handle = INVALID_GRANT_HANDLE; 345 xnbp->x_rx_ring_handle = INVALID_GRANT_HANDLE; 346 xnbp->x_cksum_offload = xnb_cksum_offload; 347 xnbp->x_connected = B_FALSE; 348 xnbp->x_hotplugged = B_FALSE; 349 xnbp->x_detachable = B_FALSE; 350 xnbp->x_peer = xvdi_get_oeid(dip); 351 xnbp->x_rx_pages_writable = B_FALSE; 352 353 xnbp->x_rx_buf_count = 0; 354 xnbp->x_rx_unmop_count = 0; 355 356 xnbp->x_tx_va = vmem_alloc(heap_arena, PAGESIZE, VM_SLEEP); 357 ASSERT(xnbp->x_tx_va != NULL); 358 359 if (ddi_get_iblock_cookie(dip, 0, &xnbp->x_icookie) 360 != DDI_SUCCESS) 361 goto failure; 362 363 mutex_init(&xnbp->x_tx_lock, NULL, MUTEX_DRIVER, xnbp->x_icookie); 364 mutex_init(&xnbp->x_rx_lock, NULL, MUTEX_DRIVER, xnbp->x_icookie); 365 366 /* set driver private pointer now */ 367 ddi_set_driver_private(dip, xnbp); 368 369 if (!xnb_ks_init(xnbp)) 370 goto late_failure; 371 372 /* 373 * Receive notification of changes in the state of the 374 * driver in the guest domain. 375 */ 376 if (xvdi_add_event_handler(dip, XS_OE_STATE, 377 xnb_oe_state_change) != DDI_SUCCESS) 378 goto very_late_failure; 379 380 /* 381 * Receive notification of hotplug events. 382 */ 383 if (xvdi_add_event_handler(dip, XS_HP_STATE, 384 xnb_hp_state_change) != DDI_SUCCESS) 385 goto very_late_failure; 386 387 xsname = xvdi_get_xsname(dip); 388 389 if (xenbus_printf(XBT_NULL, xsname, 390 "feature-no-csum-offload", "%d", 391 xnbp->x_cksum_offload ? 0 : 1) != 0) 392 goto very_very_late_failure; 393 394 if (xenbus_scanf(XBT_NULL, xsname, 395 "mac", "%s", mac) != 0) { 396 cmn_err(CE_WARN, "xnb_attach: " 397 "cannot read mac address from %s", 398 xsname); 399 goto very_very_late_failure; 400 } 401 402 if (ether_aton(mac, xnbp->x_mac_addr) != ETHERADDRL) { 403 cmn_err(CE_WARN, 404 "xnb_attach: cannot parse mac address %s", 405 mac); 406 goto very_very_late_failure; 407 } 408 409 (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateInitWait); 410 (void) xvdi_post_event(dip, XEN_HP_ADD); 411 412 return (DDI_SUCCESS); 413 414 very_very_late_failure: /* not that the naming is getting silly or anything */ 415 xvdi_remove_event_handler(dip, NULL); 416 417 very_late_failure: 418 xnb_ks_free(xnbp); 419 420 late_failure: 421 mutex_destroy(&xnbp->x_rx_lock); 422 mutex_destroy(&xnbp->x_tx_lock); 423 424 failure: 425 vmem_free(heap_arena, xnbp->x_tx_va, PAGESIZE); 426 kmem_free(xnbp, sizeof (*xnbp)); 427 return (DDI_FAILURE); 428 } 429 430 /*ARGSUSED*/ 431 void 432 xnb_detach(dev_info_t *dip) 433 { 434 xnb_t *xnbp = ddi_get_driver_private(dip); 435 436 ASSERT(xnbp != NULL); 437 ASSERT(!xnbp->x_connected); 438 ASSERT(xnbp->x_rx_buf_count == 0); 439 440 xnb_disconnect_rings(dip); 441 442 xvdi_remove_event_handler(dip, NULL); 443 444 xnb_ks_free(xnbp); 445 446 ddi_set_driver_private(dip, NULL); 447 448 mutex_destroy(&xnbp->x_tx_lock); 449 mutex_destroy(&xnbp->x_rx_lock); 450 451 ASSERT(xnbp->x_tx_va != NULL); 452 vmem_free(heap_arena, xnbp->x_tx_va, PAGESIZE); 453 454 kmem_free(xnbp, sizeof (*xnbp)); 455 } 456 457 458 static mfn_t 459 xnb_alloc_page(xnb_t *xnbp) 460 { 461 #define WARNING_RATE_LIMIT 100 462 #define BATCH_SIZE 256 463 static mfn_t mfns[BATCH_SIZE]; /* common across all instances */ 464 static int nth = BATCH_SIZE; 465 mfn_t mfn; 466 467 mutex_enter(&xnb_alloc_page_lock); 468 if (nth == BATCH_SIZE) { 469 if (balloon_alloc_pages(BATCH_SIZE, mfns) != BATCH_SIZE) { 470 xnbp->x_stat_allocation_failure++; 471 mutex_exit(&xnb_alloc_page_lock); 472 473 /* 474 * Try for a single page in low memory situations. 475 */ 476 if (balloon_alloc_pages(1, &mfn) != 1) { 477 xnbp->x_stat_small_allocation_failure++; 478 if ((xnbp->x_stat_small_allocation_failure 479 % WARNING_RATE_LIMIT) == 0) { 480 cmn_err(CE_WARN, "xnb_alloc_page: " 481 "Cannot allocate memory to " 482 "transfer packets to peer."); 483 } 484 return (0); 485 } else { 486 xnbp->x_stat_small_allocation_success++; 487 return (mfn); 488 } 489 } 490 491 nth = 0; 492 xnbp->x_stat_allocation_success++; 493 } 494 495 mfn = mfns[nth++]; 496 mutex_exit(&xnb_alloc_page_lock); 497 498 ASSERT(mfn != 0); 499 500 return (mfn); 501 #undef BATCH_SIZE 502 #undef WARNING_RATE_LIMIT 503 } 504 505 /*ARGSUSED*/ 506 static void 507 xnb_free_page(xnb_t *xnbp, mfn_t mfn) 508 { 509 int r; 510 pfn_t pfn; 511 512 pfn = xen_assign_pfn(mfn); 513 pfnzero(pfn, 0, PAGESIZE); 514 xen_release_pfn(pfn); 515 516 /* 517 * This happens only in the error path, so batching is 518 * not worth the complication. 519 */ 520 if ((r = balloon_free_pages(1, &mfn, NULL, NULL)) != 1) { 521 cmn_err(CE_WARN, "free_page: cannot decrease memory " 522 "reservation (%d): page kept but unusable (mfn = 0x%lx).", 523 r, mfn); 524 } 525 } 526 527 mblk_t * 528 xnb_to_peer(xnb_t *xnbp, mblk_t *mp) 529 { 530 mblk_t *free = mp, *prev = NULL; 531 size_t len; 532 gnttab_transfer_t *gop; 533 boolean_t notify; 534 RING_IDX loop, prod, end; 535 536 /* 537 * For each packet the sequence of operations is: 538 * 539 * 1. get a new page from the hypervisor. 540 * 2. get a request slot from the ring. 541 * 3. copy the data into the new page. 542 * 4. transfer the page to the peer. 543 * 5. update the request slot. 544 * 6. kick the peer. 545 * 7. free mp. 546 * 547 * In order to reduce the number of hypercalls, we prepare 548 * several packets for the peer and perform a single hypercall 549 * to transfer them. 550 */ 551 552 mutex_enter(&xnbp->x_tx_lock); 553 554 /* 555 * If we are not connected to the peer or have not yet 556 * finished hotplug it is too early to pass packets to the 557 * peer. 558 */ 559 if (!(xnbp->x_connected && xnbp->x_hotplugged)) { 560 mutex_exit(&xnbp->x_tx_lock); 561 xnbp->x_stat_tx_too_early++; 562 return (mp); 563 } 564 565 loop = xnbp->x_rx_ring.req_cons; 566 prod = xnbp->x_rx_ring.rsp_prod_pvt; 567 gop = xnbp->x_tx_top; 568 569 /* 570 * Similar to RING_HAS_UNCONSUMED_REQUESTS(&xnbp->x_rx_ring) but 571 * using local variables. 572 */ 573 #define XNB_RING_HAS_UNCONSUMED_REQUESTS(_r) \ 574 ((((_r)->sring->req_prod - loop) < \ 575 (RING_SIZE(_r) - (loop - prod))) ? \ 576 ((_r)->sring->req_prod - loop) : \ 577 (RING_SIZE(_r) - (loop - prod))) 578 579 while ((mp != NULL) && 580 XNB_RING_HAS_UNCONSUMED_REQUESTS(&xnbp->x_rx_ring)) { 581 582 mfn_t mfn; 583 pfn_t pfn; 584 netif_rx_request_t *rxreq; 585 netif_rx_response_t *rxresp; 586 char *valoop; 587 size_t offset; 588 mblk_t *ml; 589 uint16_t cksum_flags; 590 591 /* 1 */ 592 if ((mfn = xnb_alloc_page(xnbp)) == 0) { 593 xnbp->x_stat_xmit_defer++; 594 break; 595 } 596 597 /* 2 */ 598 rxreq = RING_GET_REQUEST(&xnbp->x_rx_ring, loop); 599 600 #ifdef XNB_DEBUG 601 if (!(rxreq->id < NET_RX_RING_SIZE)) 602 cmn_err(CE_PANIC, "xnb_to_peer: " 603 "id %d out of range in request 0x%p", 604 rxreq->id, (void *)rxreq); 605 if (rxreq->gref >= NR_GRANT_ENTRIES) 606 cmn_err(CE_PANIC, "xnb_to_peer: " 607 "grant ref %d out of range in request 0x%p", 608 rxreq->gref, (void *)rxreq); 609 #endif /* XNB_DEBUG */ 610 611 /* Assign a pfn and map the new page at the allocated va. */ 612 pfn = xen_assign_pfn(mfn); 613 hat_devload(kas.a_hat, xnbp->x_tx_va, PAGESIZE, 614 pfn, PROT_READ | PROT_WRITE, HAT_LOAD); 615 616 offset = TX_BUFFER_HEADROOM; 617 618 /* 3 */ 619 len = 0; 620 valoop = xnbp->x_tx_va + offset; 621 for (ml = mp; ml != NULL; ml = ml->b_cont) { 622 size_t chunk = ml->b_wptr - ml->b_rptr; 623 624 bcopy(ml->b_rptr, valoop, chunk); 625 valoop += chunk; 626 len += chunk; 627 } 628 629 ASSERT(len + offset < PAGESIZE); 630 631 /* Release the pfn. */ 632 hat_unload(kas.a_hat, xnbp->x_tx_va, PAGESIZE, 633 HAT_UNLOAD_UNMAP); 634 xen_release_pfn(pfn); 635 636 /* 4 */ 637 gop->mfn = mfn; 638 gop->domid = xnbp->x_peer; 639 gop->ref = rxreq->gref; 640 641 /* 5.1 */ 642 rxresp = RING_GET_RESPONSE(&xnbp->x_rx_ring, prod); 643 rxresp->offset = offset; 644 rxresp->flags = 0; 645 646 cksum_flags = xnbp->x_flavour->xf_cksum_to_peer(xnbp, mp); 647 if (cksum_flags != 0) 648 xnbp->x_stat_tx_cksum_deferred++; 649 rxresp->flags |= cksum_flags; 650 651 rxresp->id = RING_GET_REQUEST(&xnbp->x_rx_ring, prod)->id; 652 rxresp->status = len; 653 654 loop++; 655 prod++; 656 gop++; 657 prev = mp; 658 mp = mp->b_next; 659 } 660 661 /* 662 * Did we actually do anything? 663 */ 664 if (loop == xnbp->x_rx_ring.req_cons) { 665 mutex_exit(&xnbp->x_tx_lock); 666 return (mp); 667 } 668 669 end = loop; 670 671 /* 672 * Unlink the end of the 'done' list from the remainder. 673 */ 674 ASSERT(prev != NULL); 675 prev->b_next = NULL; 676 677 if (HYPERVISOR_grant_table_op(GNTTABOP_transfer, xnbp->x_tx_top, 678 loop - xnbp->x_rx_ring.req_cons) != 0) { 679 cmn_err(CE_WARN, "xnb_to_peer: transfer operation failed"); 680 } 681 682 loop = xnbp->x_rx_ring.req_cons; 683 prod = xnbp->x_rx_ring.rsp_prod_pvt; 684 gop = xnbp->x_tx_top; 685 686 while (loop < end) { 687 int16_t status = NETIF_RSP_OKAY; 688 689 if (gop->status != 0) { 690 status = NETIF_RSP_ERROR; 691 692 /* 693 * If the status is anything other than 694 * GNTST_bad_page then we don't own the page 695 * any more, so don't try to give it back. 696 */ 697 if (gop->status != GNTST_bad_page) 698 gop->mfn = 0; 699 } else { 700 /* The page is no longer ours. */ 701 gop->mfn = 0; 702 } 703 704 if (gop->mfn != 0) 705 /* 706 * Give back the page, as we won't be using 707 * it. 708 */ 709 xnb_free_page(xnbp, gop->mfn); 710 else 711 /* 712 * We gave away a page, update our accounting 713 * now. 714 */ 715 balloon_drv_subtracted(1); 716 717 /* 5.2 */ 718 if (status != NETIF_RSP_OKAY) { 719 RING_GET_RESPONSE(&xnbp->x_rx_ring, prod)->status = 720 status; 721 } else { 722 xnbp->x_stat_opackets++; 723 xnbp->x_stat_obytes += len; 724 } 725 726 loop++; 727 prod++; 728 gop++; 729 } 730 731 xnbp->x_rx_ring.req_cons = loop; 732 xnbp->x_rx_ring.rsp_prod_pvt = prod; 733 734 /* 6 */ 735 /*LINTED: constant in conditional context*/ 736 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&xnbp->x_rx_ring, notify); 737 if (notify) { 738 ec_notify_via_evtchn(xnbp->x_evtchn); 739 xnbp->x_stat_tx_notify_sent++; 740 } else { 741 xnbp->x_stat_tx_notify_deferred++; 742 } 743 744 if (mp != NULL) 745 xnbp->x_stat_xmit_defer++; 746 747 mutex_exit(&xnbp->x_tx_lock); 748 749 /* Free mblk_t's that we consumed. */ 750 freemsgchain(free); 751 752 return (mp); 753 } 754 755 /*ARGSUSED*/ 756 static int 757 xnb_rxbuf_constructor(void *buf, void *arg, int kmflag) 758 { 759 xnb_rxbuf_t *rxp = buf; 760 761 bzero(rxp, sizeof (*rxp)); 762 763 rxp->xr_free_rtn.free_func = xnb_rx_complete; 764 rxp->xr_free_rtn.free_arg = (caddr_t)rxp; 765 766 rxp->xr_mop.host_addr = 767 (uint64_t)(uintptr_t)vmem_alloc(heap_arena, PAGESIZE, 768 ((kmflag & KM_NOSLEEP) == KM_NOSLEEP) ? 769 VM_NOSLEEP : VM_SLEEP); 770 771 if (rxp->xr_mop.host_addr == NULL) { 772 cmn_err(CE_WARN, "xnb_rxbuf_constructor: " 773 "cannot get address space"); 774 return (-1); 775 } 776 777 /* 778 * Have the hat ensure that page table exists for the VA. 779 */ 780 hat_prepare_mapping(kas.a_hat, 781 (caddr_t)(uintptr_t)rxp->xr_mop.host_addr); 782 783 return (0); 784 } 785 786 /*ARGSUSED*/ 787 static void 788 xnb_rxbuf_destructor(void *buf, void *arg) 789 { 790 xnb_rxbuf_t *rxp = buf; 791 792 ASSERT(rxp->xr_mop.host_addr != NULL); 793 ASSERT((rxp->xr_flags & XNB_RXBUF_INUSE) == 0); 794 795 hat_release_mapping(kas.a_hat, 796 (caddr_t)(uintptr_t)rxp->xr_mop.host_addr); 797 vmem_free(heap_arena, 798 (caddr_t)(uintptr_t)rxp->xr_mop.host_addr, PAGESIZE); 799 } 800 801 static void 802 xnb_rx_notify_peer(xnb_t *xnbp) 803 { 804 boolean_t notify; 805 806 ASSERT(MUTEX_HELD(&xnbp->x_rx_lock)); 807 808 /*LINTED: constant in conditional context*/ 809 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&xnbp->x_tx_ring, notify); 810 if (notify) { 811 ec_notify_via_evtchn(xnbp->x_evtchn); 812 xnbp->x_stat_rx_notify_sent++; 813 } else { 814 xnbp->x_stat_rx_notify_deferred++; 815 } 816 } 817 818 static void 819 xnb_rx_complete(xnb_rxbuf_t *rxp) 820 { 821 xnb_t *xnbp = rxp->xr_xnbp; 822 823 ASSERT((rxp->xr_flags & XNB_RXBUF_INUSE) == XNB_RXBUF_INUSE); 824 825 mutex_enter(&xnbp->x_rx_lock); 826 827 xnb_rx_schedule_unmop(xnbp, &rxp->xr_mop); 828 xnb_rx_perform_pending_unmop(xnbp); 829 830 if (xnbp->x_connected) { 831 xnb_rx_mark_complete(xnbp, rxp->xr_id, rxp->xr_status); 832 xnb_rx_notify_peer(xnbp); 833 } 834 835 xnb_rxbuf_put(xnbp, rxp); 836 837 mutex_exit(&xnbp->x_rx_lock); 838 } 839 840 static void 841 xnb_rx_mark_complete(xnb_t *xnbp, RING_IDX id, int16_t status) 842 { 843 RING_IDX i; 844 netif_tx_response_t *txresp; 845 846 ASSERT(MUTEX_HELD(&xnbp->x_rx_lock)); 847 848 i = xnbp->x_tx_ring.rsp_prod_pvt; 849 850 txresp = RING_GET_RESPONSE(&xnbp->x_tx_ring, i); 851 txresp->id = id; 852 txresp->status = status; 853 854 xnbp->x_tx_ring.rsp_prod_pvt = i + 1; 855 856 /* 857 * Note that we don't push the change to the peer here - that 858 * is the callers responsibility. 859 */ 860 } 861 862 /* 863 * XXPV dme: currently pending unmap operations are stored on a 864 * per-instance basis. Should they be per-driver? The locking would 865 * have to change (obviously), but there might be an improvement from 866 * batching more together. Right now they are all 'done' either at 867 * the tail of each receive operation (copy case) or on each 868 * completion (non-copy case). Should that be changed to some 869 * interval (watermark?) to improve the chance of batching? 870 */ 871 static void 872 xnb_rx_schedule_unmop(xnb_t *xnbp, gnttab_map_grant_ref_t *mop) 873 { 874 gnttab_unmap_grant_ref_t *unmop; 875 876 ASSERT(MUTEX_HELD(&xnbp->x_rx_lock)); 877 ASSERT(xnbp->x_rx_unmop_count <= NET_TX_RING_SIZE); 878 879 unmop = &xnbp->x_rx_unmop[xnbp->x_rx_unmop_count]; 880 xnbp->x_rx_unmop_count++; 881 882 unmop->host_addr = mop->host_addr; 883 unmop->dev_bus_addr = mop->dev_bus_addr; 884 unmop->handle = mop->handle; 885 886 #ifdef XNB_DEBUG 887 if (xnbp->x_rx_unmop_count <= NET_TX_RING_SIZE) 888 ASSERT(xnbp->x_rx_unmop[xnbp->x_rx_unmop_count].host_addr 889 == NULL); 890 #endif /* XNB_DEBUG */ 891 892 } 893 894 static void 895 xnb_rx_perform_pending_unmop(xnb_t *xnbp) 896 { 897 #ifdef XNB_DEBUG 898 RING_IDX loop; 899 gnttab_unmap_grant_ref_t *unmop; 900 #endif /* XNB_DEBUG */ 901 902 ASSERT(MUTEX_HELD(&xnbp->x_rx_lock)); 903 904 if (xnbp->x_rx_unmop_count == 0) 905 return; 906 907 if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, 908 xnbp->x_rx_unmop, xnbp->x_rx_unmop_count) < 0) { 909 cmn_err(CE_WARN, "xnb_rx_perform_pending_unmop: " 910 "unmap grant operation failed, " 911 "%d pages lost", xnbp->x_rx_unmop_count); 912 } 913 914 #ifdef XNB_DEBUG 915 for (loop = 0, unmop = xnbp->x_rx_unmop; 916 loop < xnbp->x_rx_unmop_count; 917 loop++, unmop++) { 918 if (unmop->status != 0) { 919 cmn_err(CE_WARN, "xnb_rx_perform_pending_unmop: " 920 "unmap grant reference failed (%d)", 921 unmop->status); 922 } 923 } 924 #endif /* XNB_DEBUG */ 925 926 xnbp->x_rx_unmop_count = 0; 927 928 #ifdef XNB_DEBUG 929 bzero(xnbp->x_rx_unmop, sizeof (xnbp->x_rx_unmop)); 930 #endif /* XNB_DEBUG */ 931 } 932 933 static xnb_rxbuf_t * 934 xnb_rxbuf_get(xnb_t *xnbp, int flags) 935 { 936 xnb_rxbuf_t *rxp; 937 938 ASSERT(MUTEX_HELD(&xnbp->x_rx_lock)); 939 940 rxp = kmem_cache_alloc(xnb_rxbuf_cachep, flags); 941 if (rxp != NULL) { 942 ASSERT((rxp->xr_flags & XNB_RXBUF_INUSE) == 0); 943 rxp->xr_flags |= XNB_RXBUF_INUSE; 944 945 rxp->xr_xnbp = xnbp; 946 rxp->xr_mop.dom = xnbp->x_peer; 947 948 rxp->xr_mop.flags = GNTMAP_host_map; 949 if (!xnbp->x_rx_pages_writable) 950 rxp->xr_mop.flags |= GNTMAP_readonly; 951 952 xnbp->x_rx_buf_count++; 953 } 954 955 return (rxp); 956 } 957 958 static void 959 xnb_rxbuf_put(xnb_t *xnbp, xnb_rxbuf_t *rxp) 960 { 961 ASSERT(MUTEX_HELD(&xnbp->x_rx_lock)); 962 ASSERT((rxp->xr_flags & XNB_RXBUF_INUSE) == XNB_RXBUF_INUSE); 963 964 rxp->xr_flags &= ~XNB_RXBUF_INUSE; 965 xnbp->x_rx_buf_count--; 966 967 kmem_cache_free(xnb_rxbuf_cachep, rxp); 968 } 969 970 static mblk_t * 971 xnb_recv(xnb_t *xnbp) 972 { 973 RING_IDX start, end, loop; 974 gnttab_map_grant_ref_t *mop; 975 xnb_rxbuf_t **rxpp; 976 netif_tx_request_t *txreq; 977 boolean_t work_to_do; 978 mblk_t *head, *tail; 979 /* 980 * If the peer granted a read-only mapping to the page then we 981 * must copy the data, as the local protocol stack (should the 982 * packet be destined for this host) will modify the packet 983 * 'in place'. 984 */ 985 boolean_t copy = !xnbp->x_rx_pages_writable; 986 987 /* 988 * For each individual request, the sequence of actions is: 989 * 990 * 1. get the request. 991 * 2. map the page based on the grant ref. 992 * 3. allocate an mblk, copy the data to it. 993 * 4. release the grant. 994 * 5. update the ring. 995 * 6. pass the packet upward. 996 * 7. kick the peer. 997 * 998 * In fact, we try to perform the grant operations in batches, 999 * so there are two loops. 1000 */ 1001 1002 head = tail = NULL; 1003 around: 1004 ASSERT(MUTEX_HELD(&xnbp->x_rx_lock)); 1005 1006 /*LINTED: constant in conditional context*/ 1007 RING_FINAL_CHECK_FOR_REQUESTS(&xnbp->x_tx_ring, work_to_do); 1008 if (!work_to_do) { 1009 finished: 1010 xnb_rx_notify_peer(xnbp); 1011 1012 return (head); 1013 } 1014 1015 start = xnbp->x_tx_ring.req_cons; 1016 end = xnbp->x_tx_ring.sring->req_prod; 1017 1018 for (loop = start, mop = xnbp->x_rx_mop, rxpp = xnbp->x_rx_bufp; 1019 loop != end; 1020 loop++, mop++, rxpp++) { 1021 xnb_rxbuf_t *rxp; 1022 1023 rxp = xnb_rxbuf_get(xnbp, KM_NOSLEEP); 1024 if (rxp == NULL) 1025 break; 1026 1027 ASSERT(xnbp->x_rx_pages_writable || 1028 ((rxp->xr_mop.flags & GNTMAP_readonly) 1029 == GNTMAP_readonly)); 1030 1031 rxp->xr_mop.ref = 1032 RING_GET_REQUEST(&xnbp->x_tx_ring, loop)->gref; 1033 1034 ASSERT(rxp->xr_mop.ref < NR_GRANT_ENTRIES); 1035 1036 *mop = rxp->xr_mop; 1037 *rxpp = rxp; 1038 } 1039 1040 if ((loop - start) == 0) 1041 goto finished; 1042 1043 end = loop; 1044 1045 if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, 1046 xnbp->x_rx_mop, end - start) != 0) { 1047 1048 cmn_err(CE_WARN, "xnb_recv: map grant operation failed"); 1049 1050 loop = start; 1051 rxpp = xnbp->x_rx_bufp; 1052 1053 while (loop != end) { 1054 xnb_rxbuf_put(xnbp, *rxpp); 1055 1056 loop++; 1057 rxpp++; 1058 } 1059 1060 goto finished; 1061 } 1062 1063 for (loop = start, mop = xnbp->x_rx_mop, rxpp = xnbp->x_rx_bufp; 1064 loop != end; 1065 loop++, mop++, rxpp++) { 1066 mblk_t *mp = NULL; 1067 int16_t status = NETIF_RSP_OKAY; 1068 xnb_rxbuf_t *rxp = *rxpp; 1069 1070 if (mop->status != 0) { 1071 cmn_err(CE_WARN, "xnb_recv: " 1072 "failed to map buffer: %d", 1073 mop->status); 1074 status = NETIF_RSP_ERROR; 1075 } 1076 1077 txreq = RING_GET_REQUEST(&xnbp->x_tx_ring, loop); 1078 1079 if (status == NETIF_RSP_OKAY) { 1080 if (copy) { 1081 mp = allocb(txreq->size, BPRI_MED); 1082 if (mp == NULL) { 1083 status = NETIF_RSP_ERROR; 1084 xnbp->x_stat_rx_allocb_failed++; 1085 } else { 1086 bcopy((caddr_t)(uintptr_t) 1087 mop->host_addr + txreq->offset, 1088 mp->b_wptr, txreq->size); 1089 mp->b_wptr += txreq->size; 1090 } 1091 } else { 1092 mp = desballoc((unsigned char *)(uintptr_t) 1093 mop->host_addr + txreq->offset, 1094 txreq->size, 0, &rxp->xr_free_rtn); 1095 if (mp == NULL) { 1096 status = NETIF_RSP_ERROR; 1097 xnbp->x_stat_rx_allocb_failed++; 1098 } else { 1099 rxp->xr_id = txreq->id; 1100 rxp->xr_status = status; 1101 rxp->xr_mop = *mop; 1102 1103 mp->b_wptr += txreq->size; 1104 } 1105 } 1106 1107 /* 1108 * If we have a buffer and there are checksum 1109 * flags, process them appropriately. 1110 */ 1111 if ((mp != NULL) && 1112 ((txreq->flags & 1113 (NETTXF_csum_blank | NETTXF_data_validated)) 1114 != 0)) { 1115 mp = xnbp->x_flavour->xf_cksum_from_peer(xnbp, 1116 mp, txreq->flags); 1117 xnbp->x_stat_rx_cksum_no_need++; 1118 } 1119 } 1120 1121 if (copy || (mp == NULL)) { 1122 xnb_rx_mark_complete(xnbp, txreq->id, status); 1123 xnb_rx_schedule_unmop(xnbp, mop); 1124 } 1125 1126 if (mp != NULL) { 1127 xnbp->x_stat_ipackets++; 1128 xnbp->x_stat_rbytes += txreq->size; 1129 1130 mp->b_next = NULL; 1131 if (head == NULL) { 1132 ASSERT(tail == NULL); 1133 head = mp; 1134 } else { 1135 ASSERT(tail != NULL); 1136 tail->b_next = mp; 1137 } 1138 tail = mp; 1139 } 1140 } 1141 1142 /* 1143 * This has to be here rather than in the 'finished' code 1144 * because we can only handle NET_TX_RING_SIZE pending unmap 1145 * operations, which may be exceeded by multiple trips around 1146 * the receive loop during heavy load (one trip around the 1147 * loop cannot generate more than NET_TX_RING_SIZE unmap 1148 * operations). 1149 */ 1150 xnb_rx_perform_pending_unmop(xnbp); 1151 if (copy) { 1152 for (loop = start, rxpp = xnbp->x_rx_bufp; 1153 loop != end; 1154 loop++, rxpp++) 1155 xnb_rxbuf_put(xnbp, *rxpp); 1156 } 1157 1158 xnbp->x_tx_ring.req_cons = loop; 1159 1160 goto around; 1161 /* NOTREACHED */ 1162 } 1163 1164 /* 1165 * intr() -- ring interrupt service routine 1166 */ 1167 static uint_t 1168 xnb_intr(caddr_t arg) 1169 { 1170 xnb_t *xnbp = (xnb_t *)arg; 1171 mblk_t *mp; 1172 1173 xnbp->x_stat_intr++; 1174 1175 mutex_enter(&xnbp->x_rx_lock); 1176 1177 ASSERT(xnbp->x_connected); 1178 1179 mp = xnb_recv(xnbp); 1180 1181 mutex_exit(&xnbp->x_rx_lock); 1182 1183 if (!xnbp->x_hotplugged) { 1184 xnbp->x_stat_rx_too_early++; 1185 goto fail; 1186 } 1187 if (mp == NULL) { 1188 xnbp->x_stat_spurious_intr++; 1189 goto fail; 1190 } 1191 1192 xnbp->x_flavour->xf_recv(xnbp, mp); 1193 1194 return (DDI_INTR_CLAIMED); 1195 1196 fail: 1197 freemsgchain(mp); 1198 return (DDI_INTR_CLAIMED); 1199 } 1200 1201 static boolean_t 1202 xnb_connect_rings(dev_info_t *dip) 1203 { 1204 xnb_t *xnbp = ddi_get_driver_private(dip); 1205 char *oename; 1206 struct gnttab_map_grant_ref map_op; 1207 evtchn_port_t evtchn; 1208 int i; 1209 1210 /* 1211 * Cannot attempt to connect the rings if already connected. 1212 */ 1213 ASSERT(!xnbp->x_connected); 1214 1215 oename = xvdi_get_oename(dip); 1216 1217 if (xenbus_gather(XBT_NULL, oename, 1218 "event-channel", "%u", &evtchn, 1219 "tx-ring-ref", "%lu", &xnbp->x_tx_ring_ref, 1220 "rx-ring-ref", "%lu", &xnbp->x_rx_ring_ref, 1221 NULL) != 0) { 1222 cmn_err(CE_WARN, "xnb_connect_rings: " 1223 "cannot read other-end details from %s", 1224 oename); 1225 goto fail; 1226 } 1227 1228 if (xenbus_scanf(XBT_NULL, oename, 1229 "feature-tx-writable", "%d", &i) != 0) 1230 i = 0; 1231 if (i != 0) 1232 xnbp->x_rx_pages_writable = B_TRUE; 1233 1234 if (xenbus_scanf(XBT_NULL, oename, 1235 "feature-no-csum-offload", "%d", &i) != 0) 1236 i = 0; 1237 if ((i == 1) || !xnbp->x_cksum_offload) 1238 xnbp->x_cksum_offload = B_FALSE; 1239 1240 /* 1241 * 1. allocate a vaddr for the tx page, one for the rx page. 1242 * 2. call GNTTABOP_map_grant_ref to map the relevant pages 1243 * into the allocated vaddr (one for tx, one for rx). 1244 * 3. call EVTCHNOP_bind_interdomain to have the event channel 1245 * bound to this domain. 1246 * 4. associate the event channel with an interrupt. 1247 * 5. declare ourselves connected. 1248 * 6. enable the interrupt. 1249 */ 1250 1251 /* 1.tx */ 1252 xnbp->x_tx_ring_addr = vmem_xalloc(heap_arena, PAGESIZE, PAGESIZE, 1253 0, 0, 0, 0, VM_SLEEP); 1254 ASSERT(xnbp->x_tx_ring_addr != NULL); 1255 1256 /* 2.tx */ 1257 map_op.host_addr = (uint64_t)((long)xnbp->x_tx_ring_addr); 1258 map_op.flags = GNTMAP_host_map; 1259 map_op.ref = xnbp->x_tx_ring_ref; 1260 map_op.dom = xnbp->x_peer; 1261 hat_prepare_mapping(kas.a_hat, xnbp->x_tx_ring_addr); 1262 if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, 1263 &map_op, 1) != 0 || map_op.status != 0) { 1264 cmn_err(CE_WARN, "xnb_connect_rings: cannot map tx-ring page."); 1265 goto fail; 1266 } 1267 xnbp->x_tx_ring_handle = map_op.handle; 1268 1269 /*LINTED: constant in conditional context*/ 1270 BACK_RING_INIT(&xnbp->x_tx_ring, 1271 (netif_tx_sring_t *)xnbp->x_tx_ring_addr, PAGESIZE); 1272 1273 /* 1.rx */ 1274 xnbp->x_rx_ring_addr = vmem_xalloc(heap_arena, PAGESIZE, PAGESIZE, 1275 0, 0, 0, 0, VM_SLEEP); 1276 ASSERT(xnbp->x_rx_ring_addr != NULL); 1277 1278 /* 2.rx */ 1279 map_op.host_addr = (uint64_t)((long)xnbp->x_rx_ring_addr); 1280 map_op.flags = GNTMAP_host_map; 1281 map_op.ref = xnbp->x_rx_ring_ref; 1282 map_op.dom = xnbp->x_peer; 1283 hat_prepare_mapping(kas.a_hat, xnbp->x_rx_ring_addr); 1284 if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, 1285 &map_op, 1) != 0 || map_op.status != 0) { 1286 cmn_err(CE_WARN, "xnb_connect_rings: cannot map rx-ring page."); 1287 goto fail; 1288 } 1289 xnbp->x_rx_ring_handle = map_op.handle; 1290 1291 /*LINTED: constant in conditional context*/ 1292 BACK_RING_INIT(&xnbp->x_rx_ring, 1293 (netif_rx_sring_t *)xnbp->x_rx_ring_addr, PAGESIZE); 1294 1295 /* 3 */ 1296 if (xvdi_bind_evtchn(dip, evtchn) != DDI_SUCCESS) { 1297 cmn_err(CE_WARN, "xnb_connect_rings: " 1298 "cannot bind event channel %d", xnbp->x_evtchn); 1299 xnbp->x_evtchn = INVALID_EVTCHN; 1300 goto fail; 1301 } 1302 xnbp->x_evtchn = xvdi_get_evtchn(dip); 1303 1304 /* 1305 * It would be good to set the state to XenbusStateConnected 1306 * here as well, but then what if ddi_add_intr() failed? 1307 * Changing the state in the store will be noticed by the peer 1308 * and cannot be "taken back". 1309 */ 1310 mutex_enter(&xnbp->x_tx_lock); 1311 mutex_enter(&xnbp->x_rx_lock); 1312 1313 /* 5.1 */ 1314 xnbp->x_connected = B_TRUE; 1315 1316 mutex_exit(&xnbp->x_rx_lock); 1317 mutex_exit(&xnbp->x_tx_lock); 1318 1319 /* 4, 6 */ 1320 if (ddi_add_intr(dip, 0, NULL, NULL, xnb_intr, (caddr_t)xnbp) 1321 != DDI_SUCCESS) { 1322 cmn_err(CE_WARN, "xnb_connect_rings: cannot add interrupt"); 1323 goto fail; 1324 } 1325 xnbp->x_irq = B_TRUE; 1326 1327 /* 5.2 */ 1328 (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateConnected); 1329 1330 return (B_TRUE); 1331 1332 fail: 1333 mutex_enter(&xnbp->x_tx_lock); 1334 mutex_enter(&xnbp->x_rx_lock); 1335 1336 xnbp->x_connected = B_FALSE; 1337 1338 mutex_exit(&xnbp->x_rx_lock); 1339 mutex_exit(&xnbp->x_tx_lock); 1340 1341 return (B_FALSE); 1342 } 1343 1344 static void 1345 xnb_disconnect_rings(dev_info_t *dip) 1346 { 1347 xnb_t *xnbp = ddi_get_driver_private(dip); 1348 1349 if (xnbp->x_irq) { 1350 ddi_remove_intr(dip, 0, NULL); 1351 xnbp->x_irq = B_FALSE; 1352 } 1353 1354 if (xnbp->x_evtchn != INVALID_EVTCHN) { 1355 xvdi_free_evtchn(dip); 1356 xnbp->x_evtchn = INVALID_EVTCHN; 1357 } 1358 1359 if (xnbp->x_rx_ring_handle != INVALID_GRANT_HANDLE) { 1360 struct gnttab_unmap_grant_ref unmap_op; 1361 1362 unmap_op.host_addr = (uint64_t)(uintptr_t)xnbp->x_rx_ring_addr; 1363 unmap_op.dev_bus_addr = 0; 1364 unmap_op.handle = xnbp->x_rx_ring_handle; 1365 if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, 1366 &unmap_op, 1) != 0) 1367 cmn_err(CE_WARN, "xnb_disconnect_rings: " 1368 "cannot unmap rx-ring page (%d)", 1369 unmap_op.status); 1370 1371 xnbp->x_rx_ring_handle = INVALID_GRANT_HANDLE; 1372 } 1373 1374 if (xnbp->x_rx_ring_addr != NULL) { 1375 hat_release_mapping(kas.a_hat, xnbp->x_rx_ring_addr); 1376 vmem_free(heap_arena, xnbp->x_rx_ring_addr, PAGESIZE); 1377 xnbp->x_rx_ring_addr = NULL; 1378 } 1379 1380 if (xnbp->x_tx_ring_handle != INVALID_GRANT_HANDLE) { 1381 struct gnttab_unmap_grant_ref unmap_op; 1382 1383 unmap_op.host_addr = (uint64_t)(uintptr_t)xnbp->x_tx_ring_addr; 1384 unmap_op.dev_bus_addr = 0; 1385 unmap_op.handle = xnbp->x_tx_ring_handle; 1386 if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, 1387 &unmap_op, 1) != 0) 1388 cmn_err(CE_WARN, "xnb_disconnect_rings: " 1389 "cannot unmap tx-ring page (%d)", 1390 unmap_op.status); 1391 1392 xnbp->x_tx_ring_handle = INVALID_GRANT_HANDLE; 1393 } 1394 1395 if (xnbp->x_tx_ring_addr != NULL) { 1396 hat_release_mapping(kas.a_hat, xnbp->x_tx_ring_addr); 1397 vmem_free(heap_arena, xnbp->x_tx_ring_addr, PAGESIZE); 1398 xnbp->x_tx_ring_addr = NULL; 1399 } 1400 } 1401 1402 /*ARGSUSED*/ 1403 static void 1404 xnb_oe_state_change(dev_info_t *dip, ddi_eventcookie_t id, 1405 void *arg, void *impl_data) 1406 { 1407 xnb_t *xnbp = ddi_get_driver_private(dip); 1408 XenbusState new_state = *(XenbusState *)impl_data; 1409 1410 ASSERT(xnbp != NULL); 1411 1412 switch (new_state) { 1413 case XenbusStateConnected: 1414 if (xnb_connect_rings(dip)) { 1415 xnbp->x_flavour->xf_peer_connected(xnbp); 1416 } else { 1417 xnbp->x_flavour->xf_peer_disconnected(xnbp); 1418 xnb_disconnect_rings(dip); 1419 (void) xvdi_switch_state(dip, XBT_NULL, 1420 XenbusStateClosed); 1421 (void) xvdi_post_event(dip, XEN_HP_REMOVE); 1422 } 1423 1424 /* 1425 * Now that we've attempted to connect it's reasonable 1426 * to allow an attempt to detach. 1427 */ 1428 xnbp->x_detachable = B_TRUE; 1429 1430 break; 1431 1432 case XenbusStateClosing: 1433 (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateClosing); 1434 1435 break; 1436 1437 case XenbusStateClosed: 1438 xnbp->x_flavour->xf_peer_disconnected(xnbp); 1439 1440 mutex_enter(&xnbp->x_tx_lock); 1441 mutex_enter(&xnbp->x_rx_lock); 1442 1443 xnb_disconnect_rings(dip); 1444 xnbp->x_connected = B_FALSE; 1445 1446 mutex_exit(&xnbp->x_rx_lock); 1447 mutex_exit(&xnbp->x_tx_lock); 1448 1449 (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateClosed); 1450 (void) xvdi_post_event(dip, XEN_HP_REMOVE); 1451 /* 1452 * In all likelyhood this is already set (in the above 1453 * case), but if the peer never attempted to connect 1454 * and the domain is destroyed we get here without 1455 * having been through the case above, so we set it to 1456 * be sure. 1457 */ 1458 xnbp->x_detachable = B_TRUE; 1459 1460 break; 1461 1462 default: 1463 break; 1464 } 1465 } 1466 1467 /*ARGSUSED*/ 1468 static void 1469 xnb_hp_state_change(dev_info_t *dip, ddi_eventcookie_t id, 1470 void *arg, void *impl_data) 1471 { 1472 xnb_t *xnbp = ddi_get_driver_private(dip); 1473 xendev_hotplug_state_t state = *(xendev_hotplug_state_t *)impl_data; 1474 boolean_t success; 1475 1476 ASSERT(xnbp != NULL); 1477 1478 switch (state) { 1479 case Connected: 1480 1481 success = xnbp->x_flavour->xf_hotplug_connected(xnbp); 1482 1483 mutex_enter(&xnbp->x_tx_lock); 1484 mutex_enter(&xnbp->x_rx_lock); 1485 1486 xnbp->x_hotplugged = success; 1487 1488 mutex_exit(&xnbp->x_rx_lock); 1489 mutex_exit(&xnbp->x_tx_lock); 1490 break; 1491 1492 default: 1493 break; 1494 } 1495 } 1496 1497 static struct modldrv modldrv = { 1498 &mod_miscops, "xnb module %I%", 1499 }; 1500 1501 static struct modlinkage modlinkage = { 1502 MODREV_1, &modldrv, NULL 1503 }; 1504 1505 int 1506 _init(void) 1507 { 1508 int i; 1509 1510 mutex_init(&xnb_alloc_page_lock, NULL, MUTEX_DRIVER, NULL); 1511 1512 xnb_rxbuf_cachep = kmem_cache_create("xnb_rxbuf_cachep", 1513 sizeof (xnb_rxbuf_t), 0, xnb_rxbuf_constructor, 1514 xnb_rxbuf_destructor, NULL, NULL, NULL, 0); 1515 ASSERT(xnb_rxbuf_cachep != NULL); 1516 1517 i = mod_install(&modlinkage); 1518 if (i != DDI_SUCCESS) { 1519 kmem_cache_destroy(xnb_rxbuf_cachep); 1520 mutex_destroy(&xnb_alloc_page_lock); 1521 } 1522 return (i); 1523 } 1524 1525 int 1526 _info(struct modinfo *modinfop) 1527 { 1528 return (mod_info(&modlinkage, modinfop)); 1529 } 1530 1531 int 1532 _fini(void) 1533 { 1534 int i; 1535 1536 i = mod_remove(&modlinkage); 1537 if (i == DDI_SUCCESS) { 1538 kmem_cache_destroy(xnb_rxbuf_cachep); 1539 mutex_destroy(&xnb_alloc_page_lock); 1540 } 1541 return (i); 1542 } 1543