1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #ifdef DEBUG 30 #define XNB_DEBUG 1 31 #endif /* DEBUG */ 32 33 #include "xnb.h" 34 35 #include <sys/sunddi.h> 36 #include <sys/sunndi.h> 37 #include <sys/modctl.h> 38 #include <sys/conf.h> 39 #include <sys/mac.h> 40 #include <sys/dlpi.h> 41 #include <sys/strsubr.h> 42 #include <sys/strsun.h> 43 #include <sys/types.h> 44 #include <sys/pattr.h> 45 #include <vm/seg_kmem.h> 46 #include <vm/hat_i86.h> 47 #include <xen/sys/xenbus_impl.h> 48 #include <xen/sys/xendev.h> 49 #include <sys/balloon_impl.h> 50 #include <sys/evtchn_impl.h> 51 #include <sys/gnttab.h> 52 #include <vm/vm_dep.h> 53 54 #include <sys/gld.h> 55 #include <inet/ip.h> 56 #include <inet/ip_impl.h> 57 #include <sys/vnic_impl.h> /* blech. */ 58 59 /* 60 * The terms "transmit" and "receive" are used in their traditional 61 * sense here - packets from other parts of this system are 62 * "transmitted" to the peer domain and those originating from the 63 * peer are "received". 64 * 65 * In some cases this can be confusing, because various data 66 * structures are shared with the domU driver, which has the opposite 67 * view of what constitutes "transmit" and "receive". In naming the 68 * shared structures the domU driver always wins. 69 */ 70 71 /* 72 * XXPV dme: things to do, as well as various things indicated 73 * throughout the source: 74 * - copy avoidance outbound. 75 * - copy avoidance inbound. 76 * - transfer credit limiting. 77 * - MAC address based filtering. 78 */ 79 80 /* 81 * Linux expects to have some headroom in received buffers. The Linux 82 * frontend driver (netfront) checks to see if the headroom is 83 * available and will re-allocate the buffer to make room if 84 * necessary. To avoid this we add TX_BUFFER_HEADROOM bytes of 85 * headroom to each packet we pass to the peer. 86 */ 87 #define TX_BUFFER_HEADROOM 16 88 89 static boolean_t xnb_cksum_offload = B_TRUE; 90 91 static boolean_t xnb_connect_rings(dev_info_t *); 92 static void xnb_disconnect_rings(dev_info_t *); 93 static void xnb_oe_state_change(dev_info_t *, ddi_eventcookie_t, 94 void *, void *); 95 static void xnb_hp_state_change(dev_info_t *, ddi_eventcookie_t, 96 void *, void *); 97 98 static int xnb_rxbuf_constructor(void *, void *, int); 99 static void xnb_rxbuf_destructor(void *, void *); 100 static xnb_rxbuf_t *xnb_rxbuf_get(xnb_t *, int); 101 static void xnb_rxbuf_put(xnb_t *, xnb_rxbuf_t *); 102 static void xnb_rx_notify_peer(xnb_t *); 103 static void xnb_rx_complete(xnb_rxbuf_t *); 104 static void xnb_rx_mark_complete(xnb_t *, RING_IDX, int16_t); 105 static void xnb_rx_schedule_unmop(xnb_t *, gnttab_map_grant_ref_t *, 106 xnb_rxbuf_t *); 107 static void xnb_rx_perform_pending_unmop(xnb_t *); 108 mblk_t *xnb_copy_to_peer(xnb_t *, mblk_t *); 109 110 int xnb_unmop_lowwat = NET_TX_RING_SIZE >> 2; 111 int xnb_unmop_hiwat = NET_TX_RING_SIZE - (NET_TX_RING_SIZE >> 2); 112 113 114 boolean_t xnb_hv_copy = B_TRUE; 115 boolean_t xnb_explicit_pageflip_set = B_FALSE; 116 117 /* XXPV dme: are these really invalid? */ 118 #define INVALID_GRANT_HANDLE ((grant_handle_t)-1) 119 #define INVALID_GRANT_REF ((grant_ref_t)-1) 120 121 static kmem_cache_t *xnb_rxbuf_cachep; 122 static kmutex_t xnb_alloc_page_lock; 123 124 /* 125 * Statistics. 126 */ 127 static char *aux_statistics[] = { 128 "tx_cksum_deferred", 129 "rx_cksum_no_need", 130 "tx_rsp_notok", 131 "tx_notify_deferred", 132 "tx_notify_sent", 133 "rx_notify_deferred", 134 "rx_notify_sent", 135 "tx_too_early", 136 "rx_too_early", 137 "rx_allocb_failed", 138 "tx_allocb_failed", 139 "tx_foreign_page", 140 "mac_full", 141 "spurious_intr", 142 "allocation_success", 143 "allocation_failure", 144 "small_allocation_success", 145 "small_allocation_failure", 146 "other_allocation_failure", 147 "tx_pageboundary_crossed", 148 "tx_cpoparea_grown", 149 "csum_hardware", 150 "csum_software", 151 }; 152 153 static int 154 xnb_ks_aux_update(kstat_t *ksp, int flag) 155 { 156 xnb_t *xnbp; 157 kstat_named_t *knp; 158 159 if (flag != KSTAT_READ) 160 return (EACCES); 161 162 xnbp = ksp->ks_private; 163 knp = ksp->ks_data; 164 165 /* 166 * Assignment order should match that of the names in 167 * aux_statistics. 168 */ 169 (knp++)->value.ui64 = xnbp->xnb_stat_tx_cksum_deferred; 170 (knp++)->value.ui64 = xnbp->xnb_stat_rx_cksum_no_need; 171 (knp++)->value.ui64 = xnbp->xnb_stat_tx_rsp_notok; 172 (knp++)->value.ui64 = xnbp->xnb_stat_tx_notify_deferred; 173 (knp++)->value.ui64 = xnbp->xnb_stat_tx_notify_sent; 174 (knp++)->value.ui64 = xnbp->xnb_stat_rx_notify_deferred; 175 (knp++)->value.ui64 = xnbp->xnb_stat_rx_notify_sent; 176 (knp++)->value.ui64 = xnbp->xnb_stat_tx_too_early; 177 (knp++)->value.ui64 = xnbp->xnb_stat_rx_too_early; 178 (knp++)->value.ui64 = xnbp->xnb_stat_rx_allocb_failed; 179 (knp++)->value.ui64 = xnbp->xnb_stat_tx_allocb_failed; 180 (knp++)->value.ui64 = xnbp->xnb_stat_tx_foreign_page; 181 (knp++)->value.ui64 = xnbp->xnb_stat_mac_full; 182 (knp++)->value.ui64 = xnbp->xnb_stat_spurious_intr; 183 (knp++)->value.ui64 = xnbp->xnb_stat_allocation_success; 184 (knp++)->value.ui64 = xnbp->xnb_stat_allocation_failure; 185 (knp++)->value.ui64 = xnbp->xnb_stat_small_allocation_success; 186 (knp++)->value.ui64 = xnbp->xnb_stat_small_allocation_failure; 187 (knp++)->value.ui64 = xnbp->xnb_stat_other_allocation_failure; 188 (knp++)->value.ui64 = xnbp->xnb_stat_tx_pagebndry_crossed; 189 (knp++)->value.ui64 = xnbp->xnb_stat_tx_cpoparea_grown; 190 (knp++)->value.ui64 = xnbp->xnb_stat_csum_hardware; 191 (knp++)->value.ui64 = xnbp->xnb_stat_csum_software; 192 193 return (0); 194 } 195 196 static boolean_t 197 xnb_ks_init(xnb_t *xnbp) 198 { 199 int nstat = sizeof (aux_statistics) / 200 sizeof (aux_statistics[0]); 201 char **cp = aux_statistics; 202 kstat_named_t *knp; 203 204 /* 205 * Create and initialise kstats. 206 */ 207 xnbp->xnb_kstat_aux = kstat_create(ddi_driver_name(xnbp->xnb_devinfo), 208 ddi_get_instance(xnbp->xnb_devinfo), "aux_statistics", "net", 209 KSTAT_TYPE_NAMED, nstat, 0); 210 if (xnbp->xnb_kstat_aux == NULL) 211 return (B_FALSE); 212 213 xnbp->xnb_kstat_aux->ks_private = xnbp; 214 xnbp->xnb_kstat_aux->ks_update = xnb_ks_aux_update; 215 216 knp = xnbp->xnb_kstat_aux->ks_data; 217 while (nstat > 0) { 218 kstat_named_init(knp, *cp, KSTAT_DATA_UINT64); 219 220 knp++; 221 cp++; 222 nstat--; 223 } 224 225 kstat_install(xnbp->xnb_kstat_aux); 226 227 return (B_TRUE); 228 } 229 230 static void 231 xnb_ks_free(xnb_t *xnbp) 232 { 233 kstat_delete(xnbp->xnb_kstat_aux); 234 } 235 236 /* 237 * Software checksum calculation and insertion for an arbitrary packet. 238 */ 239 /*ARGSUSED*/ 240 static mblk_t * 241 xnb_software_csum(xnb_t *xnbp, mblk_t *mp) 242 { 243 /* 244 * XXPV dme: shouldn't rely on vnic_fix_cksum(), not least 245 * because it doesn't cover all of the interesting cases :-( 246 */ 247 (void) hcksum_assoc(mp, NULL, NULL, 0, 0, 0, 0, 248 HCK_FULLCKSUM, KM_NOSLEEP); 249 250 return (vnic_fix_cksum(mp)); 251 } 252 253 mblk_t * 254 xnb_process_cksum_flags(xnb_t *xnbp, mblk_t *mp, uint32_t capab) 255 { 256 struct ether_header *ehp; 257 uint16_t sap; 258 uint32_t offset; 259 ipha_t *ipha; 260 261 ASSERT(mp->b_next == NULL); 262 263 /* 264 * Check that the packet is contained in a single mblk. In 265 * the "from peer" path this is true today, but will change 266 * when scatter gather support is added. In the "to peer" 267 * path we cannot be sure, but in most cases it will be true 268 * (in the xnbo case the packet has come from a MAC device 269 * which is unlikely to split packets). 270 */ 271 if (mp->b_cont != NULL) 272 goto software; 273 274 /* 275 * If the MAC has no hardware capability don't do any further 276 * checking. 277 */ 278 if (capab == 0) 279 goto software; 280 281 ASSERT(MBLKL(mp) >= sizeof (struct ether_header)); 282 ehp = (struct ether_header *)mp->b_rptr; 283 284 if (ntohs(ehp->ether_type) == VLAN_TPID) { 285 struct ether_vlan_header *evhp; 286 287 ASSERT(MBLKL(mp) >= sizeof (struct ether_vlan_header)); 288 evhp = (struct ether_vlan_header *)mp->b_rptr; 289 sap = ntohs(evhp->ether_type); 290 offset = sizeof (struct ether_vlan_header); 291 } else { 292 sap = ntohs(ehp->ether_type); 293 offset = sizeof (struct ether_header); 294 } 295 296 /* 297 * We only attempt to do IPv4 packets in hardware. 298 */ 299 if (sap != ETHERTYPE_IP) 300 goto software; 301 302 /* 303 * We know that this is an IPv4 packet. 304 */ 305 ipha = (ipha_t *)(mp->b_rptr + offset); 306 307 switch (ipha->ipha_protocol) { 308 case IPPROTO_TCP: 309 case IPPROTO_UDP: 310 /* 311 * This is a TCP/IPv4 or UDP/IPv4 packet. 312 * 313 * If the capabilities indicate that full checksum 314 * offload is available, use it. 315 */ 316 if ((capab & HCKSUM_INET_FULL_V4) != 0) { 317 (void) hcksum_assoc(mp, NULL, NULL, 318 0, 0, 0, 0, 319 HCK_FULLCKSUM, KM_NOSLEEP); 320 321 xnbp->xnb_stat_csum_hardware++; 322 323 return (mp); 324 } 325 326 /* 327 * XXPV dme: If the capabilities indicate that partial 328 * checksum offload is available, we should use it. 329 */ 330 331 break; 332 333 default: 334 /* Use software. */ 335 break; 336 } 337 338 software: 339 /* 340 * We are not able to use any offload so do the whole thing in 341 * software. 342 */ 343 xnbp->xnb_stat_csum_software++; 344 345 return (xnb_software_csum(xnbp, mp)); 346 } 347 348 int 349 xnb_attach(dev_info_t *dip, xnb_flavour_t *flavour, void *flavour_data) 350 { 351 xnb_t *xnbp; 352 char *xsname, mac[ETHERADDRL * 3]; 353 354 xnbp = kmem_zalloc(sizeof (*xnbp), KM_SLEEP); 355 356 xnbp->xnb_flavour = flavour; 357 xnbp->xnb_flavour_data = flavour_data; 358 xnbp->xnb_devinfo = dip; 359 xnbp->xnb_evtchn = INVALID_EVTCHN; 360 xnbp->xnb_irq = B_FALSE; 361 xnbp->xnb_tx_ring_handle = INVALID_GRANT_HANDLE; 362 xnbp->xnb_rx_ring_handle = INVALID_GRANT_HANDLE; 363 xnbp->xnb_cksum_offload = xnb_cksum_offload; 364 xnbp->xnb_connected = B_FALSE; 365 xnbp->xnb_hotplugged = B_FALSE; 366 xnbp->xnb_detachable = B_FALSE; 367 xnbp->xnb_peer = xvdi_get_oeid(dip); 368 xnbp->xnb_rx_pages_writable = B_FALSE; 369 370 xnbp->xnb_rx_buf_count = 0; 371 xnbp->xnb_rx_unmop_count = 0; 372 373 xnbp->xnb_hv_copy = B_FALSE; 374 375 xnbp->xnb_tx_va = vmem_alloc(heap_arena, PAGESIZE, VM_SLEEP); 376 ASSERT(xnbp->xnb_tx_va != NULL); 377 378 if (ddi_get_iblock_cookie(dip, 0, &xnbp->xnb_icookie) 379 != DDI_SUCCESS) 380 goto failure; 381 382 /* allocated on demand, when/if we enter xnb_copy_to_peer() */ 383 xnbp->xnb_tx_cpop = NULL; 384 xnbp->xnb_cpop_sz = 0; 385 386 mutex_init(&xnbp->xnb_tx_lock, NULL, MUTEX_DRIVER, 387 xnbp->xnb_icookie); 388 mutex_init(&xnbp->xnb_rx_lock, NULL, MUTEX_DRIVER, 389 xnbp->xnb_icookie); 390 391 /* set driver private pointer now */ 392 ddi_set_driver_private(dip, xnbp); 393 394 if (!xnb_ks_init(xnbp)) 395 goto failure_1; 396 397 /* 398 * Receive notification of changes in the state of the 399 * driver in the guest domain. 400 */ 401 if (xvdi_add_event_handler(dip, XS_OE_STATE, 402 xnb_oe_state_change) != DDI_SUCCESS) 403 goto failure_2; 404 405 /* 406 * Receive notification of hotplug events. 407 */ 408 if (xvdi_add_event_handler(dip, XS_HP_STATE, 409 xnb_hp_state_change) != DDI_SUCCESS) 410 goto failure_2; 411 412 xsname = xvdi_get_xsname(dip); 413 414 if (xenbus_printf(XBT_NULL, xsname, 415 "feature-no-csum-offload", "%d", 416 xnbp->xnb_cksum_offload ? 0 : 1) != 0) 417 goto failure_3; 418 419 /* 420 * Use global xnb_hv_copy to export this feature. This means that 421 * we have to decide what to do before starting up a guest domain 422 */ 423 if (xenbus_printf(XBT_NULL, xsname, 424 "feature-rx-copy", "%d", xnb_hv_copy ? 1 : 0) != 0) 425 goto failure_3; 426 /* 427 * Linux domUs seem to depend on "feature-rx-flip" being 0 428 * in addition to "feature-rx-copy" being 1. It seems strange 429 * to use four possible states to describe a binary decision, 430 * but we might as well play nice. 431 */ 432 if (xenbus_printf(XBT_NULL, xsname, 433 "feature-rx-flip", "%d", xnb_explicit_pageflip_set ? 1 : 0) != 0) 434 goto failure_3; 435 436 if (xenbus_scanf(XBT_NULL, xsname, 437 "mac", "%s", mac) != 0) { 438 cmn_err(CE_WARN, "xnb_attach: " 439 "cannot read mac address from %s", 440 xsname); 441 goto failure_3; 442 } 443 444 if (ether_aton(mac, xnbp->xnb_mac_addr) != ETHERADDRL) { 445 cmn_err(CE_WARN, 446 "xnb_attach: cannot parse mac address %s", 447 mac); 448 goto failure_3; 449 } 450 451 (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateInitWait); 452 (void) xvdi_post_event(dip, XEN_HP_ADD); 453 454 return (DDI_SUCCESS); 455 456 failure_3: 457 xvdi_remove_event_handler(dip, NULL); 458 459 failure_2: 460 xnb_ks_free(xnbp); 461 462 failure_1: 463 mutex_destroy(&xnbp->xnb_rx_lock); 464 mutex_destroy(&xnbp->xnb_tx_lock); 465 466 failure: 467 vmem_free(heap_arena, xnbp->xnb_tx_va, PAGESIZE); 468 kmem_free(xnbp, sizeof (*xnbp)); 469 return (DDI_FAILURE); 470 } 471 472 /*ARGSUSED*/ 473 void 474 xnb_detach(dev_info_t *dip) 475 { 476 xnb_t *xnbp = ddi_get_driver_private(dip); 477 478 ASSERT(xnbp != NULL); 479 ASSERT(!xnbp->xnb_connected); 480 ASSERT(xnbp->xnb_rx_buf_count == 0); 481 482 xnb_disconnect_rings(dip); 483 484 xvdi_remove_event_handler(dip, NULL); 485 486 xnb_ks_free(xnbp); 487 488 ddi_set_driver_private(dip, NULL); 489 490 mutex_destroy(&xnbp->xnb_tx_lock); 491 mutex_destroy(&xnbp->xnb_rx_lock); 492 493 if (xnbp->xnb_cpop_sz > 0) 494 kmem_free(xnbp->xnb_tx_cpop, sizeof (*xnbp->xnb_tx_cpop) 495 * xnbp->xnb_cpop_sz); 496 497 ASSERT(xnbp->xnb_tx_va != NULL); 498 vmem_free(heap_arena, xnbp->xnb_tx_va, PAGESIZE); 499 500 kmem_free(xnbp, sizeof (*xnbp)); 501 } 502 503 504 static mfn_t 505 xnb_alloc_page(xnb_t *xnbp) 506 { 507 #define WARNING_RATE_LIMIT 100 508 #define BATCH_SIZE 256 509 static mfn_t mfns[BATCH_SIZE]; /* common across all instances */ 510 static int nth = BATCH_SIZE; 511 mfn_t mfn; 512 513 mutex_enter(&xnb_alloc_page_lock); 514 if (nth == BATCH_SIZE) { 515 if (balloon_alloc_pages(BATCH_SIZE, mfns) != BATCH_SIZE) { 516 xnbp->xnb_stat_allocation_failure++; 517 mutex_exit(&xnb_alloc_page_lock); 518 519 /* 520 * Try for a single page in low memory situations. 521 */ 522 if (balloon_alloc_pages(1, &mfn) != 1) { 523 if ((xnbp->xnb_stat_small_allocation_failure++ 524 % WARNING_RATE_LIMIT) == 0) 525 cmn_err(CE_WARN, "xnb_alloc_page: " 526 "Cannot allocate memory to " 527 "transfer packets to peer."); 528 return (0); 529 } else { 530 xnbp->xnb_stat_small_allocation_success++; 531 return (mfn); 532 } 533 } 534 535 nth = 0; 536 xnbp->xnb_stat_allocation_success++; 537 } 538 539 mfn = mfns[nth++]; 540 mutex_exit(&xnb_alloc_page_lock); 541 542 ASSERT(mfn != 0); 543 544 return (mfn); 545 #undef BATCH_SIZE 546 #undef WARNING_RATE_LIMIT 547 } 548 549 /*ARGSUSED*/ 550 static void 551 xnb_free_page(xnb_t *xnbp, mfn_t mfn) 552 { 553 int r; 554 pfn_t pfn; 555 556 pfn = xen_assign_pfn(mfn); 557 pfnzero(pfn, 0, PAGESIZE); 558 xen_release_pfn(pfn); 559 560 /* 561 * This happens only in the error path, so batching is 562 * not worth the complication. 563 */ 564 if ((r = balloon_free_pages(1, &mfn, NULL, NULL)) != 1) { 565 cmn_err(CE_WARN, "free_page: cannot decrease memory " 566 "reservation (%d): page kept but unusable (mfn = 0x%lx).", 567 r, mfn); 568 } 569 } 570 571 /* 572 * Similar to RING_HAS_UNCONSUMED_REQUESTS(&xnbp->rx_ring) but 573 * using local variables. 574 */ 575 #define XNB_RING_HAS_UNCONSUMED_REQUESTS(_r) \ 576 ((((_r)->sring->req_prod - loop) < \ 577 (RING_SIZE(_r) - (loop - prod))) ? \ 578 ((_r)->sring->req_prod - loop) : \ 579 (RING_SIZE(_r) - (loop - prod))) 580 581 mblk_t * 582 xnb_to_peer(xnb_t *xnbp, mblk_t *mp) 583 { 584 mblk_t *free = mp, *prev = NULL; 585 size_t len; 586 gnttab_transfer_t *gop; 587 boolean_t notify; 588 RING_IDX loop, prod, end; 589 590 /* 591 * For each packet the sequence of operations is: 592 * 593 * 1. get a new page from the hypervisor. 594 * 2. get a request slot from the ring. 595 * 3. copy the data into the new page. 596 * 4. transfer the page to the peer. 597 * 5. update the request slot. 598 * 6. kick the peer. 599 * 7. free mp. 600 * 601 * In order to reduce the number of hypercalls, we prepare 602 * several packets for the peer and perform a single hypercall 603 * to transfer them. 604 */ 605 606 mutex_enter(&xnbp->xnb_tx_lock); 607 608 /* 609 * If we are not connected to the peer or have not yet 610 * finished hotplug it is too early to pass packets to the 611 * peer. 612 */ 613 if (!(xnbp->xnb_connected && xnbp->xnb_hotplugged)) { 614 mutex_exit(&xnbp->xnb_tx_lock); 615 DTRACE_PROBE(flip_tx_too_early); 616 xnbp->xnb_stat_tx_too_early++; 617 return (mp); 618 } 619 620 loop = xnbp->xnb_rx_ring.req_cons; 621 prod = xnbp->xnb_rx_ring.rsp_prod_pvt; 622 gop = xnbp->xnb_tx_top; 623 624 while ((mp != NULL) && 625 XNB_RING_HAS_UNCONSUMED_REQUESTS(&xnbp->xnb_rx_ring)) { 626 627 mfn_t mfn; 628 pfn_t pfn; 629 netif_rx_request_t *rxreq; 630 netif_rx_response_t *rxresp; 631 char *valoop; 632 size_t offset; 633 mblk_t *ml; 634 uint16_t cksum_flags; 635 636 /* 1 */ 637 if ((mfn = xnb_alloc_page(xnbp)) == 0) { 638 xnbp->xnb_stat_xmit_defer++; 639 break; 640 } 641 642 /* 2 */ 643 rxreq = RING_GET_REQUEST(&xnbp->xnb_rx_ring, loop); 644 645 #ifdef XNB_DEBUG 646 if (!(rxreq->id < NET_RX_RING_SIZE)) 647 cmn_err(CE_PANIC, "xnb_to_peer: " 648 "id %d out of range in request 0x%p", 649 rxreq->id, (void *)rxreq); 650 #endif /* XNB_DEBUG */ 651 652 /* Assign a pfn and map the new page at the allocated va. */ 653 pfn = xen_assign_pfn(mfn); 654 hat_devload(kas.a_hat, xnbp->xnb_tx_va, PAGESIZE, 655 pfn, PROT_READ | PROT_WRITE, HAT_LOAD); 656 657 offset = TX_BUFFER_HEADROOM; 658 659 /* 3 */ 660 len = 0; 661 valoop = xnbp->xnb_tx_va + offset; 662 for (ml = mp; ml != NULL; ml = ml->b_cont) { 663 size_t chunk = ml->b_wptr - ml->b_rptr; 664 665 bcopy(ml->b_rptr, valoop, chunk); 666 valoop += chunk; 667 len += chunk; 668 } 669 670 ASSERT(len + offset < PAGESIZE); 671 672 /* Release the pfn. */ 673 hat_unload(kas.a_hat, xnbp->xnb_tx_va, PAGESIZE, 674 HAT_UNLOAD_UNMAP); 675 xen_release_pfn(pfn); 676 677 /* 4 */ 678 gop->mfn = mfn; 679 gop->domid = xnbp->xnb_peer; 680 gop->ref = rxreq->gref; 681 682 /* 5.1 */ 683 rxresp = RING_GET_RESPONSE(&xnbp->xnb_rx_ring, prod); 684 rxresp->offset = offset; 685 rxresp->flags = 0; 686 687 cksum_flags = xnbp->xnb_flavour->xf_cksum_to_peer(xnbp, mp); 688 if (cksum_flags != 0) 689 xnbp->xnb_stat_tx_cksum_deferred++; 690 rxresp->flags |= cksum_flags; 691 692 rxresp->id = RING_GET_REQUEST(&xnbp->xnb_rx_ring, prod)->id; 693 rxresp->status = len; 694 695 loop++; 696 prod++; 697 gop++; 698 prev = mp; 699 mp = mp->b_next; 700 } 701 702 /* 703 * Did we actually do anything? 704 */ 705 if (loop == xnbp->xnb_rx_ring.req_cons) { 706 mutex_exit(&xnbp->xnb_tx_lock); 707 return (mp); 708 } 709 710 end = loop; 711 712 /* 713 * Unlink the end of the 'done' list from the remainder. 714 */ 715 ASSERT(prev != NULL); 716 prev->b_next = NULL; 717 718 if (HYPERVISOR_grant_table_op(GNTTABOP_transfer, xnbp->xnb_tx_top, 719 loop - xnbp->xnb_rx_ring.req_cons) != 0) { 720 cmn_err(CE_WARN, "xnb_to_peer: transfer operation failed"); 721 } 722 723 loop = xnbp->xnb_rx_ring.req_cons; 724 prod = xnbp->xnb_rx_ring.rsp_prod_pvt; 725 gop = xnbp->xnb_tx_top; 726 727 while (loop < end) { 728 int16_t status = NETIF_RSP_OKAY; 729 730 if (gop->status != 0) { 731 status = NETIF_RSP_ERROR; 732 733 /* 734 * If the status is anything other than 735 * GNTST_bad_page then we don't own the page 736 * any more, so don't try to give it back. 737 */ 738 if (gop->status != GNTST_bad_page) 739 gop->mfn = 0; 740 } else { 741 /* The page is no longer ours. */ 742 gop->mfn = 0; 743 } 744 745 if (gop->mfn != 0) 746 /* 747 * Give back the page, as we won't be using 748 * it. 749 */ 750 xnb_free_page(xnbp, gop->mfn); 751 else 752 /* 753 * We gave away a page, update our accounting 754 * now. 755 */ 756 balloon_drv_subtracted(1); 757 758 /* 5.2 */ 759 if (status != NETIF_RSP_OKAY) { 760 RING_GET_RESPONSE(&xnbp->xnb_rx_ring, prod)->status = 761 status; 762 } else { 763 xnbp->xnb_stat_opackets++; 764 xnbp->xnb_stat_obytes += len; 765 } 766 767 loop++; 768 prod++; 769 gop++; 770 } 771 772 xnbp->xnb_rx_ring.req_cons = loop; 773 xnbp->xnb_rx_ring.rsp_prod_pvt = prod; 774 775 /* 6 */ 776 /* LINTED: constant in conditional context */ 777 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&xnbp->xnb_rx_ring, notify); 778 if (notify) { 779 ec_notify_via_evtchn(xnbp->xnb_evtchn); 780 xnbp->xnb_stat_tx_notify_sent++; 781 } else { 782 xnbp->xnb_stat_tx_notify_deferred++; 783 } 784 785 if (mp != NULL) 786 xnbp->xnb_stat_xmit_defer++; 787 788 mutex_exit(&xnbp->xnb_tx_lock); 789 790 /* Free mblk_t's that we consumed. */ 791 freemsgchain(free); 792 793 return (mp); 794 } 795 796 /* helper functions for xnb_copy_to_peer */ 797 798 /* 799 * Grow the array of copy operation descriptors. 800 * Returns a pointer to the next available entry. 801 */ 802 gnttab_copy_t * 803 grow_cpop_area(xnb_t *xnbp, gnttab_copy_t *o_cpop) 804 { 805 /* 806 * o_cpop (arg.1) is a ptr to the area we would like to copy 807 * something into but cannot, because we haven't alloc'ed it 808 * yet, or NULL. 809 * old_cpop and new_cpop (local) are pointers to old/new 810 * versions of xnbp->xnb_tx_cpop. 811 */ 812 gnttab_copy_t *new_cpop, *old_cpop, *ret_cpop; 813 size_t newcount; 814 815 ASSERT(MUTEX_HELD(&xnbp->xnb_tx_lock)); 816 817 old_cpop = xnbp->xnb_tx_cpop; 818 /* 819 * o_cpop is a pointer into the array pointed to by old_cpop; 820 * it would be an error for exactly one of these pointers to be NULL. 821 * We shouldn't call this function if xnb_tx_cpop has already 822 * been allocated, but we're starting to fill it from the beginning 823 * again. 824 */ 825 ASSERT((o_cpop == NULL && old_cpop == NULL) || 826 (o_cpop != NULL && old_cpop != NULL && o_cpop != old_cpop)); 827 828 newcount = xnbp->xnb_cpop_sz + CPOP_DEFCNT; 829 830 new_cpop = kmem_alloc(sizeof (*new_cpop) * newcount, KM_NOSLEEP); 831 if (new_cpop == NULL) { 832 xnbp->xnb_stat_other_allocation_failure++; 833 return (NULL); 834 } 835 836 if (o_cpop != NULL) { 837 size_t offset = (o_cpop - old_cpop); 838 839 /* we only need to move the parts in use ... */ 840 (void) memmove(new_cpop, old_cpop, xnbp->xnb_cpop_sz * 841 (sizeof (*old_cpop))); 842 843 kmem_free(old_cpop, xnbp->xnb_cpop_sz * sizeof (*old_cpop)); 844 845 ret_cpop = new_cpop + offset; 846 } else { 847 ret_cpop = new_cpop; 848 } 849 850 xnbp->xnb_tx_cpop = new_cpop; 851 xnbp->xnb_cpop_sz = newcount; 852 853 xnbp->xnb_stat_tx_cpoparea_grown++; 854 855 return (ret_cpop); 856 } 857 858 /* 859 * Check whether an address is on a page that's foreign to this domain. 860 */ 861 static boolean_t 862 is_foreign(void *addr) 863 { 864 pfn_t pfn = hat_getpfnum(kas.a_hat, addr); 865 866 return (pfn & PFN_IS_FOREIGN_MFN ? B_TRUE : B_FALSE); 867 } 868 869 /* 870 * Insert a newly allocated mblk into a chain, replacing the old one. 871 */ 872 static mblk_t * 873 replace_msg(mblk_t *mp, size_t len, mblk_t *mp_prev, mblk_t *ml_prev) 874 { 875 uint32_t start, stuff, end, value, flags; 876 mblk_t *new_mp; 877 878 new_mp = copyb(mp); 879 if (new_mp == NULL) 880 cmn_err(CE_PANIC, "replace_msg: cannot alloc new message" 881 "for %p, len %lu", (void *) mp, len); 882 883 hcksum_retrieve(mp, NULL, NULL, &start, &stuff, &end, &value, &flags); 884 (void) hcksum_assoc(new_mp, NULL, NULL, start, stuff, end, value, 885 flags, KM_NOSLEEP); 886 887 new_mp->b_next = mp->b_next; 888 new_mp->b_prev = mp->b_prev; 889 new_mp->b_cont = mp->b_cont; 890 891 /* Make sure we only overwrite pointers to the mblk being replaced. */ 892 if (mp_prev != NULL && mp_prev->b_next == mp) 893 mp_prev->b_next = new_mp; 894 895 if (ml_prev != NULL && ml_prev->b_cont == mp) 896 ml_prev->b_cont = new_mp; 897 898 mp->b_next = mp->b_prev = mp->b_cont = NULL; 899 freemsg(mp); 900 901 return (new_mp); 902 } 903 904 /* 905 * Set all the fields in a gnttab_copy_t. 906 */ 907 static void 908 setup_gop(xnb_t *xnbp, gnttab_copy_t *gp, uchar_t *rptr, 909 size_t s_off, size_t d_off, size_t len, grant_ref_t d_ref) 910 { 911 ASSERT(xnbp != NULL && gp != NULL); 912 913 gp->source.offset = s_off; 914 gp->source.u.gmfn = pfn_to_mfn(hat_getpfnum(kas.a_hat, (caddr_t)rptr)); 915 gp->source.domid = DOMID_SELF; 916 917 gp->len = (uint16_t)len; 918 gp->flags = GNTCOPY_dest_gref; 919 gp->status = 0; 920 921 gp->dest.u.ref = d_ref; 922 gp->dest.offset = d_off; 923 gp->dest.domid = xnbp->xnb_peer; 924 } 925 926 mblk_t * 927 xnb_copy_to_peer(xnb_t *xnbp, mblk_t *mp) 928 { 929 mblk_t *free = mp, *mp_prev = NULL, *saved_mp = mp; 930 mblk_t *ml, *ml_prev; 931 gnttab_copy_t *gop_cp; 932 boolean_t notify; 933 RING_IDX loop, prod; 934 int i; 935 936 if (!xnbp->xnb_hv_copy) 937 return (xnb_to_peer(xnbp, mp)); 938 939 /* 940 * For each packet the sequence of operations is: 941 * 942 * 1. get a request slot from the ring. 943 * 2. set up data for hypercall (see NOTE below) 944 * 3. have the hypervisore copy the data 945 * 4. update the request slot. 946 * 5. kick the peer. 947 * 948 * NOTE ad 2. 949 * In order to reduce the number of hypercalls, we prepare 950 * several packets (mp->b_cont != NULL) for the peer and 951 * perform a single hypercall to transfer them. 952 * We also have to set up a seperate copy operation for 953 * every page. 954 * 955 * If we have more than one message (mp->b_next != NULL), 956 * we do this whole dance repeatedly. 957 */ 958 959 mutex_enter(&xnbp->xnb_tx_lock); 960 961 if (!(xnbp->xnb_connected && xnbp->xnb_hotplugged)) { 962 mutex_exit(&xnbp->xnb_tx_lock); 963 DTRACE_PROBE(copy_tx_too_early); 964 xnbp->xnb_stat_tx_too_early++; 965 return (mp); 966 } 967 968 loop = xnbp->xnb_rx_ring.req_cons; 969 prod = xnbp->xnb_rx_ring.rsp_prod_pvt; 970 971 while ((mp != NULL) && 972 XNB_RING_HAS_UNCONSUMED_REQUESTS(&xnbp->xnb_rx_ring)) { 973 netif_rx_request_t *rxreq; 974 netif_rx_response_t *rxresp; 975 size_t offset, d_offset; 976 size_t len; 977 uint16_t cksum_flags; 978 int16_t status = NETIF_RSP_OKAY; 979 int item_count; 980 981 /* 1 */ 982 rxreq = RING_GET_REQUEST(&xnbp->xnb_rx_ring, loop); 983 984 #ifdef XNB_DEBUG 985 if (!(rxreq->id < NET_RX_RING_SIZE)) 986 cmn_err(CE_PANIC, "xnb_copy_to_peer: " 987 "id %d out of range in request 0x%p", 988 rxreq->id, (void *)rxreq); 989 #endif /* XNB_DEBUG */ 990 991 /* 2 */ 992 d_offset = offset = TX_BUFFER_HEADROOM; 993 len = 0; 994 item_count = 0; 995 996 gop_cp = xnbp->xnb_tx_cpop; 997 998 /* 999 * We walk the b_cont pointers and set up a gop_cp 1000 * structure for every page in every data block we have. 1001 */ 1002 /* 2a */ 1003 for (ml = mp, ml_prev = NULL; ml != NULL; ml = ml->b_cont) { 1004 size_t chunk = ml->b_wptr - ml->b_rptr; 1005 uchar_t *r_tmp, *rpt_align; 1006 size_t r_offset; 1007 1008 /* 1009 * If we get an mblk on a page that doesn't belong to 1010 * this domain, get a new mblk to replace the old one. 1011 */ 1012 if (is_foreign(ml->b_rptr) || is_foreign(ml->b_wptr)) { 1013 mblk_t *ml_new = replace_msg(ml, chunk, 1014 mp_prev, ml_prev); 1015 1016 /* We can still use old ml, but not *ml! */ 1017 if (free == ml) 1018 free = ml_new; 1019 if (mp == ml) 1020 mp = ml_new; 1021 ml = ml_new; 1022 1023 xnbp->xnb_stat_tx_foreign_page++; 1024 } 1025 1026 rpt_align = (uchar_t *)ALIGN2PAGE(ml->b_rptr); 1027 r_offset = (uint16_t)(ml->b_rptr - rpt_align); 1028 r_tmp = ml->b_rptr; 1029 1030 if (d_offset + chunk > PAGESIZE) 1031 cmn_err(CE_PANIC, "xnb_copy_to_peer: mp %p " 1032 "(svd: %p), ml %p,rpt_alg. %p, d_offset " 1033 "(%lu) + chunk (%lu) > PAGESIZE %d!", 1034 (void *)mp, (void *)saved_mp, (void *)ml, 1035 (void *)rpt_align, 1036 d_offset, chunk, (int)PAGESIZE); 1037 1038 while (chunk > 0) { 1039 size_t part_len; 1040 1041 item_count++; 1042 if (item_count > xnbp->xnb_cpop_sz) { 1043 gop_cp = grow_cpop_area(xnbp, gop_cp); 1044 if (gop_cp == NULL) 1045 goto failure; 1046 } 1047 /* 1048 * If our mblk crosses a page boundary, we need 1049 * to do a seperate copy for every page. 1050 */ 1051 if (r_offset + chunk > PAGESIZE) { 1052 part_len = PAGESIZE - r_offset; 1053 1054 DTRACE_PROBE3(mblk_page_crossed, 1055 (mblk_t *), ml, int, chunk, int, 1056 (int)r_offset); 1057 1058 xnbp->xnb_stat_tx_pagebndry_crossed++; 1059 } else { 1060 part_len = chunk; 1061 } 1062 1063 setup_gop(xnbp, gop_cp, r_tmp, r_offset, 1064 d_offset, part_len, rxreq->gref); 1065 1066 chunk -= part_len; 1067 1068 len += part_len; 1069 d_offset += part_len; 1070 r_tmp += part_len; 1071 /* 1072 * The 2nd, 3rd ... last copies will always 1073 * start at r_tmp, therefore r_offset is 0. 1074 */ 1075 r_offset = 0; 1076 gop_cp++; 1077 } 1078 ml_prev = ml; 1079 DTRACE_PROBE4(mblk_loop_end, (mblk_t *), ml, int, 1080 chunk, int, len, int, item_count); 1081 } 1082 /* 3 */ 1083 if (HYPERVISOR_grant_table_op(GNTTABOP_copy, xnbp->xnb_tx_cpop, 1084 item_count) != 0) { 1085 cmn_err(CE_WARN, "xnb_copy_to_peer: copy op. failed"); 1086 DTRACE_PROBE(HV_granttableopfailed); 1087 } 1088 1089 /* 4 */ 1090 rxresp = RING_GET_RESPONSE(&xnbp->xnb_rx_ring, prod); 1091 rxresp->offset = offset; 1092 1093 rxresp->flags = 0; 1094 1095 DTRACE_PROBE4(got_RX_rsp, int, (int)rxresp->id, int, 1096 (int)rxresp->offset, int, (int)rxresp->flags, int, 1097 (int)rxresp->status); 1098 1099 cksum_flags = xnbp->xnb_flavour->xf_cksum_to_peer(xnbp, mp); 1100 if (cksum_flags != 0) 1101 xnbp->xnb_stat_tx_cksum_deferred++; 1102 rxresp->flags |= cksum_flags; 1103 1104 rxresp->id = RING_GET_REQUEST(&xnbp->xnb_rx_ring, prod)->id; 1105 rxresp->status = len; 1106 1107 DTRACE_PROBE4(RX_rsp_set, int, (int)rxresp->id, int, 1108 (int)rxresp->offset, int, (int)rxresp->flags, int, 1109 (int)rxresp->status); 1110 1111 for (i = 0; i < item_count; i++) { 1112 if (xnbp->xnb_tx_cpop[i].status != 0) { 1113 DTRACE_PROBE2(cpop__status__nonnull, int, 1114 (int)xnbp->xnb_tx_cpop[i].status, 1115 int, i); 1116 status = NETIF_RSP_ERROR; 1117 } 1118 } 1119 1120 /* 5.2 */ 1121 if (status != NETIF_RSP_OKAY) { 1122 RING_GET_RESPONSE(&xnbp->xnb_rx_ring, prod)->status = 1123 status; 1124 xnbp->xnb_stat_tx_rsp_notok++; 1125 } else { 1126 xnbp->xnb_stat_opackets++; 1127 xnbp->xnb_stat_obytes += len; 1128 } 1129 1130 loop++; 1131 prod++; 1132 mp_prev = mp; 1133 mp = mp->b_next; 1134 } 1135 failure: 1136 /* 1137 * Did we actually do anything? 1138 */ 1139 if (loop == xnbp->xnb_rx_ring.req_cons) { 1140 mutex_exit(&xnbp->xnb_tx_lock); 1141 return (mp); 1142 } 1143 1144 /* 1145 * Unlink the end of the 'done' list from the remainder. 1146 */ 1147 ASSERT(mp_prev != NULL); 1148 mp_prev->b_next = NULL; 1149 1150 xnbp->xnb_rx_ring.req_cons = loop; 1151 xnbp->xnb_rx_ring.rsp_prod_pvt = prod; 1152 1153 /* 6 */ 1154 /* LINTED: constant in conditional context */ 1155 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&xnbp->xnb_rx_ring, notify); 1156 if (notify) { 1157 ec_notify_via_evtchn(xnbp->xnb_evtchn); 1158 xnbp->xnb_stat_tx_notify_sent++; 1159 } else { 1160 xnbp->xnb_stat_tx_notify_deferred++; 1161 } 1162 1163 if (mp != NULL) 1164 xnbp->xnb_stat_xmit_defer++; 1165 1166 mutex_exit(&xnbp->xnb_tx_lock); 1167 1168 /* Free mblk_t structs we have consumed. */ 1169 freemsgchain(free); 1170 1171 return (mp); 1172 } 1173 1174 /*ARGSUSED*/ 1175 static int 1176 xnb_rxbuf_constructor(void *buf, void *arg, int kmflag) 1177 { 1178 xnb_rxbuf_t *rxp = buf; 1179 1180 bzero(rxp, sizeof (*rxp)); 1181 1182 rxp->xr_free_rtn.free_func = xnb_rx_complete; 1183 rxp->xr_free_rtn.free_arg = (caddr_t)rxp; 1184 1185 rxp->xr_mop.host_addr = 1186 (uint64_t)(uintptr_t)vmem_alloc(heap_arena, PAGESIZE, 1187 ((kmflag & KM_NOSLEEP) == KM_NOSLEEP) ? 1188 VM_NOSLEEP : VM_SLEEP); 1189 1190 if (rxp->xr_mop.host_addr == NULL) { 1191 cmn_err(CE_WARN, "xnb_rxbuf_constructor: " 1192 "cannot get address space"); 1193 return (-1); 1194 } 1195 1196 /* 1197 * Have the hat ensure that page table exists for the VA. 1198 */ 1199 hat_prepare_mapping(kas.a_hat, 1200 (caddr_t)(uintptr_t)rxp->xr_mop.host_addr); 1201 1202 return (0); 1203 } 1204 1205 /*ARGSUSED*/ 1206 static void 1207 xnb_rxbuf_destructor(void *buf, void *arg) 1208 { 1209 xnb_rxbuf_t *rxp = buf; 1210 1211 ASSERT(rxp->xr_mop.host_addr != NULL); 1212 ASSERT((rxp->xr_flags & XNB_RXBUF_INUSE) == 0); 1213 1214 hat_release_mapping(kas.a_hat, 1215 (caddr_t)(uintptr_t)rxp->xr_mop.host_addr); 1216 vmem_free(heap_arena, 1217 (caddr_t)(uintptr_t)rxp->xr_mop.host_addr, PAGESIZE); 1218 } 1219 1220 static void 1221 xnb_rx_notify_peer(xnb_t *xnbp) 1222 { 1223 boolean_t notify; 1224 1225 ASSERT(MUTEX_HELD(&xnbp->xnb_rx_lock)); 1226 1227 /* LINTED: constant in conditional context */ 1228 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&xnbp->xnb_tx_ring, notify); 1229 if (notify) { 1230 ec_notify_via_evtchn(xnbp->xnb_evtchn); 1231 xnbp->xnb_stat_rx_notify_sent++; 1232 } else { 1233 xnbp->xnb_stat_rx_notify_deferred++; 1234 } 1235 } 1236 1237 static void 1238 xnb_rx_complete(xnb_rxbuf_t *rxp) 1239 { 1240 xnb_t *xnbp = rxp->xr_xnbp; 1241 1242 ASSERT((rxp->xr_flags & XNB_RXBUF_INUSE) == XNB_RXBUF_INUSE); 1243 1244 mutex_enter(&xnbp->xnb_rx_lock); 1245 xnb_rx_schedule_unmop(xnbp, &rxp->xr_mop, rxp); 1246 mutex_exit(&xnbp->xnb_rx_lock); 1247 } 1248 1249 static void 1250 xnb_rx_mark_complete(xnb_t *xnbp, RING_IDX id, int16_t status) 1251 { 1252 RING_IDX i; 1253 netif_tx_response_t *txresp; 1254 1255 ASSERT(MUTEX_HELD(&xnbp->xnb_rx_lock)); 1256 1257 i = xnbp->xnb_tx_ring.rsp_prod_pvt; 1258 1259 txresp = RING_GET_RESPONSE(&xnbp->xnb_tx_ring, i); 1260 txresp->id = id; 1261 txresp->status = status; 1262 1263 xnbp->xnb_tx_ring.rsp_prod_pvt = i + 1; 1264 1265 /* 1266 * Note that we don't push the change to the peer here - that 1267 * is the callers responsibility. 1268 */ 1269 } 1270 1271 static void 1272 xnb_rx_schedule_unmop(xnb_t *xnbp, gnttab_map_grant_ref_t *mop, 1273 xnb_rxbuf_t *rxp) 1274 { 1275 gnttab_unmap_grant_ref_t *unmop; 1276 int u_count; 1277 int reqs_on_ring; 1278 1279 ASSERT(MUTEX_HELD(&xnbp->xnb_rx_lock)); 1280 ASSERT(xnbp->xnb_rx_unmop_count < NET_TX_RING_SIZE); 1281 1282 u_count = xnbp->xnb_rx_unmop_count++; 1283 1284 /* Cache data for the time when we actually unmap grant refs */ 1285 xnbp->xnb_rx_unmop_rxp[u_count] = rxp; 1286 1287 unmop = &xnbp->xnb_rx_unmop[u_count]; 1288 unmop->host_addr = mop->host_addr; 1289 unmop->dev_bus_addr = mop->dev_bus_addr; 1290 unmop->handle = mop->handle; 1291 1292 /* 1293 * We cannot check the ring once we're disconnected from it. Batching 1294 * doesn't seem to be a useful optimisation in this case either, 1295 * so we directly call into the actual unmap function. 1296 */ 1297 if (xnbp->xnb_connected) { 1298 reqs_on_ring = RING_HAS_UNCONSUMED_REQUESTS(&xnbp->xnb_tx_ring); 1299 1300 /* 1301 * By tuning xnb_unmop_hiwat to N, we can emulate "N per batch" 1302 * or (with N == 1) "immediate unmop" behaviour. 1303 * The "> xnb_unmop_lowwat" is a guard against ring exhaustion. 1304 */ 1305 if (xnbp->xnb_rx_unmop_count < xnb_unmop_hiwat && 1306 reqs_on_ring > xnb_unmop_lowwat) 1307 return; 1308 } 1309 1310 xnb_rx_perform_pending_unmop(xnbp); 1311 } 1312 1313 /* 1314 * Here we perform the actual unmapping of the data that was 1315 * accumulated in xnb_rx_schedule_unmop(). 1316 * Note that it is the caller's responsibility to make sure that 1317 * there's actually something there to unmop. 1318 */ 1319 static void 1320 xnb_rx_perform_pending_unmop(xnb_t *xnbp) 1321 { 1322 RING_IDX loop; 1323 #ifdef XNB_DEBUG 1324 gnttab_unmap_grant_ref_t *unmop; 1325 #endif /* XNB_DEBUG */ 1326 1327 ASSERT(MUTEX_HELD(&xnbp->xnb_rx_lock)); 1328 ASSERT(xnbp->xnb_rx_unmop_count > 0); 1329 1330 if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, 1331 xnbp->xnb_rx_unmop, xnbp->xnb_rx_unmop_count) < 0) { 1332 cmn_err(CE_WARN, "xnb_rx_perform_pending_unmop: " 1333 "unmap grant operation failed, " 1334 "%d pages lost", xnbp->xnb_rx_unmop_count); 1335 } 1336 1337 #ifdef XNB_DEBUG 1338 for (loop = 0, unmop = xnbp->xnb_rx_unmop; 1339 loop < xnbp->xnb_rx_unmop_count; 1340 loop++, unmop++) { 1341 if (unmop->status != 0) { 1342 cmn_err(CE_WARN, "xnb_rx_perform_pending_unmop: " 1343 "unmap grant reference failed (%d)", 1344 unmop->status); 1345 } 1346 } 1347 #endif /* XNB_DEBUG */ 1348 1349 for (loop = 0; loop < xnbp->xnb_rx_unmop_count; loop++) { 1350 xnb_rxbuf_t *rxp = xnbp->xnb_rx_unmop_rxp[loop]; 1351 1352 if (rxp == NULL) 1353 cmn_err(CE_PANIC, 1354 "xnb_rx_perform_pending_unmop: " 1355 "unexpected NULL rxp (loop %d; count %d)!", 1356 loop, xnbp->xnb_rx_unmop_count); 1357 1358 if (xnbp->xnb_connected) 1359 xnb_rx_mark_complete(xnbp, rxp->xr_id, rxp->xr_status); 1360 xnb_rxbuf_put(xnbp, rxp); 1361 } 1362 if (xnbp->xnb_connected) 1363 xnb_rx_notify_peer(xnbp); 1364 1365 xnbp->xnb_rx_unmop_count = 0; 1366 1367 #ifdef XNB_DEBUG 1368 bzero(xnbp->xnb_rx_unmop, sizeof (xnbp->xnb_rx_unmop)); 1369 bzero(xnbp->xnb_rx_unmop_rxp, sizeof (xnbp->xnb_rx_unmop_rxp)); 1370 #endif /* XNB_DEBUG */ 1371 } 1372 1373 static xnb_rxbuf_t * 1374 xnb_rxbuf_get(xnb_t *xnbp, int flags) 1375 { 1376 xnb_rxbuf_t *rxp; 1377 1378 ASSERT(MUTEX_HELD(&xnbp->xnb_rx_lock)); 1379 1380 rxp = kmem_cache_alloc(xnb_rxbuf_cachep, flags); 1381 if (rxp != NULL) { 1382 ASSERT((rxp->xr_flags & XNB_RXBUF_INUSE) == 0); 1383 rxp->xr_flags |= XNB_RXBUF_INUSE; 1384 1385 rxp->xr_xnbp = xnbp; 1386 rxp->xr_mop.dom = xnbp->xnb_peer; 1387 1388 rxp->xr_mop.flags = GNTMAP_host_map; 1389 if (!xnbp->xnb_rx_pages_writable) 1390 rxp->xr_mop.flags |= GNTMAP_readonly; 1391 1392 xnbp->xnb_rx_buf_count++; 1393 } 1394 1395 return (rxp); 1396 } 1397 1398 static void 1399 xnb_rxbuf_put(xnb_t *xnbp, xnb_rxbuf_t *rxp) 1400 { 1401 ASSERT(MUTEX_HELD(&xnbp->xnb_rx_lock)); 1402 ASSERT((rxp->xr_flags & XNB_RXBUF_INUSE) == XNB_RXBUF_INUSE); 1403 1404 rxp->xr_flags &= ~XNB_RXBUF_INUSE; 1405 xnbp->xnb_rx_buf_count--; 1406 1407 kmem_cache_free(xnb_rxbuf_cachep, rxp); 1408 } 1409 1410 static mblk_t * 1411 xnb_recv(xnb_t *xnbp) 1412 { 1413 RING_IDX start, end, loop; 1414 gnttab_map_grant_ref_t *mop; 1415 xnb_rxbuf_t **rxpp; 1416 netif_tx_request_t *txreq; 1417 boolean_t work_to_do; 1418 mblk_t *head, *tail; 1419 /* 1420 * If the peer granted a read-only mapping to the page then we 1421 * must copy the data, as the local protocol stack (should the 1422 * packet be destined for this host) will modify the packet 1423 * 'in place'. 1424 */ 1425 boolean_t copy = !xnbp->xnb_rx_pages_writable; 1426 1427 /* 1428 * For each individual request, the sequence of actions is: 1429 * 1430 * 1. get the request. 1431 * 2. map the page based on the grant ref. 1432 * 3. allocate an mblk, copy the data to it. 1433 * 4. release the grant. 1434 * 5. update the ring. 1435 * 6. pass the packet upward. 1436 * 7. kick the peer. 1437 * 1438 * In fact, we try to perform the grant operations in batches, 1439 * so there are two loops. 1440 */ 1441 1442 head = tail = NULL; 1443 around: 1444 ASSERT(MUTEX_HELD(&xnbp->xnb_rx_lock)); 1445 1446 /* LINTED: constant in conditional context */ 1447 RING_FINAL_CHECK_FOR_REQUESTS(&xnbp->xnb_tx_ring, work_to_do); 1448 if (!work_to_do) { 1449 finished: 1450 return (head); 1451 } 1452 1453 start = xnbp->xnb_tx_ring.req_cons; 1454 end = xnbp->xnb_tx_ring.sring->req_prod; 1455 1456 for (loop = start, mop = xnbp->xnb_rx_mop, rxpp = xnbp->xnb_rx_bufp; 1457 loop != end; 1458 loop++, mop++, rxpp++) { 1459 xnb_rxbuf_t *rxp; 1460 1461 rxp = xnb_rxbuf_get(xnbp, KM_NOSLEEP); 1462 if (rxp == NULL) 1463 break; 1464 1465 ASSERT(xnbp->xnb_rx_pages_writable || 1466 ((rxp->xr_mop.flags & GNTMAP_readonly) 1467 == GNTMAP_readonly)); 1468 1469 rxp->xr_mop.ref = 1470 RING_GET_REQUEST(&xnbp->xnb_tx_ring, loop)->gref; 1471 1472 *mop = rxp->xr_mop; 1473 *rxpp = rxp; 1474 } 1475 1476 if ((loop - start) == 0) 1477 goto finished; 1478 1479 end = loop; 1480 1481 if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, 1482 xnbp->xnb_rx_mop, end - start) != 0) { 1483 1484 cmn_err(CE_WARN, "xnb_recv: map grant operation failed"); 1485 1486 loop = start; 1487 rxpp = xnbp->xnb_rx_bufp; 1488 1489 while (loop != end) { 1490 xnb_rxbuf_put(xnbp, *rxpp); 1491 1492 loop++; 1493 rxpp++; 1494 } 1495 1496 goto finished; 1497 } 1498 1499 for (loop = start, mop = xnbp->xnb_rx_mop, rxpp = xnbp->xnb_rx_bufp; 1500 loop != end; 1501 loop++, mop++, rxpp++) { 1502 mblk_t *mp = NULL; 1503 int16_t status = NETIF_RSP_OKAY; 1504 xnb_rxbuf_t *rxp = *rxpp; 1505 1506 if (mop->status != 0) { 1507 cmn_err(CE_WARN, "xnb_recv: " 1508 "failed to map buffer: %d", 1509 mop->status); 1510 status = NETIF_RSP_ERROR; 1511 } 1512 1513 txreq = RING_GET_REQUEST(&xnbp->xnb_tx_ring, loop); 1514 1515 if (status == NETIF_RSP_OKAY) { 1516 if (copy) { 1517 mp = allocb(txreq->size, BPRI_MED); 1518 if (mp == NULL) { 1519 status = NETIF_RSP_ERROR; 1520 xnbp->xnb_stat_rx_allocb_failed++; 1521 } else { 1522 bcopy((caddr_t)(uintptr_t) 1523 mop->host_addr + txreq->offset, 1524 mp->b_wptr, txreq->size); 1525 mp->b_wptr += txreq->size; 1526 } 1527 } else { 1528 mp = desballoc((uchar_t *)(uintptr_t) 1529 mop->host_addr + txreq->offset, 1530 txreq->size, 0, &rxp->xr_free_rtn); 1531 if (mp == NULL) { 1532 status = NETIF_RSP_ERROR; 1533 xnbp->xnb_stat_rx_allocb_failed++; 1534 } else { 1535 rxp->xr_id = txreq->id; 1536 rxp->xr_status = status; 1537 rxp->xr_mop = *mop; 1538 1539 mp->b_wptr += txreq->size; 1540 } 1541 } 1542 1543 /* 1544 * If we have a buffer and there are checksum 1545 * flags, process them appropriately. 1546 */ 1547 if ((mp != NULL) && 1548 ((txreq->flags & 1549 (NETTXF_csum_blank | NETTXF_data_validated)) 1550 != 0)) { 1551 mp = xnbp->xnb_flavour->xf_cksum_from_peer(xnbp, 1552 mp, txreq->flags); 1553 xnbp->xnb_stat_rx_cksum_no_need++; 1554 } 1555 } 1556 1557 if (copy || (mp == NULL)) { 1558 rxp->xr_status = status; 1559 rxp->xr_id = txreq->id; 1560 xnb_rx_schedule_unmop(xnbp, mop, rxp); 1561 } 1562 1563 if (mp != NULL) { 1564 xnbp->xnb_stat_ipackets++; 1565 xnbp->xnb_stat_rbytes += txreq->size; 1566 1567 mp->b_next = NULL; 1568 if (head == NULL) { 1569 ASSERT(tail == NULL); 1570 head = mp; 1571 } else { 1572 ASSERT(tail != NULL); 1573 tail->b_next = mp; 1574 } 1575 tail = mp; 1576 } 1577 } 1578 1579 xnbp->xnb_tx_ring.req_cons = loop; 1580 1581 goto around; 1582 /* NOTREACHED */ 1583 } 1584 1585 /* 1586 * intr() -- ring interrupt service routine 1587 */ 1588 static uint_t 1589 xnb_intr(caddr_t arg) 1590 { 1591 xnb_t *xnbp = (xnb_t *)arg; 1592 mblk_t *mp; 1593 1594 xnbp->xnb_stat_intr++; 1595 1596 mutex_enter(&xnbp->xnb_rx_lock); 1597 1598 ASSERT(xnbp->xnb_connected); 1599 1600 mp = xnb_recv(xnbp); 1601 1602 mutex_exit(&xnbp->xnb_rx_lock); 1603 1604 if (!xnbp->xnb_hotplugged) { 1605 xnbp->xnb_stat_rx_too_early++; 1606 goto fail; 1607 } 1608 if (mp == NULL) { 1609 xnbp->xnb_stat_spurious_intr++; 1610 goto fail; 1611 } 1612 1613 xnbp->xnb_flavour->xf_recv(xnbp, mp); 1614 1615 return (DDI_INTR_CLAIMED); 1616 1617 fail: 1618 freemsgchain(mp); 1619 return (DDI_INTR_CLAIMED); 1620 } 1621 1622 static boolean_t 1623 xnb_connect_rings(dev_info_t *dip) 1624 { 1625 xnb_t *xnbp = ddi_get_driver_private(dip); 1626 char *oename; 1627 struct gnttab_map_grant_ref map_op; 1628 evtchn_port_t evtchn; 1629 int i; 1630 1631 /* 1632 * Cannot attempt to connect the rings if already connected. 1633 */ 1634 ASSERT(!xnbp->xnb_connected); 1635 1636 oename = xvdi_get_oename(dip); 1637 1638 if (xenbus_gather(XBT_NULL, oename, 1639 "event-channel", "%u", &evtchn, 1640 "tx-ring-ref", "%lu", &xnbp->xnb_tx_ring_ref, 1641 "rx-ring-ref", "%lu", &xnbp->xnb_rx_ring_ref, 1642 NULL) != 0) { 1643 cmn_err(CE_WARN, "xnb_connect_rings: " 1644 "cannot read other-end details from %s", 1645 oename); 1646 goto fail; 1647 } 1648 1649 if (xenbus_scanf(XBT_NULL, oename, 1650 "feature-tx-writable", "%d", &i) != 0) 1651 i = 0; 1652 if (i != 0) 1653 xnbp->xnb_rx_pages_writable = B_TRUE; 1654 1655 if (xenbus_scanf(XBT_NULL, oename, 1656 "feature-no-csum-offload", "%d", &i) != 0) 1657 i = 0; 1658 if ((i == 1) || !xnbp->xnb_cksum_offload) 1659 xnbp->xnb_cksum_offload = B_FALSE; 1660 1661 /* Check whether our peer knows and requests hypervisor copy */ 1662 if (xenbus_scanf(XBT_NULL, oename, "request-rx-copy", "%d", &i) 1663 != 0) 1664 i = 0; 1665 if (i != 0) 1666 xnbp->xnb_hv_copy = B_TRUE; 1667 1668 /* 1669 * 1. allocate a vaddr for the tx page, one for the rx page. 1670 * 2. call GNTTABOP_map_grant_ref to map the relevant pages 1671 * into the allocated vaddr (one for tx, one for rx). 1672 * 3. call EVTCHNOP_bind_interdomain to have the event channel 1673 * bound to this domain. 1674 * 4. associate the event channel with an interrupt. 1675 * 5. declare ourselves connected. 1676 * 6. enable the interrupt. 1677 */ 1678 1679 /* 1.tx */ 1680 xnbp->xnb_tx_ring_addr = vmem_xalloc(heap_arena, PAGESIZE, PAGESIZE, 1681 0, 0, 0, 0, VM_SLEEP); 1682 ASSERT(xnbp->xnb_tx_ring_addr != NULL); 1683 1684 /* 2.tx */ 1685 map_op.host_addr = (uint64_t)((long)xnbp->xnb_tx_ring_addr); 1686 map_op.flags = GNTMAP_host_map; 1687 map_op.ref = xnbp->xnb_tx_ring_ref; 1688 map_op.dom = xnbp->xnb_peer; 1689 hat_prepare_mapping(kas.a_hat, xnbp->xnb_tx_ring_addr); 1690 if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, 1691 &map_op, 1) != 0 || map_op.status != 0) { 1692 cmn_err(CE_WARN, "xnb_connect_rings: cannot map tx-ring page."); 1693 goto fail; 1694 } 1695 xnbp->xnb_tx_ring_handle = map_op.handle; 1696 1697 /* LINTED: constant in conditional context */ 1698 BACK_RING_INIT(&xnbp->xnb_tx_ring, 1699 (netif_tx_sring_t *)xnbp->xnb_tx_ring_addr, PAGESIZE); 1700 1701 /* 1.rx */ 1702 xnbp->xnb_rx_ring_addr = vmem_xalloc(heap_arena, PAGESIZE, PAGESIZE, 1703 0, 0, 0, 0, VM_SLEEP); 1704 ASSERT(xnbp->xnb_rx_ring_addr != NULL); 1705 1706 /* 2.rx */ 1707 map_op.host_addr = (uint64_t)((long)xnbp->xnb_rx_ring_addr); 1708 map_op.flags = GNTMAP_host_map; 1709 map_op.ref = xnbp->xnb_rx_ring_ref; 1710 map_op.dom = xnbp->xnb_peer; 1711 hat_prepare_mapping(kas.a_hat, xnbp->xnb_rx_ring_addr); 1712 if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, 1713 &map_op, 1) != 0 || map_op.status != 0) { 1714 cmn_err(CE_WARN, "xnb_connect_rings: cannot map rx-ring page."); 1715 goto fail; 1716 } 1717 xnbp->xnb_rx_ring_handle = map_op.handle; 1718 1719 /* LINTED: constant in conditional context */ 1720 BACK_RING_INIT(&xnbp->xnb_rx_ring, 1721 (netif_rx_sring_t *)xnbp->xnb_rx_ring_addr, PAGESIZE); 1722 1723 /* 3 */ 1724 if (xvdi_bind_evtchn(dip, evtchn) != DDI_SUCCESS) { 1725 cmn_err(CE_WARN, "xnb_connect_rings: " 1726 "cannot bind event channel %d", xnbp->xnb_evtchn); 1727 xnbp->xnb_evtchn = INVALID_EVTCHN; 1728 goto fail; 1729 } 1730 xnbp->xnb_evtchn = xvdi_get_evtchn(dip); 1731 1732 /* 1733 * It would be good to set the state to XenbusStateConnected 1734 * here as well, but then what if ddi_add_intr() failed? 1735 * Changing the state in the store will be noticed by the peer 1736 * and cannot be "taken back". 1737 */ 1738 mutex_enter(&xnbp->xnb_tx_lock); 1739 mutex_enter(&xnbp->xnb_rx_lock); 1740 1741 /* 5.1 */ 1742 xnbp->xnb_connected = B_TRUE; 1743 1744 mutex_exit(&xnbp->xnb_rx_lock); 1745 mutex_exit(&xnbp->xnb_tx_lock); 1746 1747 /* 4, 6 */ 1748 if (ddi_add_intr(dip, 0, NULL, NULL, xnb_intr, (caddr_t)xnbp) 1749 != DDI_SUCCESS) { 1750 cmn_err(CE_WARN, "xnb_connect_rings: cannot add interrupt"); 1751 goto fail; 1752 } 1753 xnbp->xnb_irq = B_TRUE; 1754 1755 /* 5.2 */ 1756 (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateConnected); 1757 1758 return (B_TRUE); 1759 1760 fail: 1761 mutex_enter(&xnbp->xnb_tx_lock); 1762 mutex_enter(&xnbp->xnb_rx_lock); 1763 1764 xnbp->xnb_connected = B_FALSE; 1765 mutex_exit(&xnbp->xnb_rx_lock); 1766 mutex_exit(&xnbp->xnb_tx_lock); 1767 1768 return (B_FALSE); 1769 } 1770 1771 static void 1772 xnb_disconnect_rings(dev_info_t *dip) 1773 { 1774 xnb_t *xnbp = ddi_get_driver_private(dip); 1775 1776 if (xnbp->xnb_irq) { 1777 ddi_remove_intr(dip, 0, NULL); 1778 xnbp->xnb_irq = B_FALSE; 1779 } 1780 1781 if (xnbp->xnb_rx_unmop_count > 0) 1782 xnb_rx_perform_pending_unmop(xnbp); 1783 1784 if (xnbp->xnb_evtchn != INVALID_EVTCHN) { 1785 xvdi_free_evtchn(dip); 1786 xnbp->xnb_evtchn = INVALID_EVTCHN; 1787 } 1788 1789 if (xnbp->xnb_rx_ring_handle != INVALID_GRANT_HANDLE) { 1790 struct gnttab_unmap_grant_ref unmap_op; 1791 1792 unmap_op.host_addr = (uint64_t)(uintptr_t) 1793 xnbp->xnb_rx_ring_addr; 1794 unmap_op.dev_bus_addr = 0; 1795 unmap_op.handle = xnbp->xnb_rx_ring_handle; 1796 if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, 1797 &unmap_op, 1) != 0) 1798 cmn_err(CE_WARN, "xnb_disconnect_rings: " 1799 "cannot unmap rx-ring page (%d)", 1800 unmap_op.status); 1801 1802 xnbp->xnb_rx_ring_handle = INVALID_GRANT_HANDLE; 1803 } 1804 1805 if (xnbp->xnb_rx_ring_addr != NULL) { 1806 hat_release_mapping(kas.a_hat, xnbp->xnb_rx_ring_addr); 1807 vmem_free(heap_arena, xnbp->xnb_rx_ring_addr, PAGESIZE); 1808 xnbp->xnb_rx_ring_addr = NULL; 1809 } 1810 1811 if (xnbp->xnb_tx_ring_handle != INVALID_GRANT_HANDLE) { 1812 struct gnttab_unmap_grant_ref unmap_op; 1813 1814 unmap_op.host_addr = (uint64_t)(uintptr_t) 1815 xnbp->xnb_tx_ring_addr; 1816 unmap_op.dev_bus_addr = 0; 1817 unmap_op.handle = xnbp->xnb_tx_ring_handle; 1818 if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, 1819 &unmap_op, 1) != 0) 1820 cmn_err(CE_WARN, "xnb_disconnect_rings: " 1821 "cannot unmap tx-ring page (%d)", 1822 unmap_op.status); 1823 1824 xnbp->xnb_tx_ring_handle = INVALID_GRANT_HANDLE; 1825 } 1826 1827 if (xnbp->xnb_tx_ring_addr != NULL) { 1828 hat_release_mapping(kas.a_hat, xnbp->xnb_tx_ring_addr); 1829 vmem_free(heap_arena, xnbp->xnb_tx_ring_addr, PAGESIZE); 1830 xnbp->xnb_tx_ring_addr = NULL; 1831 } 1832 } 1833 1834 /*ARGSUSED*/ 1835 static void 1836 xnb_oe_state_change(dev_info_t *dip, ddi_eventcookie_t id, 1837 void *arg, void *impl_data) 1838 { 1839 xnb_t *xnbp = ddi_get_driver_private(dip); 1840 XenbusState new_state = *(XenbusState *)impl_data; 1841 1842 ASSERT(xnbp != NULL); 1843 1844 switch (new_state) { 1845 case XenbusStateConnected: 1846 /* spurious state change */ 1847 if (xnbp->xnb_connected) 1848 return; 1849 1850 if (xnb_connect_rings(dip)) { 1851 xnbp->xnb_flavour->xf_peer_connected(xnbp); 1852 } else { 1853 xnbp->xnb_flavour->xf_peer_disconnected(xnbp); 1854 xnb_disconnect_rings(dip); 1855 (void) xvdi_switch_state(dip, XBT_NULL, 1856 XenbusStateClosed); 1857 (void) xvdi_post_event(dip, XEN_HP_REMOVE); 1858 } 1859 1860 /* 1861 * Now that we've attempted to connect it's reasonable 1862 * to allow an attempt to detach. 1863 */ 1864 xnbp->xnb_detachable = B_TRUE; 1865 1866 break; 1867 1868 case XenbusStateClosing: 1869 (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateClosing); 1870 1871 break; 1872 1873 case XenbusStateClosed: 1874 xnbp->xnb_flavour->xf_peer_disconnected(xnbp); 1875 1876 mutex_enter(&xnbp->xnb_tx_lock); 1877 mutex_enter(&xnbp->xnb_rx_lock); 1878 1879 xnb_disconnect_rings(dip); 1880 xnbp->xnb_connected = B_FALSE; 1881 1882 mutex_exit(&xnbp->xnb_rx_lock); 1883 mutex_exit(&xnbp->xnb_tx_lock); 1884 1885 (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateClosed); 1886 (void) xvdi_post_event(dip, XEN_HP_REMOVE); 1887 /* 1888 * In all likelyhood this is already set (in the above 1889 * case), but if the peer never attempted to connect 1890 * and the domain is destroyed we get here without 1891 * having been through the case above, so we set it to 1892 * be sure. 1893 */ 1894 xnbp->xnb_detachable = B_TRUE; 1895 1896 break; 1897 1898 default: 1899 break; 1900 } 1901 } 1902 1903 /*ARGSUSED*/ 1904 static void 1905 xnb_hp_state_change(dev_info_t *dip, ddi_eventcookie_t id, 1906 void *arg, void *impl_data) 1907 { 1908 xnb_t *xnbp = ddi_get_driver_private(dip); 1909 xendev_hotplug_state_t state = *(xendev_hotplug_state_t *)impl_data; 1910 boolean_t success; 1911 1912 ASSERT(xnbp != NULL); 1913 1914 switch (state) { 1915 case Connected: 1916 1917 /* spurious hotplug event */ 1918 if (xnbp->xnb_hotplugged) 1919 return; 1920 1921 success = xnbp->xnb_flavour->xf_hotplug_connected(xnbp); 1922 1923 mutex_enter(&xnbp->xnb_tx_lock); 1924 mutex_enter(&xnbp->xnb_rx_lock); 1925 1926 xnbp->xnb_hotplugged = success; 1927 1928 mutex_exit(&xnbp->xnb_rx_lock); 1929 mutex_exit(&xnbp->xnb_tx_lock); 1930 break; 1931 1932 default: 1933 break; 1934 } 1935 } 1936 1937 static struct modldrv modldrv = { 1938 &mod_miscops, "xnb module %I%", 1939 }; 1940 1941 static struct modlinkage modlinkage = { 1942 MODREV_1, &modldrv, NULL 1943 }; 1944 1945 int 1946 _init(void) 1947 { 1948 int i; 1949 1950 mutex_init(&xnb_alloc_page_lock, NULL, MUTEX_DRIVER, NULL); 1951 1952 xnb_rxbuf_cachep = kmem_cache_create("xnb_rxbuf_cachep", 1953 sizeof (xnb_rxbuf_t), 0, xnb_rxbuf_constructor, 1954 xnb_rxbuf_destructor, NULL, NULL, NULL, 0); 1955 ASSERT(xnb_rxbuf_cachep != NULL); 1956 1957 i = mod_install(&modlinkage); 1958 if (i != DDI_SUCCESS) { 1959 kmem_cache_destroy(xnb_rxbuf_cachep); 1960 mutex_destroy(&xnb_alloc_page_lock); 1961 } 1962 return (i); 1963 } 1964 1965 int 1966 _info(struct modinfo *modinfop) 1967 { 1968 return (mod_info(&modlinkage, modinfop)); 1969 } 1970 1971 int 1972 _fini(void) 1973 { 1974 int i; 1975 1976 i = mod_remove(&modlinkage); 1977 if (i == DDI_SUCCESS) { 1978 kmem_cache_destroy(xnb_rxbuf_cachep); 1979 mutex_destroy(&xnb_alloc_page_lock); 1980 } 1981 return (i); 1982 } 1983