1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #ifdef DEBUG 30 #define XNB_DEBUG 1 31 #endif /* DEBUG */ 32 33 #include "xnb.h" 34 35 #include <sys/sunddi.h> 36 #include <sys/sunndi.h> 37 #include <sys/modctl.h> 38 #include <sys/conf.h> 39 #include <sys/mac.h> 40 #include <sys/dlpi.h> 41 #include <sys/strsubr.h> 42 #include <sys/strsun.h> 43 #include <sys/types.h> 44 #include <sys/pattr.h> 45 #include <vm/seg_kmem.h> 46 #include <vm/hat_i86.h> 47 #include <xen/sys/xenbus_impl.h> 48 #include <xen/sys/xendev.h> 49 #include <sys/balloon_impl.h> 50 #include <sys/evtchn_impl.h> 51 #include <sys/gnttab.h> 52 #include <vm/vm_dep.h> 53 54 #include <sys/gld.h> 55 #include <inet/ip.h> 56 #include <inet/ip_impl.h> 57 #include <sys/vnic_impl.h> /* blech. */ 58 59 /* 60 * The terms "transmit" and "receive" are used in their traditional 61 * sense here - packets from other parts of this system are 62 * "transmitted" to the peer domain and those originating from the 63 * peer are "received". 64 * 65 * In some cases this can be confusing, because various data 66 * structures are shared with the domU driver, which has the opposite 67 * view of what constitutes "transmit" and "receive". In naming the 68 * shared structures the domU driver always wins. 69 */ 70 71 /* 72 * XXPV dme: things to do, as well as various things indicated 73 * throughout the source: 74 * - copy avoidance outbound. 75 * - copy avoidance inbound. 76 * - transfer credit limiting. 77 * - MAC address based filtering. 78 */ 79 80 /* 81 * Linux expects to have some headroom in received buffers. The Linux 82 * frontend driver (netfront) checks to see if the headroom is 83 * available and will re-allocate the buffer to make room if 84 * necessary. To avoid this we add TX_BUFFER_HEADROOM bytes of 85 * headroom to each packet we pass to the peer. 86 */ 87 #define TX_BUFFER_HEADROOM 16 88 89 static boolean_t xnb_cksum_offload = B_TRUE; 90 91 static boolean_t xnb_connect_rings(dev_info_t *); 92 static void xnb_disconnect_rings(dev_info_t *); 93 static void xnb_oe_state_change(dev_info_t *, ddi_eventcookie_t, 94 void *, void *); 95 static void xnb_hp_state_change(dev_info_t *, ddi_eventcookie_t, 96 void *, void *); 97 98 static int xnb_rxbuf_constructor(void *, void *, int); 99 static void xnb_rxbuf_destructor(void *, void *); 100 static xnb_rxbuf_t *xnb_rxbuf_get(xnb_t *, int); 101 static void xnb_rxbuf_put(xnb_t *, xnb_rxbuf_t *); 102 static void xnb_rx_notify_peer(xnb_t *); 103 static void xnb_rx_complete(xnb_rxbuf_t *); 104 static void xnb_rx_mark_complete(xnb_t *, RING_IDX, int16_t); 105 static void xnb_rx_schedule_unmop(xnb_t *, gnttab_map_grant_ref_t *, 106 xnb_rxbuf_t *); 107 static void xnb_rx_perform_pending_unmop(xnb_t *); 108 mblk_t *xnb_copy_to_peer(xnb_t *, mblk_t *); 109 110 int xnb_unmop_lowwat = NET_TX_RING_SIZE >> 2; 111 int xnb_unmop_hiwat = NET_TX_RING_SIZE - (NET_TX_RING_SIZE >> 2); 112 113 114 boolean_t xnb_hv_copy = B_TRUE; 115 boolean_t xnb_explicit_pageflip_set = B_FALSE; 116 117 #ifdef XNB_DEBUG 118 #define NR_GRANT_ENTRIES \ 119 (NR_GRANT_FRAMES * PAGESIZE / sizeof (grant_entry_t)) 120 #endif /* XNB_DEBUG */ 121 122 /* XXPV dme: are these really invalid? */ 123 #define INVALID_GRANT_HANDLE ((grant_handle_t)-1) 124 #define INVALID_GRANT_REF ((grant_ref_t)-1) 125 126 static kmem_cache_t *xnb_rxbuf_cachep; 127 static kmutex_t xnb_alloc_page_lock; 128 129 /* 130 * Statistics. 131 */ 132 static char *aux_statistics[] = { 133 "tx_cksum_deferred", 134 "rx_cksum_no_need", 135 "tx_notify_deferred", 136 "tx_notify_sent", 137 "rx_notify_deferred", 138 "rx_notify_sent", 139 "tx_too_early", 140 "rx_too_early", 141 "rx_allocb_failed", 142 "tx_allocb_failed", 143 "tx_foreign_page", 144 "mac_full", 145 "spurious_intr", 146 "allocation_success", 147 "allocation_failure", 148 "small_allocation_success", 149 "small_allocation_failure", 150 "other_allocation_failure", 151 "tx_pageboundary_crossed", 152 "tx_cpoparea_grown", 153 "csum_hardware", 154 "csum_software", 155 }; 156 157 static int 158 xnb_ks_aux_update(kstat_t *ksp, int flag) 159 { 160 xnb_t *xnbp; 161 kstat_named_t *knp; 162 163 if (flag != KSTAT_READ) 164 return (EACCES); 165 166 xnbp = ksp->ks_private; 167 knp = ksp->ks_data; 168 169 /* 170 * Assignment order should match that of the names in 171 * aux_statistics. 172 */ 173 (knp++)->value.ui64 = xnbp->xnb_stat_tx_cksum_deferred; 174 (knp++)->value.ui64 = xnbp->xnb_stat_rx_cksum_no_need; 175 (knp++)->value.ui64 = xnbp->xnb_stat_tx_notify_deferred; 176 (knp++)->value.ui64 = xnbp->xnb_stat_tx_notify_sent; 177 (knp++)->value.ui64 = xnbp->xnb_stat_rx_notify_deferred; 178 (knp++)->value.ui64 = xnbp->xnb_stat_rx_notify_sent; 179 (knp++)->value.ui64 = xnbp->xnb_stat_tx_too_early; 180 (knp++)->value.ui64 = xnbp->xnb_stat_rx_too_early; 181 (knp++)->value.ui64 = xnbp->xnb_stat_rx_allocb_failed; 182 (knp++)->value.ui64 = xnbp->xnb_stat_tx_allocb_failed; 183 (knp++)->value.ui64 = xnbp->xnb_stat_tx_foreign_page; 184 (knp++)->value.ui64 = xnbp->xnb_stat_mac_full; 185 (knp++)->value.ui64 = xnbp->xnb_stat_spurious_intr; 186 (knp++)->value.ui64 = xnbp->xnb_stat_allocation_success; 187 (knp++)->value.ui64 = xnbp->xnb_stat_allocation_failure; 188 (knp++)->value.ui64 = xnbp->xnb_stat_small_allocation_success; 189 (knp++)->value.ui64 = xnbp->xnb_stat_small_allocation_failure; 190 (knp++)->value.ui64 = xnbp->xnb_stat_other_allocation_failure; 191 (knp++)->value.ui64 = xnbp->xnb_stat_tx_pagebndry_crossed; 192 (knp++)->value.ui64 = xnbp->xnb_stat_tx_cpoparea_grown; 193 (knp++)->value.ui64 = xnbp->xnb_stat_csum_hardware; 194 (knp++)->value.ui64 = xnbp->xnb_stat_csum_software; 195 196 return (0); 197 } 198 199 static boolean_t 200 xnb_ks_init(xnb_t *xnbp) 201 { 202 int nstat = sizeof (aux_statistics) / 203 sizeof (aux_statistics[0]); 204 char **cp = aux_statistics; 205 kstat_named_t *knp; 206 207 /* 208 * Create and initialise kstats. 209 */ 210 xnbp->xnb_kstat_aux = kstat_create(ddi_driver_name(xnbp->xnb_devinfo), 211 ddi_get_instance(xnbp->xnb_devinfo), "aux_statistics", "net", 212 KSTAT_TYPE_NAMED, nstat, 0); 213 if (xnbp->xnb_kstat_aux == NULL) 214 return (B_FALSE); 215 216 xnbp->xnb_kstat_aux->ks_private = xnbp; 217 xnbp->xnb_kstat_aux->ks_update = xnb_ks_aux_update; 218 219 knp = xnbp->xnb_kstat_aux->ks_data; 220 while (nstat > 0) { 221 kstat_named_init(knp, *cp, KSTAT_DATA_UINT64); 222 223 knp++; 224 cp++; 225 nstat--; 226 } 227 228 kstat_install(xnbp->xnb_kstat_aux); 229 230 return (B_TRUE); 231 } 232 233 static void 234 xnb_ks_free(xnb_t *xnbp) 235 { 236 kstat_delete(xnbp->xnb_kstat_aux); 237 } 238 239 /* 240 * Software checksum calculation and insertion for an arbitrary packet. 241 */ 242 /*ARGSUSED*/ 243 static mblk_t * 244 xnb_software_csum(xnb_t *xnbp, mblk_t *mp) 245 { 246 /* 247 * XXPV dme: shouldn't rely on vnic_fix_cksum(), not least 248 * because it doesn't cover all of the interesting cases :-( 249 */ 250 (void) hcksum_assoc(mp, NULL, NULL, 0, 0, 0, 0, 251 HCK_FULLCKSUM, KM_NOSLEEP); 252 253 return (vnic_fix_cksum(mp)); 254 } 255 256 mblk_t * 257 xnb_process_cksum_flags(xnb_t *xnbp, mblk_t *mp, uint32_t capab) 258 { 259 struct ether_header *ehp; 260 uint16_t sap; 261 uint32_t offset; 262 ipha_t *ipha; 263 264 ASSERT(mp->b_next == NULL); 265 266 /* 267 * Check that the packet is contained in a single mblk. In 268 * the "from peer" path this is true today, but will change 269 * when scatter gather support is added. In the "to peer" 270 * path we cannot be sure, but in most cases it will be true 271 * (in the xnbo case the packet has come from a MAC device 272 * which is unlikely to split packets). 273 */ 274 if (mp->b_cont != NULL) 275 goto software; 276 277 /* 278 * If the MAC has no hardware capability don't do any further 279 * checking. 280 */ 281 if (capab == 0) 282 goto software; 283 284 ASSERT(MBLKL(mp) >= sizeof (struct ether_header)); 285 ehp = (struct ether_header *)mp->b_rptr; 286 287 if (ntohs(ehp->ether_type) == VLAN_TPID) { 288 struct ether_vlan_header *evhp; 289 290 ASSERT(MBLKL(mp) >= sizeof (struct ether_vlan_header)); 291 evhp = (struct ether_vlan_header *)mp->b_rptr; 292 sap = ntohs(evhp->ether_type); 293 offset = sizeof (struct ether_vlan_header); 294 } else { 295 sap = ntohs(ehp->ether_type); 296 offset = sizeof (struct ether_header); 297 } 298 299 /* 300 * We only attempt to do IPv4 packets in hardware. 301 */ 302 if (sap != ETHERTYPE_IP) 303 goto software; 304 305 /* 306 * We know that this is an IPv4 packet. 307 */ 308 ipha = (ipha_t *)(mp->b_rptr + offset); 309 310 switch (ipha->ipha_protocol) { 311 case IPPROTO_TCP: 312 case IPPROTO_UDP: 313 /* 314 * This is a TCP/IPv4 or UDP/IPv4 packet. 315 * 316 * If the capabilities indicate that full checksum 317 * offload is available, use it. 318 */ 319 if ((capab & HCKSUM_INET_FULL_V4) != 0) { 320 (void) hcksum_assoc(mp, NULL, NULL, 321 0, 0, 0, 0, 322 HCK_FULLCKSUM, KM_NOSLEEP); 323 324 xnbp->xnb_stat_csum_hardware++; 325 326 return (mp); 327 } 328 329 /* 330 * XXPV dme: If the capabilities indicate that partial 331 * checksum offload is available, we should use it. 332 */ 333 334 break; 335 336 default: 337 /* Use software. */ 338 break; 339 } 340 341 software: 342 /* 343 * We are not able to use any offload so do the whole thing in 344 * software. 345 */ 346 xnbp->xnb_stat_csum_software++; 347 348 return (xnb_software_csum(xnbp, mp)); 349 } 350 351 int 352 xnb_attach(dev_info_t *dip, xnb_flavour_t *flavour, void *flavour_data) 353 { 354 xnb_t *xnbp; 355 char *xsname, mac[ETHERADDRL * 3]; 356 357 xnbp = kmem_zalloc(sizeof (*xnbp), KM_SLEEP); 358 359 xnbp->xnb_flavour = flavour; 360 xnbp->xnb_flavour_data = flavour_data; 361 xnbp->xnb_devinfo = dip; 362 xnbp->xnb_evtchn = INVALID_EVTCHN; 363 xnbp->xnb_irq = B_FALSE; 364 xnbp->xnb_tx_ring_handle = INVALID_GRANT_HANDLE; 365 xnbp->xnb_rx_ring_handle = INVALID_GRANT_HANDLE; 366 xnbp->xnb_cksum_offload = xnb_cksum_offload; 367 xnbp->xnb_connected = B_FALSE; 368 xnbp->xnb_hotplugged = B_FALSE; 369 xnbp->xnb_detachable = B_FALSE; 370 xnbp->xnb_peer = xvdi_get_oeid(dip); 371 xnbp->xnb_rx_pages_writable = B_FALSE; 372 373 xnbp->xnb_rx_buf_count = 0; 374 xnbp->xnb_rx_unmop_count = 0; 375 376 xnbp->xnb_hv_copy = B_FALSE; 377 378 xnbp->xnb_tx_va = vmem_alloc(heap_arena, PAGESIZE, VM_SLEEP); 379 ASSERT(xnbp->xnb_tx_va != NULL); 380 381 if (ddi_get_iblock_cookie(dip, 0, &xnbp->xnb_icookie) 382 != DDI_SUCCESS) 383 goto failure; 384 385 /* allocated on demand, when/if we enter xnb_copy_to_peer() */ 386 xnbp->xnb_tx_cpop = NULL; 387 xnbp->xnb_cpop_sz = 0; 388 389 mutex_init(&xnbp->xnb_tx_lock, NULL, MUTEX_DRIVER, 390 xnbp->xnb_icookie); 391 mutex_init(&xnbp->xnb_rx_lock, NULL, MUTEX_DRIVER, 392 xnbp->xnb_icookie); 393 394 /* set driver private pointer now */ 395 ddi_set_driver_private(dip, xnbp); 396 397 if (!xnb_ks_init(xnbp)) 398 goto failure_1; 399 400 /* 401 * Receive notification of changes in the state of the 402 * driver in the guest domain. 403 */ 404 if (xvdi_add_event_handler(dip, XS_OE_STATE, 405 xnb_oe_state_change) != DDI_SUCCESS) 406 goto failure_2; 407 408 /* 409 * Receive notification of hotplug events. 410 */ 411 if (xvdi_add_event_handler(dip, XS_HP_STATE, 412 xnb_hp_state_change) != DDI_SUCCESS) 413 goto failure_2; 414 415 xsname = xvdi_get_xsname(dip); 416 417 if (xenbus_printf(XBT_NULL, xsname, 418 "feature-no-csum-offload", "%d", 419 xnbp->xnb_cksum_offload ? 0 : 1) != 0) 420 goto failure_3; 421 422 /* 423 * Use global xnb_hv_copy to export this feature. This means that 424 * we have to decide what to do before starting up a guest domain 425 */ 426 if (xenbus_printf(XBT_NULL, xsname, 427 "feature-rx-copy", "%d", xnb_hv_copy ? 1 : 0) != 0) 428 goto failure_3; 429 /* 430 * Linux domUs seem to depend on "feature-rx-flip" being 0 431 * in addition to "feature-rx-copy" being 1. It seems strange 432 * to use four possible states to describe a binary decision, 433 * but we might as well play nice. 434 */ 435 if (xenbus_printf(XBT_NULL, xsname, 436 "feature-rx-flip", "%d", xnb_explicit_pageflip_set ? 1 : 0) != 0) 437 goto failure_3; 438 439 if (xenbus_scanf(XBT_NULL, xsname, 440 "mac", "%s", mac) != 0) { 441 cmn_err(CE_WARN, "xnb_attach: " 442 "cannot read mac address from %s", 443 xsname); 444 goto failure_3; 445 } 446 447 if (ether_aton(mac, xnbp->xnb_mac_addr) != ETHERADDRL) { 448 cmn_err(CE_WARN, 449 "xnb_attach: cannot parse mac address %s", 450 mac); 451 goto failure_3; 452 } 453 454 (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateInitWait); 455 (void) xvdi_post_event(dip, XEN_HP_ADD); 456 457 return (DDI_SUCCESS); 458 459 failure_3: 460 xvdi_remove_event_handler(dip, NULL); 461 462 failure_2: 463 xnb_ks_free(xnbp); 464 465 failure_1: 466 mutex_destroy(&xnbp->xnb_rx_lock); 467 mutex_destroy(&xnbp->xnb_tx_lock); 468 469 failure: 470 vmem_free(heap_arena, xnbp->xnb_tx_va, PAGESIZE); 471 kmem_free(xnbp, sizeof (*xnbp)); 472 return (DDI_FAILURE); 473 } 474 475 /*ARGSUSED*/ 476 void 477 xnb_detach(dev_info_t *dip) 478 { 479 xnb_t *xnbp = ddi_get_driver_private(dip); 480 481 ASSERT(xnbp != NULL); 482 ASSERT(!xnbp->xnb_connected); 483 ASSERT(xnbp->xnb_rx_buf_count == 0); 484 485 xnb_disconnect_rings(dip); 486 487 xvdi_remove_event_handler(dip, NULL); 488 489 xnb_ks_free(xnbp); 490 491 ddi_set_driver_private(dip, NULL); 492 493 mutex_destroy(&xnbp->xnb_tx_lock); 494 mutex_destroy(&xnbp->xnb_rx_lock); 495 496 if (xnbp->xnb_cpop_sz > 0) 497 kmem_free(xnbp->xnb_tx_cpop, sizeof (*xnbp->xnb_tx_cpop) 498 * xnbp->xnb_cpop_sz); 499 500 ASSERT(xnbp->xnb_tx_va != NULL); 501 vmem_free(heap_arena, xnbp->xnb_tx_va, PAGESIZE); 502 503 kmem_free(xnbp, sizeof (*xnbp)); 504 } 505 506 507 static mfn_t 508 xnb_alloc_page(xnb_t *xnbp) 509 { 510 #define WARNING_RATE_LIMIT 100 511 #define BATCH_SIZE 256 512 static mfn_t mfns[BATCH_SIZE]; /* common across all instances */ 513 static int nth = BATCH_SIZE; 514 mfn_t mfn; 515 516 mutex_enter(&xnb_alloc_page_lock); 517 if (nth == BATCH_SIZE) { 518 if (balloon_alloc_pages(BATCH_SIZE, mfns) != BATCH_SIZE) { 519 xnbp->xnb_stat_allocation_failure++; 520 mutex_exit(&xnb_alloc_page_lock); 521 522 /* 523 * Try for a single page in low memory situations. 524 */ 525 if (balloon_alloc_pages(1, &mfn) != 1) { 526 if ((xnbp->xnb_stat_small_allocation_failure++ 527 % WARNING_RATE_LIMIT) == 0) 528 cmn_err(CE_WARN, "xnb_alloc_page: " 529 "Cannot allocate memory to " 530 "transfer packets to peer."); 531 return (0); 532 } else { 533 xnbp->xnb_stat_small_allocation_success++; 534 return (mfn); 535 } 536 } 537 538 nth = 0; 539 xnbp->xnb_stat_allocation_success++; 540 } 541 542 mfn = mfns[nth++]; 543 mutex_exit(&xnb_alloc_page_lock); 544 545 ASSERT(mfn != 0); 546 547 return (mfn); 548 #undef BATCH_SIZE 549 #undef WARNING_RATE_LIMIT 550 } 551 552 /*ARGSUSED*/ 553 static void 554 xnb_free_page(xnb_t *xnbp, mfn_t mfn) 555 { 556 int r; 557 pfn_t pfn; 558 559 pfn = xen_assign_pfn(mfn); 560 pfnzero(pfn, 0, PAGESIZE); 561 xen_release_pfn(pfn); 562 563 /* 564 * This happens only in the error path, so batching is 565 * not worth the complication. 566 */ 567 if ((r = balloon_free_pages(1, &mfn, NULL, NULL)) != 1) { 568 cmn_err(CE_WARN, "free_page: cannot decrease memory " 569 "reservation (%d): page kept but unusable (mfn = 0x%lx).", 570 r, mfn); 571 } 572 } 573 574 /* 575 * Similar to RING_HAS_UNCONSUMED_REQUESTS(&xnbp->rx_ring) but 576 * using local variables. 577 */ 578 #define XNB_RING_HAS_UNCONSUMED_REQUESTS(_r) \ 579 ((((_r)->sring->req_prod - loop) < \ 580 (RING_SIZE(_r) - (loop - prod))) ? \ 581 ((_r)->sring->req_prod - loop) : \ 582 (RING_SIZE(_r) - (loop - prod))) 583 584 mblk_t * 585 xnb_to_peer(xnb_t *xnbp, mblk_t *mp) 586 { 587 mblk_t *free = mp, *prev = NULL; 588 size_t len; 589 gnttab_transfer_t *gop; 590 boolean_t notify; 591 RING_IDX loop, prod, end; 592 593 /* 594 * For each packet the sequence of operations is: 595 * 596 * 1. get a new page from the hypervisor. 597 * 2. get a request slot from the ring. 598 * 3. copy the data into the new page. 599 * 4. transfer the page to the peer. 600 * 5. update the request slot. 601 * 6. kick the peer. 602 * 7. free mp. 603 * 604 * In order to reduce the number of hypercalls, we prepare 605 * several packets for the peer and perform a single hypercall 606 * to transfer them. 607 */ 608 609 mutex_enter(&xnbp->xnb_tx_lock); 610 611 /* 612 * If we are not connected to the peer or have not yet 613 * finished hotplug it is too early to pass packets to the 614 * peer. 615 */ 616 if (!(xnbp->xnb_connected && xnbp->xnb_hotplugged)) { 617 mutex_exit(&xnbp->xnb_tx_lock); 618 DTRACE_PROBE(flip_tx_too_early); 619 xnbp->xnb_stat_tx_too_early++; 620 return (mp); 621 } 622 623 loop = xnbp->xnb_rx_ring.req_cons; 624 prod = xnbp->xnb_rx_ring.rsp_prod_pvt; 625 gop = xnbp->xnb_tx_top; 626 627 while ((mp != NULL) && 628 XNB_RING_HAS_UNCONSUMED_REQUESTS(&xnbp->xnb_rx_ring)) { 629 630 mfn_t mfn; 631 pfn_t pfn; 632 netif_rx_request_t *rxreq; 633 netif_rx_response_t *rxresp; 634 char *valoop; 635 size_t offset; 636 mblk_t *ml; 637 uint16_t cksum_flags; 638 639 /* 1 */ 640 if ((mfn = xnb_alloc_page(xnbp)) == 0) { 641 xnbp->xnb_stat_xmit_defer++; 642 break; 643 } 644 645 /* 2 */ 646 rxreq = RING_GET_REQUEST(&xnbp->xnb_rx_ring, loop); 647 648 #ifdef XNB_DEBUG 649 if (!(rxreq->id < NET_RX_RING_SIZE)) 650 cmn_err(CE_PANIC, "xnb_to_peer: " 651 "id %d out of range in request 0x%p", 652 rxreq->id, (void *)rxreq); 653 if (rxreq->gref >= NR_GRANT_ENTRIES) 654 cmn_err(CE_PANIC, "xnb_to_peer: " 655 "grant ref %d out of range in request 0x%p", 656 rxreq->gref, (void *)rxreq); 657 #endif /* XNB_DEBUG */ 658 659 /* Assign a pfn and map the new page at the allocated va. */ 660 pfn = xen_assign_pfn(mfn); 661 hat_devload(kas.a_hat, xnbp->xnb_tx_va, PAGESIZE, 662 pfn, PROT_READ | PROT_WRITE, HAT_LOAD); 663 664 offset = TX_BUFFER_HEADROOM; 665 666 /* 3 */ 667 len = 0; 668 valoop = xnbp->xnb_tx_va + offset; 669 for (ml = mp; ml != NULL; ml = ml->b_cont) { 670 size_t chunk = ml->b_wptr - ml->b_rptr; 671 672 bcopy(ml->b_rptr, valoop, chunk); 673 valoop += chunk; 674 len += chunk; 675 } 676 677 ASSERT(len + offset < PAGESIZE); 678 679 /* Release the pfn. */ 680 hat_unload(kas.a_hat, xnbp->xnb_tx_va, PAGESIZE, 681 HAT_UNLOAD_UNMAP); 682 xen_release_pfn(pfn); 683 684 /* 4 */ 685 gop->mfn = mfn; 686 gop->domid = xnbp->xnb_peer; 687 gop->ref = rxreq->gref; 688 689 /* 5.1 */ 690 rxresp = RING_GET_RESPONSE(&xnbp->xnb_rx_ring, prod); 691 rxresp->offset = offset; 692 rxresp->flags = 0; 693 694 cksum_flags = xnbp->xnb_flavour->xf_cksum_to_peer(xnbp, mp); 695 if (cksum_flags != 0) 696 xnbp->xnb_stat_tx_cksum_deferred++; 697 rxresp->flags |= cksum_flags; 698 699 rxresp->id = RING_GET_REQUEST(&xnbp->xnb_rx_ring, prod)->id; 700 rxresp->status = len; 701 702 loop++; 703 prod++; 704 gop++; 705 prev = mp; 706 mp = mp->b_next; 707 } 708 709 /* 710 * Did we actually do anything? 711 */ 712 if (loop == xnbp->xnb_rx_ring.req_cons) { 713 mutex_exit(&xnbp->xnb_tx_lock); 714 return (mp); 715 } 716 717 end = loop; 718 719 /* 720 * Unlink the end of the 'done' list from the remainder. 721 */ 722 ASSERT(prev != NULL); 723 prev->b_next = NULL; 724 725 if (HYPERVISOR_grant_table_op(GNTTABOP_transfer, xnbp->xnb_tx_top, 726 loop - xnbp->xnb_rx_ring.req_cons) != 0) { 727 cmn_err(CE_WARN, "xnb_to_peer: transfer operation failed"); 728 } 729 730 loop = xnbp->xnb_rx_ring.req_cons; 731 prod = xnbp->xnb_rx_ring.rsp_prod_pvt; 732 gop = xnbp->xnb_tx_top; 733 734 while (loop < end) { 735 int16_t status = NETIF_RSP_OKAY; 736 737 if (gop->status != 0) { 738 status = NETIF_RSP_ERROR; 739 740 /* 741 * If the status is anything other than 742 * GNTST_bad_page then we don't own the page 743 * any more, so don't try to give it back. 744 */ 745 if (gop->status != GNTST_bad_page) 746 gop->mfn = 0; 747 } else { 748 /* The page is no longer ours. */ 749 gop->mfn = 0; 750 } 751 752 if (gop->mfn != 0) 753 /* 754 * Give back the page, as we won't be using 755 * it. 756 */ 757 xnb_free_page(xnbp, gop->mfn); 758 else 759 /* 760 * We gave away a page, update our accounting 761 * now. 762 */ 763 balloon_drv_subtracted(1); 764 765 /* 5.2 */ 766 if (status != NETIF_RSP_OKAY) { 767 RING_GET_RESPONSE(&xnbp->xnb_rx_ring, prod)->status = 768 status; 769 } else { 770 xnbp->xnb_stat_opackets++; 771 xnbp->xnb_stat_obytes += len; 772 } 773 774 loop++; 775 prod++; 776 gop++; 777 } 778 779 xnbp->xnb_rx_ring.req_cons = loop; 780 xnbp->xnb_rx_ring.rsp_prod_pvt = prod; 781 782 /* 6 */ 783 /* LINTED: constant in conditional context */ 784 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&xnbp->xnb_rx_ring, notify); 785 if (notify) { 786 ec_notify_via_evtchn(xnbp->xnb_evtchn); 787 xnbp->xnb_stat_tx_notify_sent++; 788 } else { 789 xnbp->xnb_stat_tx_notify_deferred++; 790 } 791 792 if (mp != NULL) 793 xnbp->xnb_stat_xmit_defer++; 794 795 mutex_exit(&xnbp->xnb_tx_lock); 796 797 /* Free mblk_t's that we consumed. */ 798 freemsgchain(free); 799 800 return (mp); 801 } 802 803 /* helper functions for xnb_copy_to_peer */ 804 805 /* 806 * Grow the array of copy operation descriptors. 807 * Returns a pointer to the next available entry. 808 */ 809 gnttab_copy_t * 810 grow_cpop_area(xnb_t *xnbp, gnttab_copy_t *o_cpop) 811 { 812 /* 813 * o_cpop (arg.1) is a ptr to the area we would like to copy 814 * something into but cannot, because we haven't alloc'ed it 815 * yet, or NULL. 816 * old_cpop and new_cpop (local) are pointers to old/new 817 * versions of xnbp->xnb_tx_cpop. 818 */ 819 gnttab_copy_t *new_cpop, *old_cpop, *ret_cpop; 820 size_t newcount; 821 822 ASSERT(MUTEX_HELD(&xnbp->xnb_tx_lock)); 823 824 old_cpop = xnbp->xnb_tx_cpop; 825 /* 826 * o_cpop is a pointer into the array pointed to by old_cpop; 827 * it would be an error for exactly one of these pointers to be NULL. 828 * We shouldn't call this function if xnb_tx_cpop has already 829 * been allocated, but we're starting to fill it from the beginning 830 * again. 831 */ 832 ASSERT((o_cpop == NULL && old_cpop == NULL) || 833 (o_cpop != NULL && old_cpop != NULL && o_cpop != old_cpop)); 834 835 newcount = xnbp->xnb_cpop_sz + CPOP_DEFCNT; 836 837 new_cpop = kmem_alloc(sizeof (*new_cpop) * newcount, KM_NOSLEEP); 838 if (new_cpop == NULL) { 839 xnbp->xnb_stat_other_allocation_failure++; 840 return (NULL); 841 } 842 843 if (o_cpop != NULL) { 844 size_t offset = (o_cpop - old_cpop); 845 846 /* we only need to move the parts in use ... */ 847 (void) memmove(new_cpop, old_cpop, xnbp->xnb_cpop_sz * 848 (sizeof (*old_cpop))); 849 850 kmem_free(old_cpop, xnbp->xnb_cpop_sz * sizeof (*old_cpop)); 851 852 ret_cpop = new_cpop + offset; 853 } else { 854 ret_cpop = new_cpop; 855 } 856 857 xnbp->xnb_tx_cpop = new_cpop; 858 xnbp->xnb_cpop_sz = newcount; 859 860 xnbp->xnb_stat_tx_cpoparea_grown++; 861 862 return (ret_cpop); 863 } 864 865 /* 866 * Check whether an address is on a page that's foreign to this domain. 867 */ 868 static boolean_t 869 is_foreign(void *addr) 870 { 871 pfn_t pfn = hat_getpfnum(kas.a_hat, addr); 872 873 return (pfn & PFN_IS_FOREIGN_MFN ? B_TRUE : B_FALSE); 874 } 875 876 /* 877 * Insert a newly allocated mblk into a chain, replacing the old one. 878 */ 879 static mblk_t * 880 replace_msg(mblk_t *mp, size_t len, mblk_t *mp_prev, mblk_t *ml_prev) 881 { 882 uint32_t start, stuff, end, value, flags; 883 mblk_t *new_mp; 884 885 new_mp = copyb(mp); 886 if (new_mp == NULL) 887 cmn_err(CE_PANIC, "replace_msg: cannot alloc new message" 888 "for %p, len %lu", (void *) mp, len); 889 890 hcksum_retrieve(mp, NULL, NULL, &start, &stuff, &end, &value, &flags); 891 (void) hcksum_assoc(new_mp, NULL, NULL, start, stuff, end, value, 892 flags, KM_NOSLEEP); 893 894 new_mp->b_next = mp->b_next; 895 new_mp->b_prev = mp->b_prev; 896 new_mp->b_cont = mp->b_cont; 897 898 /* Make sure we only overwrite pointers to the mblk being replaced. */ 899 if (mp_prev != NULL && mp_prev->b_next == mp) 900 mp_prev->b_next = new_mp; 901 902 if (ml_prev != NULL && ml_prev->b_cont == mp) 903 ml_prev->b_cont = new_mp; 904 905 mp->b_next = mp->b_prev = mp->b_cont = NULL; 906 freemsg(mp); 907 908 return (new_mp); 909 } 910 911 /* 912 * Set all the fields in a gnttab_copy_t. 913 */ 914 static void 915 setup_gop(xnb_t *xnbp, gnttab_copy_t *gp, uchar_t *rptr, 916 size_t s_off, size_t d_off, size_t len, grant_ref_t d_ref) 917 { 918 ASSERT(xnbp != NULL && gp != NULL); 919 920 gp->source.offset = s_off; 921 gp->source.u.gmfn = pfn_to_mfn(hat_getpfnum(kas.a_hat, (caddr_t)rptr)); 922 gp->source.domid = DOMID_SELF; 923 924 gp->len = (uint16_t)len; 925 gp->flags = GNTCOPY_dest_gref; 926 gp->status = 0; 927 928 gp->dest.u.ref = d_ref; 929 gp->dest.offset = d_off; 930 gp->dest.domid = xnbp->xnb_peer; 931 } 932 933 mblk_t * 934 xnb_copy_to_peer(xnb_t *xnbp, mblk_t *mp) 935 { 936 mblk_t *free = mp, *mp_prev = NULL, *saved_mp = mp; 937 mblk_t *ml, *ml_prev; 938 gnttab_copy_t *gop_cp; 939 boolean_t notify; 940 RING_IDX loop, prod; 941 int i; 942 943 if (!xnbp->xnb_hv_copy) 944 return (xnb_to_peer(xnbp, mp)); 945 946 /* 947 * For each packet the sequence of operations is: 948 * 949 * 1. get a request slot from the ring. 950 * 2. set up data for hypercall (see NOTE below) 951 * 3. have the hypervisore copy the data 952 * 4. update the request slot. 953 * 5. kick the peer. 954 * 955 * NOTE ad 2. 956 * In order to reduce the number of hypercalls, we prepare 957 * several packets (mp->b_cont != NULL) for the peer and 958 * perform a single hypercall to transfer them. 959 * We also have to set up a seperate copy operation for 960 * every page. 961 * 962 * If we have more than one message (mp->b_next != NULL), 963 * we do this whole dance repeatedly. 964 */ 965 966 mutex_enter(&xnbp->xnb_tx_lock); 967 968 if (!(xnbp->xnb_connected && xnbp->xnb_hotplugged)) { 969 mutex_exit(&xnbp->xnb_tx_lock); 970 DTRACE_PROBE(copy_tx_too_early); 971 xnbp->xnb_stat_tx_too_early++; 972 return (mp); 973 } 974 975 loop = xnbp->xnb_rx_ring.req_cons; 976 prod = xnbp->xnb_rx_ring.rsp_prod_pvt; 977 978 while ((mp != NULL) && 979 XNB_RING_HAS_UNCONSUMED_REQUESTS(&xnbp->xnb_rx_ring)) { 980 netif_rx_request_t *rxreq; 981 netif_rx_response_t *rxresp; 982 size_t offset, d_offset; 983 size_t len; 984 uint16_t cksum_flags; 985 int16_t status = NETIF_RSP_OKAY; 986 int item_count; 987 988 /* 1 */ 989 rxreq = RING_GET_REQUEST(&xnbp->xnb_rx_ring, loop); 990 991 #ifdef XNB_DEBUG 992 if (!(rxreq->id < NET_RX_RING_SIZE)) 993 cmn_err(CE_PANIC, "xnb_copy_to_peer: " 994 "id %d out of range in request 0x%p", 995 rxreq->id, (void *)rxreq); 996 if (rxreq->gref >= NR_GRANT_ENTRIES) 997 cmn_err(CE_PANIC, "xnb_copy_to_peer: " 998 "grant ref %d out of range in request 0x%p", 999 rxreq->gref, (void *)rxreq); 1000 #endif /* XNB_DEBUG */ 1001 1002 /* 2 */ 1003 d_offset = offset = TX_BUFFER_HEADROOM; 1004 len = 0; 1005 item_count = 0; 1006 1007 gop_cp = xnbp->xnb_tx_cpop; 1008 1009 /* 1010 * We walk the b_cont pointers and set up a gop_cp 1011 * structure for every page in every data block we have. 1012 */ 1013 /* 2a */ 1014 for (ml = mp, ml_prev = NULL; ml != NULL; ml = ml->b_cont) { 1015 size_t chunk = ml->b_wptr - ml->b_rptr; 1016 uchar_t *r_tmp, *rpt_align; 1017 size_t r_offset; 1018 1019 /* 1020 * If we get an mblk on a page that doesn't belong to 1021 * this domain, get a new mblk to replace the old one. 1022 */ 1023 if (is_foreign(ml->b_rptr) || is_foreign(ml->b_wptr)) { 1024 mblk_t *ml_new = replace_msg(ml, chunk, 1025 mp_prev, ml_prev); 1026 1027 /* We can still use old ml, but not *ml! */ 1028 if (free == ml) 1029 free = ml_new; 1030 if (mp == ml) 1031 mp = ml_new; 1032 ml = ml_new; 1033 1034 xnbp->xnb_stat_tx_foreign_page++; 1035 } 1036 1037 rpt_align = (uchar_t *)ALIGN2PAGE(ml->b_rptr); 1038 r_offset = (uint16_t)(ml->b_rptr - rpt_align); 1039 r_tmp = ml->b_rptr; 1040 1041 if (d_offset + chunk > PAGESIZE) 1042 cmn_err(CE_PANIC, "xnb_copy_to_peer: mp %p " 1043 "(svd: %p), ml %p,rpt_alg. %p, d_offset " 1044 "(%lu) + chunk (%lu) > PAGESIZE %d!", 1045 (void *)mp, (void *)saved_mp, (void *)ml, 1046 (void *)rpt_align, 1047 d_offset, chunk, (int)PAGESIZE); 1048 1049 while (chunk > 0) { 1050 size_t part_len; 1051 1052 item_count++; 1053 if (item_count > xnbp->xnb_cpop_sz) { 1054 gop_cp = grow_cpop_area(xnbp, gop_cp); 1055 if (gop_cp == NULL) 1056 goto failure; 1057 } 1058 /* 1059 * If our mblk crosses a page boundary, we need 1060 * to do a seperate copy for every page. 1061 */ 1062 if (r_offset + chunk > PAGESIZE) { 1063 part_len = PAGESIZE - r_offset; 1064 1065 DTRACE_PROBE3(mblk_page_crossed, 1066 (mblk_t *), ml, int, chunk, int, 1067 (int)r_offset); 1068 1069 xnbp->xnb_stat_tx_pagebndry_crossed++; 1070 } else { 1071 part_len = chunk; 1072 } 1073 1074 setup_gop(xnbp, gop_cp, r_tmp, r_offset, 1075 d_offset, part_len, rxreq->gref); 1076 1077 chunk -= part_len; 1078 1079 len += part_len; 1080 d_offset += part_len; 1081 r_tmp += part_len; 1082 /* 1083 * The 2nd, 3rd ... last copies will always 1084 * start at r_tmp, therefore r_offset is 0. 1085 */ 1086 r_offset = 0; 1087 gop_cp++; 1088 } 1089 ml_prev = ml; 1090 DTRACE_PROBE4(mblk_loop_end, (mblk_t *), ml, int, 1091 chunk, int, len, int, item_count); 1092 } 1093 /* 3 */ 1094 if (HYPERVISOR_grant_table_op(GNTTABOP_copy, xnbp->xnb_tx_cpop, 1095 item_count) != 0) { 1096 cmn_err(CE_WARN, "xnb_copy_to_peer: copy op. failed"); 1097 DTRACE_PROBE(HV_granttableopfailed); 1098 } 1099 1100 /* 4 */ 1101 rxresp = RING_GET_RESPONSE(&xnbp->xnb_rx_ring, prod); 1102 rxresp->offset = offset; 1103 1104 rxresp->flags = 0; 1105 1106 DTRACE_PROBE4(got_RX_rsp, int, (int)rxresp->id, int, 1107 (int)rxresp->offset, int, (int)rxresp->flags, int, 1108 (int)rxresp->status); 1109 1110 cksum_flags = xnbp->xnb_flavour->xf_cksum_to_peer(xnbp, mp); 1111 if (cksum_flags != 0) 1112 xnbp->xnb_stat_tx_cksum_deferred++; 1113 rxresp->flags |= cksum_flags; 1114 1115 rxresp->id = RING_GET_REQUEST(&xnbp->xnb_rx_ring, prod)->id; 1116 rxresp->status = len; 1117 1118 DTRACE_PROBE4(RX_rsp_set, int, (int)rxresp->id, int, 1119 (int)rxresp->offset, int, (int)rxresp->flags, int, 1120 (int)rxresp->status); 1121 1122 for (i = 0; i < item_count; i++) { 1123 if (xnbp->xnb_tx_cpop[i].status != 0) { 1124 DTRACE_PROBE2(cpop__status__nonnull, int, 1125 (int)xnbp->xnb_tx_cpop[i].status, 1126 int, i); 1127 status = NETIF_RSP_ERROR; 1128 } 1129 } 1130 1131 /* 5.2 */ 1132 if (status != NETIF_RSP_OKAY) { 1133 RING_GET_RESPONSE(&xnbp->xnb_rx_ring, prod)->status = 1134 status; 1135 } else { 1136 xnbp->xnb_stat_opackets++; 1137 xnbp->xnb_stat_obytes += len; 1138 } 1139 1140 loop++; 1141 prod++; 1142 mp_prev = mp; 1143 mp = mp->b_next; 1144 } 1145 failure: 1146 /* 1147 * Did we actually do anything? 1148 */ 1149 if (loop == xnbp->xnb_rx_ring.req_cons) { 1150 mutex_exit(&xnbp->xnb_tx_lock); 1151 return (mp); 1152 } 1153 1154 /* 1155 * Unlink the end of the 'done' list from the remainder. 1156 */ 1157 ASSERT(mp_prev != NULL); 1158 mp_prev->b_next = NULL; 1159 1160 xnbp->xnb_rx_ring.req_cons = loop; 1161 xnbp->xnb_rx_ring.rsp_prod_pvt = prod; 1162 1163 /* 6 */ 1164 /* LINTED: constant in conditional context */ 1165 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&xnbp->xnb_rx_ring, notify); 1166 if (notify) { 1167 ec_notify_via_evtchn(xnbp->xnb_evtchn); 1168 xnbp->xnb_stat_tx_notify_sent++; 1169 } else { 1170 xnbp->xnb_stat_tx_notify_deferred++; 1171 } 1172 1173 if (mp != NULL) 1174 xnbp->xnb_stat_xmit_defer++; 1175 1176 mutex_exit(&xnbp->xnb_tx_lock); 1177 1178 /* Free mblk_t structs we have consumed. */ 1179 freemsgchain(free); 1180 1181 return (mp); 1182 } 1183 1184 /*ARGSUSED*/ 1185 static int 1186 xnb_rxbuf_constructor(void *buf, void *arg, int kmflag) 1187 { 1188 xnb_rxbuf_t *rxp = buf; 1189 1190 bzero(rxp, sizeof (*rxp)); 1191 1192 rxp->xr_free_rtn.free_func = xnb_rx_complete; 1193 rxp->xr_free_rtn.free_arg = (caddr_t)rxp; 1194 1195 rxp->xr_mop.host_addr = 1196 (uint64_t)(uintptr_t)vmem_alloc(heap_arena, PAGESIZE, 1197 ((kmflag & KM_NOSLEEP) == KM_NOSLEEP) ? 1198 VM_NOSLEEP : VM_SLEEP); 1199 1200 if (rxp->xr_mop.host_addr == NULL) { 1201 cmn_err(CE_WARN, "xnb_rxbuf_constructor: " 1202 "cannot get address space"); 1203 return (-1); 1204 } 1205 1206 /* 1207 * Have the hat ensure that page table exists for the VA. 1208 */ 1209 hat_prepare_mapping(kas.a_hat, 1210 (caddr_t)(uintptr_t)rxp->xr_mop.host_addr); 1211 1212 return (0); 1213 } 1214 1215 /*ARGSUSED*/ 1216 static void 1217 xnb_rxbuf_destructor(void *buf, void *arg) 1218 { 1219 xnb_rxbuf_t *rxp = buf; 1220 1221 ASSERT(rxp->xr_mop.host_addr != NULL); 1222 ASSERT((rxp->xr_flags & XNB_RXBUF_INUSE) == 0); 1223 1224 hat_release_mapping(kas.a_hat, 1225 (caddr_t)(uintptr_t)rxp->xr_mop.host_addr); 1226 vmem_free(heap_arena, 1227 (caddr_t)(uintptr_t)rxp->xr_mop.host_addr, PAGESIZE); 1228 } 1229 1230 static void 1231 xnb_rx_notify_peer(xnb_t *xnbp) 1232 { 1233 boolean_t notify; 1234 1235 ASSERT(MUTEX_HELD(&xnbp->xnb_rx_lock)); 1236 1237 /* LINTED: constant in conditional context */ 1238 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&xnbp->xnb_tx_ring, notify); 1239 if (notify) { 1240 ec_notify_via_evtchn(xnbp->xnb_evtchn); 1241 xnbp->xnb_stat_rx_notify_sent++; 1242 } else { 1243 xnbp->xnb_stat_rx_notify_deferred++; 1244 } 1245 } 1246 1247 static void 1248 xnb_rx_complete(xnb_rxbuf_t *rxp) 1249 { 1250 xnb_t *xnbp = rxp->xr_xnbp; 1251 1252 ASSERT((rxp->xr_flags & XNB_RXBUF_INUSE) == XNB_RXBUF_INUSE); 1253 1254 mutex_enter(&xnbp->xnb_rx_lock); 1255 xnb_rx_schedule_unmop(xnbp, &rxp->xr_mop, rxp); 1256 mutex_exit(&xnbp->xnb_rx_lock); 1257 } 1258 1259 static void 1260 xnb_rx_mark_complete(xnb_t *xnbp, RING_IDX id, int16_t status) 1261 { 1262 RING_IDX i; 1263 netif_tx_response_t *txresp; 1264 1265 ASSERT(MUTEX_HELD(&xnbp->xnb_rx_lock)); 1266 1267 i = xnbp->xnb_tx_ring.rsp_prod_pvt; 1268 1269 txresp = RING_GET_RESPONSE(&xnbp->xnb_tx_ring, i); 1270 txresp->id = id; 1271 txresp->status = status; 1272 1273 xnbp->xnb_tx_ring.rsp_prod_pvt = i + 1; 1274 1275 /* 1276 * Note that we don't push the change to the peer here - that 1277 * is the callers responsibility. 1278 */ 1279 } 1280 1281 static void 1282 xnb_rx_schedule_unmop(xnb_t *xnbp, gnttab_map_grant_ref_t *mop, 1283 xnb_rxbuf_t *rxp) 1284 { 1285 gnttab_unmap_grant_ref_t *unmop; 1286 int u_count; 1287 int reqs_on_ring; 1288 1289 ASSERT(MUTEX_HELD(&xnbp->xnb_rx_lock)); 1290 ASSERT(xnbp->xnb_rx_unmop_count < NET_TX_RING_SIZE); 1291 1292 u_count = xnbp->xnb_rx_unmop_count++; 1293 1294 /* Cache data for the time when we actually unmap grant refs */ 1295 xnbp->xnb_rx_unmop_rxp[u_count] = rxp; 1296 1297 unmop = &xnbp->xnb_rx_unmop[u_count]; 1298 unmop->host_addr = mop->host_addr; 1299 unmop->dev_bus_addr = mop->dev_bus_addr; 1300 unmop->handle = mop->handle; 1301 1302 /* 1303 * We cannot check the ring once we're disconnected from it. Batching 1304 * doesn't seem to be a useful optimisation in this case either, 1305 * so we directly call into the actual unmap function. 1306 */ 1307 if (xnbp->xnb_connected) { 1308 reqs_on_ring = RING_HAS_UNCONSUMED_REQUESTS(&xnbp->xnb_rx_ring); 1309 1310 /* 1311 * By tuning xnb_unmop_hiwat to N, we can emulate "N per batch" 1312 * or (with N == 1) "immediate unmop" behaviour. 1313 * The "> xnb_unmop_lowwat" is a guard against ring exhaustion. 1314 */ 1315 if (xnbp->xnb_rx_unmop_count < xnb_unmop_hiwat && 1316 reqs_on_ring > xnb_unmop_lowwat) 1317 return; 1318 } 1319 1320 xnb_rx_perform_pending_unmop(xnbp); 1321 } 1322 1323 /* 1324 * Here we perform the actual unmapping of the data that was 1325 * accumulated in xnb_rx_schedule_unmop(). 1326 * Note that it is the caller's responsibility to make sure that 1327 * there's actually something there to unmop. 1328 */ 1329 static void 1330 xnb_rx_perform_pending_unmop(xnb_t *xnbp) 1331 { 1332 RING_IDX loop; 1333 #ifdef XNB_DEBUG 1334 gnttab_unmap_grant_ref_t *unmop; 1335 #endif /* XNB_DEBUG */ 1336 1337 ASSERT(MUTEX_HELD(&xnbp->xnb_rx_lock)); 1338 ASSERT(xnbp->xnb_rx_unmop_count > 0); 1339 1340 if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, 1341 xnbp->xnb_rx_unmop, xnbp->xnb_rx_unmop_count) < 0) { 1342 cmn_err(CE_WARN, "xnb_rx_perform_pending_unmop: " 1343 "unmap grant operation failed, " 1344 "%d pages lost", xnbp->xnb_rx_unmop_count); 1345 } 1346 1347 #ifdef XNB_DEBUG 1348 for (loop = 0, unmop = xnbp->xnb_rx_unmop; 1349 loop < xnbp->xnb_rx_unmop_count; 1350 loop++, unmop++) { 1351 if (unmop->status != 0) { 1352 cmn_err(CE_WARN, "xnb_rx_perform_pending_unmop: " 1353 "unmap grant reference failed (%d)", 1354 unmop->status); 1355 } 1356 } 1357 #endif /* XNB_DEBUG */ 1358 1359 for (loop = 0; loop < xnbp->xnb_rx_unmop_count; loop++) { 1360 xnb_rxbuf_t *rxp = xnbp->xnb_rx_unmop_rxp[loop]; 1361 1362 if (rxp == NULL) 1363 cmn_err(CE_PANIC, 1364 "xnb_rx_perform_pending_unmop: " 1365 "unexpected NULL rxp (loop %d; count %d)!", 1366 loop, xnbp->xnb_rx_unmop_count); 1367 1368 if (xnbp->xnb_connected) 1369 xnb_rx_mark_complete(xnbp, rxp->xr_id, rxp->xr_status); 1370 xnb_rxbuf_put(xnbp, rxp); 1371 } 1372 if (xnbp->xnb_connected) 1373 xnb_rx_notify_peer(xnbp); 1374 1375 xnbp->xnb_rx_unmop_count = 0; 1376 1377 #ifdef XNB_DEBUG 1378 bzero(xnbp->xnb_rx_unmop, sizeof (xnbp->xnb_rx_unmop)); 1379 bzero(xnbp->xnb_rx_unmop_rxp, sizeof (xnbp->xnb_rx_unmop_rxp)); 1380 #endif /* XNB_DEBUG */ 1381 } 1382 1383 static xnb_rxbuf_t * 1384 xnb_rxbuf_get(xnb_t *xnbp, int flags) 1385 { 1386 xnb_rxbuf_t *rxp; 1387 1388 ASSERT(MUTEX_HELD(&xnbp->xnb_rx_lock)); 1389 1390 rxp = kmem_cache_alloc(xnb_rxbuf_cachep, flags); 1391 if (rxp != NULL) { 1392 ASSERT((rxp->xr_flags & XNB_RXBUF_INUSE) == 0); 1393 rxp->xr_flags |= XNB_RXBUF_INUSE; 1394 1395 rxp->xr_xnbp = xnbp; 1396 rxp->xr_mop.dom = xnbp->xnb_peer; 1397 1398 rxp->xr_mop.flags = GNTMAP_host_map; 1399 if (!xnbp->xnb_rx_pages_writable) 1400 rxp->xr_mop.flags |= GNTMAP_readonly; 1401 1402 xnbp->xnb_rx_buf_count++; 1403 } 1404 1405 return (rxp); 1406 } 1407 1408 static void 1409 xnb_rxbuf_put(xnb_t *xnbp, xnb_rxbuf_t *rxp) 1410 { 1411 ASSERT(MUTEX_HELD(&xnbp->xnb_rx_lock)); 1412 ASSERT((rxp->xr_flags & XNB_RXBUF_INUSE) == XNB_RXBUF_INUSE); 1413 1414 rxp->xr_flags &= ~XNB_RXBUF_INUSE; 1415 xnbp->xnb_rx_buf_count--; 1416 1417 kmem_cache_free(xnb_rxbuf_cachep, rxp); 1418 } 1419 1420 static mblk_t * 1421 xnb_recv(xnb_t *xnbp) 1422 { 1423 RING_IDX start, end, loop; 1424 gnttab_map_grant_ref_t *mop; 1425 xnb_rxbuf_t **rxpp; 1426 netif_tx_request_t *txreq; 1427 boolean_t work_to_do; 1428 mblk_t *head, *tail; 1429 /* 1430 * If the peer granted a read-only mapping to the page then we 1431 * must copy the data, as the local protocol stack (should the 1432 * packet be destined for this host) will modify the packet 1433 * 'in place'. 1434 */ 1435 boolean_t copy = !xnbp->xnb_rx_pages_writable; 1436 1437 /* 1438 * For each individual request, the sequence of actions is: 1439 * 1440 * 1. get the request. 1441 * 2. map the page based on the grant ref. 1442 * 3. allocate an mblk, copy the data to it. 1443 * 4. release the grant. 1444 * 5. update the ring. 1445 * 6. pass the packet upward. 1446 * 7. kick the peer. 1447 * 1448 * In fact, we try to perform the grant operations in batches, 1449 * so there are two loops. 1450 */ 1451 1452 head = tail = NULL; 1453 around: 1454 ASSERT(MUTEX_HELD(&xnbp->xnb_rx_lock)); 1455 1456 /* LINTED: constant in conditional context */ 1457 RING_FINAL_CHECK_FOR_REQUESTS(&xnbp->xnb_tx_ring, work_to_do); 1458 if (!work_to_do) { 1459 finished: 1460 return (head); 1461 } 1462 1463 start = xnbp->xnb_tx_ring.req_cons; 1464 end = xnbp->xnb_tx_ring.sring->req_prod; 1465 1466 for (loop = start, mop = xnbp->xnb_rx_mop, rxpp = xnbp->xnb_rx_bufp; 1467 loop != end; 1468 loop++, mop++, rxpp++) { 1469 xnb_rxbuf_t *rxp; 1470 1471 rxp = xnb_rxbuf_get(xnbp, KM_NOSLEEP); 1472 if (rxp == NULL) 1473 break; 1474 1475 ASSERT(xnbp->xnb_rx_pages_writable || 1476 ((rxp->xr_mop.flags & GNTMAP_readonly) 1477 == GNTMAP_readonly)); 1478 1479 rxp->xr_mop.ref = 1480 RING_GET_REQUEST(&xnbp->xnb_tx_ring, loop)->gref; 1481 1482 ASSERT(rxp->xr_mop.ref < NR_GRANT_ENTRIES); 1483 1484 *mop = rxp->xr_mop; 1485 *rxpp = rxp; 1486 } 1487 1488 if ((loop - start) == 0) 1489 goto finished; 1490 1491 end = loop; 1492 1493 if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, 1494 xnbp->xnb_rx_mop, end - start) != 0) { 1495 1496 cmn_err(CE_WARN, "xnb_recv: map grant operation failed"); 1497 1498 loop = start; 1499 rxpp = xnbp->xnb_rx_bufp; 1500 1501 while (loop != end) { 1502 xnb_rxbuf_put(xnbp, *rxpp); 1503 1504 loop++; 1505 rxpp++; 1506 } 1507 1508 goto finished; 1509 } 1510 1511 for (loop = start, mop = xnbp->xnb_rx_mop, rxpp = xnbp->xnb_rx_bufp; 1512 loop != end; 1513 loop++, mop++, rxpp++) { 1514 mblk_t *mp = NULL; 1515 int16_t status = NETIF_RSP_OKAY; 1516 xnb_rxbuf_t *rxp = *rxpp; 1517 1518 if (mop->status != 0) { 1519 cmn_err(CE_WARN, "xnb_recv: " 1520 "failed to map buffer: %d", 1521 mop->status); 1522 status = NETIF_RSP_ERROR; 1523 } 1524 1525 txreq = RING_GET_REQUEST(&xnbp->xnb_tx_ring, loop); 1526 1527 if (status == NETIF_RSP_OKAY) { 1528 if (copy) { 1529 mp = allocb(txreq->size, BPRI_MED); 1530 if (mp == NULL) { 1531 status = NETIF_RSP_ERROR; 1532 xnbp->xnb_stat_rx_allocb_failed++; 1533 } else { 1534 bcopy((caddr_t)(uintptr_t) 1535 mop->host_addr + txreq->offset, 1536 mp->b_wptr, txreq->size); 1537 mp->b_wptr += txreq->size; 1538 } 1539 } else { 1540 mp = desballoc((uchar_t *)(uintptr_t) 1541 mop->host_addr + txreq->offset, 1542 txreq->size, 0, &rxp->xr_free_rtn); 1543 if (mp == NULL) { 1544 status = NETIF_RSP_ERROR; 1545 xnbp->xnb_stat_rx_allocb_failed++; 1546 } else { 1547 rxp->xr_id = txreq->id; 1548 rxp->xr_status = status; 1549 rxp->xr_mop = *mop; 1550 1551 mp->b_wptr += txreq->size; 1552 } 1553 } 1554 1555 /* 1556 * If we have a buffer and there are checksum 1557 * flags, process them appropriately. 1558 */ 1559 if ((mp != NULL) && 1560 ((txreq->flags & 1561 (NETTXF_csum_blank | NETTXF_data_validated)) 1562 != 0)) { 1563 mp = xnbp->xnb_flavour->xf_cksum_from_peer(xnbp, 1564 mp, txreq->flags); 1565 xnbp->xnb_stat_rx_cksum_no_need++; 1566 } 1567 } 1568 1569 if (copy || (mp == NULL)) { 1570 rxp->xr_status = status; 1571 rxp->xr_id = txreq->id; 1572 xnb_rx_schedule_unmop(xnbp, mop, rxp); 1573 } 1574 1575 if (mp != NULL) { 1576 xnbp->xnb_stat_ipackets++; 1577 xnbp->xnb_stat_rbytes += txreq->size; 1578 1579 mp->b_next = NULL; 1580 if (head == NULL) { 1581 ASSERT(tail == NULL); 1582 head = mp; 1583 } else { 1584 ASSERT(tail != NULL); 1585 tail->b_next = mp; 1586 } 1587 tail = mp; 1588 } 1589 } 1590 1591 xnbp->xnb_tx_ring.req_cons = loop; 1592 1593 goto around; 1594 /* NOTREACHED */ 1595 } 1596 1597 /* 1598 * intr() -- ring interrupt service routine 1599 */ 1600 static uint_t 1601 xnb_intr(caddr_t arg) 1602 { 1603 xnb_t *xnbp = (xnb_t *)arg; 1604 mblk_t *mp; 1605 1606 xnbp->xnb_stat_intr++; 1607 1608 mutex_enter(&xnbp->xnb_rx_lock); 1609 1610 ASSERT(xnbp->xnb_connected); 1611 1612 mp = xnb_recv(xnbp); 1613 1614 mutex_exit(&xnbp->xnb_rx_lock); 1615 1616 if (!xnbp->xnb_hotplugged) { 1617 xnbp->xnb_stat_rx_too_early++; 1618 goto fail; 1619 } 1620 if (mp == NULL) { 1621 xnbp->xnb_stat_spurious_intr++; 1622 goto fail; 1623 } 1624 1625 xnbp->xnb_flavour->xf_recv(xnbp, mp); 1626 1627 return (DDI_INTR_CLAIMED); 1628 1629 fail: 1630 freemsgchain(mp); 1631 return (DDI_INTR_CLAIMED); 1632 } 1633 1634 static boolean_t 1635 xnb_connect_rings(dev_info_t *dip) 1636 { 1637 xnb_t *xnbp = ddi_get_driver_private(dip); 1638 char *oename; 1639 struct gnttab_map_grant_ref map_op; 1640 evtchn_port_t evtchn; 1641 int i; 1642 1643 /* 1644 * Cannot attempt to connect the rings if already connected. 1645 */ 1646 ASSERT(!xnbp->xnb_connected); 1647 1648 oename = xvdi_get_oename(dip); 1649 1650 if (xenbus_gather(XBT_NULL, oename, 1651 "event-channel", "%u", &evtchn, 1652 "tx-ring-ref", "%lu", &xnbp->xnb_tx_ring_ref, 1653 "rx-ring-ref", "%lu", &xnbp->xnb_rx_ring_ref, 1654 NULL) != 0) { 1655 cmn_err(CE_WARN, "xnb_connect_rings: " 1656 "cannot read other-end details from %s", 1657 oename); 1658 goto fail; 1659 } 1660 1661 if (xenbus_scanf(XBT_NULL, oename, 1662 "feature-tx-writable", "%d", &i) != 0) 1663 i = 0; 1664 if (i != 0) 1665 xnbp->xnb_rx_pages_writable = B_TRUE; 1666 1667 if (xenbus_scanf(XBT_NULL, oename, 1668 "feature-no-csum-offload", "%d", &i) != 0) 1669 i = 0; 1670 if ((i == 1) || !xnbp->xnb_cksum_offload) 1671 xnbp->xnb_cksum_offload = B_FALSE; 1672 1673 /* Check whether our peer knows and requests hypervisor copy */ 1674 if (xenbus_scanf(XBT_NULL, oename, "request-rx-copy", "%d", &i) 1675 != 0) 1676 i = 0; 1677 if (i != 0) 1678 xnbp->xnb_hv_copy = B_TRUE; 1679 1680 /* 1681 * 1. allocate a vaddr for the tx page, one for the rx page. 1682 * 2. call GNTTABOP_map_grant_ref to map the relevant pages 1683 * into the allocated vaddr (one for tx, one for rx). 1684 * 3. call EVTCHNOP_bind_interdomain to have the event channel 1685 * bound to this domain. 1686 * 4. associate the event channel with an interrupt. 1687 * 5. declare ourselves connected. 1688 * 6. enable the interrupt. 1689 */ 1690 1691 /* 1.tx */ 1692 xnbp->xnb_tx_ring_addr = vmem_xalloc(heap_arena, PAGESIZE, PAGESIZE, 1693 0, 0, 0, 0, VM_SLEEP); 1694 ASSERT(xnbp->xnb_tx_ring_addr != NULL); 1695 1696 /* 2.tx */ 1697 map_op.host_addr = (uint64_t)((long)xnbp->xnb_tx_ring_addr); 1698 map_op.flags = GNTMAP_host_map; 1699 map_op.ref = xnbp->xnb_tx_ring_ref; 1700 map_op.dom = xnbp->xnb_peer; 1701 hat_prepare_mapping(kas.a_hat, xnbp->xnb_tx_ring_addr); 1702 if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, 1703 &map_op, 1) != 0 || map_op.status != 0) { 1704 cmn_err(CE_WARN, "xnb_connect_rings: cannot map tx-ring page."); 1705 goto fail; 1706 } 1707 xnbp->xnb_tx_ring_handle = map_op.handle; 1708 1709 /* LINTED: constant in conditional context */ 1710 BACK_RING_INIT(&xnbp->xnb_tx_ring, 1711 (netif_tx_sring_t *)xnbp->xnb_tx_ring_addr, PAGESIZE); 1712 1713 /* 1.rx */ 1714 xnbp->xnb_rx_ring_addr = vmem_xalloc(heap_arena, PAGESIZE, PAGESIZE, 1715 0, 0, 0, 0, VM_SLEEP); 1716 ASSERT(xnbp->xnb_rx_ring_addr != NULL); 1717 1718 /* 2.rx */ 1719 map_op.host_addr = (uint64_t)((long)xnbp->xnb_rx_ring_addr); 1720 map_op.flags = GNTMAP_host_map; 1721 map_op.ref = xnbp->xnb_rx_ring_ref; 1722 map_op.dom = xnbp->xnb_peer; 1723 hat_prepare_mapping(kas.a_hat, xnbp->xnb_rx_ring_addr); 1724 if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, 1725 &map_op, 1) != 0 || map_op.status != 0) { 1726 cmn_err(CE_WARN, "xnb_connect_rings: cannot map rx-ring page."); 1727 goto fail; 1728 } 1729 xnbp->xnb_rx_ring_handle = map_op.handle; 1730 1731 /* LINTED: constant in conditional context */ 1732 BACK_RING_INIT(&xnbp->xnb_rx_ring, 1733 (netif_rx_sring_t *)xnbp->xnb_rx_ring_addr, PAGESIZE); 1734 1735 /* 3 */ 1736 if (xvdi_bind_evtchn(dip, evtchn) != DDI_SUCCESS) { 1737 cmn_err(CE_WARN, "xnb_connect_rings: " 1738 "cannot bind event channel %d", xnbp->xnb_evtchn); 1739 xnbp->xnb_evtchn = INVALID_EVTCHN; 1740 goto fail; 1741 } 1742 xnbp->xnb_evtchn = xvdi_get_evtchn(dip); 1743 1744 /* 1745 * It would be good to set the state to XenbusStateConnected 1746 * here as well, but then what if ddi_add_intr() failed? 1747 * Changing the state in the store will be noticed by the peer 1748 * and cannot be "taken back". 1749 */ 1750 mutex_enter(&xnbp->xnb_tx_lock); 1751 mutex_enter(&xnbp->xnb_rx_lock); 1752 1753 /* 5.1 */ 1754 xnbp->xnb_connected = B_TRUE; 1755 1756 mutex_exit(&xnbp->xnb_rx_lock); 1757 mutex_exit(&xnbp->xnb_tx_lock); 1758 1759 /* 4, 6 */ 1760 if (ddi_add_intr(dip, 0, NULL, NULL, xnb_intr, (caddr_t)xnbp) 1761 != DDI_SUCCESS) { 1762 cmn_err(CE_WARN, "xnb_connect_rings: cannot add interrupt"); 1763 goto fail; 1764 } 1765 xnbp->xnb_irq = B_TRUE; 1766 1767 /* 5.2 */ 1768 (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateConnected); 1769 1770 return (B_TRUE); 1771 1772 fail: 1773 mutex_enter(&xnbp->xnb_tx_lock); 1774 mutex_enter(&xnbp->xnb_rx_lock); 1775 1776 xnbp->xnb_connected = B_FALSE; 1777 mutex_exit(&xnbp->xnb_rx_lock); 1778 mutex_exit(&xnbp->xnb_tx_lock); 1779 1780 return (B_FALSE); 1781 } 1782 1783 static void 1784 xnb_disconnect_rings(dev_info_t *dip) 1785 { 1786 xnb_t *xnbp = ddi_get_driver_private(dip); 1787 1788 if (xnbp->xnb_irq) { 1789 ddi_remove_intr(dip, 0, NULL); 1790 xnbp->xnb_irq = B_FALSE; 1791 } 1792 1793 if (xnbp->xnb_rx_unmop_count > 0) 1794 xnb_rx_perform_pending_unmop(xnbp); 1795 1796 if (xnbp->xnb_evtchn != INVALID_EVTCHN) { 1797 xvdi_free_evtchn(dip); 1798 xnbp->xnb_evtchn = INVALID_EVTCHN; 1799 } 1800 1801 if (xnbp->xnb_rx_ring_handle != INVALID_GRANT_HANDLE) { 1802 struct gnttab_unmap_grant_ref unmap_op; 1803 1804 unmap_op.host_addr = (uint64_t)(uintptr_t) 1805 xnbp->xnb_rx_ring_addr; 1806 unmap_op.dev_bus_addr = 0; 1807 unmap_op.handle = xnbp->xnb_rx_ring_handle; 1808 if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, 1809 &unmap_op, 1) != 0) 1810 cmn_err(CE_WARN, "xnb_disconnect_rings: " 1811 "cannot unmap rx-ring page (%d)", 1812 unmap_op.status); 1813 1814 xnbp->xnb_rx_ring_handle = INVALID_GRANT_HANDLE; 1815 } 1816 1817 if (xnbp->xnb_rx_ring_addr != NULL) { 1818 hat_release_mapping(kas.a_hat, xnbp->xnb_rx_ring_addr); 1819 vmem_free(heap_arena, xnbp->xnb_rx_ring_addr, PAGESIZE); 1820 xnbp->xnb_rx_ring_addr = NULL; 1821 } 1822 1823 if (xnbp->xnb_tx_ring_handle != INVALID_GRANT_HANDLE) { 1824 struct gnttab_unmap_grant_ref unmap_op; 1825 1826 unmap_op.host_addr = (uint64_t)(uintptr_t) 1827 xnbp->xnb_tx_ring_addr; 1828 unmap_op.dev_bus_addr = 0; 1829 unmap_op.handle = xnbp->xnb_tx_ring_handle; 1830 if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, 1831 &unmap_op, 1) != 0) 1832 cmn_err(CE_WARN, "xnb_disconnect_rings: " 1833 "cannot unmap tx-ring page (%d)", 1834 unmap_op.status); 1835 1836 xnbp->xnb_tx_ring_handle = INVALID_GRANT_HANDLE; 1837 } 1838 1839 if (xnbp->xnb_tx_ring_addr != NULL) { 1840 hat_release_mapping(kas.a_hat, xnbp->xnb_tx_ring_addr); 1841 vmem_free(heap_arena, xnbp->xnb_tx_ring_addr, PAGESIZE); 1842 xnbp->xnb_tx_ring_addr = NULL; 1843 } 1844 } 1845 1846 /*ARGSUSED*/ 1847 static void 1848 xnb_oe_state_change(dev_info_t *dip, ddi_eventcookie_t id, 1849 void *arg, void *impl_data) 1850 { 1851 xnb_t *xnbp = ddi_get_driver_private(dip); 1852 XenbusState new_state = *(XenbusState *)impl_data; 1853 1854 ASSERT(xnbp != NULL); 1855 1856 switch (new_state) { 1857 case XenbusStateConnected: 1858 if (xnb_connect_rings(dip)) { 1859 xnbp->xnb_flavour->xf_peer_connected(xnbp); 1860 } else { 1861 xnbp->xnb_flavour->xf_peer_disconnected(xnbp); 1862 xnb_disconnect_rings(dip); 1863 (void) xvdi_switch_state(dip, XBT_NULL, 1864 XenbusStateClosed); 1865 (void) xvdi_post_event(dip, XEN_HP_REMOVE); 1866 } 1867 1868 /* 1869 * Now that we've attempted to connect it's reasonable 1870 * to allow an attempt to detach. 1871 */ 1872 xnbp->xnb_detachable = B_TRUE; 1873 1874 break; 1875 1876 case XenbusStateClosing: 1877 (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateClosing); 1878 1879 break; 1880 1881 case XenbusStateClosed: 1882 xnbp->xnb_flavour->xf_peer_disconnected(xnbp); 1883 1884 mutex_enter(&xnbp->xnb_tx_lock); 1885 mutex_enter(&xnbp->xnb_rx_lock); 1886 1887 xnb_disconnect_rings(dip); 1888 xnbp->xnb_connected = B_FALSE; 1889 1890 mutex_exit(&xnbp->xnb_rx_lock); 1891 mutex_exit(&xnbp->xnb_tx_lock); 1892 1893 (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateClosed); 1894 (void) xvdi_post_event(dip, XEN_HP_REMOVE); 1895 /* 1896 * In all likelyhood this is already set (in the above 1897 * case), but if the peer never attempted to connect 1898 * and the domain is destroyed we get here without 1899 * having been through the case above, so we set it to 1900 * be sure. 1901 */ 1902 xnbp->xnb_detachable = B_TRUE; 1903 1904 break; 1905 1906 default: 1907 break; 1908 } 1909 } 1910 1911 /*ARGSUSED*/ 1912 static void 1913 xnb_hp_state_change(dev_info_t *dip, ddi_eventcookie_t id, 1914 void *arg, void *impl_data) 1915 { 1916 xnb_t *xnbp = ddi_get_driver_private(dip); 1917 xendev_hotplug_state_t state = *(xendev_hotplug_state_t *)impl_data; 1918 boolean_t success; 1919 1920 ASSERT(xnbp != NULL); 1921 1922 switch (state) { 1923 case Connected: 1924 1925 success = xnbp->xnb_flavour->xf_hotplug_connected(xnbp); 1926 1927 mutex_enter(&xnbp->xnb_tx_lock); 1928 mutex_enter(&xnbp->xnb_rx_lock); 1929 1930 xnbp->xnb_hotplugged = success; 1931 1932 mutex_exit(&xnbp->xnb_rx_lock); 1933 mutex_exit(&xnbp->xnb_tx_lock); 1934 break; 1935 1936 default: 1937 break; 1938 } 1939 } 1940 1941 static struct modldrv modldrv = { 1942 &mod_miscops, "xnb module %I%", 1943 }; 1944 1945 static struct modlinkage modlinkage = { 1946 MODREV_1, &modldrv, NULL 1947 }; 1948 1949 int 1950 _init(void) 1951 { 1952 int i; 1953 1954 mutex_init(&xnb_alloc_page_lock, NULL, MUTEX_DRIVER, NULL); 1955 1956 xnb_rxbuf_cachep = kmem_cache_create("xnb_rxbuf_cachep", 1957 sizeof (xnb_rxbuf_t), 0, xnb_rxbuf_constructor, 1958 xnb_rxbuf_destructor, NULL, NULL, NULL, 0); 1959 ASSERT(xnb_rxbuf_cachep != NULL); 1960 1961 i = mod_install(&modlinkage); 1962 if (i != DDI_SUCCESS) { 1963 kmem_cache_destroy(xnb_rxbuf_cachep); 1964 mutex_destroy(&xnb_alloc_page_lock); 1965 } 1966 return (i); 1967 } 1968 1969 int 1970 _info(struct modinfo *modinfop) 1971 { 1972 return (mod_info(&modlinkage, modinfop)); 1973 } 1974 1975 int 1976 _fini(void) 1977 { 1978 int i; 1979 1980 i = mod_remove(&modlinkage); 1981 if (i == DDI_SUCCESS) { 1982 kmem_cache_destroy(xnb_rxbuf_cachep); 1983 mutex_destroy(&xnb_alloc_page_lock); 1984 } 1985 return (i); 1986 } 1987