1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #ifdef DEBUG 30 #define XNB_DEBUG 1 31 #endif /* DEBUG */ 32 33 #include "xnb.h" 34 35 #include <sys/sunddi.h> 36 #include <sys/sunndi.h> 37 #include <sys/modctl.h> 38 #include <sys/conf.h> 39 #include <sys/mac.h> 40 #include <sys/dlpi.h> 41 #include <sys/strsubr.h> 42 #include <sys/strsun.h> 43 #include <sys/types.h> 44 #include <sys/pattr.h> 45 #include <vm/seg_kmem.h> 46 #include <vm/hat_i86.h> 47 #include <xen/sys/xenbus_impl.h> 48 #include <xen/sys/xendev.h> 49 #include <sys/balloon_impl.h> 50 #include <sys/evtchn_impl.h> 51 #include <sys/gnttab.h> 52 #include <vm/vm_dep.h> 53 54 #include <sys/gld.h> 55 #include <inet/ip.h> 56 #include <inet/ip_impl.h> 57 #include <sys/vnic_impl.h> /* blech. */ 58 59 /* 60 * The terms "transmit" and "receive" are used in their traditional 61 * sense here - packets from other parts of this system are 62 * "transmitted" to the peer domain and those originating from the 63 * peer are "received". 64 * 65 * In some cases this can be confusing, because various data 66 * structures are shared with the domU driver, which has the opposite 67 * view of what constitutes "transmit" and "receive". In naming the 68 * shared structures the domU driver always wins. 69 */ 70 71 /* 72 * XXPV dme: things to do, as well as various things indicated 73 * throughout the source: 74 * - copy avoidance outbound. 75 * - copy avoidance inbound. 76 * - transfer credit limiting. 77 * - MAC address based filtering. 78 */ 79 80 /* 81 * Linux expects to have some headroom in received buffers. The Linux 82 * frontend driver (netfront) checks to see if the headroom is 83 * available and will re-allocate the buffer to make room if 84 * necessary. To avoid this we add TX_BUFFER_HEADROOM bytes of 85 * headroom to each packet we pass to the peer. 86 */ 87 #define TX_BUFFER_HEADROOM 16 88 89 static boolean_t xnb_cksum_offload = B_TRUE; 90 91 static boolean_t xnb_connect_rings(dev_info_t *); 92 static void xnb_disconnect_rings(dev_info_t *); 93 static void xnb_oe_state_change(dev_info_t *, ddi_eventcookie_t, 94 void *, void *); 95 static void xnb_hp_state_change(dev_info_t *, ddi_eventcookie_t, 96 void *, void *); 97 98 static int xnb_rxbuf_constructor(void *, void *, int); 99 static void xnb_rxbuf_destructor(void *, void *); 100 static xnb_rxbuf_t *xnb_rxbuf_get(xnb_t *, int); 101 static void xnb_rxbuf_put(xnb_t *, xnb_rxbuf_t *); 102 static void xnb_rx_notify_peer(xnb_t *); 103 static void xnb_rx_complete(xnb_rxbuf_t *); 104 static void xnb_rx_mark_complete(xnb_t *, RING_IDX, int16_t); 105 static void xnb_rx_schedule_unmop(xnb_t *, gnttab_map_grant_ref_t *, 106 xnb_rxbuf_t *); 107 static void xnb_rx_perform_pending_unmop(xnb_t *); 108 mblk_t *xnb_copy_to_peer(xnb_t *, mblk_t *); 109 110 int xnb_unmop_lowwat = NET_TX_RING_SIZE >> 2; 111 int xnb_unmop_hiwat = NET_TX_RING_SIZE - (NET_TX_RING_SIZE >> 2); 112 113 114 boolean_t xnb_hv_copy = B_TRUE; 115 boolean_t xnb_explicit_pageflip_set = B_FALSE; 116 117 #ifdef XNB_DEBUG 118 #define NR_GRANT_ENTRIES \ 119 (NR_GRANT_FRAMES * PAGESIZE / sizeof (grant_entry_t)) 120 #endif /* XNB_DEBUG */ 121 122 /* XXPV dme: are these really invalid? */ 123 #define INVALID_GRANT_HANDLE ((grant_handle_t)-1) 124 #define INVALID_GRANT_REF ((grant_ref_t)-1) 125 126 static kmem_cache_t *xnb_rxbuf_cachep; 127 static kmutex_t xnb_alloc_page_lock; 128 129 /* 130 * Statistics. 131 */ 132 static char *aux_statistics[] = { 133 "tx_cksum_deferred", 134 "rx_cksum_no_need", 135 "tx_rsp_notok", 136 "tx_notify_deferred", 137 "tx_notify_sent", 138 "rx_notify_deferred", 139 "rx_notify_sent", 140 "tx_too_early", 141 "rx_too_early", 142 "rx_allocb_failed", 143 "tx_allocb_failed", 144 "tx_foreign_page", 145 "mac_full", 146 "spurious_intr", 147 "allocation_success", 148 "allocation_failure", 149 "small_allocation_success", 150 "small_allocation_failure", 151 "other_allocation_failure", 152 "tx_pageboundary_crossed", 153 "tx_cpoparea_grown", 154 "csum_hardware", 155 "csum_software", 156 }; 157 158 static int 159 xnb_ks_aux_update(kstat_t *ksp, int flag) 160 { 161 xnb_t *xnbp; 162 kstat_named_t *knp; 163 164 if (flag != KSTAT_READ) 165 return (EACCES); 166 167 xnbp = ksp->ks_private; 168 knp = ksp->ks_data; 169 170 /* 171 * Assignment order should match that of the names in 172 * aux_statistics. 173 */ 174 (knp++)->value.ui64 = xnbp->xnb_stat_tx_cksum_deferred; 175 (knp++)->value.ui64 = xnbp->xnb_stat_rx_cksum_no_need; 176 (knp++)->value.ui64 = xnbp->xnb_stat_tx_rsp_notok; 177 (knp++)->value.ui64 = xnbp->xnb_stat_tx_notify_deferred; 178 (knp++)->value.ui64 = xnbp->xnb_stat_tx_notify_sent; 179 (knp++)->value.ui64 = xnbp->xnb_stat_rx_notify_deferred; 180 (knp++)->value.ui64 = xnbp->xnb_stat_rx_notify_sent; 181 (knp++)->value.ui64 = xnbp->xnb_stat_tx_too_early; 182 (knp++)->value.ui64 = xnbp->xnb_stat_rx_too_early; 183 (knp++)->value.ui64 = xnbp->xnb_stat_rx_allocb_failed; 184 (knp++)->value.ui64 = xnbp->xnb_stat_tx_allocb_failed; 185 (knp++)->value.ui64 = xnbp->xnb_stat_tx_foreign_page; 186 (knp++)->value.ui64 = xnbp->xnb_stat_mac_full; 187 (knp++)->value.ui64 = xnbp->xnb_stat_spurious_intr; 188 (knp++)->value.ui64 = xnbp->xnb_stat_allocation_success; 189 (knp++)->value.ui64 = xnbp->xnb_stat_allocation_failure; 190 (knp++)->value.ui64 = xnbp->xnb_stat_small_allocation_success; 191 (knp++)->value.ui64 = xnbp->xnb_stat_small_allocation_failure; 192 (knp++)->value.ui64 = xnbp->xnb_stat_other_allocation_failure; 193 (knp++)->value.ui64 = xnbp->xnb_stat_tx_pagebndry_crossed; 194 (knp++)->value.ui64 = xnbp->xnb_stat_tx_cpoparea_grown; 195 (knp++)->value.ui64 = xnbp->xnb_stat_csum_hardware; 196 (knp++)->value.ui64 = xnbp->xnb_stat_csum_software; 197 198 return (0); 199 } 200 201 static boolean_t 202 xnb_ks_init(xnb_t *xnbp) 203 { 204 int nstat = sizeof (aux_statistics) / 205 sizeof (aux_statistics[0]); 206 char **cp = aux_statistics; 207 kstat_named_t *knp; 208 209 /* 210 * Create and initialise kstats. 211 */ 212 xnbp->xnb_kstat_aux = kstat_create(ddi_driver_name(xnbp->xnb_devinfo), 213 ddi_get_instance(xnbp->xnb_devinfo), "aux_statistics", "net", 214 KSTAT_TYPE_NAMED, nstat, 0); 215 if (xnbp->xnb_kstat_aux == NULL) 216 return (B_FALSE); 217 218 xnbp->xnb_kstat_aux->ks_private = xnbp; 219 xnbp->xnb_kstat_aux->ks_update = xnb_ks_aux_update; 220 221 knp = xnbp->xnb_kstat_aux->ks_data; 222 while (nstat > 0) { 223 kstat_named_init(knp, *cp, KSTAT_DATA_UINT64); 224 225 knp++; 226 cp++; 227 nstat--; 228 } 229 230 kstat_install(xnbp->xnb_kstat_aux); 231 232 return (B_TRUE); 233 } 234 235 static void 236 xnb_ks_free(xnb_t *xnbp) 237 { 238 kstat_delete(xnbp->xnb_kstat_aux); 239 } 240 241 /* 242 * Software checksum calculation and insertion for an arbitrary packet. 243 */ 244 /*ARGSUSED*/ 245 static mblk_t * 246 xnb_software_csum(xnb_t *xnbp, mblk_t *mp) 247 { 248 /* 249 * XXPV dme: shouldn't rely on vnic_fix_cksum(), not least 250 * because it doesn't cover all of the interesting cases :-( 251 */ 252 (void) hcksum_assoc(mp, NULL, NULL, 0, 0, 0, 0, 253 HCK_FULLCKSUM, KM_NOSLEEP); 254 255 return (vnic_fix_cksum(mp)); 256 } 257 258 mblk_t * 259 xnb_process_cksum_flags(xnb_t *xnbp, mblk_t *mp, uint32_t capab) 260 { 261 struct ether_header *ehp; 262 uint16_t sap; 263 uint32_t offset; 264 ipha_t *ipha; 265 266 ASSERT(mp->b_next == NULL); 267 268 /* 269 * Check that the packet is contained in a single mblk. In 270 * the "from peer" path this is true today, but will change 271 * when scatter gather support is added. In the "to peer" 272 * path we cannot be sure, but in most cases it will be true 273 * (in the xnbo case the packet has come from a MAC device 274 * which is unlikely to split packets). 275 */ 276 if (mp->b_cont != NULL) 277 goto software; 278 279 /* 280 * If the MAC has no hardware capability don't do any further 281 * checking. 282 */ 283 if (capab == 0) 284 goto software; 285 286 ASSERT(MBLKL(mp) >= sizeof (struct ether_header)); 287 ehp = (struct ether_header *)mp->b_rptr; 288 289 if (ntohs(ehp->ether_type) == VLAN_TPID) { 290 struct ether_vlan_header *evhp; 291 292 ASSERT(MBLKL(mp) >= sizeof (struct ether_vlan_header)); 293 evhp = (struct ether_vlan_header *)mp->b_rptr; 294 sap = ntohs(evhp->ether_type); 295 offset = sizeof (struct ether_vlan_header); 296 } else { 297 sap = ntohs(ehp->ether_type); 298 offset = sizeof (struct ether_header); 299 } 300 301 /* 302 * We only attempt to do IPv4 packets in hardware. 303 */ 304 if (sap != ETHERTYPE_IP) 305 goto software; 306 307 /* 308 * We know that this is an IPv4 packet. 309 */ 310 ipha = (ipha_t *)(mp->b_rptr + offset); 311 312 switch (ipha->ipha_protocol) { 313 case IPPROTO_TCP: 314 case IPPROTO_UDP: 315 /* 316 * This is a TCP/IPv4 or UDP/IPv4 packet. 317 * 318 * If the capabilities indicate that full checksum 319 * offload is available, use it. 320 */ 321 if ((capab & HCKSUM_INET_FULL_V4) != 0) { 322 (void) hcksum_assoc(mp, NULL, NULL, 323 0, 0, 0, 0, 324 HCK_FULLCKSUM, KM_NOSLEEP); 325 326 xnbp->xnb_stat_csum_hardware++; 327 328 return (mp); 329 } 330 331 /* 332 * XXPV dme: If the capabilities indicate that partial 333 * checksum offload is available, we should use it. 334 */ 335 336 break; 337 338 default: 339 /* Use software. */ 340 break; 341 } 342 343 software: 344 /* 345 * We are not able to use any offload so do the whole thing in 346 * software. 347 */ 348 xnbp->xnb_stat_csum_software++; 349 350 return (xnb_software_csum(xnbp, mp)); 351 } 352 353 int 354 xnb_attach(dev_info_t *dip, xnb_flavour_t *flavour, void *flavour_data) 355 { 356 xnb_t *xnbp; 357 char *xsname, mac[ETHERADDRL * 3]; 358 359 xnbp = kmem_zalloc(sizeof (*xnbp), KM_SLEEP); 360 361 xnbp->xnb_flavour = flavour; 362 xnbp->xnb_flavour_data = flavour_data; 363 xnbp->xnb_devinfo = dip; 364 xnbp->xnb_evtchn = INVALID_EVTCHN; 365 xnbp->xnb_irq = B_FALSE; 366 xnbp->xnb_tx_ring_handle = INVALID_GRANT_HANDLE; 367 xnbp->xnb_rx_ring_handle = INVALID_GRANT_HANDLE; 368 xnbp->xnb_cksum_offload = xnb_cksum_offload; 369 xnbp->xnb_connected = B_FALSE; 370 xnbp->xnb_hotplugged = B_FALSE; 371 xnbp->xnb_detachable = B_FALSE; 372 xnbp->xnb_peer = xvdi_get_oeid(dip); 373 xnbp->xnb_rx_pages_writable = B_FALSE; 374 375 xnbp->xnb_rx_buf_count = 0; 376 xnbp->xnb_rx_unmop_count = 0; 377 378 xnbp->xnb_hv_copy = B_FALSE; 379 380 xnbp->xnb_tx_va = vmem_alloc(heap_arena, PAGESIZE, VM_SLEEP); 381 ASSERT(xnbp->xnb_tx_va != NULL); 382 383 if (ddi_get_iblock_cookie(dip, 0, &xnbp->xnb_icookie) 384 != DDI_SUCCESS) 385 goto failure; 386 387 /* allocated on demand, when/if we enter xnb_copy_to_peer() */ 388 xnbp->xnb_tx_cpop = NULL; 389 xnbp->xnb_cpop_sz = 0; 390 391 mutex_init(&xnbp->xnb_tx_lock, NULL, MUTEX_DRIVER, 392 xnbp->xnb_icookie); 393 mutex_init(&xnbp->xnb_rx_lock, NULL, MUTEX_DRIVER, 394 xnbp->xnb_icookie); 395 396 /* set driver private pointer now */ 397 ddi_set_driver_private(dip, xnbp); 398 399 if (!xnb_ks_init(xnbp)) 400 goto failure_1; 401 402 /* 403 * Receive notification of changes in the state of the 404 * driver in the guest domain. 405 */ 406 if (xvdi_add_event_handler(dip, XS_OE_STATE, 407 xnb_oe_state_change) != DDI_SUCCESS) 408 goto failure_2; 409 410 /* 411 * Receive notification of hotplug events. 412 */ 413 if (xvdi_add_event_handler(dip, XS_HP_STATE, 414 xnb_hp_state_change) != DDI_SUCCESS) 415 goto failure_2; 416 417 xsname = xvdi_get_xsname(dip); 418 419 if (xenbus_printf(XBT_NULL, xsname, 420 "feature-no-csum-offload", "%d", 421 xnbp->xnb_cksum_offload ? 0 : 1) != 0) 422 goto failure_3; 423 424 /* 425 * Use global xnb_hv_copy to export this feature. This means that 426 * we have to decide what to do before starting up a guest domain 427 */ 428 if (xenbus_printf(XBT_NULL, xsname, 429 "feature-rx-copy", "%d", xnb_hv_copy ? 1 : 0) != 0) 430 goto failure_3; 431 /* 432 * Linux domUs seem to depend on "feature-rx-flip" being 0 433 * in addition to "feature-rx-copy" being 1. It seems strange 434 * to use four possible states to describe a binary decision, 435 * but we might as well play nice. 436 */ 437 if (xenbus_printf(XBT_NULL, xsname, 438 "feature-rx-flip", "%d", xnb_explicit_pageflip_set ? 1 : 0) != 0) 439 goto failure_3; 440 441 if (xenbus_scanf(XBT_NULL, xsname, 442 "mac", "%s", mac) != 0) { 443 cmn_err(CE_WARN, "xnb_attach: " 444 "cannot read mac address from %s", 445 xsname); 446 goto failure_3; 447 } 448 449 if (ether_aton(mac, xnbp->xnb_mac_addr) != ETHERADDRL) { 450 cmn_err(CE_WARN, 451 "xnb_attach: cannot parse mac address %s", 452 mac); 453 goto failure_3; 454 } 455 456 (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateInitWait); 457 (void) xvdi_post_event(dip, XEN_HP_ADD); 458 459 return (DDI_SUCCESS); 460 461 failure_3: 462 xvdi_remove_event_handler(dip, NULL); 463 464 failure_2: 465 xnb_ks_free(xnbp); 466 467 failure_1: 468 mutex_destroy(&xnbp->xnb_rx_lock); 469 mutex_destroy(&xnbp->xnb_tx_lock); 470 471 failure: 472 vmem_free(heap_arena, xnbp->xnb_tx_va, PAGESIZE); 473 kmem_free(xnbp, sizeof (*xnbp)); 474 return (DDI_FAILURE); 475 } 476 477 /*ARGSUSED*/ 478 void 479 xnb_detach(dev_info_t *dip) 480 { 481 xnb_t *xnbp = ddi_get_driver_private(dip); 482 483 ASSERT(xnbp != NULL); 484 ASSERT(!xnbp->xnb_connected); 485 ASSERT(xnbp->xnb_rx_buf_count == 0); 486 487 xnb_disconnect_rings(dip); 488 489 xvdi_remove_event_handler(dip, NULL); 490 491 xnb_ks_free(xnbp); 492 493 ddi_set_driver_private(dip, NULL); 494 495 mutex_destroy(&xnbp->xnb_tx_lock); 496 mutex_destroy(&xnbp->xnb_rx_lock); 497 498 if (xnbp->xnb_cpop_sz > 0) 499 kmem_free(xnbp->xnb_tx_cpop, sizeof (*xnbp->xnb_tx_cpop) 500 * xnbp->xnb_cpop_sz); 501 502 ASSERT(xnbp->xnb_tx_va != NULL); 503 vmem_free(heap_arena, xnbp->xnb_tx_va, PAGESIZE); 504 505 kmem_free(xnbp, sizeof (*xnbp)); 506 } 507 508 509 static mfn_t 510 xnb_alloc_page(xnb_t *xnbp) 511 { 512 #define WARNING_RATE_LIMIT 100 513 #define BATCH_SIZE 256 514 static mfn_t mfns[BATCH_SIZE]; /* common across all instances */ 515 static int nth = BATCH_SIZE; 516 mfn_t mfn; 517 518 mutex_enter(&xnb_alloc_page_lock); 519 if (nth == BATCH_SIZE) { 520 if (balloon_alloc_pages(BATCH_SIZE, mfns) != BATCH_SIZE) { 521 xnbp->xnb_stat_allocation_failure++; 522 mutex_exit(&xnb_alloc_page_lock); 523 524 /* 525 * Try for a single page in low memory situations. 526 */ 527 if (balloon_alloc_pages(1, &mfn) != 1) { 528 if ((xnbp->xnb_stat_small_allocation_failure++ 529 % WARNING_RATE_LIMIT) == 0) 530 cmn_err(CE_WARN, "xnb_alloc_page: " 531 "Cannot allocate memory to " 532 "transfer packets to peer."); 533 return (0); 534 } else { 535 xnbp->xnb_stat_small_allocation_success++; 536 return (mfn); 537 } 538 } 539 540 nth = 0; 541 xnbp->xnb_stat_allocation_success++; 542 } 543 544 mfn = mfns[nth++]; 545 mutex_exit(&xnb_alloc_page_lock); 546 547 ASSERT(mfn != 0); 548 549 return (mfn); 550 #undef BATCH_SIZE 551 #undef WARNING_RATE_LIMIT 552 } 553 554 /*ARGSUSED*/ 555 static void 556 xnb_free_page(xnb_t *xnbp, mfn_t mfn) 557 { 558 int r; 559 pfn_t pfn; 560 561 pfn = xen_assign_pfn(mfn); 562 pfnzero(pfn, 0, PAGESIZE); 563 xen_release_pfn(pfn); 564 565 /* 566 * This happens only in the error path, so batching is 567 * not worth the complication. 568 */ 569 if ((r = balloon_free_pages(1, &mfn, NULL, NULL)) != 1) { 570 cmn_err(CE_WARN, "free_page: cannot decrease memory " 571 "reservation (%d): page kept but unusable (mfn = 0x%lx).", 572 r, mfn); 573 } 574 } 575 576 /* 577 * Similar to RING_HAS_UNCONSUMED_REQUESTS(&xnbp->rx_ring) but 578 * using local variables. 579 */ 580 #define XNB_RING_HAS_UNCONSUMED_REQUESTS(_r) \ 581 ((((_r)->sring->req_prod - loop) < \ 582 (RING_SIZE(_r) - (loop - prod))) ? \ 583 ((_r)->sring->req_prod - loop) : \ 584 (RING_SIZE(_r) - (loop - prod))) 585 586 mblk_t * 587 xnb_to_peer(xnb_t *xnbp, mblk_t *mp) 588 { 589 mblk_t *free = mp, *prev = NULL; 590 size_t len; 591 gnttab_transfer_t *gop; 592 boolean_t notify; 593 RING_IDX loop, prod, end; 594 595 /* 596 * For each packet the sequence of operations is: 597 * 598 * 1. get a new page from the hypervisor. 599 * 2. get a request slot from the ring. 600 * 3. copy the data into the new page. 601 * 4. transfer the page to the peer. 602 * 5. update the request slot. 603 * 6. kick the peer. 604 * 7. free mp. 605 * 606 * In order to reduce the number of hypercalls, we prepare 607 * several packets for the peer and perform a single hypercall 608 * to transfer them. 609 */ 610 611 mutex_enter(&xnbp->xnb_tx_lock); 612 613 /* 614 * If we are not connected to the peer or have not yet 615 * finished hotplug it is too early to pass packets to the 616 * peer. 617 */ 618 if (!(xnbp->xnb_connected && xnbp->xnb_hotplugged)) { 619 mutex_exit(&xnbp->xnb_tx_lock); 620 DTRACE_PROBE(flip_tx_too_early); 621 xnbp->xnb_stat_tx_too_early++; 622 return (mp); 623 } 624 625 loop = xnbp->xnb_rx_ring.req_cons; 626 prod = xnbp->xnb_rx_ring.rsp_prod_pvt; 627 gop = xnbp->xnb_tx_top; 628 629 while ((mp != NULL) && 630 XNB_RING_HAS_UNCONSUMED_REQUESTS(&xnbp->xnb_rx_ring)) { 631 632 mfn_t mfn; 633 pfn_t pfn; 634 netif_rx_request_t *rxreq; 635 netif_rx_response_t *rxresp; 636 char *valoop; 637 size_t offset; 638 mblk_t *ml; 639 uint16_t cksum_flags; 640 641 /* 1 */ 642 if ((mfn = xnb_alloc_page(xnbp)) == 0) { 643 xnbp->xnb_stat_xmit_defer++; 644 break; 645 } 646 647 /* 2 */ 648 rxreq = RING_GET_REQUEST(&xnbp->xnb_rx_ring, loop); 649 650 #ifdef XNB_DEBUG 651 if (!(rxreq->id < NET_RX_RING_SIZE)) 652 cmn_err(CE_PANIC, "xnb_to_peer: " 653 "id %d out of range in request 0x%p", 654 rxreq->id, (void *)rxreq); 655 if (rxreq->gref >= NR_GRANT_ENTRIES) 656 cmn_err(CE_PANIC, "xnb_to_peer: " 657 "grant ref %d out of range in request 0x%p", 658 rxreq->gref, (void *)rxreq); 659 #endif /* XNB_DEBUG */ 660 661 /* Assign a pfn and map the new page at the allocated va. */ 662 pfn = xen_assign_pfn(mfn); 663 hat_devload(kas.a_hat, xnbp->xnb_tx_va, PAGESIZE, 664 pfn, PROT_READ | PROT_WRITE, HAT_LOAD); 665 666 offset = TX_BUFFER_HEADROOM; 667 668 /* 3 */ 669 len = 0; 670 valoop = xnbp->xnb_tx_va + offset; 671 for (ml = mp; ml != NULL; ml = ml->b_cont) { 672 size_t chunk = ml->b_wptr - ml->b_rptr; 673 674 bcopy(ml->b_rptr, valoop, chunk); 675 valoop += chunk; 676 len += chunk; 677 } 678 679 ASSERT(len + offset < PAGESIZE); 680 681 /* Release the pfn. */ 682 hat_unload(kas.a_hat, xnbp->xnb_tx_va, PAGESIZE, 683 HAT_UNLOAD_UNMAP); 684 xen_release_pfn(pfn); 685 686 /* 4 */ 687 gop->mfn = mfn; 688 gop->domid = xnbp->xnb_peer; 689 gop->ref = rxreq->gref; 690 691 /* 5.1 */ 692 rxresp = RING_GET_RESPONSE(&xnbp->xnb_rx_ring, prod); 693 rxresp->offset = offset; 694 rxresp->flags = 0; 695 696 cksum_flags = xnbp->xnb_flavour->xf_cksum_to_peer(xnbp, mp); 697 if (cksum_flags != 0) 698 xnbp->xnb_stat_tx_cksum_deferred++; 699 rxresp->flags |= cksum_flags; 700 701 rxresp->id = RING_GET_REQUEST(&xnbp->xnb_rx_ring, prod)->id; 702 rxresp->status = len; 703 704 loop++; 705 prod++; 706 gop++; 707 prev = mp; 708 mp = mp->b_next; 709 } 710 711 /* 712 * Did we actually do anything? 713 */ 714 if (loop == xnbp->xnb_rx_ring.req_cons) { 715 mutex_exit(&xnbp->xnb_tx_lock); 716 return (mp); 717 } 718 719 end = loop; 720 721 /* 722 * Unlink the end of the 'done' list from the remainder. 723 */ 724 ASSERT(prev != NULL); 725 prev->b_next = NULL; 726 727 if (HYPERVISOR_grant_table_op(GNTTABOP_transfer, xnbp->xnb_tx_top, 728 loop - xnbp->xnb_rx_ring.req_cons) != 0) { 729 cmn_err(CE_WARN, "xnb_to_peer: transfer operation failed"); 730 } 731 732 loop = xnbp->xnb_rx_ring.req_cons; 733 prod = xnbp->xnb_rx_ring.rsp_prod_pvt; 734 gop = xnbp->xnb_tx_top; 735 736 while (loop < end) { 737 int16_t status = NETIF_RSP_OKAY; 738 739 if (gop->status != 0) { 740 status = NETIF_RSP_ERROR; 741 742 /* 743 * If the status is anything other than 744 * GNTST_bad_page then we don't own the page 745 * any more, so don't try to give it back. 746 */ 747 if (gop->status != GNTST_bad_page) 748 gop->mfn = 0; 749 } else { 750 /* The page is no longer ours. */ 751 gop->mfn = 0; 752 } 753 754 if (gop->mfn != 0) 755 /* 756 * Give back the page, as we won't be using 757 * it. 758 */ 759 xnb_free_page(xnbp, gop->mfn); 760 else 761 /* 762 * We gave away a page, update our accounting 763 * now. 764 */ 765 balloon_drv_subtracted(1); 766 767 /* 5.2 */ 768 if (status != NETIF_RSP_OKAY) { 769 RING_GET_RESPONSE(&xnbp->xnb_rx_ring, prod)->status = 770 status; 771 } else { 772 xnbp->xnb_stat_opackets++; 773 xnbp->xnb_stat_obytes += len; 774 } 775 776 loop++; 777 prod++; 778 gop++; 779 } 780 781 xnbp->xnb_rx_ring.req_cons = loop; 782 xnbp->xnb_rx_ring.rsp_prod_pvt = prod; 783 784 /* 6 */ 785 /* LINTED: constant in conditional context */ 786 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&xnbp->xnb_rx_ring, notify); 787 if (notify) { 788 ec_notify_via_evtchn(xnbp->xnb_evtchn); 789 xnbp->xnb_stat_tx_notify_sent++; 790 } else { 791 xnbp->xnb_stat_tx_notify_deferred++; 792 } 793 794 if (mp != NULL) 795 xnbp->xnb_stat_xmit_defer++; 796 797 mutex_exit(&xnbp->xnb_tx_lock); 798 799 /* Free mblk_t's that we consumed. */ 800 freemsgchain(free); 801 802 return (mp); 803 } 804 805 /* helper functions for xnb_copy_to_peer */ 806 807 /* 808 * Grow the array of copy operation descriptors. 809 * Returns a pointer to the next available entry. 810 */ 811 gnttab_copy_t * 812 grow_cpop_area(xnb_t *xnbp, gnttab_copy_t *o_cpop) 813 { 814 /* 815 * o_cpop (arg.1) is a ptr to the area we would like to copy 816 * something into but cannot, because we haven't alloc'ed it 817 * yet, or NULL. 818 * old_cpop and new_cpop (local) are pointers to old/new 819 * versions of xnbp->xnb_tx_cpop. 820 */ 821 gnttab_copy_t *new_cpop, *old_cpop, *ret_cpop; 822 size_t newcount; 823 824 ASSERT(MUTEX_HELD(&xnbp->xnb_tx_lock)); 825 826 old_cpop = xnbp->xnb_tx_cpop; 827 /* 828 * o_cpop is a pointer into the array pointed to by old_cpop; 829 * it would be an error for exactly one of these pointers to be NULL. 830 * We shouldn't call this function if xnb_tx_cpop has already 831 * been allocated, but we're starting to fill it from the beginning 832 * again. 833 */ 834 ASSERT((o_cpop == NULL && old_cpop == NULL) || 835 (o_cpop != NULL && old_cpop != NULL && o_cpop != old_cpop)); 836 837 newcount = xnbp->xnb_cpop_sz + CPOP_DEFCNT; 838 839 new_cpop = kmem_alloc(sizeof (*new_cpop) * newcount, KM_NOSLEEP); 840 if (new_cpop == NULL) { 841 xnbp->xnb_stat_other_allocation_failure++; 842 return (NULL); 843 } 844 845 if (o_cpop != NULL) { 846 size_t offset = (o_cpop - old_cpop); 847 848 /* we only need to move the parts in use ... */ 849 (void) memmove(new_cpop, old_cpop, xnbp->xnb_cpop_sz * 850 (sizeof (*old_cpop))); 851 852 kmem_free(old_cpop, xnbp->xnb_cpop_sz * sizeof (*old_cpop)); 853 854 ret_cpop = new_cpop + offset; 855 } else { 856 ret_cpop = new_cpop; 857 } 858 859 xnbp->xnb_tx_cpop = new_cpop; 860 xnbp->xnb_cpop_sz = newcount; 861 862 xnbp->xnb_stat_tx_cpoparea_grown++; 863 864 return (ret_cpop); 865 } 866 867 /* 868 * Check whether an address is on a page that's foreign to this domain. 869 */ 870 static boolean_t 871 is_foreign(void *addr) 872 { 873 pfn_t pfn = hat_getpfnum(kas.a_hat, addr); 874 875 return (pfn & PFN_IS_FOREIGN_MFN ? B_TRUE : B_FALSE); 876 } 877 878 /* 879 * Insert a newly allocated mblk into a chain, replacing the old one. 880 */ 881 static mblk_t * 882 replace_msg(mblk_t *mp, size_t len, mblk_t *mp_prev, mblk_t *ml_prev) 883 { 884 uint32_t start, stuff, end, value, flags; 885 mblk_t *new_mp; 886 887 new_mp = copyb(mp); 888 if (new_mp == NULL) 889 cmn_err(CE_PANIC, "replace_msg: cannot alloc new message" 890 "for %p, len %lu", (void *) mp, len); 891 892 hcksum_retrieve(mp, NULL, NULL, &start, &stuff, &end, &value, &flags); 893 (void) hcksum_assoc(new_mp, NULL, NULL, start, stuff, end, value, 894 flags, KM_NOSLEEP); 895 896 new_mp->b_next = mp->b_next; 897 new_mp->b_prev = mp->b_prev; 898 new_mp->b_cont = mp->b_cont; 899 900 /* Make sure we only overwrite pointers to the mblk being replaced. */ 901 if (mp_prev != NULL && mp_prev->b_next == mp) 902 mp_prev->b_next = new_mp; 903 904 if (ml_prev != NULL && ml_prev->b_cont == mp) 905 ml_prev->b_cont = new_mp; 906 907 mp->b_next = mp->b_prev = mp->b_cont = NULL; 908 freemsg(mp); 909 910 return (new_mp); 911 } 912 913 /* 914 * Set all the fields in a gnttab_copy_t. 915 */ 916 static void 917 setup_gop(xnb_t *xnbp, gnttab_copy_t *gp, uchar_t *rptr, 918 size_t s_off, size_t d_off, size_t len, grant_ref_t d_ref) 919 { 920 ASSERT(xnbp != NULL && gp != NULL); 921 922 gp->source.offset = s_off; 923 gp->source.u.gmfn = pfn_to_mfn(hat_getpfnum(kas.a_hat, (caddr_t)rptr)); 924 gp->source.domid = DOMID_SELF; 925 926 gp->len = (uint16_t)len; 927 gp->flags = GNTCOPY_dest_gref; 928 gp->status = 0; 929 930 gp->dest.u.ref = d_ref; 931 gp->dest.offset = d_off; 932 gp->dest.domid = xnbp->xnb_peer; 933 } 934 935 mblk_t * 936 xnb_copy_to_peer(xnb_t *xnbp, mblk_t *mp) 937 { 938 mblk_t *free = mp, *mp_prev = NULL, *saved_mp = mp; 939 mblk_t *ml, *ml_prev; 940 gnttab_copy_t *gop_cp; 941 boolean_t notify; 942 RING_IDX loop, prod; 943 int i; 944 945 if (!xnbp->xnb_hv_copy) 946 return (xnb_to_peer(xnbp, mp)); 947 948 /* 949 * For each packet the sequence of operations is: 950 * 951 * 1. get a request slot from the ring. 952 * 2. set up data for hypercall (see NOTE below) 953 * 3. have the hypervisore copy the data 954 * 4. update the request slot. 955 * 5. kick the peer. 956 * 957 * NOTE ad 2. 958 * In order to reduce the number of hypercalls, we prepare 959 * several packets (mp->b_cont != NULL) for the peer and 960 * perform a single hypercall to transfer them. 961 * We also have to set up a seperate copy operation for 962 * every page. 963 * 964 * If we have more than one message (mp->b_next != NULL), 965 * we do this whole dance repeatedly. 966 */ 967 968 mutex_enter(&xnbp->xnb_tx_lock); 969 970 if (!(xnbp->xnb_connected && xnbp->xnb_hotplugged)) { 971 mutex_exit(&xnbp->xnb_tx_lock); 972 DTRACE_PROBE(copy_tx_too_early); 973 xnbp->xnb_stat_tx_too_early++; 974 return (mp); 975 } 976 977 loop = xnbp->xnb_rx_ring.req_cons; 978 prod = xnbp->xnb_rx_ring.rsp_prod_pvt; 979 980 while ((mp != NULL) && 981 XNB_RING_HAS_UNCONSUMED_REQUESTS(&xnbp->xnb_rx_ring)) { 982 netif_rx_request_t *rxreq; 983 netif_rx_response_t *rxresp; 984 size_t offset, d_offset; 985 size_t len; 986 uint16_t cksum_flags; 987 int16_t status = NETIF_RSP_OKAY; 988 int item_count; 989 990 /* 1 */ 991 rxreq = RING_GET_REQUEST(&xnbp->xnb_rx_ring, loop); 992 993 #ifdef XNB_DEBUG 994 if (!(rxreq->id < NET_RX_RING_SIZE)) 995 cmn_err(CE_PANIC, "xnb_copy_to_peer: " 996 "id %d out of range in request 0x%p", 997 rxreq->id, (void *)rxreq); 998 if (rxreq->gref >= NR_GRANT_ENTRIES) 999 cmn_err(CE_PANIC, "xnb_copy_to_peer: " 1000 "grant ref %d out of range in request 0x%p", 1001 rxreq->gref, (void *)rxreq); 1002 #endif /* XNB_DEBUG */ 1003 1004 /* 2 */ 1005 d_offset = offset = TX_BUFFER_HEADROOM; 1006 len = 0; 1007 item_count = 0; 1008 1009 gop_cp = xnbp->xnb_tx_cpop; 1010 1011 /* 1012 * We walk the b_cont pointers and set up a gop_cp 1013 * structure for every page in every data block we have. 1014 */ 1015 /* 2a */ 1016 for (ml = mp, ml_prev = NULL; ml != NULL; ml = ml->b_cont) { 1017 size_t chunk = ml->b_wptr - ml->b_rptr; 1018 uchar_t *r_tmp, *rpt_align; 1019 size_t r_offset; 1020 1021 /* 1022 * If we get an mblk on a page that doesn't belong to 1023 * this domain, get a new mblk to replace the old one. 1024 */ 1025 if (is_foreign(ml->b_rptr) || is_foreign(ml->b_wptr)) { 1026 mblk_t *ml_new = replace_msg(ml, chunk, 1027 mp_prev, ml_prev); 1028 1029 /* We can still use old ml, but not *ml! */ 1030 if (free == ml) 1031 free = ml_new; 1032 if (mp == ml) 1033 mp = ml_new; 1034 ml = ml_new; 1035 1036 xnbp->xnb_stat_tx_foreign_page++; 1037 } 1038 1039 rpt_align = (uchar_t *)ALIGN2PAGE(ml->b_rptr); 1040 r_offset = (uint16_t)(ml->b_rptr - rpt_align); 1041 r_tmp = ml->b_rptr; 1042 1043 if (d_offset + chunk > PAGESIZE) 1044 cmn_err(CE_PANIC, "xnb_copy_to_peer: mp %p " 1045 "(svd: %p), ml %p,rpt_alg. %p, d_offset " 1046 "(%lu) + chunk (%lu) > PAGESIZE %d!", 1047 (void *)mp, (void *)saved_mp, (void *)ml, 1048 (void *)rpt_align, 1049 d_offset, chunk, (int)PAGESIZE); 1050 1051 while (chunk > 0) { 1052 size_t part_len; 1053 1054 item_count++; 1055 if (item_count > xnbp->xnb_cpop_sz) { 1056 gop_cp = grow_cpop_area(xnbp, gop_cp); 1057 if (gop_cp == NULL) 1058 goto failure; 1059 } 1060 /* 1061 * If our mblk crosses a page boundary, we need 1062 * to do a seperate copy for every page. 1063 */ 1064 if (r_offset + chunk > PAGESIZE) { 1065 part_len = PAGESIZE - r_offset; 1066 1067 DTRACE_PROBE3(mblk_page_crossed, 1068 (mblk_t *), ml, int, chunk, int, 1069 (int)r_offset); 1070 1071 xnbp->xnb_stat_tx_pagebndry_crossed++; 1072 } else { 1073 part_len = chunk; 1074 } 1075 1076 setup_gop(xnbp, gop_cp, r_tmp, r_offset, 1077 d_offset, part_len, rxreq->gref); 1078 1079 chunk -= part_len; 1080 1081 len += part_len; 1082 d_offset += part_len; 1083 r_tmp += part_len; 1084 /* 1085 * The 2nd, 3rd ... last copies will always 1086 * start at r_tmp, therefore r_offset is 0. 1087 */ 1088 r_offset = 0; 1089 gop_cp++; 1090 } 1091 ml_prev = ml; 1092 DTRACE_PROBE4(mblk_loop_end, (mblk_t *), ml, int, 1093 chunk, int, len, int, item_count); 1094 } 1095 /* 3 */ 1096 if (HYPERVISOR_grant_table_op(GNTTABOP_copy, xnbp->xnb_tx_cpop, 1097 item_count) != 0) { 1098 cmn_err(CE_WARN, "xnb_copy_to_peer: copy op. failed"); 1099 DTRACE_PROBE(HV_granttableopfailed); 1100 } 1101 1102 /* 4 */ 1103 rxresp = RING_GET_RESPONSE(&xnbp->xnb_rx_ring, prod); 1104 rxresp->offset = offset; 1105 1106 rxresp->flags = 0; 1107 1108 DTRACE_PROBE4(got_RX_rsp, int, (int)rxresp->id, int, 1109 (int)rxresp->offset, int, (int)rxresp->flags, int, 1110 (int)rxresp->status); 1111 1112 cksum_flags = xnbp->xnb_flavour->xf_cksum_to_peer(xnbp, mp); 1113 if (cksum_flags != 0) 1114 xnbp->xnb_stat_tx_cksum_deferred++; 1115 rxresp->flags |= cksum_flags; 1116 1117 rxresp->id = RING_GET_REQUEST(&xnbp->xnb_rx_ring, prod)->id; 1118 rxresp->status = len; 1119 1120 DTRACE_PROBE4(RX_rsp_set, int, (int)rxresp->id, int, 1121 (int)rxresp->offset, int, (int)rxresp->flags, int, 1122 (int)rxresp->status); 1123 1124 for (i = 0; i < item_count; i++) { 1125 if (xnbp->xnb_tx_cpop[i].status != 0) { 1126 DTRACE_PROBE2(cpop__status__nonnull, int, 1127 (int)xnbp->xnb_tx_cpop[i].status, 1128 int, i); 1129 status = NETIF_RSP_ERROR; 1130 } 1131 } 1132 1133 /* 5.2 */ 1134 if (status != NETIF_RSP_OKAY) { 1135 RING_GET_RESPONSE(&xnbp->xnb_rx_ring, prod)->status = 1136 status; 1137 xnbp->xnb_stat_tx_rsp_notok++; 1138 } else { 1139 xnbp->xnb_stat_opackets++; 1140 xnbp->xnb_stat_obytes += len; 1141 } 1142 1143 loop++; 1144 prod++; 1145 mp_prev = mp; 1146 mp = mp->b_next; 1147 } 1148 failure: 1149 /* 1150 * Did we actually do anything? 1151 */ 1152 if (loop == xnbp->xnb_rx_ring.req_cons) { 1153 mutex_exit(&xnbp->xnb_tx_lock); 1154 return (mp); 1155 } 1156 1157 /* 1158 * Unlink the end of the 'done' list from the remainder. 1159 */ 1160 ASSERT(mp_prev != NULL); 1161 mp_prev->b_next = NULL; 1162 1163 xnbp->xnb_rx_ring.req_cons = loop; 1164 xnbp->xnb_rx_ring.rsp_prod_pvt = prod; 1165 1166 /* 6 */ 1167 /* LINTED: constant in conditional context */ 1168 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&xnbp->xnb_rx_ring, notify); 1169 if (notify) { 1170 ec_notify_via_evtchn(xnbp->xnb_evtchn); 1171 xnbp->xnb_stat_tx_notify_sent++; 1172 } else { 1173 xnbp->xnb_stat_tx_notify_deferred++; 1174 } 1175 1176 if (mp != NULL) 1177 xnbp->xnb_stat_xmit_defer++; 1178 1179 mutex_exit(&xnbp->xnb_tx_lock); 1180 1181 /* Free mblk_t structs we have consumed. */ 1182 freemsgchain(free); 1183 1184 return (mp); 1185 } 1186 1187 /*ARGSUSED*/ 1188 static int 1189 xnb_rxbuf_constructor(void *buf, void *arg, int kmflag) 1190 { 1191 xnb_rxbuf_t *rxp = buf; 1192 1193 bzero(rxp, sizeof (*rxp)); 1194 1195 rxp->xr_free_rtn.free_func = xnb_rx_complete; 1196 rxp->xr_free_rtn.free_arg = (caddr_t)rxp; 1197 1198 rxp->xr_mop.host_addr = 1199 (uint64_t)(uintptr_t)vmem_alloc(heap_arena, PAGESIZE, 1200 ((kmflag & KM_NOSLEEP) == KM_NOSLEEP) ? 1201 VM_NOSLEEP : VM_SLEEP); 1202 1203 if (rxp->xr_mop.host_addr == NULL) { 1204 cmn_err(CE_WARN, "xnb_rxbuf_constructor: " 1205 "cannot get address space"); 1206 return (-1); 1207 } 1208 1209 /* 1210 * Have the hat ensure that page table exists for the VA. 1211 */ 1212 hat_prepare_mapping(kas.a_hat, 1213 (caddr_t)(uintptr_t)rxp->xr_mop.host_addr); 1214 1215 return (0); 1216 } 1217 1218 /*ARGSUSED*/ 1219 static void 1220 xnb_rxbuf_destructor(void *buf, void *arg) 1221 { 1222 xnb_rxbuf_t *rxp = buf; 1223 1224 ASSERT(rxp->xr_mop.host_addr != NULL); 1225 ASSERT((rxp->xr_flags & XNB_RXBUF_INUSE) == 0); 1226 1227 hat_release_mapping(kas.a_hat, 1228 (caddr_t)(uintptr_t)rxp->xr_mop.host_addr); 1229 vmem_free(heap_arena, 1230 (caddr_t)(uintptr_t)rxp->xr_mop.host_addr, PAGESIZE); 1231 } 1232 1233 static void 1234 xnb_rx_notify_peer(xnb_t *xnbp) 1235 { 1236 boolean_t notify; 1237 1238 ASSERT(MUTEX_HELD(&xnbp->xnb_rx_lock)); 1239 1240 /* LINTED: constant in conditional context */ 1241 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&xnbp->xnb_tx_ring, notify); 1242 if (notify) { 1243 ec_notify_via_evtchn(xnbp->xnb_evtchn); 1244 xnbp->xnb_stat_rx_notify_sent++; 1245 } else { 1246 xnbp->xnb_stat_rx_notify_deferred++; 1247 } 1248 } 1249 1250 static void 1251 xnb_rx_complete(xnb_rxbuf_t *rxp) 1252 { 1253 xnb_t *xnbp = rxp->xr_xnbp; 1254 1255 ASSERT((rxp->xr_flags & XNB_RXBUF_INUSE) == XNB_RXBUF_INUSE); 1256 1257 mutex_enter(&xnbp->xnb_rx_lock); 1258 xnb_rx_schedule_unmop(xnbp, &rxp->xr_mop, rxp); 1259 mutex_exit(&xnbp->xnb_rx_lock); 1260 } 1261 1262 static void 1263 xnb_rx_mark_complete(xnb_t *xnbp, RING_IDX id, int16_t status) 1264 { 1265 RING_IDX i; 1266 netif_tx_response_t *txresp; 1267 1268 ASSERT(MUTEX_HELD(&xnbp->xnb_rx_lock)); 1269 1270 i = xnbp->xnb_tx_ring.rsp_prod_pvt; 1271 1272 txresp = RING_GET_RESPONSE(&xnbp->xnb_tx_ring, i); 1273 txresp->id = id; 1274 txresp->status = status; 1275 1276 xnbp->xnb_tx_ring.rsp_prod_pvt = i + 1; 1277 1278 /* 1279 * Note that we don't push the change to the peer here - that 1280 * is the callers responsibility. 1281 */ 1282 } 1283 1284 static void 1285 xnb_rx_schedule_unmop(xnb_t *xnbp, gnttab_map_grant_ref_t *mop, 1286 xnb_rxbuf_t *rxp) 1287 { 1288 gnttab_unmap_grant_ref_t *unmop; 1289 int u_count; 1290 int reqs_on_ring; 1291 1292 ASSERT(MUTEX_HELD(&xnbp->xnb_rx_lock)); 1293 ASSERT(xnbp->xnb_rx_unmop_count < NET_TX_RING_SIZE); 1294 1295 u_count = xnbp->xnb_rx_unmop_count++; 1296 1297 /* Cache data for the time when we actually unmap grant refs */ 1298 xnbp->xnb_rx_unmop_rxp[u_count] = rxp; 1299 1300 unmop = &xnbp->xnb_rx_unmop[u_count]; 1301 unmop->host_addr = mop->host_addr; 1302 unmop->dev_bus_addr = mop->dev_bus_addr; 1303 unmop->handle = mop->handle; 1304 1305 /* 1306 * We cannot check the ring once we're disconnected from it. Batching 1307 * doesn't seem to be a useful optimisation in this case either, 1308 * so we directly call into the actual unmap function. 1309 */ 1310 if (xnbp->xnb_connected) { 1311 reqs_on_ring = RING_HAS_UNCONSUMED_REQUESTS(&xnbp->xnb_rx_ring); 1312 1313 /* 1314 * By tuning xnb_unmop_hiwat to N, we can emulate "N per batch" 1315 * or (with N == 1) "immediate unmop" behaviour. 1316 * The "> xnb_unmop_lowwat" is a guard against ring exhaustion. 1317 */ 1318 if (xnbp->xnb_rx_unmop_count < xnb_unmop_hiwat && 1319 reqs_on_ring > xnb_unmop_lowwat) 1320 return; 1321 } 1322 1323 xnb_rx_perform_pending_unmop(xnbp); 1324 } 1325 1326 /* 1327 * Here we perform the actual unmapping of the data that was 1328 * accumulated in xnb_rx_schedule_unmop(). 1329 * Note that it is the caller's responsibility to make sure that 1330 * there's actually something there to unmop. 1331 */ 1332 static void 1333 xnb_rx_perform_pending_unmop(xnb_t *xnbp) 1334 { 1335 RING_IDX loop; 1336 #ifdef XNB_DEBUG 1337 gnttab_unmap_grant_ref_t *unmop; 1338 #endif /* XNB_DEBUG */ 1339 1340 ASSERT(MUTEX_HELD(&xnbp->xnb_rx_lock)); 1341 ASSERT(xnbp->xnb_rx_unmop_count > 0); 1342 1343 if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, 1344 xnbp->xnb_rx_unmop, xnbp->xnb_rx_unmop_count) < 0) { 1345 cmn_err(CE_WARN, "xnb_rx_perform_pending_unmop: " 1346 "unmap grant operation failed, " 1347 "%d pages lost", xnbp->xnb_rx_unmop_count); 1348 } 1349 1350 #ifdef XNB_DEBUG 1351 for (loop = 0, unmop = xnbp->xnb_rx_unmop; 1352 loop < xnbp->xnb_rx_unmop_count; 1353 loop++, unmop++) { 1354 if (unmop->status != 0) { 1355 cmn_err(CE_WARN, "xnb_rx_perform_pending_unmop: " 1356 "unmap grant reference failed (%d)", 1357 unmop->status); 1358 } 1359 } 1360 #endif /* XNB_DEBUG */ 1361 1362 for (loop = 0; loop < xnbp->xnb_rx_unmop_count; loop++) { 1363 xnb_rxbuf_t *rxp = xnbp->xnb_rx_unmop_rxp[loop]; 1364 1365 if (rxp == NULL) 1366 cmn_err(CE_PANIC, 1367 "xnb_rx_perform_pending_unmop: " 1368 "unexpected NULL rxp (loop %d; count %d)!", 1369 loop, xnbp->xnb_rx_unmop_count); 1370 1371 if (xnbp->xnb_connected) 1372 xnb_rx_mark_complete(xnbp, rxp->xr_id, rxp->xr_status); 1373 xnb_rxbuf_put(xnbp, rxp); 1374 } 1375 if (xnbp->xnb_connected) 1376 xnb_rx_notify_peer(xnbp); 1377 1378 xnbp->xnb_rx_unmop_count = 0; 1379 1380 #ifdef XNB_DEBUG 1381 bzero(xnbp->xnb_rx_unmop, sizeof (xnbp->xnb_rx_unmop)); 1382 bzero(xnbp->xnb_rx_unmop_rxp, sizeof (xnbp->xnb_rx_unmop_rxp)); 1383 #endif /* XNB_DEBUG */ 1384 } 1385 1386 static xnb_rxbuf_t * 1387 xnb_rxbuf_get(xnb_t *xnbp, int flags) 1388 { 1389 xnb_rxbuf_t *rxp; 1390 1391 ASSERT(MUTEX_HELD(&xnbp->xnb_rx_lock)); 1392 1393 rxp = kmem_cache_alloc(xnb_rxbuf_cachep, flags); 1394 if (rxp != NULL) { 1395 ASSERT((rxp->xr_flags & XNB_RXBUF_INUSE) == 0); 1396 rxp->xr_flags |= XNB_RXBUF_INUSE; 1397 1398 rxp->xr_xnbp = xnbp; 1399 rxp->xr_mop.dom = xnbp->xnb_peer; 1400 1401 rxp->xr_mop.flags = GNTMAP_host_map; 1402 if (!xnbp->xnb_rx_pages_writable) 1403 rxp->xr_mop.flags |= GNTMAP_readonly; 1404 1405 xnbp->xnb_rx_buf_count++; 1406 } 1407 1408 return (rxp); 1409 } 1410 1411 static void 1412 xnb_rxbuf_put(xnb_t *xnbp, xnb_rxbuf_t *rxp) 1413 { 1414 ASSERT(MUTEX_HELD(&xnbp->xnb_rx_lock)); 1415 ASSERT((rxp->xr_flags & XNB_RXBUF_INUSE) == XNB_RXBUF_INUSE); 1416 1417 rxp->xr_flags &= ~XNB_RXBUF_INUSE; 1418 xnbp->xnb_rx_buf_count--; 1419 1420 kmem_cache_free(xnb_rxbuf_cachep, rxp); 1421 } 1422 1423 static mblk_t * 1424 xnb_recv(xnb_t *xnbp) 1425 { 1426 RING_IDX start, end, loop; 1427 gnttab_map_grant_ref_t *mop; 1428 xnb_rxbuf_t **rxpp; 1429 netif_tx_request_t *txreq; 1430 boolean_t work_to_do; 1431 mblk_t *head, *tail; 1432 /* 1433 * If the peer granted a read-only mapping to the page then we 1434 * must copy the data, as the local protocol stack (should the 1435 * packet be destined for this host) will modify the packet 1436 * 'in place'. 1437 */ 1438 boolean_t copy = !xnbp->xnb_rx_pages_writable; 1439 1440 /* 1441 * For each individual request, the sequence of actions is: 1442 * 1443 * 1. get the request. 1444 * 2. map the page based on the grant ref. 1445 * 3. allocate an mblk, copy the data to it. 1446 * 4. release the grant. 1447 * 5. update the ring. 1448 * 6. pass the packet upward. 1449 * 7. kick the peer. 1450 * 1451 * In fact, we try to perform the grant operations in batches, 1452 * so there are two loops. 1453 */ 1454 1455 head = tail = NULL; 1456 around: 1457 ASSERT(MUTEX_HELD(&xnbp->xnb_rx_lock)); 1458 1459 /* LINTED: constant in conditional context */ 1460 RING_FINAL_CHECK_FOR_REQUESTS(&xnbp->xnb_tx_ring, work_to_do); 1461 if (!work_to_do) { 1462 finished: 1463 return (head); 1464 } 1465 1466 start = xnbp->xnb_tx_ring.req_cons; 1467 end = xnbp->xnb_tx_ring.sring->req_prod; 1468 1469 for (loop = start, mop = xnbp->xnb_rx_mop, rxpp = xnbp->xnb_rx_bufp; 1470 loop != end; 1471 loop++, mop++, rxpp++) { 1472 xnb_rxbuf_t *rxp; 1473 1474 rxp = xnb_rxbuf_get(xnbp, KM_NOSLEEP); 1475 if (rxp == NULL) 1476 break; 1477 1478 ASSERT(xnbp->xnb_rx_pages_writable || 1479 ((rxp->xr_mop.flags & GNTMAP_readonly) 1480 == GNTMAP_readonly)); 1481 1482 rxp->xr_mop.ref = 1483 RING_GET_REQUEST(&xnbp->xnb_tx_ring, loop)->gref; 1484 1485 ASSERT(rxp->xr_mop.ref < NR_GRANT_ENTRIES); 1486 1487 *mop = rxp->xr_mop; 1488 *rxpp = rxp; 1489 } 1490 1491 if ((loop - start) == 0) 1492 goto finished; 1493 1494 end = loop; 1495 1496 if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, 1497 xnbp->xnb_rx_mop, end - start) != 0) { 1498 1499 cmn_err(CE_WARN, "xnb_recv: map grant operation failed"); 1500 1501 loop = start; 1502 rxpp = xnbp->xnb_rx_bufp; 1503 1504 while (loop != end) { 1505 xnb_rxbuf_put(xnbp, *rxpp); 1506 1507 loop++; 1508 rxpp++; 1509 } 1510 1511 goto finished; 1512 } 1513 1514 for (loop = start, mop = xnbp->xnb_rx_mop, rxpp = xnbp->xnb_rx_bufp; 1515 loop != end; 1516 loop++, mop++, rxpp++) { 1517 mblk_t *mp = NULL; 1518 int16_t status = NETIF_RSP_OKAY; 1519 xnb_rxbuf_t *rxp = *rxpp; 1520 1521 if (mop->status != 0) { 1522 cmn_err(CE_WARN, "xnb_recv: " 1523 "failed to map buffer: %d", 1524 mop->status); 1525 status = NETIF_RSP_ERROR; 1526 } 1527 1528 txreq = RING_GET_REQUEST(&xnbp->xnb_tx_ring, loop); 1529 1530 if (status == NETIF_RSP_OKAY) { 1531 if (copy) { 1532 mp = allocb(txreq->size, BPRI_MED); 1533 if (mp == NULL) { 1534 status = NETIF_RSP_ERROR; 1535 xnbp->xnb_stat_rx_allocb_failed++; 1536 } else { 1537 bcopy((caddr_t)(uintptr_t) 1538 mop->host_addr + txreq->offset, 1539 mp->b_wptr, txreq->size); 1540 mp->b_wptr += txreq->size; 1541 } 1542 } else { 1543 mp = desballoc((uchar_t *)(uintptr_t) 1544 mop->host_addr + txreq->offset, 1545 txreq->size, 0, &rxp->xr_free_rtn); 1546 if (mp == NULL) { 1547 status = NETIF_RSP_ERROR; 1548 xnbp->xnb_stat_rx_allocb_failed++; 1549 } else { 1550 rxp->xr_id = txreq->id; 1551 rxp->xr_status = status; 1552 rxp->xr_mop = *mop; 1553 1554 mp->b_wptr += txreq->size; 1555 } 1556 } 1557 1558 /* 1559 * If we have a buffer and there are checksum 1560 * flags, process them appropriately. 1561 */ 1562 if ((mp != NULL) && 1563 ((txreq->flags & 1564 (NETTXF_csum_blank | NETTXF_data_validated)) 1565 != 0)) { 1566 mp = xnbp->xnb_flavour->xf_cksum_from_peer(xnbp, 1567 mp, txreq->flags); 1568 xnbp->xnb_stat_rx_cksum_no_need++; 1569 } 1570 } 1571 1572 if (copy || (mp == NULL)) { 1573 rxp->xr_status = status; 1574 rxp->xr_id = txreq->id; 1575 xnb_rx_schedule_unmop(xnbp, mop, rxp); 1576 } 1577 1578 if (mp != NULL) { 1579 xnbp->xnb_stat_ipackets++; 1580 xnbp->xnb_stat_rbytes += txreq->size; 1581 1582 mp->b_next = NULL; 1583 if (head == NULL) { 1584 ASSERT(tail == NULL); 1585 head = mp; 1586 } else { 1587 ASSERT(tail != NULL); 1588 tail->b_next = mp; 1589 } 1590 tail = mp; 1591 } 1592 } 1593 1594 xnbp->xnb_tx_ring.req_cons = loop; 1595 1596 goto around; 1597 /* NOTREACHED */ 1598 } 1599 1600 /* 1601 * intr() -- ring interrupt service routine 1602 */ 1603 static uint_t 1604 xnb_intr(caddr_t arg) 1605 { 1606 xnb_t *xnbp = (xnb_t *)arg; 1607 mblk_t *mp; 1608 1609 xnbp->xnb_stat_intr++; 1610 1611 mutex_enter(&xnbp->xnb_rx_lock); 1612 1613 ASSERT(xnbp->xnb_connected); 1614 1615 mp = xnb_recv(xnbp); 1616 1617 mutex_exit(&xnbp->xnb_rx_lock); 1618 1619 if (!xnbp->xnb_hotplugged) { 1620 xnbp->xnb_stat_rx_too_early++; 1621 goto fail; 1622 } 1623 if (mp == NULL) { 1624 xnbp->xnb_stat_spurious_intr++; 1625 goto fail; 1626 } 1627 1628 xnbp->xnb_flavour->xf_recv(xnbp, mp); 1629 1630 return (DDI_INTR_CLAIMED); 1631 1632 fail: 1633 freemsgchain(mp); 1634 return (DDI_INTR_CLAIMED); 1635 } 1636 1637 static boolean_t 1638 xnb_connect_rings(dev_info_t *dip) 1639 { 1640 xnb_t *xnbp = ddi_get_driver_private(dip); 1641 char *oename; 1642 struct gnttab_map_grant_ref map_op; 1643 evtchn_port_t evtchn; 1644 int i; 1645 1646 /* 1647 * Cannot attempt to connect the rings if already connected. 1648 */ 1649 ASSERT(!xnbp->xnb_connected); 1650 1651 oename = xvdi_get_oename(dip); 1652 1653 if (xenbus_gather(XBT_NULL, oename, 1654 "event-channel", "%u", &evtchn, 1655 "tx-ring-ref", "%lu", &xnbp->xnb_tx_ring_ref, 1656 "rx-ring-ref", "%lu", &xnbp->xnb_rx_ring_ref, 1657 NULL) != 0) { 1658 cmn_err(CE_WARN, "xnb_connect_rings: " 1659 "cannot read other-end details from %s", 1660 oename); 1661 goto fail; 1662 } 1663 1664 if (xenbus_scanf(XBT_NULL, oename, 1665 "feature-tx-writable", "%d", &i) != 0) 1666 i = 0; 1667 if (i != 0) 1668 xnbp->xnb_rx_pages_writable = B_TRUE; 1669 1670 if (xenbus_scanf(XBT_NULL, oename, 1671 "feature-no-csum-offload", "%d", &i) != 0) 1672 i = 0; 1673 if ((i == 1) || !xnbp->xnb_cksum_offload) 1674 xnbp->xnb_cksum_offload = B_FALSE; 1675 1676 /* Check whether our peer knows and requests hypervisor copy */ 1677 if (xenbus_scanf(XBT_NULL, oename, "request-rx-copy", "%d", &i) 1678 != 0) 1679 i = 0; 1680 if (i != 0) 1681 xnbp->xnb_hv_copy = B_TRUE; 1682 1683 /* 1684 * 1. allocate a vaddr for the tx page, one for the rx page. 1685 * 2. call GNTTABOP_map_grant_ref to map the relevant pages 1686 * into the allocated vaddr (one for tx, one for rx). 1687 * 3. call EVTCHNOP_bind_interdomain to have the event channel 1688 * bound to this domain. 1689 * 4. associate the event channel with an interrupt. 1690 * 5. declare ourselves connected. 1691 * 6. enable the interrupt. 1692 */ 1693 1694 /* 1.tx */ 1695 xnbp->xnb_tx_ring_addr = vmem_xalloc(heap_arena, PAGESIZE, PAGESIZE, 1696 0, 0, 0, 0, VM_SLEEP); 1697 ASSERT(xnbp->xnb_tx_ring_addr != NULL); 1698 1699 /* 2.tx */ 1700 map_op.host_addr = (uint64_t)((long)xnbp->xnb_tx_ring_addr); 1701 map_op.flags = GNTMAP_host_map; 1702 map_op.ref = xnbp->xnb_tx_ring_ref; 1703 map_op.dom = xnbp->xnb_peer; 1704 hat_prepare_mapping(kas.a_hat, xnbp->xnb_tx_ring_addr); 1705 if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, 1706 &map_op, 1) != 0 || map_op.status != 0) { 1707 cmn_err(CE_WARN, "xnb_connect_rings: cannot map tx-ring page."); 1708 goto fail; 1709 } 1710 xnbp->xnb_tx_ring_handle = map_op.handle; 1711 1712 /* LINTED: constant in conditional context */ 1713 BACK_RING_INIT(&xnbp->xnb_tx_ring, 1714 (netif_tx_sring_t *)xnbp->xnb_tx_ring_addr, PAGESIZE); 1715 1716 /* 1.rx */ 1717 xnbp->xnb_rx_ring_addr = vmem_xalloc(heap_arena, PAGESIZE, PAGESIZE, 1718 0, 0, 0, 0, VM_SLEEP); 1719 ASSERT(xnbp->xnb_rx_ring_addr != NULL); 1720 1721 /* 2.rx */ 1722 map_op.host_addr = (uint64_t)((long)xnbp->xnb_rx_ring_addr); 1723 map_op.flags = GNTMAP_host_map; 1724 map_op.ref = xnbp->xnb_rx_ring_ref; 1725 map_op.dom = xnbp->xnb_peer; 1726 hat_prepare_mapping(kas.a_hat, xnbp->xnb_rx_ring_addr); 1727 if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, 1728 &map_op, 1) != 0 || map_op.status != 0) { 1729 cmn_err(CE_WARN, "xnb_connect_rings: cannot map rx-ring page."); 1730 goto fail; 1731 } 1732 xnbp->xnb_rx_ring_handle = map_op.handle; 1733 1734 /* LINTED: constant in conditional context */ 1735 BACK_RING_INIT(&xnbp->xnb_rx_ring, 1736 (netif_rx_sring_t *)xnbp->xnb_rx_ring_addr, PAGESIZE); 1737 1738 /* 3 */ 1739 if (xvdi_bind_evtchn(dip, evtchn) != DDI_SUCCESS) { 1740 cmn_err(CE_WARN, "xnb_connect_rings: " 1741 "cannot bind event channel %d", xnbp->xnb_evtchn); 1742 xnbp->xnb_evtchn = INVALID_EVTCHN; 1743 goto fail; 1744 } 1745 xnbp->xnb_evtchn = xvdi_get_evtchn(dip); 1746 1747 /* 1748 * It would be good to set the state to XenbusStateConnected 1749 * here as well, but then what if ddi_add_intr() failed? 1750 * Changing the state in the store will be noticed by the peer 1751 * and cannot be "taken back". 1752 */ 1753 mutex_enter(&xnbp->xnb_tx_lock); 1754 mutex_enter(&xnbp->xnb_rx_lock); 1755 1756 /* 5.1 */ 1757 xnbp->xnb_connected = B_TRUE; 1758 1759 mutex_exit(&xnbp->xnb_rx_lock); 1760 mutex_exit(&xnbp->xnb_tx_lock); 1761 1762 /* 4, 6 */ 1763 if (ddi_add_intr(dip, 0, NULL, NULL, xnb_intr, (caddr_t)xnbp) 1764 != DDI_SUCCESS) { 1765 cmn_err(CE_WARN, "xnb_connect_rings: cannot add interrupt"); 1766 goto fail; 1767 } 1768 xnbp->xnb_irq = B_TRUE; 1769 1770 /* 5.2 */ 1771 (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateConnected); 1772 1773 return (B_TRUE); 1774 1775 fail: 1776 mutex_enter(&xnbp->xnb_tx_lock); 1777 mutex_enter(&xnbp->xnb_rx_lock); 1778 1779 xnbp->xnb_connected = B_FALSE; 1780 mutex_exit(&xnbp->xnb_rx_lock); 1781 mutex_exit(&xnbp->xnb_tx_lock); 1782 1783 return (B_FALSE); 1784 } 1785 1786 static void 1787 xnb_disconnect_rings(dev_info_t *dip) 1788 { 1789 xnb_t *xnbp = ddi_get_driver_private(dip); 1790 1791 if (xnbp->xnb_irq) { 1792 ddi_remove_intr(dip, 0, NULL); 1793 xnbp->xnb_irq = B_FALSE; 1794 } 1795 1796 if (xnbp->xnb_rx_unmop_count > 0) 1797 xnb_rx_perform_pending_unmop(xnbp); 1798 1799 if (xnbp->xnb_evtchn != INVALID_EVTCHN) { 1800 xvdi_free_evtchn(dip); 1801 xnbp->xnb_evtchn = INVALID_EVTCHN; 1802 } 1803 1804 if (xnbp->xnb_rx_ring_handle != INVALID_GRANT_HANDLE) { 1805 struct gnttab_unmap_grant_ref unmap_op; 1806 1807 unmap_op.host_addr = (uint64_t)(uintptr_t) 1808 xnbp->xnb_rx_ring_addr; 1809 unmap_op.dev_bus_addr = 0; 1810 unmap_op.handle = xnbp->xnb_rx_ring_handle; 1811 if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, 1812 &unmap_op, 1) != 0) 1813 cmn_err(CE_WARN, "xnb_disconnect_rings: " 1814 "cannot unmap rx-ring page (%d)", 1815 unmap_op.status); 1816 1817 xnbp->xnb_rx_ring_handle = INVALID_GRANT_HANDLE; 1818 } 1819 1820 if (xnbp->xnb_rx_ring_addr != NULL) { 1821 hat_release_mapping(kas.a_hat, xnbp->xnb_rx_ring_addr); 1822 vmem_free(heap_arena, xnbp->xnb_rx_ring_addr, PAGESIZE); 1823 xnbp->xnb_rx_ring_addr = NULL; 1824 } 1825 1826 if (xnbp->xnb_tx_ring_handle != INVALID_GRANT_HANDLE) { 1827 struct gnttab_unmap_grant_ref unmap_op; 1828 1829 unmap_op.host_addr = (uint64_t)(uintptr_t) 1830 xnbp->xnb_tx_ring_addr; 1831 unmap_op.dev_bus_addr = 0; 1832 unmap_op.handle = xnbp->xnb_tx_ring_handle; 1833 if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, 1834 &unmap_op, 1) != 0) 1835 cmn_err(CE_WARN, "xnb_disconnect_rings: " 1836 "cannot unmap tx-ring page (%d)", 1837 unmap_op.status); 1838 1839 xnbp->xnb_tx_ring_handle = INVALID_GRANT_HANDLE; 1840 } 1841 1842 if (xnbp->xnb_tx_ring_addr != NULL) { 1843 hat_release_mapping(kas.a_hat, xnbp->xnb_tx_ring_addr); 1844 vmem_free(heap_arena, xnbp->xnb_tx_ring_addr, PAGESIZE); 1845 xnbp->xnb_tx_ring_addr = NULL; 1846 } 1847 } 1848 1849 /*ARGSUSED*/ 1850 static void 1851 xnb_oe_state_change(dev_info_t *dip, ddi_eventcookie_t id, 1852 void *arg, void *impl_data) 1853 { 1854 xnb_t *xnbp = ddi_get_driver_private(dip); 1855 XenbusState new_state = *(XenbusState *)impl_data; 1856 1857 ASSERT(xnbp != NULL); 1858 1859 switch (new_state) { 1860 case XenbusStateConnected: 1861 if (xnb_connect_rings(dip)) { 1862 xnbp->xnb_flavour->xf_peer_connected(xnbp); 1863 } else { 1864 xnbp->xnb_flavour->xf_peer_disconnected(xnbp); 1865 xnb_disconnect_rings(dip); 1866 (void) xvdi_switch_state(dip, XBT_NULL, 1867 XenbusStateClosed); 1868 (void) xvdi_post_event(dip, XEN_HP_REMOVE); 1869 } 1870 1871 /* 1872 * Now that we've attempted to connect it's reasonable 1873 * to allow an attempt to detach. 1874 */ 1875 xnbp->xnb_detachable = B_TRUE; 1876 1877 break; 1878 1879 case XenbusStateClosing: 1880 (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateClosing); 1881 1882 break; 1883 1884 case XenbusStateClosed: 1885 xnbp->xnb_flavour->xf_peer_disconnected(xnbp); 1886 1887 mutex_enter(&xnbp->xnb_tx_lock); 1888 mutex_enter(&xnbp->xnb_rx_lock); 1889 1890 xnb_disconnect_rings(dip); 1891 xnbp->xnb_connected = B_FALSE; 1892 1893 mutex_exit(&xnbp->xnb_rx_lock); 1894 mutex_exit(&xnbp->xnb_tx_lock); 1895 1896 (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateClosed); 1897 (void) xvdi_post_event(dip, XEN_HP_REMOVE); 1898 /* 1899 * In all likelyhood this is already set (in the above 1900 * case), but if the peer never attempted to connect 1901 * and the domain is destroyed we get here without 1902 * having been through the case above, so we set it to 1903 * be sure. 1904 */ 1905 xnbp->xnb_detachable = B_TRUE; 1906 1907 break; 1908 1909 default: 1910 break; 1911 } 1912 } 1913 1914 /*ARGSUSED*/ 1915 static void 1916 xnb_hp_state_change(dev_info_t *dip, ddi_eventcookie_t id, 1917 void *arg, void *impl_data) 1918 { 1919 xnb_t *xnbp = ddi_get_driver_private(dip); 1920 xendev_hotplug_state_t state = *(xendev_hotplug_state_t *)impl_data; 1921 boolean_t success; 1922 1923 ASSERT(xnbp != NULL); 1924 1925 switch (state) { 1926 case Connected: 1927 1928 success = xnbp->xnb_flavour->xf_hotplug_connected(xnbp); 1929 1930 mutex_enter(&xnbp->xnb_tx_lock); 1931 mutex_enter(&xnbp->xnb_rx_lock); 1932 1933 xnbp->xnb_hotplugged = success; 1934 1935 mutex_exit(&xnbp->xnb_rx_lock); 1936 mutex_exit(&xnbp->xnb_tx_lock); 1937 break; 1938 1939 default: 1940 break; 1941 } 1942 } 1943 1944 static struct modldrv modldrv = { 1945 &mod_miscops, "xnb module %I%", 1946 }; 1947 1948 static struct modlinkage modlinkage = { 1949 MODREV_1, &modldrv, NULL 1950 }; 1951 1952 int 1953 _init(void) 1954 { 1955 int i; 1956 1957 mutex_init(&xnb_alloc_page_lock, NULL, MUTEX_DRIVER, NULL); 1958 1959 xnb_rxbuf_cachep = kmem_cache_create("xnb_rxbuf_cachep", 1960 sizeof (xnb_rxbuf_t), 0, xnb_rxbuf_constructor, 1961 xnb_rxbuf_destructor, NULL, NULL, NULL, 0); 1962 ASSERT(xnb_rxbuf_cachep != NULL); 1963 1964 i = mod_install(&modlinkage); 1965 if (i != DDI_SUCCESS) { 1966 kmem_cache_destroy(xnb_rxbuf_cachep); 1967 mutex_destroy(&xnb_alloc_page_lock); 1968 } 1969 return (i); 1970 } 1971 1972 int 1973 _info(struct modinfo *modinfop) 1974 { 1975 return (mod_info(&modlinkage, modinfop)); 1976 } 1977 1978 int 1979 _fini(void) 1980 { 1981 int i; 1982 1983 i = mod_remove(&modlinkage); 1984 if (i == DDI_SUCCESS) { 1985 kmem_cache_destroy(xnb_rxbuf_cachep); 1986 mutex_destroy(&xnb_alloc_page_lock); 1987 } 1988 return (i); 1989 } 1990