1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #ifdef DEBUG 28 #define XNB_DEBUG 1 29 #endif /* DEBUG */ 30 31 #include "xnb.h" 32 33 #include <sys/sunddi.h> 34 #include <sys/sunndi.h> 35 #include <sys/modctl.h> 36 #include <sys/conf.h> 37 #include <sys/mac.h> 38 #include <sys/mac_impl.h> /* XXXXBOW - remove, included for mac_fix_cksum() */ 39 #include <sys/dlpi.h> 40 #include <sys/strsubr.h> 41 #include <sys/strsun.h> 42 #include <sys/types.h> 43 #include <sys/pattr.h> 44 #include <vm/seg_kmem.h> 45 #include <vm/hat_i86.h> 46 #include <xen/sys/xenbus_impl.h> 47 #include <xen/sys/xendev.h> 48 #include <sys/balloon_impl.h> 49 #include <sys/evtchn_impl.h> 50 #include <sys/gnttab.h> 51 #include <vm/vm_dep.h> 52 53 #include <sys/gld.h> 54 #include <inet/ip.h> 55 #include <inet/ip_impl.h> 56 #include <sys/vnic_impl.h> /* blech. */ 57 58 /* 59 * The terms "transmit" and "receive" are used in alignment with domU, 60 * which means that packets originating from the peer domU are "transmitted" 61 * to other parts of the system and packets are "received" from them. 62 */ 63 64 /* 65 * XXPV dme: things to do, as well as various things indicated 66 * throughout the source: 67 * - copy avoidance outbound. 68 * - copy avoidance inbound. 69 * - transfer credit limiting. 70 * - MAC address based filtering. 71 */ 72 73 /* 74 * Should we attempt to defer checksum calculation? 75 */ 76 static boolean_t xnb_cksum_offload = B_TRUE; 77 /* 78 * When receiving packets from a guest, should they be copied 79 * or used as-is (esballoc)? 80 */ 81 static boolean_t xnb_tx_always_copy = B_TRUE; 82 83 static boolean_t xnb_connect_rings(dev_info_t *); 84 static void xnb_disconnect_rings(dev_info_t *); 85 static void xnb_oe_state_change(dev_info_t *, ddi_eventcookie_t, 86 void *, void *); 87 static void xnb_hp_state_change(dev_info_t *, ddi_eventcookie_t, 88 void *, void *); 89 90 static int xnb_txbuf_constructor(void *, void *, int); 91 static void xnb_txbuf_destructor(void *, void *); 92 static xnb_txbuf_t *xnb_txbuf_get(xnb_t *, int); 93 static void xnb_txbuf_put(xnb_t *, xnb_txbuf_t *); 94 static void xnb_tx_notify_peer(xnb_t *); 95 static void xnb_tx_complete(xnb_txbuf_t *); 96 static void xnb_tx_mark_complete(xnb_t *, RING_IDX, int16_t); 97 static void xnb_tx_schedule_unmop(xnb_t *, gnttab_map_grant_ref_t *, 98 xnb_txbuf_t *); 99 static void xnb_tx_perform_pending_unmop(xnb_t *); 100 mblk_t *xnb_copy_to_peer(xnb_t *, mblk_t *); 101 102 int xnb_unmop_lowwat = NET_TX_RING_SIZE >> 2; 103 int xnb_unmop_hiwat = NET_TX_RING_SIZE - (NET_TX_RING_SIZE >> 2); 104 105 106 boolean_t xnb_hv_copy = B_TRUE; 107 boolean_t xnb_explicit_pageflip_set = B_FALSE; 108 109 /* XXPV dme: are these really invalid? */ 110 #define INVALID_GRANT_HANDLE ((grant_handle_t)-1) 111 #define INVALID_GRANT_REF ((grant_ref_t)-1) 112 113 static kmem_cache_t *xnb_txbuf_cachep; 114 static kmutex_t xnb_alloc_page_lock; 115 116 /* 117 * Statistics. 118 */ 119 static char *aux_statistics[] = { 120 "rx_cksum_deferred", 121 "tx_cksum_no_need", 122 "rx_rsp_notok", 123 "tx_notify_deferred", 124 "tx_notify_sent", 125 "rx_notify_deferred", 126 "rx_notify_sent", 127 "tx_too_early", 128 "rx_too_early", 129 "rx_allocb_failed", 130 "tx_allocb_failed", 131 "rx_foreign_page", 132 "mac_full", 133 "spurious_intr", 134 "allocation_success", 135 "allocation_failure", 136 "small_allocation_success", 137 "small_allocation_failure", 138 "other_allocation_failure", 139 "rx_pageboundary_crossed", 140 "rx_cpoparea_grown", 141 "csum_hardware", 142 "csum_software", 143 }; 144 145 static int 146 xnb_ks_aux_update(kstat_t *ksp, int flag) 147 { 148 xnb_t *xnbp; 149 kstat_named_t *knp; 150 151 if (flag != KSTAT_READ) 152 return (EACCES); 153 154 xnbp = ksp->ks_private; 155 knp = ksp->ks_data; 156 157 /* 158 * Assignment order should match that of the names in 159 * aux_statistics. 160 */ 161 (knp++)->value.ui64 = xnbp->xnb_stat_rx_cksum_deferred; 162 (knp++)->value.ui64 = xnbp->xnb_stat_tx_cksum_no_need; 163 (knp++)->value.ui64 = xnbp->xnb_stat_rx_rsp_notok; 164 (knp++)->value.ui64 = xnbp->xnb_stat_tx_notify_deferred; 165 (knp++)->value.ui64 = xnbp->xnb_stat_tx_notify_sent; 166 (knp++)->value.ui64 = xnbp->xnb_stat_rx_notify_deferred; 167 (knp++)->value.ui64 = xnbp->xnb_stat_rx_notify_sent; 168 (knp++)->value.ui64 = xnbp->xnb_stat_tx_too_early; 169 (knp++)->value.ui64 = xnbp->xnb_stat_rx_too_early; 170 (knp++)->value.ui64 = xnbp->xnb_stat_rx_allocb_failed; 171 (knp++)->value.ui64 = xnbp->xnb_stat_tx_allocb_failed; 172 (knp++)->value.ui64 = xnbp->xnb_stat_rx_foreign_page; 173 (knp++)->value.ui64 = xnbp->xnb_stat_mac_full; 174 (knp++)->value.ui64 = xnbp->xnb_stat_spurious_intr; 175 (knp++)->value.ui64 = xnbp->xnb_stat_allocation_success; 176 (knp++)->value.ui64 = xnbp->xnb_stat_allocation_failure; 177 (knp++)->value.ui64 = xnbp->xnb_stat_small_allocation_success; 178 (knp++)->value.ui64 = xnbp->xnb_stat_small_allocation_failure; 179 (knp++)->value.ui64 = xnbp->xnb_stat_other_allocation_failure; 180 (knp++)->value.ui64 = xnbp->xnb_stat_rx_pagebndry_crossed; 181 (knp++)->value.ui64 = xnbp->xnb_stat_rx_cpoparea_grown; 182 (knp++)->value.ui64 = xnbp->xnb_stat_csum_hardware; 183 (knp++)->value.ui64 = xnbp->xnb_stat_csum_software; 184 185 return (0); 186 } 187 188 static boolean_t 189 xnb_ks_init(xnb_t *xnbp) 190 { 191 int nstat = sizeof (aux_statistics) / 192 sizeof (aux_statistics[0]); 193 char **cp = aux_statistics; 194 kstat_named_t *knp; 195 196 /* 197 * Create and initialise kstats. 198 */ 199 xnbp->xnb_kstat_aux = kstat_create(ddi_driver_name(xnbp->xnb_devinfo), 200 ddi_get_instance(xnbp->xnb_devinfo), "aux_statistics", "net", 201 KSTAT_TYPE_NAMED, nstat, 0); 202 if (xnbp->xnb_kstat_aux == NULL) 203 return (B_FALSE); 204 205 xnbp->xnb_kstat_aux->ks_private = xnbp; 206 xnbp->xnb_kstat_aux->ks_update = xnb_ks_aux_update; 207 208 knp = xnbp->xnb_kstat_aux->ks_data; 209 while (nstat > 0) { 210 kstat_named_init(knp, *cp, KSTAT_DATA_UINT64); 211 212 knp++; 213 cp++; 214 nstat--; 215 } 216 217 kstat_install(xnbp->xnb_kstat_aux); 218 219 return (B_TRUE); 220 } 221 222 static void 223 xnb_ks_free(xnb_t *xnbp) 224 { 225 kstat_delete(xnbp->xnb_kstat_aux); 226 } 227 228 /* 229 * Software checksum calculation and insertion for an arbitrary packet. 230 */ 231 /*ARGSUSED*/ 232 static mblk_t * 233 xnb_software_csum(xnb_t *xnbp, mblk_t *mp) 234 { 235 /* 236 * XXPV dme: shouldn't rely on vnic_fix_cksum(), not least 237 * because it doesn't cover all of the interesting cases :-( 238 */ 239 (void) hcksum_assoc(mp, NULL, NULL, 0, 0, 0, 0, 240 HCK_FULLCKSUM, KM_NOSLEEP); 241 242 return (mac_fix_cksum(mp)); 243 } 244 245 mblk_t * 246 xnb_process_cksum_flags(xnb_t *xnbp, mblk_t *mp, uint32_t capab) 247 { 248 struct ether_header *ehp; 249 uint16_t sap; 250 uint32_t offset; 251 ipha_t *ipha; 252 253 ASSERT(mp->b_next == NULL); 254 255 /* 256 * Check that the packet is contained in a single mblk. In 257 * the "from peer" path this is true today, but will change 258 * when scatter gather support is added. In the "to peer" 259 * path we cannot be sure, but in most cases it will be true 260 * (in the xnbo case the packet has come from a MAC device 261 * which is unlikely to split packets). 262 */ 263 if (mp->b_cont != NULL) 264 goto software; 265 266 /* 267 * If the MAC has no hardware capability don't do any further 268 * checking. 269 */ 270 if (capab == 0) 271 goto software; 272 273 ASSERT(MBLKL(mp) >= sizeof (struct ether_header)); 274 ehp = (struct ether_header *)mp->b_rptr; 275 276 if (ntohs(ehp->ether_type) == VLAN_TPID) { 277 struct ether_vlan_header *evhp; 278 279 ASSERT(MBLKL(mp) >= sizeof (struct ether_vlan_header)); 280 evhp = (struct ether_vlan_header *)mp->b_rptr; 281 sap = ntohs(evhp->ether_type); 282 offset = sizeof (struct ether_vlan_header); 283 } else { 284 sap = ntohs(ehp->ether_type); 285 offset = sizeof (struct ether_header); 286 } 287 288 /* 289 * We only attempt to do IPv4 packets in hardware. 290 */ 291 if (sap != ETHERTYPE_IP) 292 goto software; 293 294 /* 295 * We know that this is an IPv4 packet. 296 */ 297 ipha = (ipha_t *)(mp->b_rptr + offset); 298 299 switch (ipha->ipha_protocol) { 300 case IPPROTO_TCP: 301 case IPPROTO_UDP: { 302 uint32_t start, length, stuff, cksum; 303 uint16_t *stuffp; 304 305 /* 306 * This is a TCP/IPv4 or UDP/IPv4 packet, for which we 307 * can use full IPv4 and partial checksum offload. 308 */ 309 if ((capab & (HCKSUM_INET_FULL_V4|HCKSUM_INET_PARTIAL)) == 0) 310 break; 311 312 start = IP_SIMPLE_HDR_LENGTH; 313 length = ntohs(ipha->ipha_length); 314 if (ipha->ipha_protocol == IPPROTO_TCP) { 315 stuff = start + TCP_CHECKSUM_OFFSET; 316 cksum = IP_TCP_CSUM_COMP; 317 } else { 318 stuff = start + UDP_CHECKSUM_OFFSET; 319 cksum = IP_UDP_CSUM_COMP; 320 } 321 stuffp = (uint16_t *)(mp->b_rptr + offset + stuff); 322 323 if (capab & HCKSUM_INET_FULL_V4) { 324 /* 325 * Some devices require that the checksum 326 * field of the packet is zero for full 327 * offload. 328 */ 329 *stuffp = 0; 330 331 (void) hcksum_assoc(mp, NULL, NULL, 332 0, 0, 0, 0, 333 HCK_FULLCKSUM, KM_NOSLEEP); 334 335 xnbp->xnb_stat_csum_hardware++; 336 337 return (mp); 338 } 339 340 if (capab & HCKSUM_INET_PARTIAL) { 341 if (*stuffp == 0) { 342 ipaddr_t src, dst; 343 344 /* 345 * Older Solaris guests don't insert 346 * the pseudo-header checksum, so we 347 * calculate it here. 348 */ 349 src = ipha->ipha_src; 350 dst = ipha->ipha_dst; 351 352 cksum += (dst >> 16) + (dst & 0xFFFF); 353 cksum += (src >> 16) + (src & 0xFFFF); 354 cksum += length - IP_SIMPLE_HDR_LENGTH; 355 356 cksum = (cksum >> 16) + (cksum & 0xFFFF); 357 cksum = (cksum >> 16) + (cksum & 0xFFFF); 358 359 ASSERT(cksum <= 0xFFFF); 360 361 *stuffp = (uint16_t)(cksum ? cksum : ~cksum); 362 } 363 364 (void) hcksum_assoc(mp, NULL, NULL, 365 start, stuff, length, 0, 366 HCK_PARTIALCKSUM, KM_NOSLEEP); 367 368 xnbp->xnb_stat_csum_hardware++; 369 370 return (mp); 371 } 372 373 /* NOTREACHED */ 374 break; 375 } 376 377 default: 378 /* Use software. */ 379 break; 380 } 381 382 software: 383 /* 384 * We are not able to use any offload so do the whole thing in 385 * software. 386 */ 387 xnbp->xnb_stat_csum_software++; 388 389 return (xnb_software_csum(xnbp, mp)); 390 } 391 392 int 393 xnb_attach(dev_info_t *dip, xnb_flavour_t *flavour, void *flavour_data) 394 { 395 xnb_t *xnbp; 396 char *xsname, mac[ETHERADDRL * 3]; 397 398 xnbp = kmem_zalloc(sizeof (*xnbp), KM_SLEEP); 399 400 xnbp->xnb_flavour = flavour; 401 xnbp->xnb_flavour_data = flavour_data; 402 xnbp->xnb_devinfo = dip; 403 xnbp->xnb_evtchn = INVALID_EVTCHN; 404 xnbp->xnb_irq = B_FALSE; 405 xnbp->xnb_tx_ring_handle = INVALID_GRANT_HANDLE; 406 xnbp->xnb_rx_ring_handle = INVALID_GRANT_HANDLE; 407 xnbp->xnb_cksum_offload = xnb_cksum_offload; 408 xnbp->xnb_connected = B_FALSE; 409 xnbp->xnb_hotplugged = B_FALSE; 410 xnbp->xnb_detachable = B_FALSE; 411 xnbp->xnb_peer = xvdi_get_oeid(dip); 412 xnbp->xnb_tx_pages_writable = B_FALSE; 413 xnbp->xnb_tx_always_copy = xnb_tx_always_copy; 414 415 xnbp->xnb_tx_buf_count = 0; 416 xnbp->xnb_tx_unmop_count = 0; 417 418 xnbp->xnb_hv_copy = B_FALSE; 419 420 xnbp->xnb_rx_va = vmem_alloc(heap_arena, PAGESIZE, VM_SLEEP); 421 ASSERT(xnbp->xnb_rx_va != NULL); 422 423 if (ddi_get_iblock_cookie(dip, 0, &xnbp->xnb_icookie) 424 != DDI_SUCCESS) 425 goto failure; 426 427 /* allocated on demand, when/if we enter xnb_copy_to_peer() */ 428 xnbp->xnb_rx_cpop = NULL; 429 xnbp->xnb_cpop_sz = 0; 430 431 mutex_init(&xnbp->xnb_tx_lock, NULL, MUTEX_DRIVER, 432 xnbp->xnb_icookie); 433 mutex_init(&xnbp->xnb_rx_lock, NULL, MUTEX_DRIVER, 434 xnbp->xnb_icookie); 435 436 /* set driver private pointer now */ 437 ddi_set_driver_private(dip, xnbp); 438 439 if (!xnb_ks_init(xnbp)) 440 goto failure_1; 441 442 /* 443 * Receive notification of changes in the state of the 444 * driver in the guest domain. 445 */ 446 if (xvdi_add_event_handler(dip, XS_OE_STATE, xnb_oe_state_change, 447 NULL) != DDI_SUCCESS) 448 goto failure_2; 449 450 /* 451 * Receive notification of hotplug events. 452 */ 453 if (xvdi_add_event_handler(dip, XS_HP_STATE, xnb_hp_state_change, 454 NULL) != DDI_SUCCESS) 455 goto failure_2; 456 457 xsname = xvdi_get_xsname(dip); 458 459 if (xenbus_printf(XBT_NULL, xsname, 460 "feature-no-csum-offload", "%d", 461 xnbp->xnb_cksum_offload ? 0 : 1) != 0) 462 goto failure_3; 463 464 /* 465 * Use global xnb_hv_copy to export this feature. This means that 466 * we have to decide what to do before starting up a guest domain 467 */ 468 if (xenbus_printf(XBT_NULL, xsname, 469 "feature-rx-copy", "%d", xnb_hv_copy ? 1 : 0) != 0) 470 goto failure_3; 471 /* 472 * Linux domUs seem to depend on "feature-rx-flip" being 0 473 * in addition to "feature-rx-copy" being 1. It seems strange 474 * to use four possible states to describe a binary decision, 475 * but we might as well play nice. 476 */ 477 if (xenbus_printf(XBT_NULL, xsname, 478 "feature-rx-flip", "%d", xnb_explicit_pageflip_set ? 1 : 0) != 0) 479 goto failure_3; 480 481 if (xenbus_scanf(XBT_NULL, xsname, 482 "mac", "%s", mac) != 0) { 483 cmn_err(CE_WARN, "xnb_attach: " 484 "cannot read mac address from %s", 485 xsname); 486 goto failure_3; 487 } 488 489 if (ether_aton(mac, xnbp->xnb_mac_addr) != ETHERADDRL) { 490 cmn_err(CE_WARN, 491 "xnb_attach: cannot parse mac address %s", 492 mac); 493 goto failure_3; 494 } 495 496 (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateInitWait); 497 (void) xvdi_post_event(dip, XEN_HP_ADD); 498 499 return (DDI_SUCCESS); 500 501 failure_3: 502 xvdi_remove_event_handler(dip, NULL); 503 504 failure_2: 505 xnb_ks_free(xnbp); 506 507 failure_1: 508 mutex_destroy(&xnbp->xnb_rx_lock); 509 mutex_destroy(&xnbp->xnb_tx_lock); 510 511 failure: 512 vmem_free(heap_arena, xnbp->xnb_rx_va, PAGESIZE); 513 kmem_free(xnbp, sizeof (*xnbp)); 514 return (DDI_FAILURE); 515 } 516 517 /*ARGSUSED*/ 518 void 519 xnb_detach(dev_info_t *dip) 520 { 521 xnb_t *xnbp = ddi_get_driver_private(dip); 522 523 ASSERT(xnbp != NULL); 524 ASSERT(!xnbp->xnb_connected); 525 ASSERT(xnbp->xnb_tx_buf_count == 0); 526 527 xnb_disconnect_rings(dip); 528 529 xvdi_remove_event_handler(dip, NULL); 530 531 xnb_ks_free(xnbp); 532 533 ddi_set_driver_private(dip, NULL); 534 535 mutex_destroy(&xnbp->xnb_tx_lock); 536 mutex_destroy(&xnbp->xnb_rx_lock); 537 538 if (xnbp->xnb_cpop_sz > 0) 539 kmem_free(xnbp->xnb_rx_cpop, sizeof (*xnbp->xnb_rx_cpop) 540 * xnbp->xnb_cpop_sz); 541 542 ASSERT(xnbp->xnb_rx_va != NULL); 543 vmem_free(heap_arena, xnbp->xnb_rx_va, PAGESIZE); 544 545 kmem_free(xnbp, sizeof (*xnbp)); 546 } 547 548 549 static mfn_t 550 xnb_alloc_page(xnb_t *xnbp) 551 { 552 #define WARNING_RATE_LIMIT 100 553 #define BATCH_SIZE 256 554 static mfn_t mfns[BATCH_SIZE]; /* common across all instances */ 555 static int nth = BATCH_SIZE; 556 mfn_t mfn; 557 558 mutex_enter(&xnb_alloc_page_lock); 559 if (nth == BATCH_SIZE) { 560 if (balloon_alloc_pages(BATCH_SIZE, mfns) != BATCH_SIZE) { 561 xnbp->xnb_stat_allocation_failure++; 562 mutex_exit(&xnb_alloc_page_lock); 563 564 /* 565 * Try for a single page in low memory situations. 566 */ 567 if (balloon_alloc_pages(1, &mfn) != 1) { 568 if ((xnbp->xnb_stat_small_allocation_failure++ 569 % WARNING_RATE_LIMIT) == 0) 570 cmn_err(CE_WARN, "xnb_alloc_page: " 571 "Cannot allocate memory to " 572 "transfer packets to peer."); 573 return (0); 574 } else { 575 xnbp->xnb_stat_small_allocation_success++; 576 return (mfn); 577 } 578 } 579 580 nth = 0; 581 xnbp->xnb_stat_allocation_success++; 582 } 583 584 mfn = mfns[nth++]; 585 mutex_exit(&xnb_alloc_page_lock); 586 587 ASSERT(mfn != 0); 588 589 return (mfn); 590 #undef BATCH_SIZE 591 #undef WARNING_RATE_LIMIT 592 } 593 594 /*ARGSUSED*/ 595 static void 596 xnb_free_page(xnb_t *xnbp, mfn_t mfn) 597 { 598 int r; 599 pfn_t pfn; 600 601 pfn = xen_assign_pfn(mfn); 602 pfnzero(pfn, 0, PAGESIZE); 603 xen_release_pfn(pfn); 604 605 /* 606 * This happens only in the error path, so batching is 607 * not worth the complication. 608 */ 609 if ((r = balloon_free_pages(1, &mfn, NULL, NULL)) != 1) { 610 cmn_err(CE_WARN, "free_page: cannot decrease memory " 611 "reservation (%d): page kept but unusable (mfn = 0x%lx).", 612 r, mfn); 613 } 614 } 615 616 /* 617 * Similar to RING_HAS_UNCONSUMED_REQUESTS(&xnbp->rx_ring) but 618 * using local variables. 619 */ 620 #define XNB_RING_HAS_UNCONSUMED_REQUESTS(_r) \ 621 ((((_r)->sring->req_prod - loop) < \ 622 (RING_SIZE(_r) - (loop - prod))) ? \ 623 ((_r)->sring->req_prod - loop) : \ 624 (RING_SIZE(_r) - (loop - prod))) 625 626 mblk_t * 627 xnb_to_peer(xnb_t *xnbp, mblk_t *mp) 628 { 629 mblk_t *free = mp, *prev = NULL; 630 size_t len; 631 gnttab_transfer_t *gop; 632 boolean_t notify; 633 RING_IDX loop, prod, end; 634 635 /* 636 * For each packet the sequence of operations is: 637 * 638 * 1. get a new page from the hypervisor. 639 * 2. get a request slot from the ring. 640 * 3. copy the data into the new page. 641 * 4. transfer the page to the peer. 642 * 5. update the request slot. 643 * 6. kick the peer. 644 * 7. free mp. 645 * 646 * In order to reduce the number of hypercalls, we prepare 647 * several packets for the peer and perform a single hypercall 648 * to transfer them. 649 */ 650 651 mutex_enter(&xnbp->xnb_rx_lock); 652 653 /* 654 * If we are not connected to the peer or have not yet 655 * finished hotplug it is too early to pass packets to the 656 * peer. 657 */ 658 if (!(xnbp->xnb_connected && xnbp->xnb_hotplugged)) { 659 mutex_exit(&xnbp->xnb_rx_lock); 660 DTRACE_PROBE(flip_rx_too_early); 661 xnbp->xnb_stat_rx_too_early++; 662 return (mp); 663 } 664 665 loop = xnbp->xnb_rx_ring.req_cons; 666 prod = xnbp->xnb_rx_ring.rsp_prod_pvt; 667 gop = xnbp->xnb_rx_top; 668 669 while ((mp != NULL) && 670 XNB_RING_HAS_UNCONSUMED_REQUESTS(&xnbp->xnb_rx_ring)) { 671 672 mfn_t mfn; 673 pfn_t pfn; 674 netif_rx_request_t *rxreq; 675 netif_rx_response_t *rxresp; 676 char *valoop; 677 mblk_t *ml; 678 uint16_t cksum_flags; 679 680 /* 1 */ 681 if ((mfn = xnb_alloc_page(xnbp)) == 0) { 682 xnbp->xnb_stat_rx_defer++; 683 break; 684 } 685 686 /* 2 */ 687 rxreq = RING_GET_REQUEST(&xnbp->xnb_rx_ring, loop); 688 689 #ifdef XNB_DEBUG 690 if (!(rxreq->id < NET_RX_RING_SIZE)) 691 cmn_err(CE_PANIC, "xnb_to_peer: " 692 "id %d out of range in request 0x%p", 693 rxreq->id, (void *)rxreq); 694 #endif /* XNB_DEBUG */ 695 696 /* Assign a pfn and map the new page at the allocated va. */ 697 pfn = xen_assign_pfn(mfn); 698 hat_devload(kas.a_hat, xnbp->xnb_rx_va, PAGESIZE, 699 pfn, PROT_READ | PROT_WRITE, HAT_LOAD); 700 701 /* 3 */ 702 len = 0; 703 valoop = xnbp->xnb_rx_va; 704 for (ml = mp; ml != NULL; ml = ml->b_cont) { 705 size_t chunk = ml->b_wptr - ml->b_rptr; 706 707 bcopy(ml->b_rptr, valoop, chunk); 708 valoop += chunk; 709 len += chunk; 710 } 711 712 ASSERT(len < PAGESIZE); 713 714 /* Release the pfn. */ 715 hat_unload(kas.a_hat, xnbp->xnb_rx_va, PAGESIZE, 716 HAT_UNLOAD_UNMAP); 717 xen_release_pfn(pfn); 718 719 /* 4 */ 720 gop->mfn = mfn; 721 gop->domid = xnbp->xnb_peer; 722 gop->ref = rxreq->gref; 723 724 /* 5.1 */ 725 rxresp = RING_GET_RESPONSE(&xnbp->xnb_rx_ring, prod); 726 rxresp->offset = 0; 727 rxresp->flags = 0; 728 729 cksum_flags = xnbp->xnb_flavour->xf_cksum_to_peer(xnbp, mp); 730 if (cksum_flags != 0) 731 xnbp->xnb_stat_rx_cksum_deferred++; 732 rxresp->flags |= cksum_flags; 733 734 rxresp->id = RING_GET_REQUEST(&xnbp->xnb_rx_ring, prod)->id; 735 rxresp->status = len; 736 737 loop++; 738 prod++; 739 gop++; 740 prev = mp; 741 mp = mp->b_next; 742 } 743 744 /* 745 * Did we actually do anything? 746 */ 747 if (loop == xnbp->xnb_rx_ring.req_cons) { 748 mutex_exit(&xnbp->xnb_rx_lock); 749 return (mp); 750 } 751 752 end = loop; 753 754 /* 755 * Unlink the end of the 'done' list from the remainder. 756 */ 757 ASSERT(prev != NULL); 758 prev->b_next = NULL; 759 760 if (HYPERVISOR_grant_table_op(GNTTABOP_transfer, xnbp->xnb_rx_top, 761 loop - xnbp->xnb_rx_ring.req_cons) != 0) { 762 cmn_err(CE_WARN, "xnb_to_peer: transfer operation failed"); 763 } 764 765 loop = xnbp->xnb_rx_ring.req_cons; 766 prod = xnbp->xnb_rx_ring.rsp_prod_pvt; 767 gop = xnbp->xnb_rx_top; 768 769 while (loop < end) { 770 int16_t status = NETIF_RSP_OKAY; 771 772 if (gop->status != 0) { 773 status = NETIF_RSP_ERROR; 774 775 /* 776 * If the status is anything other than 777 * GNTST_bad_page then we don't own the page 778 * any more, so don't try to give it back. 779 */ 780 if (gop->status != GNTST_bad_page) 781 gop->mfn = 0; 782 } else { 783 /* The page is no longer ours. */ 784 gop->mfn = 0; 785 } 786 787 if (gop->mfn != 0) 788 /* 789 * Give back the page, as we won't be using 790 * it. 791 */ 792 xnb_free_page(xnbp, gop->mfn); 793 else 794 /* 795 * We gave away a page, update our accounting 796 * now. 797 */ 798 balloon_drv_subtracted(1); 799 800 /* 5.2 */ 801 if (status != NETIF_RSP_OKAY) { 802 RING_GET_RESPONSE(&xnbp->xnb_rx_ring, prod)->status = 803 status; 804 } else { 805 xnbp->xnb_stat_ipackets++; 806 xnbp->xnb_stat_rbytes += len; 807 } 808 809 loop++; 810 prod++; 811 gop++; 812 } 813 814 xnbp->xnb_rx_ring.req_cons = loop; 815 xnbp->xnb_rx_ring.rsp_prod_pvt = prod; 816 817 /* 6 */ 818 /* LINTED: constant in conditional context */ 819 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&xnbp->xnb_rx_ring, notify); 820 if (notify) { 821 ec_notify_via_evtchn(xnbp->xnb_evtchn); 822 xnbp->xnb_stat_rx_notify_sent++; 823 } else { 824 xnbp->xnb_stat_rx_notify_deferred++; 825 } 826 827 if (mp != NULL) 828 xnbp->xnb_stat_rx_defer++; 829 830 mutex_exit(&xnbp->xnb_rx_lock); 831 832 /* Free mblk_t's that we consumed. */ 833 freemsgchain(free); 834 835 return (mp); 836 } 837 838 /* helper functions for xnb_copy_to_peer */ 839 840 /* 841 * Grow the array of copy operation descriptors. 842 * Returns a pointer to the next available entry. 843 */ 844 gnttab_copy_t * 845 grow_cpop_area(xnb_t *xnbp, gnttab_copy_t *o_cpop) 846 { 847 /* 848 * o_cpop (arg.1) is a ptr to the area we would like to copy 849 * something into but cannot, because we haven't alloc'ed it 850 * yet, or NULL. 851 * old_cpop and new_cpop (local) are pointers to old/new 852 * versions of xnbp->xnb_rx_cpop. 853 */ 854 gnttab_copy_t *new_cpop, *old_cpop, *ret_cpop; 855 size_t newcount; 856 857 ASSERT(MUTEX_HELD(&xnbp->xnb_rx_lock)); 858 859 old_cpop = xnbp->xnb_rx_cpop; 860 /* 861 * o_cpop is a pointer into the array pointed to by old_cpop; 862 * it would be an error for exactly one of these pointers to be NULL. 863 * We shouldn't call this function if xnb_rx_cpop has already 864 * been allocated, but we're starting to fill it from the beginning 865 * again. 866 */ 867 ASSERT((o_cpop == NULL && old_cpop == NULL) || 868 (o_cpop != NULL && old_cpop != NULL && o_cpop != old_cpop)); 869 870 newcount = xnbp->xnb_cpop_sz + CPOP_DEFCNT; 871 872 new_cpop = kmem_alloc(sizeof (*new_cpop) * newcount, KM_NOSLEEP); 873 if (new_cpop == NULL) { 874 xnbp->xnb_stat_other_allocation_failure++; 875 return (NULL); 876 } 877 878 if (o_cpop != NULL) { 879 size_t offset = (o_cpop - old_cpop); 880 881 /* we only need to move the parts in use ... */ 882 (void) memmove(new_cpop, old_cpop, xnbp->xnb_cpop_sz * 883 (sizeof (*old_cpop))); 884 885 kmem_free(old_cpop, xnbp->xnb_cpop_sz * sizeof (*old_cpop)); 886 887 ret_cpop = new_cpop + offset; 888 } else { 889 ret_cpop = new_cpop; 890 } 891 892 xnbp->xnb_rx_cpop = new_cpop; 893 xnbp->xnb_cpop_sz = newcount; 894 895 xnbp->xnb_stat_rx_cpoparea_grown++; 896 897 return (ret_cpop); 898 } 899 900 /* 901 * Check whether an address is on a page that's foreign to this domain. 902 */ 903 static boolean_t 904 is_foreign(void *addr) 905 { 906 pfn_t pfn = hat_getpfnum(kas.a_hat, addr); 907 908 return (pfn & PFN_IS_FOREIGN_MFN ? B_TRUE : B_FALSE); 909 } 910 911 /* 912 * Insert a newly allocated mblk into a chain, replacing the old one. 913 */ 914 static mblk_t * 915 replace_msg(mblk_t *mp, size_t len, mblk_t *mp_prev, mblk_t *ml_prev) 916 { 917 uint32_t start, stuff, end, value, flags; 918 mblk_t *new_mp; 919 920 new_mp = copyb(mp); 921 if (new_mp == NULL) 922 cmn_err(CE_PANIC, "replace_msg: cannot alloc new message" 923 "for %p, len %lu", (void *) mp, len); 924 925 hcksum_retrieve(mp, NULL, NULL, &start, &stuff, &end, &value, &flags); 926 (void) hcksum_assoc(new_mp, NULL, NULL, start, stuff, end, value, 927 flags, KM_NOSLEEP); 928 929 new_mp->b_next = mp->b_next; 930 new_mp->b_prev = mp->b_prev; 931 new_mp->b_cont = mp->b_cont; 932 933 /* Make sure we only overwrite pointers to the mblk being replaced. */ 934 if (mp_prev != NULL && mp_prev->b_next == mp) 935 mp_prev->b_next = new_mp; 936 937 if (ml_prev != NULL && ml_prev->b_cont == mp) 938 ml_prev->b_cont = new_mp; 939 940 mp->b_next = mp->b_prev = mp->b_cont = NULL; 941 freemsg(mp); 942 943 return (new_mp); 944 } 945 946 /* 947 * Set all the fields in a gnttab_copy_t. 948 */ 949 static void 950 setup_gop(xnb_t *xnbp, gnttab_copy_t *gp, uchar_t *rptr, 951 size_t s_off, size_t d_off, size_t len, grant_ref_t d_ref) 952 { 953 ASSERT(xnbp != NULL && gp != NULL); 954 955 gp->source.offset = s_off; 956 gp->source.u.gmfn = pfn_to_mfn(hat_getpfnum(kas.a_hat, (caddr_t)rptr)); 957 gp->source.domid = DOMID_SELF; 958 959 gp->len = (uint16_t)len; 960 gp->flags = GNTCOPY_dest_gref; 961 gp->status = 0; 962 963 gp->dest.u.ref = d_ref; 964 gp->dest.offset = d_off; 965 gp->dest.domid = xnbp->xnb_peer; 966 } 967 968 mblk_t * 969 xnb_copy_to_peer(xnb_t *xnbp, mblk_t *mp) 970 { 971 mblk_t *free = mp, *mp_prev = NULL, *saved_mp = mp; 972 mblk_t *ml, *ml_prev; 973 gnttab_copy_t *gop_cp; 974 boolean_t notify; 975 RING_IDX loop, prod; 976 int i; 977 978 if (!xnbp->xnb_hv_copy) 979 return (xnb_to_peer(xnbp, mp)); 980 981 /* 982 * For each packet the sequence of operations is: 983 * 984 * 1. get a request slot from the ring. 985 * 2. set up data for hypercall (see NOTE below) 986 * 3. have the hypervisore copy the data 987 * 4. update the request slot. 988 * 5. kick the peer. 989 * 990 * NOTE ad 2. 991 * In order to reduce the number of hypercalls, we prepare 992 * several packets (mp->b_cont != NULL) for the peer and 993 * perform a single hypercall to transfer them. 994 * We also have to set up a seperate copy operation for 995 * every page. 996 * 997 * If we have more than one message (mp->b_next != NULL), 998 * we do this whole dance repeatedly. 999 */ 1000 1001 mutex_enter(&xnbp->xnb_rx_lock); 1002 1003 if (!(xnbp->xnb_connected && xnbp->xnb_hotplugged)) { 1004 mutex_exit(&xnbp->xnb_rx_lock); 1005 DTRACE_PROBE(copy_rx_too_early); 1006 xnbp->xnb_stat_rx_too_early++; 1007 return (mp); 1008 } 1009 1010 loop = xnbp->xnb_rx_ring.req_cons; 1011 prod = xnbp->xnb_rx_ring.rsp_prod_pvt; 1012 1013 while ((mp != NULL) && 1014 XNB_RING_HAS_UNCONSUMED_REQUESTS(&xnbp->xnb_rx_ring)) { 1015 netif_rx_request_t *rxreq; 1016 netif_rx_response_t *rxresp; 1017 size_t d_offset; 1018 size_t len; 1019 uint16_t cksum_flags; 1020 int16_t status = NETIF_RSP_OKAY; 1021 int item_count; 1022 1023 /* 1 */ 1024 rxreq = RING_GET_REQUEST(&xnbp->xnb_rx_ring, loop); 1025 1026 #ifdef XNB_DEBUG 1027 if (!(rxreq->id < NET_RX_RING_SIZE)) 1028 cmn_err(CE_PANIC, "xnb_copy_to_peer: " 1029 "id %d out of range in request 0x%p", 1030 rxreq->id, (void *)rxreq); 1031 #endif /* XNB_DEBUG */ 1032 1033 /* 2 */ 1034 d_offset = 0; 1035 len = 0; 1036 item_count = 0; 1037 1038 gop_cp = xnbp->xnb_rx_cpop; 1039 1040 /* 1041 * We walk the b_cont pointers and set up a gop_cp 1042 * structure for every page in every data block we have. 1043 */ 1044 /* 2a */ 1045 for (ml = mp, ml_prev = NULL; ml != NULL; ml = ml->b_cont) { 1046 size_t chunk = ml->b_wptr - ml->b_rptr; 1047 uchar_t *r_tmp, *rpt_align; 1048 size_t r_offset; 1049 1050 /* 1051 * If we get an mblk on a page that doesn't belong to 1052 * this domain, get a new mblk to replace the old one. 1053 */ 1054 if (is_foreign(ml->b_rptr) || is_foreign(ml->b_wptr)) { 1055 mblk_t *ml_new = replace_msg(ml, chunk, 1056 mp_prev, ml_prev); 1057 1058 /* We can still use old ml, but not *ml! */ 1059 if (free == ml) 1060 free = ml_new; 1061 if (mp == ml) 1062 mp = ml_new; 1063 ml = ml_new; 1064 1065 xnbp->xnb_stat_rx_foreign_page++; 1066 } 1067 1068 rpt_align = (uchar_t *)ALIGN2PAGE(ml->b_rptr); 1069 r_offset = (uint16_t)(ml->b_rptr - rpt_align); 1070 r_tmp = ml->b_rptr; 1071 1072 if (d_offset + chunk > PAGESIZE) 1073 cmn_err(CE_PANIC, "xnb_copy_to_peer: mp %p " 1074 "(svd: %p), ml %p,rpt_alg. %p, d_offset " 1075 "(%lu) + chunk (%lu) > PAGESIZE %d!", 1076 (void *)mp, (void *)saved_mp, (void *)ml, 1077 (void *)rpt_align, 1078 d_offset, chunk, (int)PAGESIZE); 1079 1080 while (chunk > 0) { 1081 size_t part_len; 1082 1083 item_count++; 1084 if (item_count > xnbp->xnb_cpop_sz) { 1085 gop_cp = grow_cpop_area(xnbp, gop_cp); 1086 if (gop_cp == NULL) 1087 goto failure; 1088 } 1089 /* 1090 * If our mblk crosses a page boundary, we need 1091 * to do a seperate copy for every page. 1092 */ 1093 if (r_offset + chunk > PAGESIZE) { 1094 part_len = PAGESIZE - r_offset; 1095 1096 DTRACE_PROBE3(mblk_page_crossed, 1097 (mblk_t *), ml, int, chunk, int, 1098 (int)r_offset); 1099 1100 xnbp->xnb_stat_rx_pagebndry_crossed++; 1101 } else { 1102 part_len = chunk; 1103 } 1104 1105 setup_gop(xnbp, gop_cp, r_tmp, r_offset, 1106 d_offset, part_len, rxreq->gref); 1107 1108 chunk -= part_len; 1109 1110 len += part_len; 1111 d_offset += part_len; 1112 r_tmp += part_len; 1113 /* 1114 * The 2nd, 3rd ... last copies will always 1115 * start at r_tmp, therefore r_offset is 0. 1116 */ 1117 r_offset = 0; 1118 gop_cp++; 1119 } 1120 ml_prev = ml; 1121 DTRACE_PROBE4(mblk_loop_end, (mblk_t *), ml, int, 1122 chunk, int, len, int, item_count); 1123 } 1124 /* 3 */ 1125 if (HYPERVISOR_grant_table_op(GNTTABOP_copy, xnbp->xnb_rx_cpop, 1126 item_count) != 0) { 1127 cmn_err(CE_WARN, "xnb_copy_to_peer: copy op. failed"); 1128 DTRACE_PROBE(HV_granttableopfailed); 1129 } 1130 1131 /* 4 */ 1132 rxresp = RING_GET_RESPONSE(&xnbp->xnb_rx_ring, prod); 1133 rxresp->offset = 0; 1134 1135 rxresp->flags = 0; 1136 1137 DTRACE_PROBE4(got_RX_rsp, int, (int)rxresp->id, int, 1138 (int)rxresp->offset, int, (int)rxresp->flags, int, 1139 (int)rxresp->status); 1140 1141 cksum_flags = xnbp->xnb_flavour->xf_cksum_to_peer(xnbp, mp); 1142 if (cksum_flags != 0) 1143 xnbp->xnb_stat_rx_cksum_deferred++; 1144 rxresp->flags |= cksum_flags; 1145 1146 rxresp->id = RING_GET_REQUEST(&xnbp->xnb_rx_ring, prod)->id; 1147 rxresp->status = len; 1148 1149 DTRACE_PROBE4(RX_rsp_set, int, (int)rxresp->id, int, 1150 (int)rxresp->offset, int, (int)rxresp->flags, int, 1151 (int)rxresp->status); 1152 1153 for (i = 0; i < item_count; i++) { 1154 if (xnbp->xnb_rx_cpop[i].status != 0) { 1155 DTRACE_PROBE2(cpop__status__nonnull, int, 1156 (int)xnbp->xnb_rx_cpop[i].status, 1157 int, i); 1158 status = NETIF_RSP_ERROR; 1159 } 1160 } 1161 1162 /* 5.2 */ 1163 if (status != NETIF_RSP_OKAY) { 1164 RING_GET_RESPONSE(&xnbp->xnb_rx_ring, prod)->status = 1165 status; 1166 xnbp->xnb_stat_rx_rsp_notok++; 1167 } else { 1168 xnbp->xnb_stat_ipackets++; 1169 xnbp->xnb_stat_rbytes += len; 1170 } 1171 1172 loop++; 1173 prod++; 1174 mp_prev = mp; 1175 mp = mp->b_next; 1176 } 1177 failure: 1178 /* 1179 * Did we actually do anything? 1180 */ 1181 if (loop == xnbp->xnb_rx_ring.req_cons) { 1182 mutex_exit(&xnbp->xnb_rx_lock); 1183 return (mp); 1184 } 1185 1186 /* 1187 * Unlink the end of the 'done' list from the remainder. 1188 */ 1189 ASSERT(mp_prev != NULL); 1190 mp_prev->b_next = NULL; 1191 1192 xnbp->xnb_rx_ring.req_cons = loop; 1193 xnbp->xnb_rx_ring.rsp_prod_pvt = prod; 1194 1195 /* 6 */ 1196 /* LINTED: constant in conditional context */ 1197 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&xnbp->xnb_rx_ring, notify); 1198 if (notify) { 1199 ec_notify_via_evtchn(xnbp->xnb_evtchn); 1200 xnbp->xnb_stat_rx_notify_sent++; 1201 } else { 1202 xnbp->xnb_stat_rx_notify_deferred++; 1203 } 1204 1205 if (mp != NULL) 1206 xnbp->xnb_stat_rx_defer++; 1207 1208 mutex_exit(&xnbp->xnb_rx_lock); 1209 1210 /* Free mblk_t structs we have consumed. */ 1211 freemsgchain(free); 1212 1213 return (mp); 1214 } 1215 1216 /*ARGSUSED*/ 1217 static int 1218 xnb_txbuf_constructor(void *buf, void *arg, int kmflag) 1219 { 1220 xnb_txbuf_t *txp = buf; 1221 1222 bzero(txp, sizeof (*txp)); 1223 1224 txp->xt_free_rtn.free_func = xnb_tx_complete; 1225 txp->xt_free_rtn.free_arg = (caddr_t)txp; 1226 1227 txp->xt_mop.host_addr = 1228 (uint64_t)(uintptr_t)vmem_alloc(heap_arena, PAGESIZE, 1229 ((kmflag & KM_NOSLEEP) == KM_NOSLEEP) ? 1230 VM_NOSLEEP : VM_SLEEP); 1231 1232 if (txp->xt_mop.host_addr == NULL) { 1233 cmn_err(CE_WARN, "xnb_txbuf_constructor: " 1234 "cannot get address space"); 1235 return (-1); 1236 } 1237 1238 /* 1239 * Have the hat ensure that page table exists for the VA. 1240 */ 1241 hat_prepare_mapping(kas.a_hat, 1242 (caddr_t)(uintptr_t)txp->xt_mop.host_addr, NULL); 1243 1244 return (0); 1245 } 1246 1247 /*ARGSUSED*/ 1248 static void 1249 xnb_txbuf_destructor(void *buf, void *arg) 1250 { 1251 xnb_txbuf_t *txp = buf; 1252 1253 ASSERT(txp->xt_mop.host_addr != NULL); 1254 ASSERT((txp->xt_flags & XNB_TXBUF_INUSE) == 0); 1255 1256 hat_release_mapping(kas.a_hat, 1257 (caddr_t)(uintptr_t)txp->xt_mop.host_addr); 1258 vmem_free(heap_arena, 1259 (caddr_t)(uintptr_t)txp->xt_mop.host_addr, PAGESIZE); 1260 } 1261 1262 static void 1263 xnb_tx_notify_peer(xnb_t *xnbp) 1264 { 1265 boolean_t notify; 1266 1267 ASSERT(MUTEX_HELD(&xnbp->xnb_tx_lock)); 1268 1269 /* LINTED: constant in conditional context */ 1270 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&xnbp->xnb_tx_ring, notify); 1271 if (notify) { 1272 ec_notify_via_evtchn(xnbp->xnb_evtchn); 1273 xnbp->xnb_stat_tx_notify_sent++; 1274 } else { 1275 xnbp->xnb_stat_tx_notify_deferred++; 1276 } 1277 } 1278 1279 static void 1280 xnb_tx_complete(xnb_txbuf_t *txp) 1281 { 1282 xnb_t *xnbp = txp->xt_xnbp; 1283 1284 ASSERT((txp->xt_flags & XNB_TXBUF_INUSE) == XNB_TXBUF_INUSE); 1285 1286 mutex_enter(&xnbp->xnb_tx_lock); 1287 xnb_tx_schedule_unmop(xnbp, &txp->xt_mop, txp); 1288 mutex_exit(&xnbp->xnb_tx_lock); 1289 } 1290 1291 static void 1292 xnb_tx_mark_complete(xnb_t *xnbp, RING_IDX id, int16_t status) 1293 { 1294 RING_IDX i; 1295 netif_tx_response_t *txresp; 1296 1297 ASSERT(MUTEX_HELD(&xnbp->xnb_tx_lock)); 1298 1299 i = xnbp->xnb_tx_ring.rsp_prod_pvt; 1300 1301 txresp = RING_GET_RESPONSE(&xnbp->xnb_tx_ring, i); 1302 txresp->id = id; 1303 txresp->status = status; 1304 1305 xnbp->xnb_tx_ring.rsp_prod_pvt = i + 1; 1306 1307 /* 1308 * Note that we don't push the change to the peer here - that 1309 * is the callers responsibility. 1310 */ 1311 } 1312 1313 static void 1314 xnb_tx_schedule_unmop(xnb_t *xnbp, gnttab_map_grant_ref_t *mop, 1315 xnb_txbuf_t *txp) 1316 { 1317 gnttab_unmap_grant_ref_t *unmop; 1318 int u_count; 1319 int reqs_on_ring; 1320 1321 ASSERT(MUTEX_HELD(&xnbp->xnb_tx_lock)); 1322 ASSERT(xnbp->xnb_tx_unmop_count < NET_TX_RING_SIZE); 1323 1324 u_count = xnbp->xnb_tx_unmop_count++; 1325 1326 /* Cache data for the time when we actually unmap grant refs */ 1327 xnbp->xnb_tx_unmop_txp[u_count] = txp; 1328 1329 unmop = &xnbp->xnb_tx_unmop[u_count]; 1330 unmop->host_addr = mop->host_addr; 1331 unmop->dev_bus_addr = mop->dev_bus_addr; 1332 unmop->handle = mop->handle; 1333 1334 /* 1335 * We cannot check the ring once we're disconnected from it. Batching 1336 * doesn't seem to be a useful optimisation in this case either, 1337 * so we directly call into the actual unmap function. 1338 */ 1339 if (xnbp->xnb_connected) { 1340 reqs_on_ring = RING_HAS_UNCONSUMED_REQUESTS(&xnbp->xnb_tx_ring); 1341 1342 /* 1343 * By tuning xnb_unmop_hiwat to N, we can emulate "N per batch" 1344 * or (with N == 1) "immediate unmop" behaviour. 1345 * The "> xnb_unmop_lowwat" is a guard against ring exhaustion. 1346 */ 1347 if (xnbp->xnb_tx_unmop_count < xnb_unmop_hiwat && 1348 reqs_on_ring > xnb_unmop_lowwat) 1349 return; 1350 } 1351 1352 xnb_tx_perform_pending_unmop(xnbp); 1353 } 1354 1355 /* 1356 * Here we perform the actual unmapping of the data that was 1357 * accumulated in xnb_tx_schedule_unmop(). 1358 * Note that it is the caller's responsibility to make sure that 1359 * there's actually something there to unmop. 1360 */ 1361 static void 1362 xnb_tx_perform_pending_unmop(xnb_t *xnbp) 1363 { 1364 RING_IDX loop; 1365 #ifdef XNB_DEBUG 1366 gnttab_unmap_grant_ref_t *unmop; 1367 #endif /* XNB_DEBUG */ 1368 1369 ASSERT(MUTEX_HELD(&xnbp->xnb_tx_lock)); 1370 ASSERT(xnbp->xnb_tx_unmop_count > 0); 1371 1372 if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, 1373 xnbp->xnb_tx_unmop, xnbp->xnb_tx_unmop_count) < 0) { 1374 cmn_err(CE_WARN, "xnb_tx_perform_pending_unmop: " 1375 "unmap grant operation failed, " 1376 "%d pages lost", xnbp->xnb_tx_unmop_count); 1377 } 1378 1379 #ifdef XNB_DEBUG 1380 for (loop = 0, unmop = xnbp->xnb_tx_unmop; 1381 loop < xnbp->xnb_tx_unmop_count; 1382 loop++, unmop++) { 1383 if (unmop->status != 0) { 1384 cmn_err(CE_WARN, "xnb_tx_perform_pending_unmop: " 1385 "unmap grant reference failed (%d)", 1386 unmop->status); 1387 } 1388 } 1389 #endif /* XNB_DEBUG */ 1390 1391 for (loop = 0; loop < xnbp->xnb_tx_unmop_count; loop++) { 1392 xnb_txbuf_t *txp = xnbp->xnb_tx_unmop_txp[loop]; 1393 1394 if (txp == NULL) 1395 cmn_err(CE_PANIC, 1396 "xnb_tx_perform_pending_unmop: " 1397 "unexpected NULL txp (loop %d; count %d)!", 1398 loop, xnbp->xnb_tx_unmop_count); 1399 1400 if (xnbp->xnb_connected) 1401 xnb_tx_mark_complete(xnbp, txp->xt_id, txp->xt_status); 1402 xnb_txbuf_put(xnbp, txp); 1403 } 1404 if (xnbp->xnb_connected) 1405 xnb_tx_notify_peer(xnbp); 1406 1407 xnbp->xnb_tx_unmop_count = 0; 1408 1409 #ifdef XNB_DEBUG 1410 bzero(xnbp->xnb_tx_unmop, sizeof (xnbp->xnb_tx_unmop)); 1411 bzero(xnbp->xnb_tx_unmop_txp, sizeof (xnbp->xnb_tx_unmop_txp)); 1412 #endif /* XNB_DEBUG */ 1413 } 1414 1415 static xnb_txbuf_t * 1416 xnb_txbuf_get(xnb_t *xnbp, int flags) 1417 { 1418 xnb_txbuf_t *txp; 1419 1420 ASSERT(MUTEX_HELD(&xnbp->xnb_tx_lock)); 1421 1422 txp = kmem_cache_alloc(xnb_txbuf_cachep, flags); 1423 if (txp != NULL) { 1424 ASSERT((txp->xt_flags & XNB_TXBUF_INUSE) == 0); 1425 txp->xt_flags |= XNB_TXBUF_INUSE; 1426 1427 txp->xt_xnbp = xnbp; 1428 txp->xt_mop.dom = xnbp->xnb_peer; 1429 1430 txp->xt_mop.flags = GNTMAP_host_map; 1431 if (!xnbp->xnb_tx_pages_writable) 1432 txp->xt_mop.flags |= GNTMAP_readonly; 1433 1434 xnbp->xnb_tx_buf_count++; 1435 } 1436 1437 return (txp); 1438 } 1439 1440 static void 1441 xnb_txbuf_put(xnb_t *xnbp, xnb_txbuf_t *txp) 1442 { 1443 ASSERT(MUTEX_HELD(&xnbp->xnb_tx_lock)); 1444 ASSERT((txp->xt_flags & XNB_TXBUF_INUSE) == XNB_TXBUF_INUSE); 1445 1446 txp->xt_flags &= ~XNB_TXBUF_INUSE; 1447 xnbp->xnb_tx_buf_count--; 1448 1449 kmem_cache_free(xnb_txbuf_cachep, txp); 1450 } 1451 1452 static mblk_t * 1453 xnb_from_peer(xnb_t *xnbp) 1454 { 1455 RING_IDX start, end, loop; 1456 gnttab_map_grant_ref_t *mop; 1457 xnb_txbuf_t **txpp; 1458 netif_tx_request_t *txreq; 1459 boolean_t work_to_do; 1460 mblk_t *head, *tail; 1461 /* 1462 * If the peer granted a read-only mapping to the page then we 1463 * must copy the data, as the local protocol stack (should the 1464 * packet be destined for this host) will modify the packet 1465 * 'in place'. 1466 */ 1467 boolean_t copy = xnbp->xnb_tx_always_copy || 1468 !xnbp->xnb_tx_pages_writable; 1469 1470 /* 1471 * For each individual request, the sequence of actions is: 1472 * 1473 * 1. get the request. 1474 * 2. map the page based on the grant ref. 1475 * 3. allocate an mblk, copy the data to it. 1476 * 4. release the grant. 1477 * 5. update the ring. 1478 * 6. pass the packet upward. 1479 * 7. kick the peer. 1480 * 1481 * In fact, we try to perform the grant operations in batches, 1482 * so there are two loops. 1483 */ 1484 1485 head = tail = NULL; 1486 around: 1487 ASSERT(MUTEX_HELD(&xnbp->xnb_tx_lock)); 1488 1489 /* LINTED: constant in conditional context */ 1490 RING_FINAL_CHECK_FOR_REQUESTS(&xnbp->xnb_tx_ring, work_to_do); 1491 if (!work_to_do) { 1492 finished: 1493 return (head); 1494 } 1495 1496 start = xnbp->xnb_tx_ring.req_cons; 1497 end = xnbp->xnb_tx_ring.sring->req_prod; 1498 1499 if ((end - start) > NET_TX_RING_SIZE) { 1500 /* 1501 * This usually indicates that the frontend driver is 1502 * misbehaving, as it's not possible to have more than 1503 * NET_TX_RING_SIZE ring elements in play at any one 1504 * time. 1505 * 1506 * We reset the ring pointers to the state declared by 1507 * the frontend and try to carry on. 1508 */ 1509 cmn_err(CE_WARN, "xnb_from_peer: domain %d tried to give us %u " 1510 "items in the ring, resetting and trying to recover.", 1511 xnbp->xnb_peer, (end - start)); 1512 1513 /* LINTED: constant in conditional context */ 1514 BACK_RING_ATTACH(&xnbp->xnb_tx_ring, 1515 (netif_tx_sring_t *)xnbp->xnb_tx_ring_addr, PAGESIZE); 1516 1517 goto around; 1518 } 1519 1520 for (loop = start, mop = xnbp->xnb_tx_mop, txpp = xnbp->xnb_tx_bufp; 1521 loop != end; 1522 loop++, mop++, txpp++) { 1523 xnb_txbuf_t *txp; 1524 1525 txp = xnb_txbuf_get(xnbp, KM_NOSLEEP); 1526 if (txp == NULL) 1527 break; 1528 1529 ASSERT(xnbp->xnb_tx_pages_writable || 1530 ((txp->xt_mop.flags & GNTMAP_readonly) 1531 == GNTMAP_readonly)); 1532 1533 txp->xt_mop.ref = 1534 RING_GET_REQUEST(&xnbp->xnb_tx_ring, loop)->gref; 1535 1536 *mop = txp->xt_mop; 1537 *txpp = txp; 1538 } 1539 1540 if ((loop - start) == 0) 1541 goto finished; 1542 1543 end = loop; 1544 1545 if (xen_map_gref(GNTTABOP_map_grant_ref, xnbp->xnb_tx_mop, 1546 end - start, B_FALSE) != 0) { 1547 1548 cmn_err(CE_WARN, "xnb_from_peer: map grant operation failed"); 1549 1550 loop = start; 1551 txpp = xnbp->xnb_tx_bufp; 1552 1553 while (loop != end) { 1554 xnb_txbuf_put(xnbp, *txpp); 1555 1556 loop++; 1557 txpp++; 1558 } 1559 1560 goto finished; 1561 } 1562 1563 for (loop = start, mop = xnbp->xnb_tx_mop, txpp = xnbp->xnb_tx_bufp; 1564 loop != end; 1565 loop++, mop++, txpp++) { 1566 mblk_t *mp = NULL; 1567 int16_t status = NETIF_RSP_OKAY; 1568 xnb_txbuf_t *txp = *txpp; 1569 1570 if (mop->status != 0) { 1571 cmn_err(CE_WARN, "xnb_from_peer: " 1572 "failed to map buffer: %d", 1573 mop->status); 1574 status = NETIF_RSP_ERROR; 1575 } 1576 1577 txreq = RING_GET_REQUEST(&xnbp->xnb_tx_ring, loop); 1578 1579 if (status == NETIF_RSP_OKAY) { 1580 if (copy) { 1581 mp = allocb(txreq->size, BPRI_MED); 1582 if (mp == NULL) { 1583 status = NETIF_RSP_ERROR; 1584 xnbp->xnb_stat_tx_allocb_failed++; 1585 } else { 1586 bcopy((caddr_t)(uintptr_t) 1587 mop->host_addr + txreq->offset, 1588 mp->b_wptr, txreq->size); 1589 mp->b_wptr += txreq->size; 1590 } 1591 } else { 1592 mp = desballoc((uchar_t *)(uintptr_t) 1593 mop->host_addr + txreq->offset, 1594 txreq->size, 0, &txp->xt_free_rtn); 1595 if (mp == NULL) { 1596 status = NETIF_RSP_ERROR; 1597 xnbp->xnb_stat_tx_allocb_failed++; 1598 } else { 1599 txp->xt_id = txreq->id; 1600 txp->xt_status = status; 1601 txp->xt_mop = *mop; 1602 1603 mp->b_wptr += txreq->size; 1604 } 1605 } 1606 1607 /* 1608 * If we have a buffer and there are checksum 1609 * flags, process them appropriately. 1610 */ 1611 if ((mp != NULL) && 1612 ((txreq->flags & 1613 (NETTXF_csum_blank | NETTXF_data_validated)) 1614 != 0)) { 1615 mp = xnbp->xnb_flavour->xf_cksum_from_peer(xnbp, 1616 mp, txreq->flags); 1617 xnbp->xnb_stat_tx_cksum_no_need++; 1618 } 1619 } 1620 1621 if (copy || (mp == NULL)) { 1622 txp->xt_status = status; 1623 txp->xt_id = txreq->id; 1624 xnb_tx_schedule_unmop(xnbp, mop, txp); 1625 } 1626 1627 if (mp != NULL) { 1628 xnbp->xnb_stat_opackets++; 1629 xnbp->xnb_stat_obytes += txreq->size; 1630 1631 mp->b_next = NULL; 1632 if (head == NULL) { 1633 ASSERT(tail == NULL); 1634 head = mp; 1635 } else { 1636 ASSERT(tail != NULL); 1637 tail->b_next = mp; 1638 } 1639 tail = mp; 1640 } 1641 } 1642 1643 xnbp->xnb_tx_ring.req_cons = loop; 1644 1645 goto around; 1646 /* NOTREACHED */ 1647 } 1648 1649 /* 1650 * intr() -- ring interrupt service routine 1651 */ 1652 static uint_t 1653 xnb_intr(caddr_t arg) 1654 { 1655 xnb_t *xnbp = (xnb_t *)arg; 1656 mblk_t *mp; 1657 1658 xnbp->xnb_stat_intr++; 1659 1660 mutex_enter(&xnbp->xnb_tx_lock); 1661 1662 ASSERT(xnbp->xnb_connected); 1663 1664 mp = xnb_from_peer(xnbp); 1665 1666 mutex_exit(&xnbp->xnb_tx_lock); 1667 1668 if (!xnbp->xnb_hotplugged) { 1669 xnbp->xnb_stat_tx_too_early++; 1670 goto fail; 1671 } 1672 if (mp == NULL) { 1673 xnbp->xnb_stat_spurious_intr++; 1674 goto fail; 1675 } 1676 1677 xnbp->xnb_flavour->xf_from_peer(xnbp, mp); 1678 1679 return (DDI_INTR_CLAIMED); 1680 1681 fail: 1682 freemsgchain(mp); 1683 return (DDI_INTR_CLAIMED); 1684 } 1685 1686 static boolean_t 1687 xnb_connect_rings(dev_info_t *dip) 1688 { 1689 xnb_t *xnbp = ddi_get_driver_private(dip); 1690 char *oename; 1691 struct gnttab_map_grant_ref map_op; 1692 evtchn_port_t evtchn; 1693 int i; 1694 1695 /* 1696 * Cannot attempt to connect the rings if already connected. 1697 */ 1698 ASSERT(!xnbp->xnb_connected); 1699 1700 oename = xvdi_get_oename(dip); 1701 1702 if (xenbus_gather(XBT_NULL, oename, 1703 "event-channel", "%u", &evtchn, 1704 "tx-ring-ref", "%lu", &xnbp->xnb_tx_ring_ref, 1705 "rx-ring-ref", "%lu", &xnbp->xnb_rx_ring_ref, 1706 NULL) != 0) { 1707 cmn_err(CE_WARN, "xnb_connect_rings: " 1708 "cannot read other-end details from %s", 1709 oename); 1710 goto fail; 1711 } 1712 1713 if (xenbus_scanf(XBT_NULL, oename, 1714 "feature-tx-writable", "%d", &i) != 0) 1715 i = 0; 1716 if (i != 0) 1717 xnbp->xnb_tx_pages_writable = B_TRUE; 1718 1719 if (xenbus_scanf(XBT_NULL, oename, 1720 "feature-no-csum-offload", "%d", &i) != 0) 1721 i = 0; 1722 if ((i == 1) || !xnbp->xnb_cksum_offload) 1723 xnbp->xnb_cksum_offload = B_FALSE; 1724 1725 /* Check whether our peer knows and requests hypervisor copy */ 1726 if (xenbus_scanf(XBT_NULL, oename, "request-rx-copy", "%d", &i) 1727 != 0) 1728 i = 0; 1729 if (i != 0) 1730 xnbp->xnb_hv_copy = B_TRUE; 1731 1732 /* 1733 * 1. allocate a vaddr for the tx page, one for the rx page. 1734 * 2. call GNTTABOP_map_grant_ref to map the relevant pages 1735 * into the allocated vaddr (one for tx, one for rx). 1736 * 3. call EVTCHNOP_bind_interdomain to have the event channel 1737 * bound to this domain. 1738 * 4. associate the event channel with an interrupt. 1739 * 5. declare ourselves connected. 1740 * 6. enable the interrupt. 1741 */ 1742 1743 /* 1.tx */ 1744 xnbp->xnb_tx_ring_addr = vmem_xalloc(heap_arena, PAGESIZE, PAGESIZE, 1745 0, 0, 0, 0, VM_SLEEP); 1746 ASSERT(xnbp->xnb_tx_ring_addr != NULL); 1747 1748 /* 2.tx */ 1749 map_op.host_addr = (uint64_t)((long)xnbp->xnb_tx_ring_addr); 1750 map_op.flags = GNTMAP_host_map; 1751 map_op.ref = xnbp->xnb_tx_ring_ref; 1752 map_op.dom = xnbp->xnb_peer; 1753 hat_prepare_mapping(kas.a_hat, xnbp->xnb_tx_ring_addr, NULL); 1754 if (xen_map_gref(GNTTABOP_map_grant_ref, &map_op, 1, B_FALSE) != 0 || 1755 map_op.status != 0) { 1756 cmn_err(CE_WARN, "xnb_connect_rings: cannot map tx-ring page."); 1757 goto fail; 1758 } 1759 xnbp->xnb_tx_ring_handle = map_op.handle; 1760 1761 /* LINTED: constant in conditional context */ 1762 BACK_RING_INIT(&xnbp->xnb_tx_ring, 1763 (netif_tx_sring_t *)xnbp->xnb_tx_ring_addr, PAGESIZE); 1764 1765 /* 1.rx */ 1766 xnbp->xnb_rx_ring_addr = vmem_xalloc(heap_arena, PAGESIZE, PAGESIZE, 1767 0, 0, 0, 0, VM_SLEEP); 1768 ASSERT(xnbp->xnb_rx_ring_addr != NULL); 1769 1770 /* 2.rx */ 1771 map_op.host_addr = (uint64_t)((long)xnbp->xnb_rx_ring_addr); 1772 map_op.flags = GNTMAP_host_map; 1773 map_op.ref = xnbp->xnb_rx_ring_ref; 1774 map_op.dom = xnbp->xnb_peer; 1775 hat_prepare_mapping(kas.a_hat, xnbp->xnb_rx_ring_addr, NULL); 1776 if (xen_map_gref(GNTTABOP_map_grant_ref, &map_op, 1, B_FALSE) != 0 || 1777 map_op.status != 0) { 1778 cmn_err(CE_WARN, "xnb_connect_rings: cannot map rx-ring page."); 1779 goto fail; 1780 } 1781 xnbp->xnb_rx_ring_handle = map_op.handle; 1782 1783 /* LINTED: constant in conditional context */ 1784 BACK_RING_INIT(&xnbp->xnb_rx_ring, 1785 (netif_rx_sring_t *)xnbp->xnb_rx_ring_addr, PAGESIZE); 1786 1787 /* 3 */ 1788 if (xvdi_bind_evtchn(dip, evtchn) != DDI_SUCCESS) { 1789 cmn_err(CE_WARN, "xnb_connect_rings: " 1790 "cannot bind event channel %d", xnbp->xnb_evtchn); 1791 xnbp->xnb_evtchn = INVALID_EVTCHN; 1792 goto fail; 1793 } 1794 xnbp->xnb_evtchn = xvdi_get_evtchn(dip); 1795 1796 /* 1797 * It would be good to set the state to XenbusStateConnected 1798 * here as well, but then what if ddi_add_intr() failed? 1799 * Changing the state in the store will be noticed by the peer 1800 * and cannot be "taken back". 1801 */ 1802 mutex_enter(&xnbp->xnb_tx_lock); 1803 mutex_enter(&xnbp->xnb_rx_lock); 1804 1805 /* 5.1 */ 1806 xnbp->xnb_connected = B_TRUE; 1807 1808 mutex_exit(&xnbp->xnb_rx_lock); 1809 mutex_exit(&xnbp->xnb_tx_lock); 1810 1811 /* 4, 6 */ 1812 if (ddi_add_intr(dip, 0, NULL, NULL, xnb_intr, (caddr_t)xnbp) 1813 != DDI_SUCCESS) { 1814 cmn_err(CE_WARN, "xnb_connect_rings: cannot add interrupt"); 1815 goto fail; 1816 } 1817 xnbp->xnb_irq = B_TRUE; 1818 1819 /* 5.2 */ 1820 (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateConnected); 1821 1822 return (B_TRUE); 1823 1824 fail: 1825 mutex_enter(&xnbp->xnb_tx_lock); 1826 mutex_enter(&xnbp->xnb_rx_lock); 1827 1828 xnbp->xnb_connected = B_FALSE; 1829 mutex_exit(&xnbp->xnb_rx_lock); 1830 mutex_exit(&xnbp->xnb_tx_lock); 1831 1832 return (B_FALSE); 1833 } 1834 1835 static void 1836 xnb_disconnect_rings(dev_info_t *dip) 1837 { 1838 xnb_t *xnbp = ddi_get_driver_private(dip); 1839 1840 if (xnbp->xnb_irq) { 1841 ddi_remove_intr(dip, 0, NULL); 1842 xnbp->xnb_irq = B_FALSE; 1843 } 1844 1845 if (xnbp->xnb_tx_unmop_count > 0) 1846 xnb_tx_perform_pending_unmop(xnbp); 1847 1848 if (xnbp->xnb_evtchn != INVALID_EVTCHN) { 1849 xvdi_free_evtchn(dip); 1850 xnbp->xnb_evtchn = INVALID_EVTCHN; 1851 } 1852 1853 if (xnbp->xnb_rx_ring_handle != INVALID_GRANT_HANDLE) { 1854 struct gnttab_unmap_grant_ref unmap_op; 1855 1856 unmap_op.host_addr = (uint64_t)(uintptr_t) 1857 xnbp->xnb_rx_ring_addr; 1858 unmap_op.dev_bus_addr = 0; 1859 unmap_op.handle = xnbp->xnb_rx_ring_handle; 1860 if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, 1861 &unmap_op, 1) != 0) 1862 cmn_err(CE_WARN, "xnb_disconnect_rings: " 1863 "cannot unmap rx-ring page (%d)", 1864 unmap_op.status); 1865 1866 xnbp->xnb_rx_ring_handle = INVALID_GRANT_HANDLE; 1867 } 1868 1869 if (xnbp->xnb_rx_ring_addr != NULL) { 1870 hat_release_mapping(kas.a_hat, xnbp->xnb_rx_ring_addr); 1871 vmem_free(heap_arena, xnbp->xnb_rx_ring_addr, PAGESIZE); 1872 xnbp->xnb_rx_ring_addr = NULL; 1873 } 1874 1875 if (xnbp->xnb_tx_ring_handle != INVALID_GRANT_HANDLE) { 1876 struct gnttab_unmap_grant_ref unmap_op; 1877 1878 unmap_op.host_addr = (uint64_t)(uintptr_t) 1879 xnbp->xnb_tx_ring_addr; 1880 unmap_op.dev_bus_addr = 0; 1881 unmap_op.handle = xnbp->xnb_tx_ring_handle; 1882 if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, 1883 &unmap_op, 1) != 0) 1884 cmn_err(CE_WARN, "xnb_disconnect_rings: " 1885 "cannot unmap tx-ring page (%d)", 1886 unmap_op.status); 1887 1888 xnbp->xnb_tx_ring_handle = INVALID_GRANT_HANDLE; 1889 } 1890 1891 if (xnbp->xnb_tx_ring_addr != NULL) { 1892 hat_release_mapping(kas.a_hat, xnbp->xnb_tx_ring_addr); 1893 vmem_free(heap_arena, xnbp->xnb_tx_ring_addr, PAGESIZE); 1894 xnbp->xnb_tx_ring_addr = NULL; 1895 } 1896 } 1897 1898 /*ARGSUSED*/ 1899 static void 1900 xnb_oe_state_change(dev_info_t *dip, ddi_eventcookie_t id, 1901 void *arg, void *impl_data) 1902 { 1903 xnb_t *xnbp = ddi_get_driver_private(dip); 1904 XenbusState new_state = *(XenbusState *)impl_data; 1905 1906 ASSERT(xnbp != NULL); 1907 1908 switch (new_state) { 1909 case XenbusStateConnected: 1910 /* spurious state change */ 1911 if (xnbp->xnb_connected) 1912 return; 1913 1914 if (xnb_connect_rings(dip)) { 1915 xnbp->xnb_flavour->xf_peer_connected(xnbp); 1916 } else { 1917 xnbp->xnb_flavour->xf_peer_disconnected(xnbp); 1918 xnb_disconnect_rings(dip); 1919 (void) xvdi_switch_state(dip, XBT_NULL, 1920 XenbusStateClosed); 1921 (void) xvdi_post_event(dip, XEN_HP_REMOVE); 1922 } 1923 1924 /* 1925 * Now that we've attempted to connect it's reasonable 1926 * to allow an attempt to detach. 1927 */ 1928 xnbp->xnb_detachable = B_TRUE; 1929 1930 break; 1931 1932 case XenbusStateClosing: 1933 (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateClosing); 1934 1935 break; 1936 1937 case XenbusStateClosed: 1938 xnbp->xnb_flavour->xf_peer_disconnected(xnbp); 1939 1940 mutex_enter(&xnbp->xnb_tx_lock); 1941 mutex_enter(&xnbp->xnb_rx_lock); 1942 1943 xnb_disconnect_rings(dip); 1944 xnbp->xnb_connected = B_FALSE; 1945 1946 mutex_exit(&xnbp->xnb_rx_lock); 1947 mutex_exit(&xnbp->xnb_tx_lock); 1948 1949 (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateClosed); 1950 (void) xvdi_post_event(dip, XEN_HP_REMOVE); 1951 /* 1952 * In all likelyhood this is already set (in the above 1953 * case), but if the peer never attempted to connect 1954 * and the domain is destroyed we get here without 1955 * having been through the case above, so we set it to 1956 * be sure. 1957 */ 1958 xnbp->xnb_detachable = B_TRUE; 1959 1960 break; 1961 1962 default: 1963 break; 1964 } 1965 } 1966 1967 /*ARGSUSED*/ 1968 static void 1969 xnb_hp_state_change(dev_info_t *dip, ddi_eventcookie_t id, 1970 void *arg, void *impl_data) 1971 { 1972 xnb_t *xnbp = ddi_get_driver_private(dip); 1973 xendev_hotplug_state_t state = *(xendev_hotplug_state_t *)impl_data; 1974 boolean_t success; 1975 1976 ASSERT(xnbp != NULL); 1977 1978 switch (state) { 1979 case Connected: 1980 1981 /* spurious hotplug event */ 1982 if (xnbp->xnb_hotplugged) 1983 return; 1984 1985 success = xnbp->xnb_flavour->xf_hotplug_connected(xnbp); 1986 1987 mutex_enter(&xnbp->xnb_tx_lock); 1988 mutex_enter(&xnbp->xnb_rx_lock); 1989 1990 xnbp->xnb_hotplugged = success; 1991 1992 mutex_exit(&xnbp->xnb_rx_lock); 1993 mutex_exit(&xnbp->xnb_tx_lock); 1994 break; 1995 1996 default: 1997 break; 1998 } 1999 } 2000 2001 static struct modldrv modldrv = { 2002 &mod_miscops, "xnb", 2003 }; 2004 2005 static struct modlinkage modlinkage = { 2006 MODREV_1, &modldrv, NULL 2007 }; 2008 2009 int 2010 _init(void) 2011 { 2012 int i; 2013 2014 mutex_init(&xnb_alloc_page_lock, NULL, MUTEX_DRIVER, NULL); 2015 2016 xnb_txbuf_cachep = kmem_cache_create("xnb_txbuf_cachep", 2017 sizeof (xnb_txbuf_t), 0, xnb_txbuf_constructor, 2018 xnb_txbuf_destructor, NULL, NULL, NULL, 0); 2019 ASSERT(xnb_txbuf_cachep != NULL); 2020 2021 i = mod_install(&modlinkage); 2022 if (i != DDI_SUCCESS) { 2023 kmem_cache_destroy(xnb_txbuf_cachep); 2024 mutex_destroy(&xnb_alloc_page_lock); 2025 } 2026 return (i); 2027 } 2028 2029 int 2030 _info(struct modinfo *modinfop) 2031 { 2032 return (mod_info(&modlinkage, modinfop)); 2033 } 2034 2035 int 2036 _fini(void) 2037 { 2038 int i; 2039 2040 i = mod_remove(&modlinkage); 2041 if (i == DDI_SUCCESS) { 2042 kmem_cache_destroy(xnb_txbuf_cachep); 2043 mutex_destroy(&xnb_alloc_page_lock); 2044 } 2045 return (i); 2046 } 2047