1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #ifdef DEBUG 28 #define XNB_DEBUG 1 29 #endif /* DEBUG */ 30 31 #include "xnb.h" 32 33 #include <sys/sunddi.h> 34 #include <sys/sunndi.h> 35 #include <sys/modctl.h> 36 #include <sys/conf.h> 37 #include <sys/mac.h> 38 #include <sys/mac_impl.h> /* For mac_fix_cksum(). */ 39 #include <sys/dlpi.h> 40 #include <sys/strsubr.h> 41 #include <sys/strsun.h> 42 #include <sys/types.h> 43 #include <sys/pattr.h> 44 #include <vm/seg_kmem.h> 45 #include <vm/hat_i86.h> 46 #include <xen/sys/xenbus_impl.h> 47 #include <xen/sys/xendev.h> 48 #include <sys/balloon_impl.h> 49 #include <sys/evtchn_impl.h> 50 #include <sys/gnttab.h> 51 #include <vm/vm_dep.h> 52 #include <sys/note.h> 53 #include <sys/gld.h> 54 #include <inet/ip.h> 55 #include <inet/ip_impl.h> 56 57 /* 58 * The terms "transmit" and "receive" are used in alignment with domU, 59 * which means that packets originating from the peer domU are "transmitted" 60 * to other parts of the system and packets are "received" from them. 61 */ 62 63 /* 64 * Should we allow guests to manipulate multicast group membership? 65 */ 66 static boolean_t xnb_multicast_control = B_TRUE; 67 68 static boolean_t xnb_connect_rings(dev_info_t *); 69 static void xnb_disconnect_rings(dev_info_t *); 70 static void xnb_oe_state_change(dev_info_t *, ddi_eventcookie_t, 71 void *, void *); 72 static void xnb_hp_state_change(dev_info_t *, ddi_eventcookie_t, 73 void *, void *); 74 75 static int xnb_txbuf_constructor(void *, void *, int); 76 static void xnb_txbuf_destructor(void *, void *); 77 static void xnb_tx_notify_peer(xnb_t *, boolean_t); 78 static void xnb_tx_mark_complete(xnb_t *, RING_IDX, int16_t); 79 80 mblk_t *xnb_to_peer(xnb_t *, mblk_t *); 81 mblk_t *xnb_copy_to_peer(xnb_t *, mblk_t *); 82 83 static void setup_gop(xnb_t *, gnttab_copy_t *, uchar_t *, 84 size_t, size_t, size_t, grant_ref_t); 85 #pragma inline(setup_gop) 86 static boolean_t is_foreign(void *); 87 #pragma inline(is_foreign) 88 89 #define INVALID_GRANT_HANDLE ((grant_handle_t)-1) 90 #define INVALID_GRANT_REF ((grant_ref_t)-1) 91 92 static kmutex_t xnb_alloc_page_lock; 93 94 /* 95 * On a 32 bit PAE system physical and machine addresses are larger 96 * than 32 bits. ddi_btop() on such systems take an unsigned long 97 * argument, and so addresses above 4G are truncated before ddi_btop() 98 * gets to see them. To avoid this, code the shift operation here. 99 */ 100 #define xnb_btop(addr) ((addr) >> PAGESHIFT) 101 102 /* DMA attributes for transmit and receive data */ 103 static ddi_dma_attr_t buf_dma_attr = { 104 DMA_ATTR_V0, /* version of this structure */ 105 0, /* lowest usable address */ 106 0xffffffffffffffffULL, /* highest usable address */ 107 0x7fffffff, /* maximum DMAable byte count */ 108 MMU_PAGESIZE, /* alignment in bytes */ 109 0x7ff, /* bitmap of burst sizes */ 110 1, /* minimum transfer */ 111 0xffffffffU, /* maximum transfer */ 112 0xffffffffffffffffULL, /* maximum segment length */ 113 1, /* maximum number of segments */ 114 1, /* granularity */ 115 0, /* flags (reserved) */ 116 }; 117 118 /* DMA access attributes for data: NOT to be byte swapped. */ 119 static ddi_device_acc_attr_t data_accattr = { 120 DDI_DEVICE_ATTR_V0, 121 DDI_NEVERSWAP_ACC, 122 DDI_STRICTORDER_ACC 123 }; 124 125 /* 126 * Statistics. 127 */ 128 static char *aux_statistics[] = { 129 "rx_cksum_deferred", 130 "tx_cksum_no_need", 131 "rx_rsp_notok", 132 "tx_notify_deferred", 133 "tx_notify_sent", 134 "rx_notify_deferred", 135 "rx_notify_sent", 136 "tx_too_early", 137 "rx_too_early", 138 "rx_allocb_failed", 139 "tx_allocb_failed", 140 "rx_foreign_page", 141 "mac_full", 142 "spurious_intr", 143 "allocation_success", 144 "allocation_failure", 145 "small_allocation_success", 146 "small_allocation_failure", 147 "other_allocation_failure", 148 "rx_pageboundary_crossed", 149 "rx_cpoparea_grown", 150 "csum_hardware", 151 "csum_software", 152 }; 153 154 static int 155 xnb_ks_aux_update(kstat_t *ksp, int flag) 156 { 157 xnb_t *xnbp; 158 kstat_named_t *knp; 159 160 if (flag != KSTAT_READ) 161 return (EACCES); 162 163 xnbp = ksp->ks_private; 164 knp = ksp->ks_data; 165 166 /* 167 * Assignment order should match that of the names in 168 * aux_statistics. 169 */ 170 (knp++)->value.ui64 = xnbp->xnb_stat_rx_cksum_deferred; 171 (knp++)->value.ui64 = xnbp->xnb_stat_tx_cksum_no_need; 172 (knp++)->value.ui64 = xnbp->xnb_stat_rx_rsp_notok; 173 (knp++)->value.ui64 = xnbp->xnb_stat_tx_notify_deferred; 174 (knp++)->value.ui64 = xnbp->xnb_stat_tx_notify_sent; 175 (knp++)->value.ui64 = xnbp->xnb_stat_rx_notify_deferred; 176 (knp++)->value.ui64 = xnbp->xnb_stat_rx_notify_sent; 177 (knp++)->value.ui64 = xnbp->xnb_stat_tx_too_early; 178 (knp++)->value.ui64 = xnbp->xnb_stat_rx_too_early; 179 (knp++)->value.ui64 = xnbp->xnb_stat_rx_allocb_failed; 180 (knp++)->value.ui64 = xnbp->xnb_stat_tx_allocb_failed; 181 (knp++)->value.ui64 = xnbp->xnb_stat_rx_foreign_page; 182 (knp++)->value.ui64 = xnbp->xnb_stat_mac_full; 183 (knp++)->value.ui64 = xnbp->xnb_stat_spurious_intr; 184 (knp++)->value.ui64 = xnbp->xnb_stat_allocation_success; 185 (knp++)->value.ui64 = xnbp->xnb_stat_allocation_failure; 186 (knp++)->value.ui64 = xnbp->xnb_stat_small_allocation_success; 187 (knp++)->value.ui64 = xnbp->xnb_stat_small_allocation_failure; 188 (knp++)->value.ui64 = xnbp->xnb_stat_other_allocation_failure; 189 (knp++)->value.ui64 = xnbp->xnb_stat_rx_pagebndry_crossed; 190 (knp++)->value.ui64 = xnbp->xnb_stat_rx_cpoparea_grown; 191 (knp++)->value.ui64 = xnbp->xnb_stat_csum_hardware; 192 (knp++)->value.ui64 = xnbp->xnb_stat_csum_software; 193 194 return (0); 195 } 196 197 static boolean_t 198 xnb_ks_init(xnb_t *xnbp) 199 { 200 int nstat = sizeof (aux_statistics) / 201 sizeof (aux_statistics[0]); 202 char **cp = aux_statistics; 203 kstat_named_t *knp; 204 205 /* 206 * Create and initialise kstats. 207 */ 208 xnbp->xnb_kstat_aux = kstat_create(ddi_driver_name(xnbp->xnb_devinfo), 209 ddi_get_instance(xnbp->xnb_devinfo), "aux_statistics", "net", 210 KSTAT_TYPE_NAMED, nstat, 0); 211 if (xnbp->xnb_kstat_aux == NULL) 212 return (B_FALSE); 213 214 xnbp->xnb_kstat_aux->ks_private = xnbp; 215 xnbp->xnb_kstat_aux->ks_update = xnb_ks_aux_update; 216 217 knp = xnbp->xnb_kstat_aux->ks_data; 218 while (nstat > 0) { 219 kstat_named_init(knp, *cp, KSTAT_DATA_UINT64); 220 221 knp++; 222 cp++; 223 nstat--; 224 } 225 226 kstat_install(xnbp->xnb_kstat_aux); 227 228 return (B_TRUE); 229 } 230 231 static void 232 xnb_ks_free(xnb_t *xnbp) 233 { 234 kstat_delete(xnbp->xnb_kstat_aux); 235 } 236 237 /* 238 * Calculate and insert the transport checksum for an arbitrary packet. 239 */ 240 static mblk_t * 241 xnb_software_csum(xnb_t *xnbp, mblk_t *mp) 242 { 243 _NOTE(ARGUNUSED(xnbp)); 244 245 /* 246 * XXPV dme: shouldn't rely on mac_fix_cksum(), not least 247 * because it doesn't cover all of the interesting cases :-( 248 */ 249 (void) hcksum_assoc(mp, NULL, NULL, 0, 0, 0, 0, 250 HCK_FULLCKSUM, KM_NOSLEEP); 251 252 return (mac_fix_cksum(mp)); 253 } 254 255 mblk_t * 256 xnb_process_cksum_flags(xnb_t *xnbp, mblk_t *mp, uint32_t capab) 257 { 258 struct ether_header *ehp; 259 uint16_t sap; 260 uint32_t offset; 261 ipha_t *ipha; 262 263 ASSERT(mp->b_next == NULL); 264 265 /* 266 * Check that the packet is contained in a single mblk. In 267 * the "from peer" path this is true today, but may change 268 * when scatter gather support is added. In the "to peer" 269 * path we cannot be sure, but in most cases it will be true 270 * (in the xnbo case the packet has come from a MAC device 271 * which is unlikely to split packets). 272 */ 273 if (mp->b_cont != NULL) 274 goto software; 275 276 /* 277 * If the MAC has no hardware capability don't do any further 278 * checking. 279 */ 280 if (capab == 0) 281 goto software; 282 283 ASSERT(MBLKL(mp) >= sizeof (struct ether_header)); 284 ehp = (struct ether_header *)mp->b_rptr; 285 286 if (ntohs(ehp->ether_type) == VLAN_TPID) { 287 struct ether_vlan_header *evhp; 288 289 ASSERT(MBLKL(mp) >= sizeof (struct ether_vlan_header)); 290 evhp = (struct ether_vlan_header *)mp->b_rptr; 291 sap = ntohs(evhp->ether_type); 292 offset = sizeof (struct ether_vlan_header); 293 } else { 294 sap = ntohs(ehp->ether_type); 295 offset = sizeof (struct ether_header); 296 } 297 298 /* 299 * We only attempt to do IPv4 packets in hardware. 300 */ 301 if (sap != ETHERTYPE_IP) 302 goto software; 303 304 /* 305 * We know that this is an IPv4 packet. 306 */ 307 ipha = (ipha_t *)(mp->b_rptr + offset); 308 309 switch (ipha->ipha_protocol) { 310 case IPPROTO_TCP: 311 case IPPROTO_UDP: { 312 uint32_t start, length, stuff, cksum; 313 uint16_t *stuffp; 314 315 /* 316 * This is a TCP/IPv4 or UDP/IPv4 packet, for which we 317 * can use full IPv4 and partial checksum offload. 318 */ 319 if ((capab & (HCKSUM_INET_FULL_V4|HCKSUM_INET_PARTIAL)) == 0) 320 break; 321 322 start = IP_SIMPLE_HDR_LENGTH; 323 length = ntohs(ipha->ipha_length); 324 if (ipha->ipha_protocol == IPPROTO_TCP) { 325 stuff = start + TCP_CHECKSUM_OFFSET; 326 cksum = IP_TCP_CSUM_COMP; 327 } else { 328 stuff = start + UDP_CHECKSUM_OFFSET; 329 cksum = IP_UDP_CSUM_COMP; 330 } 331 stuffp = (uint16_t *)(mp->b_rptr + offset + stuff); 332 333 if (capab & HCKSUM_INET_FULL_V4) { 334 /* 335 * Some devices require that the checksum 336 * field of the packet is zero for full 337 * offload. 338 */ 339 *stuffp = 0; 340 341 (void) hcksum_assoc(mp, NULL, NULL, 342 0, 0, 0, 0, 343 HCK_FULLCKSUM, KM_NOSLEEP); 344 345 xnbp->xnb_stat_csum_hardware++; 346 347 return (mp); 348 } 349 350 if (capab & HCKSUM_INET_PARTIAL) { 351 if (*stuffp == 0) { 352 ipaddr_t src, dst; 353 354 /* 355 * Older Solaris guests don't insert 356 * the pseudo-header checksum, so we 357 * calculate it here. 358 */ 359 src = ipha->ipha_src; 360 dst = ipha->ipha_dst; 361 362 cksum += (dst >> 16) + (dst & 0xFFFF); 363 cksum += (src >> 16) + (src & 0xFFFF); 364 cksum += length - IP_SIMPLE_HDR_LENGTH; 365 366 cksum = (cksum >> 16) + (cksum & 0xFFFF); 367 cksum = (cksum >> 16) + (cksum & 0xFFFF); 368 369 ASSERT(cksum <= 0xFFFF); 370 371 *stuffp = (uint16_t)(cksum ? cksum : ~cksum); 372 } 373 374 (void) hcksum_assoc(mp, NULL, NULL, 375 start, stuff, length, 0, 376 HCK_PARTIALCKSUM, KM_NOSLEEP); 377 378 xnbp->xnb_stat_csum_hardware++; 379 380 return (mp); 381 } 382 383 /* NOTREACHED */ 384 break; 385 } 386 387 default: 388 /* Use software. */ 389 break; 390 } 391 392 software: 393 /* 394 * We are not able to use any offload so do the whole thing in 395 * software. 396 */ 397 xnbp->xnb_stat_csum_software++; 398 399 return (xnb_software_csum(xnbp, mp)); 400 } 401 402 int 403 xnb_attach(dev_info_t *dip, xnb_flavour_t *flavour, void *flavour_data) 404 { 405 xnb_t *xnbp; 406 char *xsname; 407 char cachename[32]; 408 409 xnbp = kmem_zalloc(sizeof (*xnbp), KM_SLEEP); 410 411 xnbp->xnb_flavour = flavour; 412 xnbp->xnb_flavour_data = flavour_data; 413 xnbp->xnb_devinfo = dip; 414 xnbp->xnb_evtchn = INVALID_EVTCHN; 415 xnbp->xnb_irq = B_FALSE; 416 xnbp->xnb_tx_ring_handle = INVALID_GRANT_HANDLE; 417 xnbp->xnb_rx_ring_handle = INVALID_GRANT_HANDLE; 418 xnbp->xnb_connected = B_FALSE; 419 xnbp->xnb_hotplugged = B_FALSE; 420 xnbp->xnb_detachable = B_FALSE; 421 xnbp->xnb_peer = xvdi_get_oeid(dip); 422 xnbp->xnb_be_status = XNB_STATE_INIT; 423 xnbp->xnb_fe_status = XNB_STATE_INIT; 424 425 xnbp->xnb_tx_buf_count = 0; 426 427 xnbp->xnb_rx_hv_copy = B_FALSE; 428 xnbp->xnb_multicast_control = B_FALSE; 429 430 xnbp->xnb_rx_va = vmem_alloc(heap_arena, PAGESIZE, VM_SLEEP); 431 ASSERT(xnbp->xnb_rx_va != NULL); 432 433 if (ddi_get_iblock_cookie(dip, 0, &xnbp->xnb_icookie) 434 != DDI_SUCCESS) 435 goto failure; 436 437 /* Allocated on demand, when/if we enter xnb_copy_to_peer(). */ 438 xnbp->xnb_rx_cpop = NULL; 439 xnbp->xnb_rx_cpop_count = 0; 440 441 mutex_init(&xnbp->xnb_tx_lock, NULL, MUTEX_DRIVER, 442 xnbp->xnb_icookie); 443 mutex_init(&xnbp->xnb_rx_lock, NULL, MUTEX_DRIVER, 444 xnbp->xnb_icookie); 445 mutex_init(&xnbp->xnb_state_lock, NULL, MUTEX_DRIVER, 446 xnbp->xnb_icookie); 447 448 /* Set driver private pointer now. */ 449 ddi_set_driver_private(dip, xnbp); 450 451 (void) sprintf(cachename, "xnb_tx_buf_cache_%d", ddi_get_instance(dip)); 452 xnbp->xnb_tx_buf_cache = kmem_cache_create(cachename, 453 sizeof (xnb_txbuf_t), 0, 454 xnb_txbuf_constructor, xnb_txbuf_destructor, 455 NULL, xnbp, NULL, 0); 456 if (xnbp->xnb_tx_buf_cache == NULL) 457 goto failure_0; 458 459 if (!xnb_ks_init(xnbp)) 460 goto failure_1; 461 462 /* 463 * Receive notification of changes in the state of the 464 * driver in the guest domain. 465 */ 466 if (xvdi_add_event_handler(dip, XS_OE_STATE, xnb_oe_state_change, 467 NULL) != DDI_SUCCESS) 468 goto failure_2; 469 470 /* 471 * Receive notification of hotplug events. 472 */ 473 if (xvdi_add_event_handler(dip, XS_HP_STATE, xnb_hp_state_change, 474 NULL) != DDI_SUCCESS) 475 goto failure_2; 476 477 xsname = xvdi_get_xsname(dip); 478 479 if (xenbus_printf(XBT_NULL, xsname, 480 "feature-multicast-control", "%d", 481 xnb_multicast_control ? 1 : 0) != 0) 482 goto failure_3; 483 484 if (xenbus_printf(XBT_NULL, xsname, 485 "feature-rx-copy", "%d", 1) != 0) 486 goto failure_3; 487 /* 488 * Linux domUs seem to depend on "feature-rx-flip" being 0 489 * in addition to "feature-rx-copy" being 1. It seems strange 490 * to use four possible states to describe a binary decision, 491 * but we might as well play nice. 492 */ 493 if (xenbus_printf(XBT_NULL, xsname, 494 "feature-rx-flip", "%d", 0) != 0) 495 goto failure_3; 496 497 (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateInitWait); 498 (void) xvdi_post_event(dip, XEN_HP_ADD); 499 500 return (DDI_SUCCESS); 501 502 failure_3: 503 xvdi_remove_event_handler(dip, NULL); 504 505 failure_2: 506 xnb_ks_free(xnbp); 507 508 failure_1: 509 kmem_cache_destroy(xnbp->xnb_tx_buf_cache); 510 511 failure_0: 512 mutex_destroy(&xnbp->xnb_state_lock); 513 mutex_destroy(&xnbp->xnb_rx_lock); 514 mutex_destroy(&xnbp->xnb_tx_lock); 515 516 failure: 517 vmem_free(heap_arena, xnbp->xnb_rx_va, PAGESIZE); 518 kmem_free(xnbp, sizeof (*xnbp)); 519 return (DDI_FAILURE); 520 } 521 522 void 523 xnb_detach(dev_info_t *dip) 524 { 525 xnb_t *xnbp = ddi_get_driver_private(dip); 526 527 ASSERT(xnbp != NULL); 528 ASSERT(!xnbp->xnb_connected); 529 ASSERT(xnbp->xnb_tx_buf_count == 0); 530 531 xnb_disconnect_rings(dip); 532 533 xvdi_remove_event_handler(dip, NULL); 534 535 xnb_ks_free(xnbp); 536 537 kmem_cache_destroy(xnbp->xnb_tx_buf_cache); 538 539 ddi_set_driver_private(dip, NULL); 540 541 mutex_destroy(&xnbp->xnb_state_lock); 542 mutex_destroy(&xnbp->xnb_rx_lock); 543 mutex_destroy(&xnbp->xnb_tx_lock); 544 545 if (xnbp->xnb_rx_cpop_count > 0) 546 kmem_free(xnbp->xnb_rx_cpop, sizeof (xnbp->xnb_rx_cpop[0]) 547 * xnbp->xnb_rx_cpop_count); 548 549 ASSERT(xnbp->xnb_rx_va != NULL); 550 vmem_free(heap_arena, xnbp->xnb_rx_va, PAGESIZE); 551 552 kmem_free(xnbp, sizeof (*xnbp)); 553 } 554 555 /* 556 * Allocate a page from the hypervisor to be flipped to the peer. 557 * 558 * Try to get pages in batches to reduce the overhead of calls into 559 * the balloon driver. 560 */ 561 static mfn_t 562 xnb_alloc_page(xnb_t *xnbp) 563 { 564 #define WARNING_RATE_LIMIT 100 565 #define BATCH_SIZE 256 566 static mfn_t mfns[BATCH_SIZE]; /* common across all instances */ 567 static int nth = BATCH_SIZE; 568 mfn_t mfn; 569 570 mutex_enter(&xnb_alloc_page_lock); 571 if (nth == BATCH_SIZE) { 572 if (balloon_alloc_pages(BATCH_SIZE, mfns) != BATCH_SIZE) { 573 xnbp->xnb_stat_allocation_failure++; 574 mutex_exit(&xnb_alloc_page_lock); 575 576 /* 577 * Try for a single page in low memory situations. 578 */ 579 if (balloon_alloc_pages(1, &mfn) != 1) { 580 if ((xnbp->xnb_stat_small_allocation_failure++ 581 % WARNING_RATE_LIMIT) == 0) 582 cmn_err(CE_WARN, "xnb_alloc_page: " 583 "Cannot allocate memory to " 584 "transfer packets to peer."); 585 return (0); 586 } else { 587 xnbp->xnb_stat_small_allocation_success++; 588 return (mfn); 589 } 590 } 591 592 nth = 0; 593 xnbp->xnb_stat_allocation_success++; 594 } 595 596 mfn = mfns[nth++]; 597 mutex_exit(&xnb_alloc_page_lock); 598 599 ASSERT(mfn != 0); 600 601 return (mfn); 602 #undef BATCH_SIZE 603 #undef WARNING_RATE_LIMIT 604 } 605 606 /* 607 * Free a page back to the hypervisor. 608 * 609 * This happens only in the error path, so batching is not worth the 610 * complication. 611 */ 612 static void 613 xnb_free_page(xnb_t *xnbp, mfn_t mfn) 614 { 615 _NOTE(ARGUNUSED(xnbp)); 616 int r; 617 pfn_t pfn; 618 619 pfn = xen_assign_pfn(mfn); 620 pfnzero(pfn, 0, PAGESIZE); 621 xen_release_pfn(pfn); 622 623 if ((r = balloon_free_pages(1, &mfn, NULL, NULL)) != 1) { 624 cmn_err(CE_WARN, "free_page: cannot decrease memory " 625 "reservation (%d): page kept but unusable (mfn = 0x%lx).", 626 r, mfn); 627 } 628 } 629 630 /* 631 * Similar to RING_HAS_UNCONSUMED_REQUESTS(&xnbp->rx_ring) but using 632 * local variables. Used in both xnb_to_peer() and xnb_copy_to_peer(). 633 */ 634 #define XNB_RING_HAS_UNCONSUMED_REQUESTS(_r) \ 635 ((((_r)->sring->req_prod - loop) < \ 636 (RING_SIZE(_r) - (loop - prod))) ? \ 637 ((_r)->sring->req_prod - loop) : \ 638 (RING_SIZE(_r) - (loop - prod))) 639 640 /* 641 * Pass packets to the peer using page flipping. 642 */ 643 mblk_t * 644 xnb_to_peer(xnb_t *xnbp, mblk_t *mp) 645 { 646 mblk_t *free = mp, *prev = NULL; 647 size_t len; 648 gnttab_transfer_t *gop; 649 boolean_t notify; 650 RING_IDX loop, prod, end; 651 652 /* 653 * For each packet the sequence of operations is: 654 * 655 * 1. get a new page from the hypervisor. 656 * 2. get a request slot from the ring. 657 * 3. copy the data into the new page. 658 * 4. transfer the page to the peer. 659 * 5. update the request slot. 660 * 6. kick the peer. 661 * 7. free mp. 662 * 663 * In order to reduce the number of hypercalls, we prepare 664 * several packets for the peer and perform a single hypercall 665 * to transfer them. 666 */ 667 668 mutex_enter(&xnbp->xnb_rx_lock); 669 670 /* 671 * If we are not connected to the peer or have not yet 672 * finished hotplug it is too early to pass packets to the 673 * peer. 674 */ 675 if (!(xnbp->xnb_connected && xnbp->xnb_hotplugged)) { 676 mutex_exit(&xnbp->xnb_rx_lock); 677 DTRACE_PROBE(flip_rx_too_early); 678 xnbp->xnb_stat_rx_too_early++; 679 return (mp); 680 } 681 682 loop = xnbp->xnb_rx_ring.req_cons; 683 prod = xnbp->xnb_rx_ring.rsp_prod_pvt; 684 gop = xnbp->xnb_rx_top; 685 686 while ((mp != NULL) && 687 XNB_RING_HAS_UNCONSUMED_REQUESTS(&xnbp->xnb_rx_ring)) { 688 689 mfn_t mfn; 690 pfn_t pfn; 691 netif_rx_request_t *rxreq; 692 netif_rx_response_t *rxresp; 693 char *valoop; 694 mblk_t *ml; 695 uint16_t cksum_flags; 696 697 /* 1 */ 698 if ((mfn = xnb_alloc_page(xnbp)) == 0) { 699 xnbp->xnb_stat_rx_defer++; 700 break; 701 } 702 703 /* 2 */ 704 rxreq = RING_GET_REQUEST(&xnbp->xnb_rx_ring, loop); 705 706 #ifdef XNB_DEBUG 707 if (!(rxreq->id < NET_RX_RING_SIZE)) 708 cmn_err(CE_PANIC, "xnb_to_peer: " 709 "id %d out of range in request 0x%p", 710 rxreq->id, (void *)rxreq); 711 #endif /* XNB_DEBUG */ 712 713 /* Assign a pfn and map the new page at the allocated va. */ 714 pfn = xen_assign_pfn(mfn); 715 hat_devload(kas.a_hat, xnbp->xnb_rx_va, PAGESIZE, 716 pfn, PROT_READ | PROT_WRITE, HAT_LOAD); 717 718 /* 3 */ 719 len = 0; 720 valoop = xnbp->xnb_rx_va; 721 for (ml = mp; ml != NULL; ml = ml->b_cont) { 722 size_t chunk = ml->b_wptr - ml->b_rptr; 723 724 bcopy(ml->b_rptr, valoop, chunk); 725 valoop += chunk; 726 len += chunk; 727 } 728 729 ASSERT(len < PAGESIZE); 730 731 /* Release the pfn. */ 732 hat_unload(kas.a_hat, xnbp->xnb_rx_va, PAGESIZE, 733 HAT_UNLOAD_UNMAP); 734 xen_release_pfn(pfn); 735 736 /* 4 */ 737 gop->mfn = mfn; 738 gop->domid = xnbp->xnb_peer; 739 gop->ref = rxreq->gref; 740 741 /* 5.1 */ 742 rxresp = RING_GET_RESPONSE(&xnbp->xnb_rx_ring, prod); 743 rxresp->offset = 0; 744 rxresp->flags = 0; 745 746 cksum_flags = xnbp->xnb_flavour->xf_cksum_to_peer(xnbp, mp); 747 if (cksum_flags != 0) 748 xnbp->xnb_stat_rx_cksum_deferred++; 749 rxresp->flags |= cksum_flags; 750 751 rxresp->id = RING_GET_REQUEST(&xnbp->xnb_rx_ring, prod)->id; 752 rxresp->status = len; 753 754 loop++; 755 prod++; 756 gop++; 757 prev = mp; 758 mp = mp->b_next; 759 } 760 761 /* 762 * Did we actually do anything? 763 */ 764 if (loop == xnbp->xnb_rx_ring.req_cons) { 765 mutex_exit(&xnbp->xnb_rx_lock); 766 return (mp); 767 } 768 769 end = loop; 770 771 /* 772 * Unlink the end of the 'done' list from the remainder. 773 */ 774 ASSERT(prev != NULL); 775 prev->b_next = NULL; 776 777 if (HYPERVISOR_grant_table_op(GNTTABOP_transfer, xnbp->xnb_rx_top, 778 loop - xnbp->xnb_rx_ring.req_cons) != 0) { 779 cmn_err(CE_WARN, "xnb_to_peer: transfer operation failed"); 780 } 781 782 loop = xnbp->xnb_rx_ring.req_cons; 783 prod = xnbp->xnb_rx_ring.rsp_prod_pvt; 784 gop = xnbp->xnb_rx_top; 785 786 while (loop < end) { 787 int16_t status = NETIF_RSP_OKAY; 788 789 if (gop->status != 0) { 790 status = NETIF_RSP_ERROR; 791 792 /* 793 * If the status is anything other than 794 * GNTST_bad_page then we don't own the page 795 * any more, so don't try to give it back. 796 */ 797 if (gop->status != GNTST_bad_page) 798 gop->mfn = 0; 799 } else { 800 /* The page is no longer ours. */ 801 gop->mfn = 0; 802 } 803 804 if (gop->mfn != 0) 805 /* 806 * Give back the page, as we won't be using 807 * it. 808 */ 809 xnb_free_page(xnbp, gop->mfn); 810 else 811 /* 812 * We gave away a page, update our accounting 813 * now. 814 */ 815 balloon_drv_subtracted(1); 816 817 /* 5.2 */ 818 if (status != NETIF_RSP_OKAY) { 819 RING_GET_RESPONSE(&xnbp->xnb_rx_ring, prod)->status = 820 status; 821 } else { 822 xnbp->xnb_stat_ipackets++; 823 xnbp->xnb_stat_rbytes += len; 824 } 825 826 loop++; 827 prod++; 828 gop++; 829 } 830 831 xnbp->xnb_rx_ring.req_cons = loop; 832 xnbp->xnb_rx_ring.rsp_prod_pvt = prod; 833 834 /* 6 */ 835 /* LINTED: constant in conditional context */ 836 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&xnbp->xnb_rx_ring, notify); 837 if (notify) { 838 ec_notify_via_evtchn(xnbp->xnb_evtchn); 839 xnbp->xnb_stat_rx_notify_sent++; 840 } else { 841 xnbp->xnb_stat_rx_notify_deferred++; 842 } 843 844 if (mp != NULL) 845 xnbp->xnb_stat_rx_defer++; 846 847 mutex_exit(&xnbp->xnb_rx_lock); 848 849 /* Free mblk_t's that we consumed. */ 850 freemsgchain(free); 851 852 return (mp); 853 } 854 855 /* Helper functions for xnb_copy_to_peer(). */ 856 857 /* 858 * Grow the array of copy operation descriptors. 859 */ 860 static boolean_t 861 grow_cpop_area(xnb_t *xnbp) 862 { 863 size_t count; 864 gnttab_copy_t *new; 865 866 ASSERT(MUTEX_HELD(&xnbp->xnb_rx_lock)); 867 868 count = xnbp->xnb_rx_cpop_count + CPOP_DEFCNT; 869 870 if ((new = kmem_alloc(sizeof (new[0]) * count, KM_NOSLEEP)) == NULL) { 871 xnbp->xnb_stat_other_allocation_failure++; 872 return (B_FALSE); 873 } 874 875 bcopy(xnbp->xnb_rx_cpop, new, 876 sizeof (xnbp->xnb_rx_cpop[0]) * xnbp->xnb_rx_cpop_count); 877 878 kmem_free(xnbp->xnb_rx_cpop, 879 sizeof (xnbp->xnb_rx_cpop[0]) * xnbp->xnb_rx_cpop_count); 880 881 xnbp->xnb_rx_cpop = new; 882 xnbp->xnb_rx_cpop_count = count; 883 884 xnbp->xnb_stat_rx_cpoparea_grown++; 885 886 return (B_TRUE); 887 } 888 889 /* 890 * Check whether an address is on a page that's foreign to this domain. 891 */ 892 static boolean_t 893 is_foreign(void *addr) 894 { 895 pfn_t pfn = hat_getpfnum(kas.a_hat, addr); 896 897 return ((pfn & PFN_IS_FOREIGN_MFN) == PFN_IS_FOREIGN_MFN); 898 } 899 900 /* 901 * Insert a newly allocated mblk into a chain, replacing the old one. 902 */ 903 static mblk_t * 904 replace_msg(mblk_t *mp, size_t len, mblk_t *mp_prev, mblk_t *ml_prev) 905 { 906 uint32_t start, stuff, end, value, flags; 907 mblk_t *new_mp; 908 909 new_mp = copyb(mp); 910 if (new_mp == NULL) 911 cmn_err(CE_PANIC, "replace_msg: cannot alloc new message" 912 "for %p, len %lu", (void *) mp, len); 913 914 hcksum_retrieve(mp, NULL, NULL, &start, &stuff, &end, &value, &flags); 915 (void) hcksum_assoc(new_mp, NULL, NULL, start, stuff, end, value, 916 flags, KM_NOSLEEP); 917 918 new_mp->b_next = mp->b_next; 919 new_mp->b_prev = mp->b_prev; 920 new_mp->b_cont = mp->b_cont; 921 922 /* Make sure we only overwrite pointers to the mblk being replaced. */ 923 if (mp_prev != NULL && mp_prev->b_next == mp) 924 mp_prev->b_next = new_mp; 925 926 if (ml_prev != NULL && ml_prev->b_cont == mp) 927 ml_prev->b_cont = new_mp; 928 929 mp->b_next = mp->b_prev = mp->b_cont = NULL; 930 freemsg(mp); 931 932 return (new_mp); 933 } 934 935 /* 936 * Set all the fields in a gnttab_copy_t. 937 */ 938 static void 939 setup_gop(xnb_t *xnbp, gnttab_copy_t *gp, uchar_t *rptr, 940 size_t s_off, size_t d_off, size_t len, grant_ref_t d_ref) 941 { 942 ASSERT(xnbp != NULL && gp != NULL); 943 944 gp->source.offset = s_off; 945 gp->source.u.gmfn = pfn_to_mfn(hat_getpfnum(kas.a_hat, (caddr_t)rptr)); 946 gp->source.domid = DOMID_SELF; 947 948 gp->len = (uint16_t)len; 949 gp->flags = GNTCOPY_dest_gref; 950 gp->status = 0; 951 952 gp->dest.u.ref = d_ref; 953 gp->dest.offset = d_off; 954 gp->dest.domid = xnbp->xnb_peer; 955 } 956 957 /* 958 * Pass packets to the peer using hypervisor copy operations. 959 */ 960 mblk_t * 961 xnb_copy_to_peer(xnb_t *xnbp, mblk_t *mp) 962 { 963 mblk_t *free = mp, *mp_prev = NULL, *saved_mp = mp; 964 mblk_t *ml, *ml_prev; 965 boolean_t notify; 966 RING_IDX loop, prod; 967 int i; 968 969 /* 970 * If the peer does not pre-post buffers for received packets, 971 * use page flipping to pass packets to it. 972 */ 973 if (!xnbp->xnb_rx_hv_copy) 974 return (xnb_to_peer(xnbp, mp)); 975 976 /* 977 * For each packet the sequence of operations is: 978 * 979 * 1. get a request slot from the ring. 980 * 2. set up data for hypercall (see NOTE below) 981 * 3. have the hypervisore copy the data 982 * 4. update the request slot. 983 * 5. kick the peer. 984 * 985 * NOTE ad 2. 986 * In order to reduce the number of hypercalls, we prepare 987 * several mblks (mp->b_cont != NULL) for the peer and 988 * perform a single hypercall to transfer them. We also have 989 * to set up a seperate copy operation for every page. 990 * 991 * If we have more than one packet (mp->b_next != NULL), we do 992 * this whole dance repeatedly. 993 */ 994 995 mutex_enter(&xnbp->xnb_rx_lock); 996 997 if (!(xnbp->xnb_connected && xnbp->xnb_hotplugged)) { 998 mutex_exit(&xnbp->xnb_rx_lock); 999 DTRACE_PROBE(copy_rx_too_early); 1000 xnbp->xnb_stat_rx_too_early++; 1001 return (mp); 1002 } 1003 1004 loop = xnbp->xnb_rx_ring.req_cons; 1005 prod = xnbp->xnb_rx_ring.rsp_prod_pvt; 1006 1007 while ((mp != NULL) && 1008 XNB_RING_HAS_UNCONSUMED_REQUESTS(&xnbp->xnb_rx_ring)) { 1009 netif_rx_request_t *rxreq; 1010 size_t d_offset, len; 1011 int item_count; 1012 gnttab_copy_t *gop_cp; 1013 netif_rx_response_t *rxresp; 1014 uint16_t cksum_flags; 1015 int16_t status = NETIF_RSP_OKAY; 1016 1017 /* 1 */ 1018 rxreq = RING_GET_REQUEST(&xnbp->xnb_rx_ring, loop); 1019 1020 #ifdef XNB_DEBUG 1021 if (!(rxreq->id < NET_RX_RING_SIZE)) 1022 cmn_err(CE_PANIC, "xnb_copy_to_peer: " 1023 "id %d out of range in request 0x%p", 1024 rxreq->id, (void *)rxreq); 1025 #endif /* XNB_DEBUG */ 1026 1027 /* 2 */ 1028 d_offset = 0; 1029 len = 0; 1030 item_count = 0; 1031 1032 gop_cp = xnbp->xnb_rx_cpop; 1033 1034 /* 1035 * We walk the b_cont pointers and set up a 1036 * gnttab_copy_t for each sub-page chunk in each data 1037 * block. 1038 */ 1039 /* 2a */ 1040 for (ml = mp, ml_prev = NULL; ml != NULL; ml = ml->b_cont) { 1041 size_t chunk = ml->b_wptr - ml->b_rptr; 1042 uchar_t *r_tmp, *rpt_align; 1043 size_t r_offset; 1044 1045 /* 1046 * The hypervisor will not allow us to 1047 * reference a foreign page (e.g. one 1048 * belonging to another domain) by mfn in the 1049 * copy operation. If the data in this mblk is 1050 * on such a page we must copy the data into a 1051 * local page before initiating the hypervisor 1052 * copy operation. 1053 */ 1054 if (is_foreign(ml->b_rptr) || is_foreign(ml->b_wptr)) { 1055 mblk_t *ml_new = replace_msg(ml, chunk, 1056 mp_prev, ml_prev); 1057 1058 /* We can still use old ml, but not *ml! */ 1059 if (free == ml) 1060 free = ml_new; 1061 if (mp == ml) 1062 mp = ml_new; 1063 ml = ml_new; 1064 1065 xnbp->xnb_stat_rx_foreign_page++; 1066 } 1067 1068 rpt_align = (uchar_t *)ALIGN2PAGE(ml->b_rptr); 1069 r_offset = (uint16_t)(ml->b_rptr - rpt_align); 1070 r_tmp = ml->b_rptr; 1071 1072 if (d_offset + chunk > PAGESIZE) 1073 cmn_err(CE_PANIC, "xnb_copy_to_peer: mp %p " 1074 "(svd: %p), ml %p,rpt_alg. %p, d_offset " 1075 "(%lu) + chunk (%lu) > PAGESIZE %d!", 1076 (void *)mp, (void *)saved_mp, (void *)ml, 1077 (void *)rpt_align, 1078 d_offset, chunk, (int)PAGESIZE); 1079 1080 while (chunk > 0) { 1081 size_t part_len; 1082 1083 if (item_count == xnbp->xnb_rx_cpop_count) { 1084 if (!grow_cpop_area(xnbp)) 1085 goto failure; 1086 gop_cp = &xnbp->xnb_rx_cpop[item_count]; 1087 } 1088 /* 1089 * If our mblk crosses a page boundary, we need 1090 * to do a seperate copy for each page. 1091 */ 1092 if (r_offset + chunk > PAGESIZE) { 1093 part_len = PAGESIZE - r_offset; 1094 1095 DTRACE_PROBE3(mblk_page_crossed, 1096 (mblk_t *), ml, int, chunk, int, 1097 (int)r_offset); 1098 1099 xnbp->xnb_stat_rx_pagebndry_crossed++; 1100 } else { 1101 part_len = chunk; 1102 } 1103 1104 setup_gop(xnbp, gop_cp, r_tmp, r_offset, 1105 d_offset, part_len, rxreq->gref); 1106 1107 chunk -= part_len; 1108 1109 len += part_len; 1110 d_offset += part_len; 1111 r_tmp += part_len; 1112 /* 1113 * The 2nd, 3rd ... last copies will always 1114 * start at r_tmp, therefore r_offset is 0. 1115 */ 1116 r_offset = 0; 1117 gop_cp++; 1118 item_count++; 1119 } 1120 ml_prev = ml; 1121 1122 DTRACE_PROBE4(mblk_loop_end, (mblk_t *), ml, int, 1123 chunk, int, len, int, item_count); 1124 } 1125 /* 3 */ 1126 if (HYPERVISOR_grant_table_op(GNTTABOP_copy, xnbp->xnb_rx_cpop, 1127 item_count) != 0) { 1128 cmn_err(CE_WARN, "xnb_copy_to_peer: copy op. failed"); 1129 DTRACE_PROBE(HV_granttableopfailed); 1130 } 1131 1132 /* 4 */ 1133 rxresp = RING_GET_RESPONSE(&xnbp->xnb_rx_ring, prod); 1134 rxresp->offset = 0; 1135 1136 rxresp->flags = 0; 1137 1138 DTRACE_PROBE4(got_RX_rsp, int, (int)rxresp->id, int, 1139 (int)rxresp->offset, int, (int)rxresp->flags, int, 1140 (int)rxresp->status); 1141 1142 cksum_flags = xnbp->xnb_flavour->xf_cksum_to_peer(xnbp, mp); 1143 if (cksum_flags != 0) 1144 xnbp->xnb_stat_rx_cksum_deferred++; 1145 rxresp->flags |= cksum_flags; 1146 1147 rxresp->id = RING_GET_REQUEST(&xnbp->xnb_rx_ring, prod)->id; 1148 rxresp->status = len; 1149 1150 DTRACE_PROBE4(RX_rsp_set, int, (int)rxresp->id, int, 1151 (int)rxresp->offset, int, (int)rxresp->flags, int, 1152 (int)rxresp->status); 1153 1154 for (i = 0; i < item_count; i++) { 1155 if (xnbp->xnb_rx_cpop[i].status != 0) { 1156 DTRACE_PROBE2(cpop_status_nonnull, int, 1157 (int)xnbp->xnb_rx_cpop[i].status, 1158 int, i); 1159 status = NETIF_RSP_ERROR; 1160 } 1161 } 1162 1163 /* 5.2 */ 1164 if (status != NETIF_RSP_OKAY) { 1165 RING_GET_RESPONSE(&xnbp->xnb_rx_ring, prod)->status = 1166 status; 1167 xnbp->xnb_stat_rx_rsp_notok++; 1168 } else { 1169 xnbp->xnb_stat_ipackets++; 1170 xnbp->xnb_stat_rbytes += len; 1171 } 1172 1173 loop++; 1174 prod++; 1175 mp_prev = mp; 1176 mp = mp->b_next; 1177 } 1178 failure: 1179 /* 1180 * Did we actually do anything? 1181 */ 1182 if (loop == xnbp->xnb_rx_ring.req_cons) { 1183 mutex_exit(&xnbp->xnb_rx_lock); 1184 return (mp); 1185 } 1186 1187 /* 1188 * Unlink the end of the 'done' list from the remainder. 1189 */ 1190 ASSERT(mp_prev != NULL); 1191 mp_prev->b_next = NULL; 1192 1193 xnbp->xnb_rx_ring.req_cons = loop; 1194 xnbp->xnb_rx_ring.rsp_prod_pvt = prod; 1195 1196 /* 6 */ 1197 /* LINTED: constant in conditional context */ 1198 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&xnbp->xnb_rx_ring, notify); 1199 if (notify) { 1200 ec_notify_via_evtchn(xnbp->xnb_evtchn); 1201 xnbp->xnb_stat_rx_notify_sent++; 1202 } else { 1203 xnbp->xnb_stat_rx_notify_deferred++; 1204 } 1205 1206 if (mp != NULL) 1207 xnbp->xnb_stat_rx_defer++; 1208 1209 mutex_exit(&xnbp->xnb_rx_lock); 1210 1211 /* Free mblk_t structs we have consumed. */ 1212 freemsgchain(free); 1213 1214 return (mp); 1215 } 1216 1217 1218 static void 1219 xnb_tx_notify_peer(xnb_t *xnbp, boolean_t force) 1220 { 1221 boolean_t notify; 1222 1223 ASSERT(MUTEX_HELD(&xnbp->xnb_tx_lock)); 1224 1225 /* LINTED: constant in conditional context */ 1226 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&xnbp->xnb_tx_ring, notify); 1227 if (notify || force) { 1228 ec_notify_via_evtchn(xnbp->xnb_evtchn); 1229 xnbp->xnb_stat_tx_notify_sent++; 1230 } else { 1231 xnbp->xnb_stat_tx_notify_deferred++; 1232 } 1233 } 1234 1235 static void 1236 xnb_tx_mark_complete(xnb_t *xnbp, RING_IDX id, int16_t status) 1237 { 1238 RING_IDX i; 1239 netif_tx_response_t *txresp; 1240 1241 ASSERT(MUTEX_HELD(&xnbp->xnb_tx_lock)); 1242 1243 i = xnbp->xnb_tx_ring.rsp_prod_pvt; 1244 1245 txresp = RING_GET_RESPONSE(&xnbp->xnb_tx_ring, i); 1246 txresp->id = id; 1247 txresp->status = status; 1248 1249 xnbp->xnb_tx_ring.rsp_prod_pvt = i + 1; 1250 1251 /* 1252 * Note that we don't push the change to the peer here - that 1253 * is the callers responsibility. 1254 */ 1255 } 1256 1257 static void 1258 xnb_txbuf_recycle(xnb_txbuf_t *txp) 1259 { 1260 xnb_t *xnbp = txp->xt_xnbp; 1261 1262 kmem_cache_free(xnbp->xnb_tx_buf_cache, txp); 1263 1264 xnbp->xnb_tx_buf_outstanding--; 1265 } 1266 1267 static int 1268 xnb_txbuf_constructor(void *buf, void *arg, int kmflag) 1269 { 1270 _NOTE(ARGUNUSED(kmflag)); 1271 xnb_txbuf_t *txp = buf; 1272 xnb_t *xnbp = arg; 1273 size_t len; 1274 ddi_dma_cookie_t dma_cookie; 1275 uint_t ncookies; 1276 1277 txp->xt_free_rtn.free_func = xnb_txbuf_recycle; 1278 txp->xt_free_rtn.free_arg = (caddr_t)txp; 1279 txp->xt_xnbp = xnbp; 1280 txp->xt_next = NULL; 1281 1282 if (ddi_dma_alloc_handle(xnbp->xnb_devinfo, &buf_dma_attr, 1283 0, 0, &txp->xt_dma_handle) != DDI_SUCCESS) 1284 goto failure; 1285 1286 if (ddi_dma_mem_alloc(txp->xt_dma_handle, PAGESIZE, &data_accattr, 1287 DDI_DMA_STREAMING, 0, 0, &txp->xt_buf, &len, 1288 &txp->xt_acc_handle) != DDI_SUCCESS) 1289 goto failure_1; 1290 1291 if (ddi_dma_addr_bind_handle(txp->xt_dma_handle, NULL, txp->xt_buf, 1292 len, DDI_DMA_RDWR | DDI_DMA_STREAMING, DDI_DMA_DONTWAIT, 0, 1293 &dma_cookie, &ncookies) 1294 != DDI_DMA_MAPPED) 1295 goto failure_2; 1296 ASSERT(ncookies == 1); 1297 1298 txp->xt_mfn = xnb_btop(dma_cookie.dmac_laddress); 1299 txp->xt_buflen = dma_cookie.dmac_size; 1300 1301 DTRACE_PROBE(txbuf_allocated); 1302 1303 atomic_add_32(&xnbp->xnb_tx_buf_count, 1); 1304 xnbp->xnb_tx_buf_outstanding++; 1305 1306 return (0); 1307 1308 failure_2: 1309 ddi_dma_mem_free(&txp->xt_acc_handle); 1310 1311 failure_1: 1312 ddi_dma_free_handle(&txp->xt_dma_handle); 1313 1314 failure: 1315 1316 return (-1); 1317 } 1318 1319 static void 1320 xnb_txbuf_destructor(void *buf, void *arg) 1321 { 1322 xnb_txbuf_t *txp = buf; 1323 xnb_t *xnbp = arg; 1324 1325 (void) ddi_dma_unbind_handle(txp->xt_dma_handle); 1326 ddi_dma_mem_free(&txp->xt_acc_handle); 1327 ddi_dma_free_handle(&txp->xt_dma_handle); 1328 1329 atomic_add_32(&xnbp->xnb_tx_buf_count, -1); 1330 } 1331 1332 /* 1333 * Take packets from the peer and deliver them onward. 1334 */ 1335 static mblk_t * 1336 xnb_from_peer(xnb_t *xnbp) 1337 { 1338 RING_IDX start, end, loop; 1339 gnttab_copy_t *cop; 1340 xnb_txbuf_t **txpp; 1341 netif_tx_request_t *txreq; 1342 boolean_t work_to_do, need_notify = B_FALSE; 1343 mblk_t *head, *tail; 1344 int n_data_req, i; 1345 1346 ASSERT(MUTEX_HELD(&xnbp->xnb_tx_lock)); 1347 1348 head = tail = NULL; 1349 around: 1350 1351 /* LINTED: constant in conditional context */ 1352 RING_FINAL_CHECK_FOR_REQUESTS(&xnbp->xnb_tx_ring, work_to_do); 1353 if (!work_to_do) { 1354 finished: 1355 xnb_tx_notify_peer(xnbp, need_notify); 1356 1357 return (head); 1358 } 1359 1360 start = xnbp->xnb_tx_ring.req_cons; 1361 end = xnbp->xnb_tx_ring.sring->req_prod; 1362 1363 if ((end - start) > NET_TX_RING_SIZE) { 1364 /* 1365 * This usually indicates that the frontend driver is 1366 * misbehaving, as it's not possible to have more than 1367 * NET_TX_RING_SIZE ring elements in play at any one 1368 * time. 1369 * 1370 * We reset the ring pointers to the state declared by 1371 * the frontend and try to carry on. 1372 */ 1373 cmn_err(CE_WARN, "xnb_from_peer: domain %d tried to give us %u " 1374 "items in the ring, resetting and trying to recover.", 1375 xnbp->xnb_peer, (end - start)); 1376 1377 /* LINTED: constant in conditional context */ 1378 BACK_RING_ATTACH(&xnbp->xnb_tx_ring, 1379 (netif_tx_sring_t *)xnbp->xnb_tx_ring_addr, PAGESIZE); 1380 1381 goto around; 1382 } 1383 1384 loop = start; 1385 cop = xnbp->xnb_tx_cop; 1386 txpp = xnbp->xnb_tx_bufp; 1387 n_data_req = 0; 1388 1389 while (loop < end) { 1390 txreq = RING_GET_REQUEST(&xnbp->xnb_tx_ring, loop); 1391 1392 if (txreq->flags & NETTXF_extra_info) { 1393 struct netif_extra_info *erp; 1394 boolean_t status; 1395 1396 loop++; /* Consume another slot in the ring. */ 1397 ASSERT(loop <= end); 1398 1399 erp = (struct netif_extra_info *) 1400 RING_GET_REQUEST(&xnbp->xnb_tx_ring, loop); 1401 1402 switch (erp->type) { 1403 case XEN_NETIF_EXTRA_TYPE_MCAST_ADD: 1404 ASSERT(xnbp->xnb_multicast_control); 1405 status = xnbp->xnb_flavour->xf_mcast_add(xnbp, 1406 &erp->u.mcast.addr); 1407 break; 1408 case XEN_NETIF_EXTRA_TYPE_MCAST_DEL: 1409 ASSERT(xnbp->xnb_multicast_control); 1410 status = xnbp->xnb_flavour->xf_mcast_del(xnbp, 1411 &erp->u.mcast.addr); 1412 break; 1413 default: 1414 status = B_FALSE; 1415 cmn_err(CE_WARN, "xnb_from_peer: " 1416 "unknown extra type %d", erp->type); 1417 break; 1418 } 1419 1420 xnb_tx_mark_complete(xnbp, txreq->id, 1421 status ? NETIF_RSP_OKAY : NETIF_RSP_ERROR); 1422 need_notify = B_TRUE; 1423 } else { 1424 xnb_txbuf_t *txp; 1425 1426 txp = kmem_cache_alloc(xnbp->xnb_tx_buf_cache, 1427 KM_NOSLEEP); 1428 if (txp == NULL) 1429 break; 1430 1431 txp->xt_mblk = desballoc((unsigned char *)txp->xt_buf, 1432 txp->xt_buflen, 0, &txp->xt_free_rtn); 1433 if (txp->xt_mblk == NULL) { 1434 kmem_cache_free(xnbp->xnb_tx_buf_cache, txp); 1435 break; 1436 } 1437 1438 txp->xt_idx = loop; 1439 txp->xt_id = txreq->id; 1440 1441 cop->source.u.ref = txreq->gref; 1442 cop->source.domid = xnbp->xnb_peer; 1443 cop->source.offset = txreq->offset; 1444 1445 cop->dest.u.gmfn = txp->xt_mfn; 1446 cop->dest.domid = DOMID_SELF; 1447 cop->dest.offset = 0; 1448 1449 cop->len = txreq->size; 1450 cop->flags = GNTCOPY_source_gref; 1451 cop->status = 0; 1452 1453 *txpp = txp; 1454 1455 txpp++; 1456 cop++; 1457 n_data_req++; 1458 1459 ASSERT(n_data_req <= NET_TX_RING_SIZE); 1460 } 1461 1462 loop++; 1463 } 1464 1465 xnbp->xnb_tx_ring.req_cons = loop; 1466 1467 if (n_data_req == 0) 1468 goto around; 1469 1470 if (HYPERVISOR_grant_table_op(GNTTABOP_copy, 1471 xnbp->xnb_tx_cop, n_data_req) != 0) { 1472 1473 cmn_err(CE_WARN, "xnb_from_peer: copy operation failed"); 1474 1475 txpp = xnbp->xnb_tx_bufp; 1476 i = n_data_req; 1477 while (i > 0) { 1478 kmem_cache_free(xnbp->xnb_tx_buf_cache, *txpp); 1479 txpp++; 1480 i--; 1481 } 1482 1483 goto finished; 1484 } 1485 1486 txpp = xnbp->xnb_tx_bufp; 1487 cop = xnbp->xnb_tx_cop; 1488 i = n_data_req; 1489 1490 while (i > 0) { 1491 xnb_txbuf_t *txp = *txpp; 1492 1493 txreq = RING_GET_REQUEST(&xnbp->xnb_tx_ring, txp->xt_idx); 1494 1495 if (cop->status != 0) { 1496 #ifdef XNB_DEBUG 1497 cmn_err(CE_WARN, "xnb_from_peer: " 1498 "txpp 0x%p failed (%d)", 1499 (void *)*txpp, cop->status); 1500 #endif /* XNB_DEBUG */ 1501 xnb_tx_mark_complete(xnbp, txp->xt_id, cop->status); 1502 freemsg(txp->xt_mblk); 1503 } else { 1504 mblk_t *mp; 1505 1506 mp = txp->xt_mblk; 1507 mp->b_rptr = mp->b_wptr = (unsigned char *)txp->xt_buf; 1508 mp->b_wptr += txreq->size; 1509 mp->b_next = NULL; 1510 1511 /* 1512 * If there are checksum flags, process them 1513 * appropriately. 1514 */ 1515 if ((txreq->flags & 1516 (NETTXF_csum_blank | NETTXF_data_validated)) 1517 != 0) { 1518 mp = xnbp->xnb_flavour->xf_cksum_from_peer(xnbp, 1519 mp, txreq->flags); 1520 xnbp->xnb_stat_tx_cksum_no_need++; 1521 1522 txp->xt_mblk = mp; 1523 } 1524 1525 if (head == NULL) { 1526 ASSERT(tail == NULL); 1527 head = mp; 1528 } else { 1529 ASSERT(tail != NULL); 1530 tail->b_next = mp; 1531 } 1532 tail = mp; 1533 1534 xnbp->xnb_stat_opackets++; 1535 xnbp->xnb_stat_obytes += txreq->size; 1536 1537 xnb_tx_mark_complete(xnbp, txp->xt_id, cop->status); 1538 } 1539 1540 txpp++; 1541 cop++; 1542 i--; 1543 } 1544 1545 goto around; 1546 /* NOTREACHED */ 1547 } 1548 1549 static uint_t 1550 xnb_intr(caddr_t arg) 1551 { 1552 xnb_t *xnbp = (xnb_t *)arg; 1553 mblk_t *mp; 1554 1555 xnbp->xnb_stat_intr++; 1556 1557 mutex_enter(&xnbp->xnb_tx_lock); 1558 1559 ASSERT(xnbp->xnb_connected); 1560 1561 mp = xnb_from_peer(xnbp); 1562 1563 mutex_exit(&xnbp->xnb_tx_lock); 1564 1565 if (!xnbp->xnb_hotplugged) { 1566 xnbp->xnb_stat_tx_too_early++; 1567 goto fail; 1568 } 1569 if (mp == NULL) { 1570 xnbp->xnb_stat_spurious_intr++; 1571 goto fail; 1572 } 1573 1574 xnbp->xnb_flavour->xf_from_peer(xnbp, mp); 1575 1576 return (DDI_INTR_CLAIMED); 1577 1578 fail: 1579 freemsgchain(mp); 1580 return (DDI_INTR_CLAIMED); 1581 } 1582 1583 /* 1584 * Read our configuration from xenstore. 1585 */ 1586 boolean_t 1587 xnb_read_xs_config(xnb_t *xnbp) 1588 { 1589 char *xsname; 1590 char mac[ETHERADDRL * 3]; 1591 1592 xsname = xvdi_get_xsname(xnbp->xnb_devinfo); 1593 1594 if (xenbus_scanf(XBT_NULL, xsname, 1595 "mac", "%s", mac) != 0) { 1596 cmn_err(CE_WARN, "xnb_attach: " 1597 "cannot read mac address from %s", 1598 xsname); 1599 return (B_FALSE); 1600 } 1601 1602 if (ether_aton(mac, xnbp->xnb_mac_addr) != ETHERADDRL) { 1603 cmn_err(CE_WARN, 1604 "xnb_attach: cannot parse mac address %s", 1605 mac); 1606 return (B_FALSE); 1607 } 1608 1609 return (B_TRUE); 1610 } 1611 1612 /* 1613 * Read the configuration of the peer from xenstore. 1614 */ 1615 boolean_t 1616 xnb_read_oe_config(xnb_t *xnbp) 1617 { 1618 char *oename; 1619 int i; 1620 1621 oename = xvdi_get_oename(xnbp->xnb_devinfo); 1622 1623 if (xenbus_gather(XBT_NULL, oename, 1624 "event-channel", "%u", &xnbp->xnb_fe_evtchn, 1625 "tx-ring-ref", "%lu", &xnbp->xnb_tx_ring_ref, 1626 "rx-ring-ref", "%lu", &xnbp->xnb_rx_ring_ref, 1627 NULL) != 0) { 1628 cmn_err(CE_WARN, "xnb_read_oe_config: " 1629 "cannot read other-end details from %s", 1630 oename); 1631 return (B_FALSE); 1632 } 1633 1634 /* 1635 * Check whether our peer requests receive side hypervisor 1636 * copy. 1637 */ 1638 if (xenbus_scanf(XBT_NULL, oename, 1639 "request-rx-copy", "%d", &i) != 0) 1640 i = 0; 1641 if (i != 0) 1642 xnbp->xnb_rx_hv_copy = B_TRUE; 1643 1644 /* 1645 * Check whether our peer requests multicast_control. 1646 */ 1647 if (xenbus_scanf(XBT_NULL, oename, 1648 "request-multicast-control", "%d", &i) != 0) 1649 i = 0; 1650 if (i != 0) 1651 xnbp->xnb_multicast_control = B_TRUE; 1652 1653 /* 1654 * The Linux backend driver here checks to see if the peer has 1655 * set 'feature-no-csum-offload'. This is used to indicate 1656 * that the guest cannot handle receiving packets without a 1657 * valid checksum. We don't check here, because packets passed 1658 * to the peer _always_ have a valid checksum. 1659 * 1660 * There are three cases: 1661 * 1662 * - the NIC is dedicated: packets from the wire should always 1663 * have a valid checksum. If the hardware validates the 1664 * checksum then the relevant bit will be set in the packet 1665 * attributes and we will inform the peer. It can choose to 1666 * ignore the hardware verification. 1667 * 1668 * - the NIC is shared (VNIC) and a packet originates from the 1669 * wire: this is the same as the case above - the packets 1670 * will have a valid checksum. 1671 * 1672 * - the NIC is shared (VNIC) and a packet originates from the 1673 * host: the MAC layer ensures that all such packets have a 1674 * valid checksum by calculating one if the stack did not. 1675 */ 1676 1677 return (B_TRUE); 1678 } 1679 1680 void 1681 xnb_start_connect(xnb_t *xnbp) 1682 { 1683 dev_info_t *dip = xnbp->xnb_devinfo; 1684 1685 if (!xnb_connect_rings(dip)) { 1686 cmn_err(CE_WARN, "xnb_start_connect: " 1687 "cannot connect rings"); 1688 goto failed; 1689 } 1690 1691 if (!xnbp->xnb_flavour->xf_start_connect(xnbp)) { 1692 cmn_err(CE_WARN, "xnb_start_connect: " 1693 "flavour failed to connect"); 1694 goto failed; 1695 } 1696 1697 (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateConnected); 1698 return; 1699 1700 failed: 1701 xnbp->xnb_flavour->xf_peer_disconnected(xnbp); 1702 xnb_disconnect_rings(dip); 1703 (void) xvdi_switch_state(dip, XBT_NULL, 1704 XenbusStateClosed); 1705 (void) xvdi_post_event(dip, XEN_HP_REMOVE); 1706 } 1707 1708 static boolean_t 1709 xnb_connect_rings(dev_info_t *dip) 1710 { 1711 xnb_t *xnbp = ddi_get_driver_private(dip); 1712 struct gnttab_map_grant_ref map_op; 1713 1714 /* 1715 * Cannot attempt to connect the rings if already connected. 1716 */ 1717 ASSERT(!xnbp->xnb_connected); 1718 1719 /* 1720 * 1. allocate a vaddr for the tx page, one for the rx page. 1721 * 2. call GNTTABOP_map_grant_ref to map the relevant pages 1722 * into the allocated vaddr (one for tx, one for rx). 1723 * 3. call EVTCHNOP_bind_interdomain to have the event channel 1724 * bound to this domain. 1725 * 4. associate the event channel with an interrupt. 1726 * 5. enable the interrupt. 1727 */ 1728 1729 /* 1.tx */ 1730 xnbp->xnb_tx_ring_addr = vmem_xalloc(heap_arena, PAGESIZE, PAGESIZE, 1731 0, 0, 0, 0, VM_SLEEP); 1732 ASSERT(xnbp->xnb_tx_ring_addr != NULL); 1733 1734 /* 2.tx */ 1735 map_op.host_addr = (uint64_t)((long)xnbp->xnb_tx_ring_addr); 1736 map_op.flags = GNTMAP_host_map; 1737 map_op.ref = xnbp->xnb_tx_ring_ref; 1738 map_op.dom = xnbp->xnb_peer; 1739 hat_prepare_mapping(kas.a_hat, xnbp->xnb_tx_ring_addr, NULL); 1740 if (xen_map_gref(GNTTABOP_map_grant_ref, &map_op, 1, B_FALSE) != 0 || 1741 map_op.status != 0) { 1742 cmn_err(CE_WARN, "xnb_connect_rings: cannot map tx-ring page."); 1743 goto fail; 1744 } 1745 xnbp->xnb_tx_ring_handle = map_op.handle; 1746 1747 /* LINTED: constant in conditional context */ 1748 BACK_RING_INIT(&xnbp->xnb_tx_ring, 1749 (netif_tx_sring_t *)xnbp->xnb_tx_ring_addr, PAGESIZE); 1750 1751 /* 1.rx */ 1752 xnbp->xnb_rx_ring_addr = vmem_xalloc(heap_arena, PAGESIZE, PAGESIZE, 1753 0, 0, 0, 0, VM_SLEEP); 1754 ASSERT(xnbp->xnb_rx_ring_addr != NULL); 1755 1756 /* 2.rx */ 1757 map_op.host_addr = (uint64_t)((long)xnbp->xnb_rx_ring_addr); 1758 map_op.flags = GNTMAP_host_map; 1759 map_op.ref = xnbp->xnb_rx_ring_ref; 1760 map_op.dom = xnbp->xnb_peer; 1761 hat_prepare_mapping(kas.a_hat, xnbp->xnb_rx_ring_addr, NULL); 1762 if (xen_map_gref(GNTTABOP_map_grant_ref, &map_op, 1, B_FALSE) != 0 || 1763 map_op.status != 0) { 1764 cmn_err(CE_WARN, "xnb_connect_rings: cannot map rx-ring page."); 1765 goto fail; 1766 } 1767 xnbp->xnb_rx_ring_handle = map_op.handle; 1768 1769 /* LINTED: constant in conditional context */ 1770 BACK_RING_INIT(&xnbp->xnb_rx_ring, 1771 (netif_rx_sring_t *)xnbp->xnb_rx_ring_addr, PAGESIZE); 1772 1773 /* 3 */ 1774 if (xvdi_bind_evtchn(dip, xnbp->xnb_fe_evtchn) != DDI_SUCCESS) { 1775 cmn_err(CE_WARN, "xnb_connect_rings: " 1776 "cannot bind event channel %d", xnbp->xnb_evtchn); 1777 xnbp->xnb_evtchn = INVALID_EVTCHN; 1778 goto fail; 1779 } 1780 xnbp->xnb_evtchn = xvdi_get_evtchn(dip); 1781 1782 /* 1783 * It would be good to set the state to XenbusStateConnected 1784 * here as well, but then what if ddi_add_intr() failed? 1785 * Changing the state in the store will be noticed by the peer 1786 * and cannot be "taken back". 1787 */ 1788 mutex_enter(&xnbp->xnb_tx_lock); 1789 mutex_enter(&xnbp->xnb_rx_lock); 1790 1791 xnbp->xnb_connected = B_TRUE; 1792 1793 mutex_exit(&xnbp->xnb_rx_lock); 1794 mutex_exit(&xnbp->xnb_tx_lock); 1795 1796 /* 4, 5 */ 1797 if (ddi_add_intr(dip, 0, NULL, NULL, xnb_intr, (caddr_t)xnbp) 1798 != DDI_SUCCESS) { 1799 cmn_err(CE_WARN, "xnb_connect_rings: cannot add interrupt"); 1800 goto fail; 1801 } 1802 xnbp->xnb_irq = B_TRUE; 1803 1804 return (B_TRUE); 1805 1806 fail: 1807 mutex_enter(&xnbp->xnb_tx_lock); 1808 mutex_enter(&xnbp->xnb_rx_lock); 1809 1810 xnbp->xnb_connected = B_FALSE; 1811 1812 mutex_exit(&xnbp->xnb_rx_lock); 1813 mutex_exit(&xnbp->xnb_tx_lock); 1814 1815 return (B_FALSE); 1816 } 1817 1818 static void 1819 xnb_disconnect_rings(dev_info_t *dip) 1820 { 1821 xnb_t *xnbp = ddi_get_driver_private(dip); 1822 1823 if (xnbp->xnb_irq) { 1824 ddi_remove_intr(dip, 0, NULL); 1825 xnbp->xnb_irq = B_FALSE; 1826 } 1827 1828 if (xnbp->xnb_evtchn != INVALID_EVTCHN) { 1829 xvdi_free_evtchn(dip); 1830 xnbp->xnb_evtchn = INVALID_EVTCHN; 1831 } 1832 1833 if (xnbp->xnb_rx_ring_handle != INVALID_GRANT_HANDLE) { 1834 struct gnttab_unmap_grant_ref unmap_op; 1835 1836 unmap_op.host_addr = (uint64_t)(uintptr_t) 1837 xnbp->xnb_rx_ring_addr; 1838 unmap_op.dev_bus_addr = 0; 1839 unmap_op.handle = xnbp->xnb_rx_ring_handle; 1840 if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, 1841 &unmap_op, 1) != 0) 1842 cmn_err(CE_WARN, "xnb_disconnect_rings: " 1843 "cannot unmap rx-ring page (%d)", 1844 unmap_op.status); 1845 1846 xnbp->xnb_rx_ring_handle = INVALID_GRANT_HANDLE; 1847 } 1848 1849 if (xnbp->xnb_rx_ring_addr != NULL) { 1850 hat_release_mapping(kas.a_hat, xnbp->xnb_rx_ring_addr); 1851 vmem_free(heap_arena, xnbp->xnb_rx_ring_addr, PAGESIZE); 1852 xnbp->xnb_rx_ring_addr = NULL; 1853 } 1854 1855 if (xnbp->xnb_tx_ring_handle != INVALID_GRANT_HANDLE) { 1856 struct gnttab_unmap_grant_ref unmap_op; 1857 1858 unmap_op.host_addr = (uint64_t)(uintptr_t) 1859 xnbp->xnb_tx_ring_addr; 1860 unmap_op.dev_bus_addr = 0; 1861 unmap_op.handle = xnbp->xnb_tx_ring_handle; 1862 if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, 1863 &unmap_op, 1) != 0) 1864 cmn_err(CE_WARN, "xnb_disconnect_rings: " 1865 "cannot unmap tx-ring page (%d)", 1866 unmap_op.status); 1867 1868 xnbp->xnb_tx_ring_handle = INVALID_GRANT_HANDLE; 1869 } 1870 1871 if (xnbp->xnb_tx_ring_addr != NULL) { 1872 hat_release_mapping(kas.a_hat, xnbp->xnb_tx_ring_addr); 1873 vmem_free(heap_arena, xnbp->xnb_tx_ring_addr, PAGESIZE); 1874 xnbp->xnb_tx_ring_addr = NULL; 1875 } 1876 } 1877 1878 static void 1879 xnb_oe_state_change(dev_info_t *dip, ddi_eventcookie_t id, 1880 void *arg, void *impl_data) 1881 { 1882 _NOTE(ARGUNUSED(id, arg)); 1883 xnb_t *xnbp = ddi_get_driver_private(dip); 1884 XenbusState new_state = *(XenbusState *)impl_data; 1885 1886 ASSERT(xnbp != NULL); 1887 1888 switch (new_state) { 1889 case XenbusStateConnected: 1890 /* spurious state change */ 1891 if (xnbp->xnb_connected) 1892 return; 1893 1894 if (!xnb_read_oe_config(xnbp) || 1895 !xnbp->xnb_flavour->xf_peer_connected(xnbp)) { 1896 cmn_err(CE_WARN, "xnb_oe_state_change: " 1897 "read otherend config error"); 1898 (void) xvdi_switch_state(dip, XBT_NULL, 1899 XenbusStateClosed); 1900 (void) xvdi_post_event(dip, XEN_HP_REMOVE); 1901 1902 break; 1903 } 1904 1905 1906 mutex_enter(&xnbp->xnb_state_lock); 1907 xnbp->xnb_fe_status = XNB_STATE_READY; 1908 if (xnbp->xnb_be_status == XNB_STATE_READY) 1909 xnb_start_connect(xnbp); 1910 mutex_exit(&xnbp->xnb_state_lock); 1911 1912 /* 1913 * Now that we've attempted to connect it's reasonable 1914 * to allow an attempt to detach. 1915 */ 1916 xnbp->xnb_detachable = B_TRUE; 1917 1918 break; 1919 1920 case XenbusStateClosing: 1921 (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateClosing); 1922 1923 break; 1924 1925 case XenbusStateClosed: 1926 xnbp->xnb_flavour->xf_peer_disconnected(xnbp); 1927 1928 mutex_enter(&xnbp->xnb_tx_lock); 1929 mutex_enter(&xnbp->xnb_rx_lock); 1930 1931 xnb_disconnect_rings(dip); 1932 xnbp->xnb_connected = B_FALSE; 1933 1934 mutex_exit(&xnbp->xnb_rx_lock); 1935 mutex_exit(&xnbp->xnb_tx_lock); 1936 1937 (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateClosed); 1938 (void) xvdi_post_event(dip, XEN_HP_REMOVE); 1939 /* 1940 * In all likelyhood this is already set (in the above 1941 * case), but if the peer never attempted to connect 1942 * and the domain is destroyed we get here without 1943 * having been through the case above, so we set it to 1944 * be sure. 1945 */ 1946 xnbp->xnb_detachable = B_TRUE; 1947 1948 break; 1949 1950 default: 1951 break; 1952 } 1953 } 1954 1955 static void 1956 xnb_hp_state_change(dev_info_t *dip, ddi_eventcookie_t id, 1957 void *arg, void *impl_data) 1958 { 1959 _NOTE(ARGUNUSED(id, arg)); 1960 xnb_t *xnbp = ddi_get_driver_private(dip); 1961 xendev_hotplug_state_t state = *(xendev_hotplug_state_t *)impl_data; 1962 1963 ASSERT(xnbp != NULL); 1964 1965 switch (state) { 1966 case Connected: 1967 /* spurious hotplug event */ 1968 if (xnbp->xnb_hotplugged) 1969 break; 1970 1971 if (!xnb_read_xs_config(xnbp)) 1972 break; 1973 1974 if (!xnbp->xnb_flavour->xf_hotplug_connected(xnbp)) 1975 break; 1976 1977 mutex_enter(&xnbp->xnb_tx_lock); 1978 mutex_enter(&xnbp->xnb_rx_lock); 1979 1980 xnbp->xnb_hotplugged = B_TRUE; 1981 1982 mutex_exit(&xnbp->xnb_rx_lock); 1983 mutex_exit(&xnbp->xnb_tx_lock); 1984 1985 mutex_enter(&xnbp->xnb_state_lock); 1986 xnbp->xnb_be_status = XNB_STATE_READY; 1987 if (xnbp->xnb_fe_status == XNB_STATE_READY) 1988 xnb_start_connect(xnbp); 1989 mutex_exit(&xnbp->xnb_state_lock); 1990 1991 break; 1992 1993 default: 1994 break; 1995 } 1996 } 1997 1998 static struct modldrv modldrv = { 1999 &mod_miscops, "xnb", 2000 }; 2001 2002 static struct modlinkage modlinkage = { 2003 MODREV_1, &modldrv, NULL 2004 }; 2005 2006 int 2007 _init(void) 2008 { 2009 int i; 2010 2011 mutex_init(&xnb_alloc_page_lock, NULL, MUTEX_DRIVER, NULL); 2012 2013 i = mod_install(&modlinkage); 2014 if (i != DDI_SUCCESS) 2015 mutex_destroy(&xnb_alloc_page_lock); 2016 2017 return (i); 2018 } 2019 2020 int 2021 _info(struct modinfo *modinfop) 2022 { 2023 return (mod_info(&modlinkage, modinfop)); 2024 } 2025 2026 int 2027 _fini(void) 2028 { 2029 int i; 2030 2031 i = mod_remove(&modlinkage); 2032 if (i == DDI_SUCCESS) 2033 mutex_destroy(&xnb_alloc_page_lock); 2034 2035 return (i); 2036 } 2037