1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 * Copyright 2018 Joyent, Inc. 26 */ 27 28 #ifdef DEBUG 29 #define XNB_DEBUG 1 30 #endif /* DEBUG */ 31 32 #include "xnb.h" 33 34 #include <sys/sunddi.h> 35 #include <sys/sunndi.h> 36 #include <sys/modctl.h> 37 #include <sys/conf.h> 38 #include <sys/mac.h> 39 #include <sys/mac_impl.h> /* For mac_fix_cksum(). */ 40 #include <sys/dlpi.h> 41 #include <sys/strsubr.h> 42 #include <sys/strsun.h> 43 #include <sys/types.h> 44 #include <sys/pattr.h> 45 #include <vm/seg_kmem.h> 46 #include <vm/hat_i86.h> 47 #include <xen/sys/xenbus_impl.h> 48 #include <xen/sys/xendev.h> 49 #include <sys/balloon_impl.h> 50 #include <sys/evtchn_impl.h> 51 #include <sys/gnttab.h> 52 #include <vm/vm_dep.h> 53 #include <sys/note.h> 54 #include <sys/gld.h> 55 #include <inet/ip.h> 56 #include <inet/ip_impl.h> 57 58 /* 59 * The terms "transmit" and "receive" are used in alignment with domU, 60 * which means that packets originating from the peer domU are "transmitted" 61 * to other parts of the system and packets are "received" from them. 62 */ 63 64 /* 65 * Should we allow guests to manipulate multicast group membership? 66 */ 67 static boolean_t xnb_multicast_control = B_TRUE; 68 69 static boolean_t xnb_connect_rings(dev_info_t *); 70 static void xnb_disconnect_rings(dev_info_t *); 71 static void xnb_oe_state_change(dev_info_t *, ddi_eventcookie_t, 72 void *, void *); 73 static void xnb_hp_state_change(dev_info_t *, ddi_eventcookie_t, 74 void *, void *); 75 76 static int xnb_txbuf_constructor(void *, void *, int); 77 static void xnb_txbuf_destructor(void *, void *); 78 static void xnb_tx_notify_peer(xnb_t *, boolean_t); 79 static void xnb_tx_mark_complete(xnb_t *, RING_IDX, int16_t); 80 81 mblk_t *xnb_to_peer(xnb_t *, mblk_t *); 82 mblk_t *xnb_copy_to_peer(xnb_t *, mblk_t *); 83 84 static void setup_gop(xnb_t *, gnttab_copy_t *, uchar_t *, 85 size_t, size_t, size_t, grant_ref_t); 86 #pragma inline(setup_gop) 87 static boolean_t is_foreign(void *); 88 #pragma inline(is_foreign) 89 90 #define INVALID_GRANT_HANDLE ((grant_handle_t)-1) 91 #define INVALID_GRANT_REF ((grant_ref_t)-1) 92 93 static kmutex_t xnb_alloc_page_lock; 94 95 /* 96 * On a 32 bit PAE system physical and machine addresses are larger 97 * than 32 bits. ddi_btop() on such systems take an unsigned long 98 * argument, and so addresses above 4G are truncated before ddi_btop() 99 * gets to see them. To avoid this, code the shift operation here. 100 */ 101 #define xnb_btop(addr) ((addr) >> PAGESHIFT) 102 103 /* DMA attributes for transmit and receive data */ 104 static ddi_dma_attr_t buf_dma_attr = { 105 DMA_ATTR_V0, /* version of this structure */ 106 0, /* lowest usable address */ 107 0xffffffffffffffffULL, /* highest usable address */ 108 0x7fffffff, /* maximum DMAable byte count */ 109 MMU_PAGESIZE, /* alignment in bytes */ 110 0x7ff, /* bitmap of burst sizes */ 111 1, /* minimum transfer */ 112 0xffffffffU, /* maximum transfer */ 113 0xffffffffffffffffULL, /* maximum segment length */ 114 1, /* maximum number of segments */ 115 1, /* granularity */ 116 0, /* flags (reserved) */ 117 }; 118 119 /* DMA access attributes for data: NOT to be byte swapped. */ 120 static ddi_device_acc_attr_t data_accattr = { 121 DDI_DEVICE_ATTR_V0, 122 DDI_NEVERSWAP_ACC, 123 DDI_STRICTORDER_ACC 124 }; 125 126 /* 127 * Statistics. 128 */ 129 static const char * const aux_statistics[] = { 130 "rx_cksum_deferred", 131 "tx_cksum_no_need", 132 "rx_rsp_notok", 133 "tx_notify_deferred", 134 "tx_notify_sent", 135 "rx_notify_deferred", 136 "rx_notify_sent", 137 "tx_too_early", 138 "rx_too_early", 139 "rx_allocb_failed", 140 "tx_allocb_failed", 141 "rx_foreign_page", 142 "mac_full", 143 "spurious_intr", 144 "allocation_success", 145 "allocation_failure", 146 "small_allocation_success", 147 "small_allocation_failure", 148 "other_allocation_failure", 149 "rx_pageboundary_crossed", 150 "rx_cpoparea_grown", 151 "csum_hardware", 152 "csum_software", 153 "tx_overflow_page", 154 "tx_unexpected_flags", 155 }; 156 157 static int 158 xnb_ks_aux_update(kstat_t *ksp, int flag) 159 { 160 xnb_t *xnbp; 161 kstat_named_t *knp; 162 163 if (flag != KSTAT_READ) 164 return (EACCES); 165 166 xnbp = ksp->ks_private; 167 knp = ksp->ks_data; 168 169 /* 170 * Assignment order should match that of the names in 171 * aux_statistics. 172 */ 173 (knp++)->value.ui64 = xnbp->xnb_stat_rx_cksum_deferred; 174 (knp++)->value.ui64 = xnbp->xnb_stat_tx_cksum_no_need; 175 (knp++)->value.ui64 = xnbp->xnb_stat_rx_rsp_notok; 176 (knp++)->value.ui64 = xnbp->xnb_stat_tx_notify_deferred; 177 (knp++)->value.ui64 = xnbp->xnb_stat_tx_notify_sent; 178 (knp++)->value.ui64 = xnbp->xnb_stat_rx_notify_deferred; 179 (knp++)->value.ui64 = xnbp->xnb_stat_rx_notify_sent; 180 (knp++)->value.ui64 = xnbp->xnb_stat_tx_too_early; 181 (knp++)->value.ui64 = xnbp->xnb_stat_rx_too_early; 182 (knp++)->value.ui64 = xnbp->xnb_stat_rx_allocb_failed; 183 (knp++)->value.ui64 = xnbp->xnb_stat_tx_allocb_failed; 184 (knp++)->value.ui64 = xnbp->xnb_stat_rx_foreign_page; 185 (knp++)->value.ui64 = xnbp->xnb_stat_mac_full; 186 (knp++)->value.ui64 = xnbp->xnb_stat_spurious_intr; 187 (knp++)->value.ui64 = xnbp->xnb_stat_allocation_success; 188 (knp++)->value.ui64 = xnbp->xnb_stat_allocation_failure; 189 (knp++)->value.ui64 = xnbp->xnb_stat_small_allocation_success; 190 (knp++)->value.ui64 = xnbp->xnb_stat_small_allocation_failure; 191 (knp++)->value.ui64 = xnbp->xnb_stat_other_allocation_failure; 192 (knp++)->value.ui64 = xnbp->xnb_stat_rx_pagebndry_crossed; 193 (knp++)->value.ui64 = xnbp->xnb_stat_rx_cpoparea_grown; 194 (knp++)->value.ui64 = xnbp->xnb_stat_csum_hardware; 195 (knp++)->value.ui64 = xnbp->xnb_stat_csum_software; 196 (knp++)->value.ui64 = xnbp->xnb_stat_tx_overflow_page; 197 (knp++)->value.ui64 = xnbp->xnb_stat_tx_unexpected_flags; 198 199 return (0); 200 } 201 202 static boolean_t 203 xnb_ks_init(xnb_t *xnbp) 204 { 205 int nstat = sizeof (aux_statistics) / 206 sizeof (aux_statistics[0]); 207 const char * const *cp = aux_statistics; 208 kstat_named_t *knp; 209 210 /* 211 * Create and initialise kstats. 212 */ 213 xnbp->xnb_kstat_aux = kstat_create(ddi_driver_name(xnbp->xnb_devinfo), 214 ddi_get_instance(xnbp->xnb_devinfo), "aux_statistics", "net", 215 KSTAT_TYPE_NAMED, nstat, 0); 216 if (xnbp->xnb_kstat_aux == NULL) 217 return (B_FALSE); 218 219 xnbp->xnb_kstat_aux->ks_private = xnbp; 220 xnbp->xnb_kstat_aux->ks_update = xnb_ks_aux_update; 221 222 knp = xnbp->xnb_kstat_aux->ks_data; 223 while (nstat > 0) { 224 kstat_named_init(knp, *cp, KSTAT_DATA_UINT64); 225 226 knp++; 227 cp++; 228 nstat--; 229 } 230 231 kstat_install(xnbp->xnb_kstat_aux); 232 233 return (B_TRUE); 234 } 235 236 static void 237 xnb_ks_free(xnb_t *xnbp) 238 { 239 kstat_delete(xnbp->xnb_kstat_aux); 240 } 241 242 /* 243 * Calculate and insert the transport checksum for an arbitrary packet. 244 */ 245 static mblk_t * 246 xnb_software_csum(xnb_t *xnbp, mblk_t *mp) 247 { 248 _NOTE(ARGUNUSED(xnbp)); 249 250 /* 251 * XXPV dme: shouldn't rely on mac_fix_cksum(), not least 252 * because it doesn't cover all of the interesting cases :-( 253 */ 254 mac_hcksum_set(mp, 0, 0, 0, 0, HCK_FULLCKSUM); 255 mac_hw_emul(&mp, NULL, NULL, MAC_HWCKSUM_EMUL); 256 return (mp); 257 } 258 259 mblk_t * 260 xnb_process_cksum_flags(xnb_t *xnbp, mblk_t *mp, uint32_t capab) 261 { 262 struct ether_header *ehp; 263 uint16_t sap; 264 uint32_t offset; 265 ipha_t *ipha; 266 267 ASSERT(mp->b_next == NULL); 268 269 /* 270 * Check that the packet is contained in a single mblk. In 271 * the "from peer" path this is true today, but may change 272 * when scatter gather support is added. In the "to peer" 273 * path we cannot be sure, but in most cases it will be true 274 * (in the xnbo case the packet has come from a MAC device 275 * which is unlikely to split packets). 276 */ 277 if (mp->b_cont != NULL) 278 goto software; 279 280 /* 281 * If the MAC has no hardware capability don't do any further 282 * checking. 283 */ 284 if (capab == 0) 285 goto software; 286 287 ASSERT(MBLKL(mp) >= sizeof (struct ether_header)); 288 ehp = (struct ether_header *)mp->b_rptr; 289 290 if (ntohs(ehp->ether_type) == VLAN_TPID) { 291 struct ether_vlan_header *evhp; 292 293 ASSERT(MBLKL(mp) >= sizeof (struct ether_vlan_header)); 294 evhp = (struct ether_vlan_header *)mp->b_rptr; 295 sap = ntohs(evhp->ether_type); 296 offset = sizeof (struct ether_vlan_header); 297 } else { 298 sap = ntohs(ehp->ether_type); 299 offset = sizeof (struct ether_header); 300 } 301 302 /* 303 * We only attempt to do IPv4 packets in hardware. 304 */ 305 if (sap != ETHERTYPE_IP) 306 goto software; 307 308 /* 309 * We know that this is an IPv4 packet. 310 */ 311 ipha = (ipha_t *)(mp->b_rptr + offset); 312 313 switch (ipha->ipha_protocol) { 314 case IPPROTO_TCP: 315 case IPPROTO_UDP: { 316 uint32_t start, length, stuff, cksum; 317 uint16_t *stuffp; 318 319 /* 320 * This is a TCP/IPv4 or UDP/IPv4 packet, for which we 321 * can use full IPv4 and partial checksum offload. 322 */ 323 if ((capab & (HCKSUM_INET_FULL_V4|HCKSUM_INET_PARTIAL)) == 0) 324 break; 325 326 start = IP_SIMPLE_HDR_LENGTH; 327 length = ntohs(ipha->ipha_length); 328 if (ipha->ipha_protocol == IPPROTO_TCP) { 329 stuff = start + TCP_CHECKSUM_OFFSET; 330 cksum = IP_TCP_CSUM_COMP; 331 } else { 332 stuff = start + UDP_CHECKSUM_OFFSET; 333 cksum = IP_UDP_CSUM_COMP; 334 } 335 stuffp = (uint16_t *)(mp->b_rptr + offset + stuff); 336 337 if (capab & HCKSUM_INET_FULL_V4) { 338 /* 339 * Some devices require that the checksum 340 * field of the packet is zero for full 341 * offload. 342 */ 343 *stuffp = 0; 344 345 mac_hcksum_set(mp, 0, 0, 0, 0, HCK_FULLCKSUM); 346 347 xnbp->xnb_stat_csum_hardware++; 348 349 return (mp); 350 } 351 352 if (capab & HCKSUM_INET_PARTIAL) { 353 if (*stuffp == 0) { 354 ipaddr_t src, dst; 355 356 /* 357 * Older Solaris guests don't insert 358 * the pseudo-header checksum, so we 359 * calculate it here. 360 */ 361 src = ipha->ipha_src; 362 dst = ipha->ipha_dst; 363 364 cksum += (dst >> 16) + (dst & 0xFFFF); 365 cksum += (src >> 16) + (src & 0xFFFF); 366 cksum += length - IP_SIMPLE_HDR_LENGTH; 367 368 cksum = (cksum >> 16) + (cksum & 0xFFFF); 369 cksum = (cksum >> 16) + (cksum & 0xFFFF); 370 371 ASSERT(cksum <= 0xFFFF); 372 373 *stuffp = (uint16_t)(cksum ? cksum : ~cksum); 374 } 375 376 mac_hcksum_set(mp, start, stuff, length, 0, 377 HCK_PARTIALCKSUM); 378 379 xnbp->xnb_stat_csum_hardware++; 380 381 return (mp); 382 } 383 384 /* NOTREACHED */ 385 break; 386 } 387 388 default: 389 /* Use software. */ 390 break; 391 } 392 393 software: 394 /* 395 * We are not able to use any offload so do the whole thing in 396 * software. 397 */ 398 xnbp->xnb_stat_csum_software++; 399 400 return (xnb_software_csum(xnbp, mp)); 401 } 402 403 int 404 xnb_attach(dev_info_t *dip, xnb_flavour_t *flavour, void *flavour_data) 405 { 406 xnb_t *xnbp; 407 char *xsname; 408 char cachename[32]; 409 410 xnbp = kmem_zalloc(sizeof (*xnbp), KM_SLEEP); 411 412 xnbp->xnb_flavour = flavour; 413 xnbp->xnb_flavour_data = flavour_data; 414 xnbp->xnb_devinfo = dip; 415 xnbp->xnb_evtchn = INVALID_EVTCHN; 416 xnbp->xnb_irq = B_FALSE; 417 xnbp->xnb_tx_ring_handle = INVALID_GRANT_HANDLE; 418 xnbp->xnb_rx_ring_handle = INVALID_GRANT_HANDLE; 419 xnbp->xnb_connected = B_FALSE; 420 xnbp->xnb_hotplugged = B_FALSE; 421 xnbp->xnb_detachable = B_FALSE; 422 xnbp->xnb_peer = xvdi_get_oeid(dip); 423 xnbp->xnb_be_status = XNB_STATE_INIT; 424 xnbp->xnb_fe_status = XNB_STATE_INIT; 425 426 xnbp->xnb_tx_buf_count = 0; 427 428 xnbp->xnb_rx_hv_copy = B_FALSE; 429 xnbp->xnb_multicast_control = B_FALSE; 430 431 xnbp->xnb_rx_va = vmem_alloc(heap_arena, PAGESIZE, VM_SLEEP); 432 ASSERT(xnbp->xnb_rx_va != NULL); 433 434 if (ddi_get_iblock_cookie(dip, 0, &xnbp->xnb_icookie) 435 != DDI_SUCCESS) 436 goto failure; 437 438 /* Allocated on demand, when/if we enter xnb_copy_to_peer(). */ 439 xnbp->xnb_rx_cpop = NULL; 440 xnbp->xnb_rx_cpop_count = 0; 441 442 mutex_init(&xnbp->xnb_tx_lock, NULL, MUTEX_DRIVER, 443 xnbp->xnb_icookie); 444 mutex_init(&xnbp->xnb_rx_lock, NULL, MUTEX_DRIVER, 445 xnbp->xnb_icookie); 446 mutex_init(&xnbp->xnb_state_lock, NULL, MUTEX_DRIVER, 447 xnbp->xnb_icookie); 448 449 /* Set driver private pointer now. */ 450 ddi_set_driver_private(dip, xnbp); 451 452 (void) sprintf(cachename, "xnb_tx_buf_cache_%d", ddi_get_instance(dip)); 453 xnbp->xnb_tx_buf_cache = kmem_cache_create(cachename, 454 sizeof (xnb_txbuf_t), 0, 455 xnb_txbuf_constructor, xnb_txbuf_destructor, 456 NULL, xnbp, NULL, 0); 457 if (xnbp->xnb_tx_buf_cache == NULL) 458 goto failure_0; 459 460 if (!xnb_ks_init(xnbp)) 461 goto failure_1; 462 463 /* 464 * Receive notification of changes in the state of the 465 * driver in the guest domain. 466 */ 467 if (xvdi_add_event_handler(dip, XS_OE_STATE, xnb_oe_state_change, 468 NULL) != DDI_SUCCESS) 469 goto failure_2; 470 471 /* 472 * Receive notification of hotplug events. 473 */ 474 if (xvdi_add_event_handler(dip, XS_HP_STATE, xnb_hp_state_change, 475 NULL) != DDI_SUCCESS) 476 goto failure_2; 477 478 xsname = xvdi_get_xsname(dip); 479 480 if (xenbus_printf(XBT_NULL, xsname, 481 "feature-multicast-control", "%d", 482 xnb_multicast_control ? 1 : 0) != 0) 483 goto failure_3; 484 485 if (xenbus_printf(XBT_NULL, xsname, 486 "feature-rx-copy", "%d", 1) != 0) 487 goto failure_3; 488 /* 489 * Linux domUs seem to depend on "feature-rx-flip" being 0 490 * in addition to "feature-rx-copy" being 1. It seems strange 491 * to use four possible states to describe a binary decision, 492 * but we might as well play nice. 493 */ 494 if (xenbus_printf(XBT_NULL, xsname, 495 "feature-rx-flip", "%d", 0) != 0) 496 goto failure_3; 497 498 (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateInitWait); 499 (void) xvdi_post_event(dip, XEN_HP_ADD); 500 501 return (DDI_SUCCESS); 502 503 failure_3: 504 xvdi_remove_event_handler(dip, NULL); 505 506 failure_2: 507 xnb_ks_free(xnbp); 508 509 failure_1: 510 kmem_cache_destroy(xnbp->xnb_tx_buf_cache); 511 512 failure_0: 513 mutex_destroy(&xnbp->xnb_state_lock); 514 mutex_destroy(&xnbp->xnb_rx_lock); 515 mutex_destroy(&xnbp->xnb_tx_lock); 516 517 failure: 518 vmem_free(heap_arena, xnbp->xnb_rx_va, PAGESIZE); 519 kmem_free(xnbp, sizeof (*xnbp)); 520 return (DDI_FAILURE); 521 } 522 523 void 524 xnb_detach(dev_info_t *dip) 525 { 526 xnb_t *xnbp = ddi_get_driver_private(dip); 527 528 ASSERT(xnbp != NULL); 529 ASSERT(!xnbp->xnb_connected); 530 ASSERT(xnbp->xnb_tx_buf_count == 0); 531 532 xnb_disconnect_rings(dip); 533 534 xvdi_remove_event_handler(dip, NULL); 535 536 xnb_ks_free(xnbp); 537 538 kmem_cache_destroy(xnbp->xnb_tx_buf_cache); 539 540 ddi_set_driver_private(dip, NULL); 541 542 mutex_destroy(&xnbp->xnb_state_lock); 543 mutex_destroy(&xnbp->xnb_rx_lock); 544 mutex_destroy(&xnbp->xnb_tx_lock); 545 546 if (xnbp->xnb_rx_cpop_count > 0) 547 kmem_free(xnbp->xnb_rx_cpop, sizeof (xnbp->xnb_rx_cpop[0]) 548 * xnbp->xnb_rx_cpop_count); 549 550 ASSERT(xnbp->xnb_rx_va != NULL); 551 vmem_free(heap_arena, xnbp->xnb_rx_va, PAGESIZE); 552 553 kmem_free(xnbp, sizeof (*xnbp)); 554 } 555 556 /* 557 * Allocate a page from the hypervisor to be flipped to the peer. 558 * 559 * Try to get pages in batches to reduce the overhead of calls into 560 * the balloon driver. 561 */ 562 static mfn_t 563 xnb_alloc_page(xnb_t *xnbp) 564 { 565 #define WARNING_RATE_LIMIT 100 566 #define BATCH_SIZE 256 567 static mfn_t mfns[BATCH_SIZE]; /* common across all instances */ 568 static int nth = BATCH_SIZE; 569 mfn_t mfn; 570 571 mutex_enter(&xnb_alloc_page_lock); 572 if (nth == BATCH_SIZE) { 573 if (balloon_alloc_pages(BATCH_SIZE, mfns) != BATCH_SIZE) { 574 xnbp->xnb_stat_allocation_failure++; 575 mutex_exit(&xnb_alloc_page_lock); 576 577 /* 578 * Try for a single page in low memory situations. 579 */ 580 if (balloon_alloc_pages(1, &mfn) != 1) { 581 if ((xnbp->xnb_stat_small_allocation_failure++ 582 % WARNING_RATE_LIMIT) == 0) 583 cmn_err(CE_WARN, "xnb_alloc_page: " 584 "Cannot allocate memory to " 585 "transfer packets to peer."); 586 return (0); 587 } else { 588 xnbp->xnb_stat_small_allocation_success++; 589 return (mfn); 590 } 591 } 592 593 nth = 0; 594 xnbp->xnb_stat_allocation_success++; 595 } 596 597 mfn = mfns[nth++]; 598 mutex_exit(&xnb_alloc_page_lock); 599 600 ASSERT(mfn != 0); 601 602 return (mfn); 603 #undef BATCH_SIZE 604 #undef WARNING_RATE_LIMIT 605 } 606 607 /* 608 * Free a page back to the hypervisor. 609 * 610 * This happens only in the error path, so batching is not worth the 611 * complication. 612 */ 613 static void 614 xnb_free_page(xnb_t *xnbp, mfn_t mfn) 615 { 616 _NOTE(ARGUNUSED(xnbp)); 617 int r; 618 pfn_t pfn; 619 620 pfn = xen_assign_pfn(mfn); 621 pfnzero(pfn, 0, PAGESIZE); 622 xen_release_pfn(pfn); 623 624 if ((r = balloon_free_pages(1, &mfn, NULL, NULL)) != 1) { 625 cmn_err(CE_WARN, "free_page: cannot decrease memory " 626 "reservation (%d): page kept but unusable (mfn = 0x%lx).", 627 r, mfn); 628 } 629 } 630 631 /* 632 * Similar to RING_HAS_UNCONSUMED_REQUESTS(&xnbp->rx_ring) but using 633 * local variables. Used in both xnb_to_peer() and xnb_copy_to_peer(). 634 */ 635 #define XNB_RING_HAS_UNCONSUMED_REQUESTS(_r) \ 636 ((((_r)->sring->req_prod - loop) < \ 637 (RING_SIZE(_r) - (loop - prod))) ? \ 638 ((_r)->sring->req_prod - loop) : \ 639 (RING_SIZE(_r) - (loop - prod))) 640 641 /* 642 * Pass packets to the peer using page flipping. 643 */ 644 mblk_t * 645 xnb_to_peer(xnb_t *xnbp, mblk_t *mp) 646 { 647 mblk_t *free = mp, *prev = NULL; 648 size_t len; 649 gnttab_transfer_t *gop; 650 boolean_t notify; 651 RING_IDX loop, prod, end; 652 653 /* 654 * For each packet the sequence of operations is: 655 * 656 * 1. get a new page from the hypervisor. 657 * 2. get a request slot from the ring. 658 * 3. copy the data into the new page. 659 * 4. transfer the page to the peer. 660 * 5. update the request slot. 661 * 6. kick the peer. 662 * 7. free mp. 663 * 664 * In order to reduce the number of hypercalls, we prepare 665 * several packets for the peer and perform a single hypercall 666 * to transfer them. 667 */ 668 669 len = 0; 670 mutex_enter(&xnbp->xnb_rx_lock); 671 672 /* 673 * If we are not connected to the peer or have not yet 674 * finished hotplug it is too early to pass packets to the 675 * peer. 676 */ 677 if (!(xnbp->xnb_connected && xnbp->xnb_hotplugged)) { 678 mutex_exit(&xnbp->xnb_rx_lock); 679 DTRACE_PROBE(flip_rx_too_early); 680 xnbp->xnb_stat_rx_too_early++; 681 return (mp); 682 } 683 684 loop = xnbp->xnb_rx_ring.req_cons; 685 prod = xnbp->xnb_rx_ring.rsp_prod_pvt; 686 gop = xnbp->xnb_rx_top; 687 688 while ((mp != NULL) && 689 XNB_RING_HAS_UNCONSUMED_REQUESTS(&xnbp->xnb_rx_ring)) { 690 691 mfn_t mfn; 692 pfn_t pfn; 693 netif_rx_request_t *rxreq; 694 netif_rx_response_t *rxresp; 695 char *valoop; 696 mblk_t *ml; 697 uint16_t cksum_flags; 698 699 /* 1 */ 700 if ((mfn = xnb_alloc_page(xnbp)) == 0) { 701 xnbp->xnb_stat_rx_defer++; 702 break; 703 } 704 705 /* 2 */ 706 rxreq = RING_GET_REQUEST(&xnbp->xnb_rx_ring, loop); 707 708 #ifdef XNB_DEBUG 709 if (!(rxreq->id < NET_RX_RING_SIZE)) 710 cmn_err(CE_PANIC, "xnb_to_peer: " 711 "id %d out of range in request 0x%p", 712 rxreq->id, (void *)rxreq); 713 #endif /* XNB_DEBUG */ 714 715 /* Assign a pfn and map the new page at the allocated va. */ 716 pfn = xen_assign_pfn(mfn); 717 hat_devload(kas.a_hat, xnbp->xnb_rx_va, PAGESIZE, 718 pfn, PROT_READ | PROT_WRITE, HAT_LOAD); 719 720 /* 3 */ 721 len = 0; 722 valoop = xnbp->xnb_rx_va; 723 for (ml = mp; ml != NULL; ml = ml->b_cont) { 724 size_t chunk = ml->b_wptr - ml->b_rptr; 725 726 bcopy(ml->b_rptr, valoop, chunk); 727 valoop += chunk; 728 len += chunk; 729 } 730 731 ASSERT(len < PAGESIZE); 732 733 /* Release the pfn. */ 734 hat_unload(kas.a_hat, xnbp->xnb_rx_va, PAGESIZE, 735 HAT_UNLOAD_UNMAP); 736 xen_release_pfn(pfn); 737 738 /* 4 */ 739 gop->mfn = mfn; 740 gop->domid = xnbp->xnb_peer; 741 gop->ref = rxreq->gref; 742 743 /* 5.1 */ 744 rxresp = RING_GET_RESPONSE(&xnbp->xnb_rx_ring, prod); 745 rxresp->offset = 0; 746 rxresp->flags = 0; 747 748 cksum_flags = xnbp->xnb_flavour->xf_cksum_to_peer(xnbp, mp); 749 if (cksum_flags != 0) 750 xnbp->xnb_stat_rx_cksum_deferred++; 751 rxresp->flags |= cksum_flags; 752 753 rxresp->id = RING_GET_REQUEST(&xnbp->xnb_rx_ring, prod)->id; 754 rxresp->status = len; 755 756 loop++; 757 prod++; 758 gop++; 759 prev = mp; 760 mp = mp->b_next; 761 } 762 763 /* 764 * Did we actually do anything? 765 */ 766 if (loop == xnbp->xnb_rx_ring.req_cons) { 767 mutex_exit(&xnbp->xnb_rx_lock); 768 return (mp); 769 } 770 771 end = loop; 772 773 /* 774 * Unlink the end of the 'done' list from the remainder. 775 */ 776 ASSERT(prev != NULL); 777 prev->b_next = NULL; 778 779 if (HYPERVISOR_grant_table_op(GNTTABOP_transfer, xnbp->xnb_rx_top, 780 loop - xnbp->xnb_rx_ring.req_cons) != 0) { 781 cmn_err(CE_WARN, "xnb_to_peer: transfer operation failed"); 782 } 783 784 loop = xnbp->xnb_rx_ring.req_cons; 785 prod = xnbp->xnb_rx_ring.rsp_prod_pvt; 786 gop = xnbp->xnb_rx_top; 787 788 while (loop < end) { 789 int16_t status = NETIF_RSP_OKAY; 790 791 if (gop->status != 0) { 792 status = NETIF_RSP_ERROR; 793 794 /* 795 * If the status is anything other than 796 * GNTST_bad_page then we don't own the page 797 * any more, so don't try to give it back. 798 */ 799 if (gop->status != GNTST_bad_page) 800 gop->mfn = 0; 801 } else { 802 /* The page is no longer ours. */ 803 gop->mfn = 0; 804 } 805 806 if (gop->mfn != 0) 807 /* 808 * Give back the page, as we won't be using 809 * it. 810 */ 811 xnb_free_page(xnbp, gop->mfn); 812 else 813 /* 814 * We gave away a page, update our accounting 815 * now. 816 */ 817 balloon_drv_subtracted(1); 818 819 /* 5.2 */ 820 if (status != NETIF_RSP_OKAY) { 821 RING_GET_RESPONSE(&xnbp->xnb_rx_ring, prod)->status = 822 status; 823 } else { 824 xnbp->xnb_stat_ipackets++; 825 xnbp->xnb_stat_rbytes += len; 826 } 827 828 loop++; 829 prod++; 830 gop++; 831 } 832 833 xnbp->xnb_rx_ring.req_cons = loop; 834 xnbp->xnb_rx_ring.rsp_prod_pvt = prod; 835 836 /* 6 */ 837 /* LINTED: constant in conditional context */ 838 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&xnbp->xnb_rx_ring, notify); 839 if (notify) { 840 ec_notify_via_evtchn(xnbp->xnb_evtchn); 841 xnbp->xnb_stat_rx_notify_sent++; 842 } else { 843 xnbp->xnb_stat_rx_notify_deferred++; 844 } 845 846 if (mp != NULL) 847 xnbp->xnb_stat_rx_defer++; 848 849 mutex_exit(&xnbp->xnb_rx_lock); 850 851 /* Free mblk_t's that we consumed. */ 852 freemsgchain(free); 853 854 return (mp); 855 } 856 857 /* Helper functions for xnb_copy_to_peer(). */ 858 859 /* 860 * Grow the array of copy operation descriptors. 861 */ 862 static boolean_t 863 grow_cpop_area(xnb_t *xnbp) 864 { 865 size_t count; 866 gnttab_copy_t *new; 867 868 ASSERT(MUTEX_HELD(&xnbp->xnb_rx_lock)); 869 870 count = xnbp->xnb_rx_cpop_count + CPOP_DEFCNT; 871 872 if ((new = kmem_alloc(sizeof (new[0]) * count, KM_NOSLEEP)) == NULL) { 873 xnbp->xnb_stat_other_allocation_failure++; 874 return (B_FALSE); 875 } 876 877 bcopy(xnbp->xnb_rx_cpop, new, 878 sizeof (xnbp->xnb_rx_cpop[0]) * xnbp->xnb_rx_cpop_count); 879 880 kmem_free(xnbp->xnb_rx_cpop, 881 sizeof (xnbp->xnb_rx_cpop[0]) * xnbp->xnb_rx_cpop_count); 882 883 xnbp->xnb_rx_cpop = new; 884 xnbp->xnb_rx_cpop_count = count; 885 886 xnbp->xnb_stat_rx_cpoparea_grown++; 887 888 return (B_TRUE); 889 } 890 891 /* 892 * Check whether an address is on a page that's foreign to this domain. 893 */ 894 static boolean_t 895 is_foreign(void *addr) 896 { 897 pfn_t pfn = hat_getpfnum(kas.a_hat, addr); 898 899 return ((pfn & PFN_IS_FOREIGN_MFN) == PFN_IS_FOREIGN_MFN); 900 } 901 902 /* 903 * Insert a newly allocated mblk into a chain, replacing the old one. 904 */ 905 static mblk_t * 906 replace_msg(mblk_t *mp, size_t len, mblk_t *mp_prev, mblk_t *ml_prev) 907 { 908 uint32_t start, stuff, end, value, flags; 909 mblk_t *new_mp; 910 911 new_mp = copyb(mp); 912 if (new_mp == NULL) { 913 cmn_err(CE_PANIC, "replace_msg: cannot alloc new message" 914 "for %p, len %lu", (void *) mp, len); 915 } 916 917 mac_hcksum_get(mp, &start, &stuff, &end, &value, &flags); 918 mac_hcksum_set(new_mp, start, stuff, end, value, flags); 919 920 new_mp->b_next = mp->b_next; 921 new_mp->b_prev = mp->b_prev; 922 new_mp->b_cont = mp->b_cont; 923 924 /* Make sure we only overwrite pointers to the mblk being replaced. */ 925 if (mp_prev != NULL && mp_prev->b_next == mp) 926 mp_prev->b_next = new_mp; 927 928 if (ml_prev != NULL && ml_prev->b_cont == mp) 929 ml_prev->b_cont = new_mp; 930 931 mp->b_next = mp->b_prev = mp->b_cont = NULL; 932 freemsg(mp); 933 934 return (new_mp); 935 } 936 937 /* 938 * Set all the fields in a gnttab_copy_t. 939 */ 940 static void 941 setup_gop(xnb_t *xnbp, gnttab_copy_t *gp, uchar_t *rptr, 942 size_t s_off, size_t d_off, size_t len, grant_ref_t d_ref) 943 { 944 ASSERT(xnbp != NULL && gp != NULL); 945 946 gp->source.offset = s_off; 947 gp->source.u.gmfn = pfn_to_mfn(hat_getpfnum(kas.a_hat, (caddr_t)rptr)); 948 gp->source.domid = DOMID_SELF; 949 950 gp->len = (uint16_t)len; 951 gp->flags = GNTCOPY_dest_gref; 952 gp->status = 0; 953 954 gp->dest.u.ref = d_ref; 955 gp->dest.offset = d_off; 956 gp->dest.domid = xnbp->xnb_peer; 957 } 958 959 /* 960 * Pass packets to the peer using hypervisor copy operations. 961 */ 962 mblk_t * 963 xnb_copy_to_peer(xnb_t *xnbp, mblk_t *mp) 964 { 965 mblk_t *free = mp, *mp_prev = NULL, *saved_mp = mp; 966 mblk_t *ml, *ml_prev; 967 boolean_t notify; 968 RING_IDX loop, prod; 969 int i; 970 971 /* 972 * If the peer does not pre-post buffers for received packets, 973 * use page flipping to pass packets to it. 974 */ 975 if (!xnbp->xnb_rx_hv_copy) 976 return (xnb_to_peer(xnbp, mp)); 977 978 /* 979 * For each packet the sequence of operations is: 980 * 981 * 1. get a request slot from the ring. 982 * 2. set up data for hypercall (see NOTE below) 983 * 3. have the hypervisore copy the data 984 * 4. update the request slot. 985 * 5. kick the peer. 986 * 987 * NOTE ad 2. 988 * In order to reduce the number of hypercalls, we prepare 989 * several mblks (mp->b_cont != NULL) for the peer and 990 * perform a single hypercall to transfer them. We also have 991 * to set up a seperate copy operation for every page. 992 * 993 * If we have more than one packet (mp->b_next != NULL), we do 994 * this whole dance repeatedly. 995 */ 996 997 mutex_enter(&xnbp->xnb_rx_lock); 998 999 if (!(xnbp->xnb_connected && xnbp->xnb_hotplugged)) { 1000 mutex_exit(&xnbp->xnb_rx_lock); 1001 DTRACE_PROBE(copy_rx_too_early); 1002 xnbp->xnb_stat_rx_too_early++; 1003 return (mp); 1004 } 1005 1006 loop = xnbp->xnb_rx_ring.req_cons; 1007 prod = xnbp->xnb_rx_ring.rsp_prod_pvt; 1008 1009 while ((mp != NULL) && 1010 XNB_RING_HAS_UNCONSUMED_REQUESTS(&xnbp->xnb_rx_ring)) { 1011 netif_rx_request_t *rxreq; 1012 size_t d_offset, len; 1013 int item_count; 1014 gnttab_copy_t *gop_cp; 1015 netif_rx_response_t *rxresp; 1016 uint16_t cksum_flags; 1017 int16_t status = NETIF_RSP_OKAY; 1018 1019 /* 1 */ 1020 rxreq = RING_GET_REQUEST(&xnbp->xnb_rx_ring, loop); 1021 1022 #ifdef XNB_DEBUG 1023 if (!(rxreq->id < NET_RX_RING_SIZE)) 1024 cmn_err(CE_PANIC, "xnb_copy_to_peer: " 1025 "id %d out of range in request 0x%p", 1026 rxreq->id, (void *)rxreq); 1027 #endif /* XNB_DEBUG */ 1028 1029 /* 2 */ 1030 d_offset = 0; 1031 len = 0; 1032 item_count = 0; 1033 1034 gop_cp = xnbp->xnb_rx_cpop; 1035 1036 /* 1037 * We walk the b_cont pointers and set up a 1038 * gnttab_copy_t for each sub-page chunk in each data 1039 * block. 1040 */ 1041 /* 2a */ 1042 for (ml = mp, ml_prev = NULL; ml != NULL; ml = ml->b_cont) { 1043 size_t chunk = ml->b_wptr - ml->b_rptr; 1044 uchar_t *r_tmp, *rpt_align; 1045 size_t r_offset; 1046 1047 /* 1048 * The hypervisor will not allow us to 1049 * reference a foreign page (e.g. one 1050 * belonging to another domain) by mfn in the 1051 * copy operation. If the data in this mblk is 1052 * on such a page we must copy the data into a 1053 * local page before initiating the hypervisor 1054 * copy operation. 1055 */ 1056 if (is_foreign(ml->b_rptr) || is_foreign(ml->b_wptr)) { 1057 mblk_t *ml_new = replace_msg(ml, chunk, 1058 mp_prev, ml_prev); 1059 1060 /* We can still use old ml, but not *ml! */ 1061 if (free == ml) 1062 free = ml_new; 1063 if (mp == ml) 1064 mp = ml_new; 1065 ml = ml_new; 1066 1067 xnbp->xnb_stat_rx_foreign_page++; 1068 } 1069 1070 rpt_align = (uchar_t *)ALIGN2PAGE(ml->b_rptr); 1071 r_offset = (uint16_t)(ml->b_rptr - rpt_align); 1072 r_tmp = ml->b_rptr; 1073 1074 if (d_offset + chunk > PAGESIZE) 1075 cmn_err(CE_PANIC, "xnb_copy_to_peer: mp %p " 1076 "(svd: %p), ml %p,rpt_alg. %p, d_offset " 1077 "(%lu) + chunk (%lu) > PAGESIZE %d!", 1078 (void *)mp, (void *)saved_mp, (void *)ml, 1079 (void *)rpt_align, 1080 d_offset, chunk, (int)PAGESIZE); 1081 1082 while (chunk > 0) { 1083 size_t part_len; 1084 1085 if (item_count == xnbp->xnb_rx_cpop_count) { 1086 if (!grow_cpop_area(xnbp)) 1087 goto failure; 1088 gop_cp = &xnbp->xnb_rx_cpop[item_count]; 1089 } 1090 /* 1091 * If our mblk crosses a page boundary, we need 1092 * to do a seperate copy for each page. 1093 */ 1094 if (r_offset + chunk > PAGESIZE) { 1095 part_len = PAGESIZE - r_offset; 1096 1097 DTRACE_PROBE3(mblk_page_crossed, 1098 (mblk_t *), ml, int, chunk, int, 1099 (int)r_offset); 1100 1101 xnbp->xnb_stat_rx_pagebndry_crossed++; 1102 } else { 1103 part_len = chunk; 1104 } 1105 1106 setup_gop(xnbp, gop_cp, r_tmp, r_offset, 1107 d_offset, part_len, rxreq->gref); 1108 1109 chunk -= part_len; 1110 1111 len += part_len; 1112 d_offset += part_len; 1113 r_tmp += part_len; 1114 /* 1115 * The 2nd, 3rd ... last copies will always 1116 * start at r_tmp, therefore r_offset is 0. 1117 */ 1118 r_offset = 0; 1119 gop_cp++; 1120 item_count++; 1121 } 1122 ml_prev = ml; 1123 1124 DTRACE_PROBE4(mblk_loop_end, (mblk_t *), ml, int, 1125 chunk, int, len, int, item_count); 1126 } 1127 /* 3 */ 1128 if (HYPERVISOR_grant_table_op(GNTTABOP_copy, xnbp->xnb_rx_cpop, 1129 item_count) != 0) { 1130 cmn_err(CE_WARN, "xnb_copy_to_peer: copy op. failed"); 1131 DTRACE_PROBE(HV_granttableopfailed); 1132 } 1133 1134 /* 4 */ 1135 rxresp = RING_GET_RESPONSE(&xnbp->xnb_rx_ring, prod); 1136 rxresp->offset = 0; 1137 1138 rxresp->flags = 0; 1139 1140 DTRACE_PROBE4(got_RX_rsp, int, (int)rxresp->id, int, 1141 (int)rxresp->offset, int, (int)rxresp->flags, int, 1142 (int)rxresp->status); 1143 1144 cksum_flags = xnbp->xnb_flavour->xf_cksum_to_peer(xnbp, mp); 1145 if (cksum_flags != 0) 1146 xnbp->xnb_stat_rx_cksum_deferred++; 1147 rxresp->flags |= cksum_flags; 1148 1149 rxresp->id = RING_GET_REQUEST(&xnbp->xnb_rx_ring, prod)->id; 1150 rxresp->status = len; 1151 1152 DTRACE_PROBE4(RX_rsp_set, int, (int)rxresp->id, int, 1153 (int)rxresp->offset, int, (int)rxresp->flags, int, 1154 (int)rxresp->status); 1155 1156 for (i = 0; i < item_count; i++) { 1157 if (xnbp->xnb_rx_cpop[i].status != 0) { 1158 DTRACE_PROBE2(cpop_status_nonnull, int, 1159 (int)xnbp->xnb_rx_cpop[i].status, 1160 int, i); 1161 status = NETIF_RSP_ERROR; 1162 } 1163 } 1164 1165 /* 5.2 */ 1166 if (status != NETIF_RSP_OKAY) { 1167 RING_GET_RESPONSE(&xnbp->xnb_rx_ring, prod)->status = 1168 status; 1169 xnbp->xnb_stat_rx_rsp_notok++; 1170 } else { 1171 xnbp->xnb_stat_ipackets++; 1172 xnbp->xnb_stat_rbytes += len; 1173 } 1174 1175 loop++; 1176 prod++; 1177 mp_prev = mp; 1178 mp = mp->b_next; 1179 } 1180 failure: 1181 /* 1182 * Did we actually do anything? 1183 */ 1184 if (loop == xnbp->xnb_rx_ring.req_cons) { 1185 mutex_exit(&xnbp->xnb_rx_lock); 1186 return (mp); 1187 } 1188 1189 /* 1190 * Unlink the end of the 'done' list from the remainder. 1191 */ 1192 ASSERT(mp_prev != NULL); 1193 mp_prev->b_next = NULL; 1194 1195 xnbp->xnb_rx_ring.req_cons = loop; 1196 xnbp->xnb_rx_ring.rsp_prod_pvt = prod; 1197 1198 /* 6 */ 1199 /* LINTED: constant in conditional context */ 1200 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&xnbp->xnb_rx_ring, notify); 1201 if (notify) { 1202 ec_notify_via_evtchn(xnbp->xnb_evtchn); 1203 xnbp->xnb_stat_rx_notify_sent++; 1204 } else { 1205 xnbp->xnb_stat_rx_notify_deferred++; 1206 } 1207 1208 if (mp != NULL) 1209 xnbp->xnb_stat_rx_defer++; 1210 1211 mutex_exit(&xnbp->xnb_rx_lock); 1212 1213 /* Free mblk_t structs we have consumed. */ 1214 freemsgchain(free); 1215 1216 return (mp); 1217 } 1218 1219 1220 static void 1221 xnb_tx_notify_peer(xnb_t *xnbp, boolean_t force) 1222 { 1223 boolean_t notify; 1224 1225 ASSERT(MUTEX_HELD(&xnbp->xnb_tx_lock)); 1226 1227 /* LINTED: constant in conditional context */ 1228 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&xnbp->xnb_tx_ring, notify); 1229 if (notify || force) { 1230 ec_notify_via_evtchn(xnbp->xnb_evtchn); 1231 xnbp->xnb_stat_tx_notify_sent++; 1232 } else { 1233 xnbp->xnb_stat_tx_notify_deferred++; 1234 } 1235 } 1236 1237 static void 1238 xnb_tx_mark_complete(xnb_t *xnbp, RING_IDX id, int16_t status) 1239 { 1240 RING_IDX i; 1241 netif_tx_response_t *txresp; 1242 1243 ASSERT(MUTEX_HELD(&xnbp->xnb_tx_lock)); 1244 1245 i = xnbp->xnb_tx_ring.rsp_prod_pvt; 1246 1247 txresp = RING_GET_RESPONSE(&xnbp->xnb_tx_ring, i); 1248 txresp->id = id; 1249 txresp->status = status; 1250 1251 xnbp->xnb_tx_ring.rsp_prod_pvt = i + 1; 1252 1253 /* 1254 * Note that we don't push the change to the peer here - that 1255 * is the callers responsibility. 1256 */ 1257 } 1258 1259 static void 1260 xnb_txbuf_recycle(xnb_txbuf_t *txp) 1261 { 1262 xnb_t *xnbp = txp->xt_xnbp; 1263 1264 kmem_cache_free(xnbp->xnb_tx_buf_cache, txp); 1265 1266 xnbp->xnb_tx_buf_outstanding--; 1267 } 1268 1269 static int 1270 xnb_txbuf_constructor(void *buf, void *arg, int kmflag) 1271 { 1272 _NOTE(ARGUNUSED(kmflag)); 1273 xnb_txbuf_t *txp = buf; 1274 xnb_t *xnbp = arg; 1275 size_t len; 1276 ddi_dma_cookie_t dma_cookie; 1277 uint_t ncookies; 1278 1279 txp->xt_free_rtn.free_func = xnb_txbuf_recycle; 1280 txp->xt_free_rtn.free_arg = (caddr_t)txp; 1281 txp->xt_xnbp = xnbp; 1282 txp->xt_next = NULL; 1283 1284 if (ddi_dma_alloc_handle(xnbp->xnb_devinfo, &buf_dma_attr, 1285 0, 0, &txp->xt_dma_handle) != DDI_SUCCESS) 1286 goto failure; 1287 1288 if (ddi_dma_mem_alloc(txp->xt_dma_handle, PAGESIZE, &data_accattr, 1289 DDI_DMA_STREAMING, 0, 0, &txp->xt_buf, &len, 1290 &txp->xt_acc_handle) != DDI_SUCCESS) 1291 goto failure_1; 1292 1293 if (ddi_dma_addr_bind_handle(txp->xt_dma_handle, NULL, txp->xt_buf, 1294 len, DDI_DMA_RDWR | DDI_DMA_STREAMING, DDI_DMA_DONTWAIT, 0, 1295 &dma_cookie, &ncookies) 1296 != DDI_DMA_MAPPED) 1297 goto failure_2; 1298 ASSERT(ncookies == 1); 1299 1300 txp->xt_mfn = xnb_btop(dma_cookie.dmac_laddress); 1301 txp->xt_buflen = dma_cookie.dmac_size; 1302 1303 DTRACE_PROBE(txbuf_allocated); 1304 1305 atomic_inc_32(&xnbp->xnb_tx_buf_count); 1306 xnbp->xnb_tx_buf_outstanding++; 1307 1308 return (0); 1309 1310 failure_2: 1311 ddi_dma_mem_free(&txp->xt_acc_handle); 1312 1313 failure_1: 1314 ddi_dma_free_handle(&txp->xt_dma_handle); 1315 1316 failure: 1317 1318 return (-1); 1319 } 1320 1321 static void 1322 xnb_txbuf_destructor(void *buf, void *arg) 1323 { 1324 xnb_txbuf_t *txp = buf; 1325 xnb_t *xnbp = arg; 1326 1327 (void) ddi_dma_unbind_handle(txp->xt_dma_handle); 1328 ddi_dma_mem_free(&txp->xt_acc_handle); 1329 ddi_dma_free_handle(&txp->xt_dma_handle); 1330 1331 atomic_dec_32(&xnbp->xnb_tx_buf_count); 1332 } 1333 1334 /* 1335 * Take packets from the peer and deliver them onward. 1336 */ 1337 static mblk_t * 1338 xnb_from_peer(xnb_t *xnbp) 1339 { 1340 RING_IDX start, end, loop; 1341 gnttab_copy_t *cop; 1342 xnb_txbuf_t **txpp; 1343 netif_tx_request_t *txreq; 1344 boolean_t work_to_do, need_notify = B_FALSE; 1345 mblk_t *head, *tail; 1346 int n_data_req, i; 1347 1348 ASSERT(MUTEX_HELD(&xnbp->xnb_tx_lock)); 1349 1350 head = tail = NULL; 1351 around: 1352 1353 /* LINTED: constant in conditional context */ 1354 RING_FINAL_CHECK_FOR_REQUESTS(&xnbp->xnb_tx_ring, work_to_do); 1355 if (!work_to_do) { 1356 finished: 1357 xnb_tx_notify_peer(xnbp, need_notify); 1358 1359 return (head); 1360 } 1361 1362 start = xnbp->xnb_tx_ring.req_cons; 1363 end = xnbp->xnb_tx_ring.sring->req_prod; 1364 1365 if ((end - start) > NET_TX_RING_SIZE) { 1366 /* 1367 * This usually indicates that the frontend driver is 1368 * misbehaving, as it's not possible to have more than 1369 * NET_TX_RING_SIZE ring elements in play at any one 1370 * time. 1371 * 1372 * We reset the ring pointers to the state declared by 1373 * the frontend and try to carry on. 1374 */ 1375 cmn_err(CE_WARN, "xnb_from_peer: domain %d tried to give us %u " 1376 "items in the ring, resetting and trying to recover.", 1377 xnbp->xnb_peer, (end - start)); 1378 1379 /* LINTED: constant in conditional context */ 1380 BACK_RING_ATTACH(&xnbp->xnb_tx_ring, 1381 (netif_tx_sring_t *)xnbp->xnb_tx_ring_addr, PAGESIZE); 1382 1383 goto around; 1384 } 1385 1386 loop = start; 1387 cop = xnbp->xnb_tx_cop; 1388 txpp = xnbp->xnb_tx_bufp; 1389 n_data_req = 0; 1390 1391 while (loop < end) { 1392 static const uint16_t acceptable_flags = 1393 NETTXF_csum_blank | 1394 NETTXF_data_validated | 1395 NETTXF_extra_info; 1396 uint16_t unexpected_flags; 1397 1398 txreq = RING_GET_REQUEST(&xnbp->xnb_tx_ring, loop); 1399 1400 unexpected_flags = txreq->flags & ~acceptable_flags; 1401 if (unexpected_flags != 0) { 1402 /* 1403 * The peer used flag bits that we do not 1404 * recognize. 1405 */ 1406 cmn_err(CE_WARN, "xnb_from_peer: " 1407 "unexpected flag bits (0x%x) from peer " 1408 "in transmit request", 1409 unexpected_flags); 1410 xnbp->xnb_stat_tx_unexpected_flags++; 1411 1412 /* Mark this entry as failed. */ 1413 xnb_tx_mark_complete(xnbp, txreq->id, NETIF_RSP_ERROR); 1414 need_notify = B_TRUE; 1415 1416 } else if (txreq->flags & NETTXF_extra_info) { 1417 struct netif_extra_info *erp; 1418 boolean_t status; 1419 1420 loop++; /* Consume another slot in the ring. */ 1421 ASSERT(loop <= end); 1422 1423 erp = (struct netif_extra_info *) 1424 RING_GET_REQUEST(&xnbp->xnb_tx_ring, loop); 1425 1426 switch (erp->type) { 1427 case XEN_NETIF_EXTRA_TYPE_MCAST_ADD: 1428 ASSERT(xnbp->xnb_multicast_control); 1429 status = xnbp->xnb_flavour->xf_mcast_add(xnbp, 1430 &erp->u.mcast.addr); 1431 break; 1432 case XEN_NETIF_EXTRA_TYPE_MCAST_DEL: 1433 ASSERT(xnbp->xnb_multicast_control); 1434 status = xnbp->xnb_flavour->xf_mcast_del(xnbp, 1435 &erp->u.mcast.addr); 1436 break; 1437 default: 1438 status = B_FALSE; 1439 cmn_err(CE_WARN, "xnb_from_peer: " 1440 "unknown extra type %d", erp->type); 1441 break; 1442 } 1443 1444 xnb_tx_mark_complete(xnbp, txreq->id, 1445 status ? NETIF_RSP_OKAY : NETIF_RSP_ERROR); 1446 need_notify = B_TRUE; 1447 1448 } else if ((txreq->offset > PAGESIZE) || 1449 (txreq->offset + txreq->size > PAGESIZE)) { 1450 /* 1451 * Peer attempted to refer to data beyond the 1452 * end of the granted page. 1453 */ 1454 cmn_err(CE_WARN, "xnb_from_peer: " 1455 "attempt to refer beyond the end of granted " 1456 "page in txreq (offset %d, size %d).", 1457 txreq->offset, txreq->size); 1458 xnbp->xnb_stat_tx_overflow_page++; 1459 1460 /* Mark this entry as failed. */ 1461 xnb_tx_mark_complete(xnbp, txreq->id, NETIF_RSP_ERROR); 1462 need_notify = B_TRUE; 1463 1464 } else { 1465 xnb_txbuf_t *txp; 1466 1467 txp = kmem_cache_alloc(xnbp->xnb_tx_buf_cache, 1468 KM_NOSLEEP); 1469 if (txp == NULL) 1470 break; 1471 1472 txp->xt_mblk = desballoc((unsigned char *)txp->xt_buf, 1473 txp->xt_buflen, 0, &txp->xt_free_rtn); 1474 if (txp->xt_mblk == NULL) { 1475 kmem_cache_free(xnbp->xnb_tx_buf_cache, txp); 1476 break; 1477 } 1478 1479 txp->xt_idx = loop; 1480 txp->xt_id = txreq->id; 1481 1482 cop->source.u.ref = txreq->gref; 1483 cop->source.domid = xnbp->xnb_peer; 1484 cop->source.offset = txreq->offset; 1485 1486 cop->dest.u.gmfn = txp->xt_mfn; 1487 cop->dest.domid = DOMID_SELF; 1488 cop->dest.offset = 0; 1489 1490 cop->len = txreq->size; 1491 cop->flags = GNTCOPY_source_gref; 1492 cop->status = 0; 1493 1494 *txpp = txp; 1495 1496 txpp++; 1497 cop++; 1498 n_data_req++; 1499 1500 ASSERT(n_data_req <= NET_TX_RING_SIZE); 1501 } 1502 1503 loop++; 1504 } 1505 1506 xnbp->xnb_tx_ring.req_cons = loop; 1507 1508 if (n_data_req == 0) 1509 goto around; 1510 1511 if (HYPERVISOR_grant_table_op(GNTTABOP_copy, 1512 xnbp->xnb_tx_cop, n_data_req) != 0) { 1513 1514 cmn_err(CE_WARN, "xnb_from_peer: copy operation failed"); 1515 1516 txpp = xnbp->xnb_tx_bufp; 1517 i = n_data_req; 1518 while (i > 0) { 1519 kmem_cache_free(xnbp->xnb_tx_buf_cache, *txpp); 1520 txpp++; 1521 i--; 1522 } 1523 1524 goto finished; 1525 } 1526 1527 txpp = xnbp->xnb_tx_bufp; 1528 cop = xnbp->xnb_tx_cop; 1529 i = n_data_req; 1530 1531 while (i > 0) { 1532 xnb_txbuf_t *txp = *txpp; 1533 1534 txreq = RING_GET_REQUEST(&xnbp->xnb_tx_ring, txp->xt_idx); 1535 1536 if (cop->status != 0) { 1537 #ifdef XNB_DEBUG 1538 cmn_err(CE_WARN, "xnb_from_peer: " 1539 "txpp 0x%p failed (%d)", 1540 (void *)*txpp, cop->status); 1541 #endif /* XNB_DEBUG */ 1542 xnb_tx_mark_complete(xnbp, txp->xt_id, NETIF_RSP_ERROR); 1543 freemsg(txp->xt_mblk); 1544 } else { 1545 mblk_t *mp; 1546 1547 mp = txp->xt_mblk; 1548 mp->b_rptr = mp->b_wptr = (unsigned char *)txp->xt_buf; 1549 mp->b_wptr += txreq->size; 1550 mp->b_next = NULL; 1551 1552 /* 1553 * If there are checksum flags, process them 1554 * appropriately. 1555 */ 1556 if ((txreq->flags & 1557 (NETTXF_csum_blank | NETTXF_data_validated)) 1558 != 0) { 1559 mp = xnbp->xnb_flavour->xf_cksum_from_peer(xnbp, 1560 mp, txreq->flags); 1561 xnbp->xnb_stat_tx_cksum_no_need++; 1562 1563 txp->xt_mblk = mp; 1564 } 1565 1566 if (head == NULL) { 1567 ASSERT(tail == NULL); 1568 head = mp; 1569 } else { 1570 ASSERT(tail != NULL); 1571 tail->b_next = mp; 1572 } 1573 tail = mp; 1574 1575 xnbp->xnb_stat_opackets++; 1576 xnbp->xnb_stat_obytes += txreq->size; 1577 1578 xnb_tx_mark_complete(xnbp, txp->xt_id, NETIF_RSP_OKAY); 1579 } 1580 1581 txpp++; 1582 cop++; 1583 i--; 1584 } 1585 1586 goto around; 1587 /* NOTREACHED */ 1588 } 1589 1590 static uint_t 1591 xnb_intr(caddr_t arg) 1592 { 1593 xnb_t *xnbp = (xnb_t *)arg; 1594 mblk_t *mp; 1595 1596 xnbp->xnb_stat_intr++; 1597 1598 mutex_enter(&xnbp->xnb_tx_lock); 1599 1600 ASSERT(xnbp->xnb_connected); 1601 1602 mp = xnb_from_peer(xnbp); 1603 1604 mutex_exit(&xnbp->xnb_tx_lock); 1605 1606 if (!xnbp->xnb_hotplugged) { 1607 xnbp->xnb_stat_tx_too_early++; 1608 goto fail; 1609 } 1610 if (mp == NULL) { 1611 xnbp->xnb_stat_spurious_intr++; 1612 goto fail; 1613 } 1614 1615 xnbp->xnb_flavour->xf_from_peer(xnbp, mp); 1616 1617 return (DDI_INTR_CLAIMED); 1618 1619 fail: 1620 freemsgchain(mp); 1621 return (DDI_INTR_CLAIMED); 1622 } 1623 1624 /* 1625 * Read our configuration from xenstore. 1626 */ 1627 boolean_t 1628 xnb_read_xs_config(xnb_t *xnbp) 1629 { 1630 char *xsname; 1631 char mac[ETHERADDRL * 3]; 1632 1633 xsname = xvdi_get_xsname(xnbp->xnb_devinfo); 1634 1635 if (xenbus_scanf(XBT_NULL, xsname, 1636 "mac", "%s", mac) != 0) { 1637 cmn_err(CE_WARN, "xnb_attach: " 1638 "cannot read mac address from %s", 1639 xsname); 1640 return (B_FALSE); 1641 } 1642 1643 if (ether_aton(mac, xnbp->xnb_mac_addr) != ETHERADDRL) { 1644 cmn_err(CE_WARN, 1645 "xnb_attach: cannot parse mac address %s", 1646 mac); 1647 return (B_FALSE); 1648 } 1649 1650 return (B_TRUE); 1651 } 1652 1653 /* 1654 * Read the configuration of the peer from xenstore. 1655 */ 1656 boolean_t 1657 xnb_read_oe_config(xnb_t *xnbp) 1658 { 1659 char *oename; 1660 int i; 1661 1662 oename = xvdi_get_oename(xnbp->xnb_devinfo); 1663 1664 if (xenbus_gather(XBT_NULL, oename, 1665 "event-channel", "%u", &xnbp->xnb_fe_evtchn, 1666 "tx-ring-ref", "%lu", &xnbp->xnb_tx_ring_ref, 1667 "rx-ring-ref", "%lu", &xnbp->xnb_rx_ring_ref, 1668 NULL) != 0) { 1669 cmn_err(CE_WARN, "xnb_read_oe_config: " 1670 "cannot read other-end details from %s", 1671 oename); 1672 return (B_FALSE); 1673 } 1674 1675 /* 1676 * Check whether our peer requests receive side hypervisor 1677 * copy. 1678 */ 1679 if (xenbus_scanf(XBT_NULL, oename, 1680 "request-rx-copy", "%d", &i) != 0) 1681 i = 0; 1682 if (i != 0) 1683 xnbp->xnb_rx_hv_copy = B_TRUE; 1684 1685 /* 1686 * Check whether our peer requests multicast_control. 1687 */ 1688 if (xenbus_scanf(XBT_NULL, oename, 1689 "request-multicast-control", "%d", &i) != 0) 1690 i = 0; 1691 if (i != 0) 1692 xnbp->xnb_multicast_control = B_TRUE; 1693 1694 /* 1695 * The Linux backend driver here checks to see if the peer has 1696 * set 'feature-no-csum-offload'. This is used to indicate 1697 * that the guest cannot handle receiving packets without a 1698 * valid checksum. We don't check here, because packets passed 1699 * to the peer _always_ have a valid checksum. 1700 * 1701 * There are three cases: 1702 * 1703 * - the NIC is dedicated: packets from the wire should always 1704 * have a valid checksum. If the hardware validates the 1705 * checksum then the relevant bit will be set in the packet 1706 * attributes and we will inform the peer. It can choose to 1707 * ignore the hardware verification. 1708 * 1709 * - the NIC is shared (VNIC) and a packet originates from the 1710 * wire: this is the same as the case above - the packets 1711 * will have a valid checksum. 1712 * 1713 * - the NIC is shared (VNIC) and a packet originates from the 1714 * host: the MAC layer ensures that all such packets have a 1715 * valid checksum by calculating one if the stack did not. 1716 */ 1717 1718 return (B_TRUE); 1719 } 1720 1721 void 1722 xnb_start_connect(xnb_t *xnbp) 1723 { 1724 dev_info_t *dip = xnbp->xnb_devinfo; 1725 1726 if (!xnb_connect_rings(dip)) { 1727 cmn_err(CE_WARN, "xnb_start_connect: " 1728 "cannot connect rings"); 1729 goto failed; 1730 } 1731 1732 if (!xnbp->xnb_flavour->xf_start_connect(xnbp)) { 1733 cmn_err(CE_WARN, "xnb_start_connect: " 1734 "flavour failed to connect"); 1735 goto failed; 1736 } 1737 1738 (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateConnected); 1739 return; 1740 1741 failed: 1742 xnbp->xnb_flavour->xf_peer_disconnected(xnbp); 1743 xnb_disconnect_rings(dip); 1744 (void) xvdi_switch_state(dip, XBT_NULL, 1745 XenbusStateClosed); 1746 (void) xvdi_post_event(dip, XEN_HP_REMOVE); 1747 } 1748 1749 static boolean_t 1750 xnb_connect_rings(dev_info_t *dip) 1751 { 1752 xnb_t *xnbp = ddi_get_driver_private(dip); 1753 struct gnttab_map_grant_ref map_op; 1754 1755 /* 1756 * Cannot attempt to connect the rings if already connected. 1757 */ 1758 ASSERT(!xnbp->xnb_connected); 1759 1760 /* 1761 * 1. allocate a vaddr for the tx page, one for the rx page. 1762 * 2. call GNTTABOP_map_grant_ref to map the relevant pages 1763 * into the allocated vaddr (one for tx, one for rx). 1764 * 3. call EVTCHNOP_bind_interdomain to have the event channel 1765 * bound to this domain. 1766 * 4. associate the event channel with an interrupt. 1767 * 5. enable the interrupt. 1768 */ 1769 1770 /* 1.tx */ 1771 xnbp->xnb_tx_ring_addr = vmem_xalloc(heap_arena, PAGESIZE, PAGESIZE, 1772 0, 0, 0, 0, VM_SLEEP); 1773 ASSERT(xnbp->xnb_tx_ring_addr != NULL); 1774 1775 /* 2.tx */ 1776 map_op.host_addr = (uint64_t)((long)xnbp->xnb_tx_ring_addr); 1777 map_op.flags = GNTMAP_host_map; 1778 map_op.ref = xnbp->xnb_tx_ring_ref; 1779 map_op.dom = xnbp->xnb_peer; 1780 hat_prepare_mapping(kas.a_hat, xnbp->xnb_tx_ring_addr, NULL); 1781 if (xen_map_gref(GNTTABOP_map_grant_ref, &map_op, 1, B_FALSE) != 0 || 1782 map_op.status != 0) { 1783 cmn_err(CE_WARN, "xnb_connect_rings: cannot map tx-ring page."); 1784 goto fail; 1785 } 1786 xnbp->xnb_tx_ring_handle = map_op.handle; 1787 1788 /* LINTED: constant in conditional context */ 1789 BACK_RING_INIT(&xnbp->xnb_tx_ring, 1790 (netif_tx_sring_t *)xnbp->xnb_tx_ring_addr, PAGESIZE); 1791 1792 /* 1.rx */ 1793 xnbp->xnb_rx_ring_addr = vmem_xalloc(heap_arena, PAGESIZE, PAGESIZE, 1794 0, 0, 0, 0, VM_SLEEP); 1795 ASSERT(xnbp->xnb_rx_ring_addr != NULL); 1796 1797 /* 2.rx */ 1798 map_op.host_addr = (uint64_t)((long)xnbp->xnb_rx_ring_addr); 1799 map_op.flags = GNTMAP_host_map; 1800 map_op.ref = xnbp->xnb_rx_ring_ref; 1801 map_op.dom = xnbp->xnb_peer; 1802 hat_prepare_mapping(kas.a_hat, xnbp->xnb_rx_ring_addr, NULL); 1803 if (xen_map_gref(GNTTABOP_map_grant_ref, &map_op, 1, B_FALSE) != 0 || 1804 map_op.status != 0) { 1805 cmn_err(CE_WARN, "xnb_connect_rings: cannot map rx-ring page."); 1806 goto fail; 1807 } 1808 xnbp->xnb_rx_ring_handle = map_op.handle; 1809 1810 /* LINTED: constant in conditional context */ 1811 BACK_RING_INIT(&xnbp->xnb_rx_ring, 1812 (netif_rx_sring_t *)xnbp->xnb_rx_ring_addr, PAGESIZE); 1813 1814 /* 3 */ 1815 if (xvdi_bind_evtchn(dip, xnbp->xnb_fe_evtchn) != DDI_SUCCESS) { 1816 cmn_err(CE_WARN, "xnb_connect_rings: " 1817 "cannot bind event channel %d", xnbp->xnb_evtchn); 1818 xnbp->xnb_evtchn = INVALID_EVTCHN; 1819 goto fail; 1820 } 1821 xnbp->xnb_evtchn = xvdi_get_evtchn(dip); 1822 1823 /* 1824 * It would be good to set the state to XenbusStateConnected 1825 * here as well, but then what if ddi_add_intr() failed? 1826 * Changing the state in the store will be noticed by the peer 1827 * and cannot be "taken back". 1828 */ 1829 mutex_enter(&xnbp->xnb_tx_lock); 1830 mutex_enter(&xnbp->xnb_rx_lock); 1831 1832 xnbp->xnb_connected = B_TRUE; 1833 1834 mutex_exit(&xnbp->xnb_rx_lock); 1835 mutex_exit(&xnbp->xnb_tx_lock); 1836 1837 /* 4, 5 */ 1838 if (ddi_add_intr(dip, 0, NULL, NULL, xnb_intr, (caddr_t)xnbp) 1839 != DDI_SUCCESS) { 1840 cmn_err(CE_WARN, "xnb_connect_rings: cannot add interrupt"); 1841 goto fail; 1842 } 1843 xnbp->xnb_irq = B_TRUE; 1844 1845 return (B_TRUE); 1846 1847 fail: 1848 mutex_enter(&xnbp->xnb_tx_lock); 1849 mutex_enter(&xnbp->xnb_rx_lock); 1850 1851 xnbp->xnb_connected = B_FALSE; 1852 1853 mutex_exit(&xnbp->xnb_rx_lock); 1854 mutex_exit(&xnbp->xnb_tx_lock); 1855 1856 return (B_FALSE); 1857 } 1858 1859 static void 1860 xnb_disconnect_rings(dev_info_t *dip) 1861 { 1862 xnb_t *xnbp = ddi_get_driver_private(dip); 1863 1864 if (xnbp->xnb_irq) { 1865 ddi_remove_intr(dip, 0, NULL); 1866 xnbp->xnb_irq = B_FALSE; 1867 } 1868 1869 if (xnbp->xnb_evtchn != INVALID_EVTCHN) { 1870 xvdi_free_evtchn(dip); 1871 xnbp->xnb_evtchn = INVALID_EVTCHN; 1872 } 1873 1874 if (xnbp->xnb_rx_ring_handle != INVALID_GRANT_HANDLE) { 1875 struct gnttab_unmap_grant_ref unmap_op; 1876 1877 unmap_op.host_addr = (uint64_t)(uintptr_t) 1878 xnbp->xnb_rx_ring_addr; 1879 unmap_op.dev_bus_addr = 0; 1880 unmap_op.handle = xnbp->xnb_rx_ring_handle; 1881 if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, 1882 &unmap_op, 1) != 0) 1883 cmn_err(CE_WARN, "xnb_disconnect_rings: " 1884 "cannot unmap rx-ring page (%d)", 1885 unmap_op.status); 1886 1887 xnbp->xnb_rx_ring_handle = INVALID_GRANT_HANDLE; 1888 } 1889 1890 if (xnbp->xnb_rx_ring_addr != NULL) { 1891 hat_release_mapping(kas.a_hat, xnbp->xnb_rx_ring_addr); 1892 vmem_free(heap_arena, xnbp->xnb_rx_ring_addr, PAGESIZE); 1893 xnbp->xnb_rx_ring_addr = NULL; 1894 } 1895 1896 if (xnbp->xnb_tx_ring_handle != INVALID_GRANT_HANDLE) { 1897 struct gnttab_unmap_grant_ref unmap_op; 1898 1899 unmap_op.host_addr = (uint64_t)(uintptr_t) 1900 xnbp->xnb_tx_ring_addr; 1901 unmap_op.dev_bus_addr = 0; 1902 unmap_op.handle = xnbp->xnb_tx_ring_handle; 1903 if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, 1904 &unmap_op, 1) != 0) 1905 cmn_err(CE_WARN, "xnb_disconnect_rings: " 1906 "cannot unmap tx-ring page (%d)", 1907 unmap_op.status); 1908 1909 xnbp->xnb_tx_ring_handle = INVALID_GRANT_HANDLE; 1910 } 1911 1912 if (xnbp->xnb_tx_ring_addr != NULL) { 1913 hat_release_mapping(kas.a_hat, xnbp->xnb_tx_ring_addr); 1914 vmem_free(heap_arena, xnbp->xnb_tx_ring_addr, PAGESIZE); 1915 xnbp->xnb_tx_ring_addr = NULL; 1916 } 1917 } 1918 1919 static void 1920 xnb_oe_state_change(dev_info_t *dip, ddi_eventcookie_t id, 1921 void *arg, void *impl_data) 1922 { 1923 _NOTE(ARGUNUSED(id, arg)); 1924 xnb_t *xnbp = ddi_get_driver_private(dip); 1925 XenbusState new_state = *(XenbusState *)impl_data; 1926 1927 ASSERT(xnbp != NULL); 1928 1929 switch (new_state) { 1930 case XenbusStateConnected: 1931 /* spurious state change */ 1932 if (xnbp->xnb_connected) 1933 return; 1934 1935 if (!xnb_read_oe_config(xnbp) || 1936 !xnbp->xnb_flavour->xf_peer_connected(xnbp)) { 1937 cmn_err(CE_WARN, "xnb_oe_state_change: " 1938 "read otherend config error"); 1939 (void) xvdi_switch_state(dip, XBT_NULL, 1940 XenbusStateClosed); 1941 (void) xvdi_post_event(dip, XEN_HP_REMOVE); 1942 1943 break; 1944 } 1945 1946 1947 mutex_enter(&xnbp->xnb_state_lock); 1948 xnbp->xnb_fe_status = XNB_STATE_READY; 1949 if (xnbp->xnb_be_status == XNB_STATE_READY) 1950 xnb_start_connect(xnbp); 1951 mutex_exit(&xnbp->xnb_state_lock); 1952 1953 /* 1954 * Now that we've attempted to connect it's reasonable 1955 * to allow an attempt to detach. 1956 */ 1957 xnbp->xnb_detachable = B_TRUE; 1958 1959 break; 1960 1961 case XenbusStateClosing: 1962 (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateClosing); 1963 1964 break; 1965 1966 case XenbusStateClosed: 1967 xnbp->xnb_flavour->xf_peer_disconnected(xnbp); 1968 1969 mutex_enter(&xnbp->xnb_tx_lock); 1970 mutex_enter(&xnbp->xnb_rx_lock); 1971 1972 xnb_disconnect_rings(dip); 1973 xnbp->xnb_connected = B_FALSE; 1974 1975 mutex_exit(&xnbp->xnb_rx_lock); 1976 mutex_exit(&xnbp->xnb_tx_lock); 1977 1978 (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateClosed); 1979 (void) xvdi_post_event(dip, XEN_HP_REMOVE); 1980 /* 1981 * In all likelyhood this is already set (in the above 1982 * case), but if the peer never attempted to connect 1983 * and the domain is destroyed we get here without 1984 * having been through the case above, so we set it to 1985 * be sure. 1986 */ 1987 xnbp->xnb_detachable = B_TRUE; 1988 1989 break; 1990 1991 default: 1992 break; 1993 } 1994 } 1995 1996 static void 1997 xnb_hp_state_change(dev_info_t *dip, ddi_eventcookie_t id, 1998 void *arg, void *impl_data) 1999 { 2000 _NOTE(ARGUNUSED(id, arg)); 2001 xnb_t *xnbp = ddi_get_driver_private(dip); 2002 xendev_hotplug_state_t state = *(xendev_hotplug_state_t *)impl_data; 2003 2004 ASSERT(xnbp != NULL); 2005 2006 switch (state) { 2007 case Connected: 2008 /* spurious hotplug event */ 2009 if (xnbp->xnb_hotplugged) 2010 break; 2011 2012 if (!xnb_read_xs_config(xnbp)) 2013 break; 2014 2015 if (!xnbp->xnb_flavour->xf_hotplug_connected(xnbp)) 2016 break; 2017 2018 mutex_enter(&xnbp->xnb_tx_lock); 2019 mutex_enter(&xnbp->xnb_rx_lock); 2020 2021 xnbp->xnb_hotplugged = B_TRUE; 2022 2023 mutex_exit(&xnbp->xnb_rx_lock); 2024 mutex_exit(&xnbp->xnb_tx_lock); 2025 2026 mutex_enter(&xnbp->xnb_state_lock); 2027 xnbp->xnb_be_status = XNB_STATE_READY; 2028 if (xnbp->xnb_fe_status == XNB_STATE_READY) 2029 xnb_start_connect(xnbp); 2030 mutex_exit(&xnbp->xnb_state_lock); 2031 2032 break; 2033 2034 default: 2035 break; 2036 } 2037 } 2038 2039 static struct modldrv modldrv = { 2040 &mod_miscops, "xnb", 2041 }; 2042 2043 static struct modlinkage modlinkage = { 2044 MODREV_1, &modldrv, NULL 2045 }; 2046 2047 int 2048 _init(void) 2049 { 2050 int i; 2051 2052 mutex_init(&xnb_alloc_page_lock, NULL, MUTEX_DRIVER, NULL); 2053 2054 i = mod_install(&modlinkage); 2055 if (i != DDI_SUCCESS) 2056 mutex_destroy(&xnb_alloc_page_lock); 2057 2058 return (i); 2059 } 2060 2061 int 2062 _info(struct modinfo *modinfop) 2063 { 2064 return (mod_info(&modlinkage, modinfop)); 2065 } 2066 2067 int 2068 _fini(void) 2069 { 2070 int i; 2071 2072 i = mod_remove(&modlinkage); 2073 if (i == DDI_SUCCESS) 2074 mutex_destroy(&xnb_alloc_page_lock); 2075 2076 return (i); 2077 } 2078