1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #ifdef DEBUG 28 #define XNB_DEBUG 1 29 #endif /* DEBUG */ 30 31 #include "xnb.h" 32 33 #include <sys/sunddi.h> 34 #include <sys/sunndi.h> 35 #include <sys/modctl.h> 36 #include <sys/conf.h> 37 #include <sys/mac.h> 38 #include <sys/mac_impl.h> /* For mac_fix_cksum(). */ 39 #include <sys/dlpi.h> 40 #include <sys/strsubr.h> 41 #include <sys/strsun.h> 42 #include <sys/types.h> 43 #include <sys/pattr.h> 44 #include <vm/seg_kmem.h> 45 #include <vm/hat_i86.h> 46 #include <xen/sys/xenbus_impl.h> 47 #include <xen/sys/xendev.h> 48 #include <sys/balloon_impl.h> 49 #include <sys/evtchn_impl.h> 50 #include <sys/gnttab.h> 51 #include <vm/vm_dep.h> 52 #include <sys/note.h> 53 #include <sys/gld.h> 54 #include <inet/ip.h> 55 #include <inet/ip_impl.h> 56 57 /* 58 * The terms "transmit" and "receive" are used in alignment with domU, 59 * which means that packets originating from the peer domU are "transmitted" 60 * to other parts of the system and packets are "received" from them. 61 */ 62 63 /* 64 * Should we allow guests to manipulate multicast group membership? 65 */ 66 static boolean_t xnb_multicast_control = B_TRUE; 67 68 static boolean_t xnb_connect_rings(dev_info_t *); 69 static void xnb_disconnect_rings(dev_info_t *); 70 static void xnb_oe_state_change(dev_info_t *, ddi_eventcookie_t, 71 void *, void *); 72 static void xnb_hp_state_change(dev_info_t *, ddi_eventcookie_t, 73 void *, void *); 74 75 static int xnb_txbuf_constructor(void *, void *, int); 76 static void xnb_txbuf_destructor(void *, void *); 77 static void xnb_tx_notify_peer(xnb_t *, boolean_t); 78 static void xnb_tx_mark_complete(xnb_t *, RING_IDX, int16_t); 79 80 mblk_t *xnb_to_peer(xnb_t *, mblk_t *); 81 mblk_t *xnb_copy_to_peer(xnb_t *, mblk_t *); 82 83 static void setup_gop(xnb_t *, gnttab_copy_t *, uchar_t *, 84 size_t, size_t, size_t, grant_ref_t); 85 #pragma inline(setup_gop) 86 static boolean_t is_foreign(void *); 87 #pragma inline(is_foreign) 88 89 #define INVALID_GRANT_HANDLE ((grant_handle_t)-1) 90 #define INVALID_GRANT_REF ((grant_ref_t)-1) 91 92 static kmutex_t xnb_alloc_page_lock; 93 94 /* 95 * On a 32 bit PAE system physical and machine addresses are larger 96 * than 32 bits. ddi_btop() on such systems take an unsigned long 97 * argument, and so addresses above 4G are truncated before ddi_btop() 98 * gets to see them. To avoid this, code the shift operation here. 99 */ 100 #define xnb_btop(addr) ((addr) >> PAGESHIFT) 101 102 /* DMA attributes for transmit and receive data */ 103 static ddi_dma_attr_t buf_dma_attr = { 104 DMA_ATTR_V0, /* version of this structure */ 105 0, /* lowest usable address */ 106 0xffffffffffffffffULL, /* highest usable address */ 107 0x7fffffff, /* maximum DMAable byte count */ 108 MMU_PAGESIZE, /* alignment in bytes */ 109 0x7ff, /* bitmap of burst sizes */ 110 1, /* minimum transfer */ 111 0xffffffffU, /* maximum transfer */ 112 0xffffffffffffffffULL, /* maximum segment length */ 113 1, /* maximum number of segments */ 114 1, /* granularity */ 115 0, /* flags (reserved) */ 116 }; 117 118 /* DMA access attributes for data: NOT to be byte swapped. */ 119 static ddi_device_acc_attr_t data_accattr = { 120 DDI_DEVICE_ATTR_V0, 121 DDI_NEVERSWAP_ACC, 122 DDI_STRICTORDER_ACC 123 }; 124 125 /* 126 * Statistics. 127 */ 128 static const char * const aux_statistics[] = { 129 "rx_cksum_deferred", 130 "tx_cksum_no_need", 131 "rx_rsp_notok", 132 "tx_notify_deferred", 133 "tx_notify_sent", 134 "rx_notify_deferred", 135 "rx_notify_sent", 136 "tx_too_early", 137 "rx_too_early", 138 "rx_allocb_failed", 139 "tx_allocb_failed", 140 "rx_foreign_page", 141 "mac_full", 142 "spurious_intr", 143 "allocation_success", 144 "allocation_failure", 145 "small_allocation_success", 146 "small_allocation_failure", 147 "other_allocation_failure", 148 "rx_pageboundary_crossed", 149 "rx_cpoparea_grown", 150 "csum_hardware", 151 "csum_software", 152 "tx_overflow_page", 153 "tx_unexpected_flags", 154 }; 155 156 static int 157 xnb_ks_aux_update(kstat_t *ksp, int flag) 158 { 159 xnb_t *xnbp; 160 kstat_named_t *knp; 161 162 if (flag != KSTAT_READ) 163 return (EACCES); 164 165 xnbp = ksp->ks_private; 166 knp = ksp->ks_data; 167 168 /* 169 * Assignment order should match that of the names in 170 * aux_statistics. 171 */ 172 (knp++)->value.ui64 = xnbp->xnb_stat_rx_cksum_deferred; 173 (knp++)->value.ui64 = xnbp->xnb_stat_tx_cksum_no_need; 174 (knp++)->value.ui64 = xnbp->xnb_stat_rx_rsp_notok; 175 (knp++)->value.ui64 = xnbp->xnb_stat_tx_notify_deferred; 176 (knp++)->value.ui64 = xnbp->xnb_stat_tx_notify_sent; 177 (knp++)->value.ui64 = xnbp->xnb_stat_rx_notify_deferred; 178 (knp++)->value.ui64 = xnbp->xnb_stat_rx_notify_sent; 179 (knp++)->value.ui64 = xnbp->xnb_stat_tx_too_early; 180 (knp++)->value.ui64 = xnbp->xnb_stat_rx_too_early; 181 (knp++)->value.ui64 = xnbp->xnb_stat_rx_allocb_failed; 182 (knp++)->value.ui64 = xnbp->xnb_stat_tx_allocb_failed; 183 (knp++)->value.ui64 = xnbp->xnb_stat_rx_foreign_page; 184 (knp++)->value.ui64 = xnbp->xnb_stat_mac_full; 185 (knp++)->value.ui64 = xnbp->xnb_stat_spurious_intr; 186 (knp++)->value.ui64 = xnbp->xnb_stat_allocation_success; 187 (knp++)->value.ui64 = xnbp->xnb_stat_allocation_failure; 188 (knp++)->value.ui64 = xnbp->xnb_stat_small_allocation_success; 189 (knp++)->value.ui64 = xnbp->xnb_stat_small_allocation_failure; 190 (knp++)->value.ui64 = xnbp->xnb_stat_other_allocation_failure; 191 (knp++)->value.ui64 = xnbp->xnb_stat_rx_pagebndry_crossed; 192 (knp++)->value.ui64 = xnbp->xnb_stat_rx_cpoparea_grown; 193 (knp++)->value.ui64 = xnbp->xnb_stat_csum_hardware; 194 (knp++)->value.ui64 = xnbp->xnb_stat_csum_software; 195 (knp++)->value.ui64 = xnbp->xnb_stat_tx_overflow_page; 196 (knp++)->value.ui64 = xnbp->xnb_stat_tx_unexpected_flags; 197 198 return (0); 199 } 200 201 static boolean_t 202 xnb_ks_init(xnb_t *xnbp) 203 { 204 int nstat = sizeof (aux_statistics) / 205 sizeof (aux_statistics[0]); 206 const char * const *cp = aux_statistics; 207 kstat_named_t *knp; 208 209 /* 210 * Create and initialise kstats. 211 */ 212 xnbp->xnb_kstat_aux = kstat_create(ddi_driver_name(xnbp->xnb_devinfo), 213 ddi_get_instance(xnbp->xnb_devinfo), "aux_statistics", "net", 214 KSTAT_TYPE_NAMED, nstat, 0); 215 if (xnbp->xnb_kstat_aux == NULL) 216 return (B_FALSE); 217 218 xnbp->xnb_kstat_aux->ks_private = xnbp; 219 xnbp->xnb_kstat_aux->ks_update = xnb_ks_aux_update; 220 221 knp = xnbp->xnb_kstat_aux->ks_data; 222 while (nstat > 0) { 223 kstat_named_init(knp, *cp, KSTAT_DATA_UINT64); 224 225 knp++; 226 cp++; 227 nstat--; 228 } 229 230 kstat_install(xnbp->xnb_kstat_aux); 231 232 return (B_TRUE); 233 } 234 235 static void 236 xnb_ks_free(xnb_t *xnbp) 237 { 238 kstat_delete(xnbp->xnb_kstat_aux); 239 } 240 241 /* 242 * Calculate and insert the transport checksum for an arbitrary packet. 243 */ 244 static mblk_t * 245 xnb_software_csum(xnb_t *xnbp, mblk_t *mp) 246 { 247 _NOTE(ARGUNUSED(xnbp)); 248 249 /* 250 * XXPV dme: shouldn't rely on mac_fix_cksum(), not least 251 * because it doesn't cover all of the interesting cases :-( 252 */ 253 mac_hcksum_set(mp, 0, 0, 0, 0, HCK_FULLCKSUM); 254 255 return (mac_fix_cksum(mp)); 256 } 257 258 mblk_t * 259 xnb_process_cksum_flags(xnb_t *xnbp, mblk_t *mp, uint32_t capab) 260 { 261 struct ether_header *ehp; 262 uint16_t sap; 263 uint32_t offset; 264 ipha_t *ipha; 265 266 ASSERT(mp->b_next == NULL); 267 268 /* 269 * Check that the packet is contained in a single mblk. In 270 * the "from peer" path this is true today, but may change 271 * when scatter gather support is added. In the "to peer" 272 * path we cannot be sure, but in most cases it will be true 273 * (in the xnbo case the packet has come from a MAC device 274 * which is unlikely to split packets). 275 */ 276 if (mp->b_cont != NULL) 277 goto software; 278 279 /* 280 * If the MAC has no hardware capability don't do any further 281 * checking. 282 */ 283 if (capab == 0) 284 goto software; 285 286 ASSERT(MBLKL(mp) >= sizeof (struct ether_header)); 287 ehp = (struct ether_header *)mp->b_rptr; 288 289 if (ntohs(ehp->ether_type) == VLAN_TPID) { 290 struct ether_vlan_header *evhp; 291 292 ASSERT(MBLKL(mp) >= sizeof (struct ether_vlan_header)); 293 evhp = (struct ether_vlan_header *)mp->b_rptr; 294 sap = ntohs(evhp->ether_type); 295 offset = sizeof (struct ether_vlan_header); 296 } else { 297 sap = ntohs(ehp->ether_type); 298 offset = sizeof (struct ether_header); 299 } 300 301 /* 302 * We only attempt to do IPv4 packets in hardware. 303 */ 304 if (sap != ETHERTYPE_IP) 305 goto software; 306 307 /* 308 * We know that this is an IPv4 packet. 309 */ 310 ipha = (ipha_t *)(mp->b_rptr + offset); 311 312 switch (ipha->ipha_protocol) { 313 case IPPROTO_TCP: 314 case IPPROTO_UDP: { 315 uint32_t start, length, stuff, cksum; 316 uint16_t *stuffp; 317 318 /* 319 * This is a TCP/IPv4 or UDP/IPv4 packet, for which we 320 * can use full IPv4 and partial checksum offload. 321 */ 322 if ((capab & (HCKSUM_INET_FULL_V4|HCKSUM_INET_PARTIAL)) == 0) 323 break; 324 325 start = IP_SIMPLE_HDR_LENGTH; 326 length = ntohs(ipha->ipha_length); 327 if (ipha->ipha_protocol == IPPROTO_TCP) { 328 stuff = start + TCP_CHECKSUM_OFFSET; 329 cksum = IP_TCP_CSUM_COMP; 330 } else { 331 stuff = start + UDP_CHECKSUM_OFFSET; 332 cksum = IP_UDP_CSUM_COMP; 333 } 334 stuffp = (uint16_t *)(mp->b_rptr + offset + stuff); 335 336 if (capab & HCKSUM_INET_FULL_V4) { 337 /* 338 * Some devices require that the checksum 339 * field of the packet is zero for full 340 * offload. 341 */ 342 *stuffp = 0; 343 344 mac_hcksum_set(mp, 0, 0, 0, 0, HCK_FULLCKSUM); 345 346 xnbp->xnb_stat_csum_hardware++; 347 348 return (mp); 349 } 350 351 if (capab & HCKSUM_INET_PARTIAL) { 352 if (*stuffp == 0) { 353 ipaddr_t src, dst; 354 355 /* 356 * Older Solaris guests don't insert 357 * the pseudo-header checksum, so we 358 * calculate it here. 359 */ 360 src = ipha->ipha_src; 361 dst = ipha->ipha_dst; 362 363 cksum += (dst >> 16) + (dst & 0xFFFF); 364 cksum += (src >> 16) + (src & 0xFFFF); 365 cksum += length - IP_SIMPLE_HDR_LENGTH; 366 367 cksum = (cksum >> 16) + (cksum & 0xFFFF); 368 cksum = (cksum >> 16) + (cksum & 0xFFFF); 369 370 ASSERT(cksum <= 0xFFFF); 371 372 *stuffp = (uint16_t)(cksum ? cksum : ~cksum); 373 } 374 375 mac_hcksum_set(mp, start, stuff, length, 0, 376 HCK_PARTIALCKSUM); 377 378 xnbp->xnb_stat_csum_hardware++; 379 380 return (mp); 381 } 382 383 /* NOTREACHED */ 384 break; 385 } 386 387 default: 388 /* Use software. */ 389 break; 390 } 391 392 software: 393 /* 394 * We are not able to use any offload so do the whole thing in 395 * software. 396 */ 397 xnbp->xnb_stat_csum_software++; 398 399 return (xnb_software_csum(xnbp, mp)); 400 } 401 402 int 403 xnb_attach(dev_info_t *dip, xnb_flavour_t *flavour, void *flavour_data) 404 { 405 xnb_t *xnbp; 406 char *xsname; 407 char cachename[32]; 408 409 xnbp = kmem_zalloc(sizeof (*xnbp), KM_SLEEP); 410 411 xnbp->xnb_flavour = flavour; 412 xnbp->xnb_flavour_data = flavour_data; 413 xnbp->xnb_devinfo = dip; 414 xnbp->xnb_evtchn = INVALID_EVTCHN; 415 xnbp->xnb_irq = B_FALSE; 416 xnbp->xnb_tx_ring_handle = INVALID_GRANT_HANDLE; 417 xnbp->xnb_rx_ring_handle = INVALID_GRANT_HANDLE; 418 xnbp->xnb_connected = B_FALSE; 419 xnbp->xnb_hotplugged = B_FALSE; 420 xnbp->xnb_detachable = B_FALSE; 421 xnbp->xnb_peer = xvdi_get_oeid(dip); 422 xnbp->xnb_be_status = XNB_STATE_INIT; 423 xnbp->xnb_fe_status = XNB_STATE_INIT; 424 425 xnbp->xnb_tx_buf_count = 0; 426 427 xnbp->xnb_rx_hv_copy = B_FALSE; 428 xnbp->xnb_multicast_control = B_FALSE; 429 430 xnbp->xnb_rx_va = vmem_alloc(heap_arena, PAGESIZE, VM_SLEEP); 431 ASSERT(xnbp->xnb_rx_va != NULL); 432 433 if (ddi_get_iblock_cookie(dip, 0, &xnbp->xnb_icookie) 434 != DDI_SUCCESS) 435 goto failure; 436 437 /* Allocated on demand, when/if we enter xnb_copy_to_peer(). */ 438 xnbp->xnb_rx_cpop = NULL; 439 xnbp->xnb_rx_cpop_count = 0; 440 441 mutex_init(&xnbp->xnb_tx_lock, NULL, MUTEX_DRIVER, 442 xnbp->xnb_icookie); 443 mutex_init(&xnbp->xnb_rx_lock, NULL, MUTEX_DRIVER, 444 xnbp->xnb_icookie); 445 mutex_init(&xnbp->xnb_state_lock, NULL, MUTEX_DRIVER, 446 xnbp->xnb_icookie); 447 448 /* Set driver private pointer now. */ 449 ddi_set_driver_private(dip, xnbp); 450 451 (void) sprintf(cachename, "xnb_tx_buf_cache_%d", ddi_get_instance(dip)); 452 xnbp->xnb_tx_buf_cache = kmem_cache_create(cachename, 453 sizeof (xnb_txbuf_t), 0, 454 xnb_txbuf_constructor, xnb_txbuf_destructor, 455 NULL, xnbp, NULL, 0); 456 if (xnbp->xnb_tx_buf_cache == NULL) 457 goto failure_0; 458 459 if (!xnb_ks_init(xnbp)) 460 goto failure_1; 461 462 /* 463 * Receive notification of changes in the state of the 464 * driver in the guest domain. 465 */ 466 if (xvdi_add_event_handler(dip, XS_OE_STATE, xnb_oe_state_change, 467 NULL) != DDI_SUCCESS) 468 goto failure_2; 469 470 /* 471 * Receive notification of hotplug events. 472 */ 473 if (xvdi_add_event_handler(dip, XS_HP_STATE, xnb_hp_state_change, 474 NULL) != DDI_SUCCESS) 475 goto failure_2; 476 477 xsname = xvdi_get_xsname(dip); 478 479 if (xenbus_printf(XBT_NULL, xsname, 480 "feature-multicast-control", "%d", 481 xnb_multicast_control ? 1 : 0) != 0) 482 goto failure_3; 483 484 if (xenbus_printf(XBT_NULL, xsname, 485 "feature-rx-copy", "%d", 1) != 0) 486 goto failure_3; 487 /* 488 * Linux domUs seem to depend on "feature-rx-flip" being 0 489 * in addition to "feature-rx-copy" being 1. It seems strange 490 * to use four possible states to describe a binary decision, 491 * but we might as well play nice. 492 */ 493 if (xenbus_printf(XBT_NULL, xsname, 494 "feature-rx-flip", "%d", 0) != 0) 495 goto failure_3; 496 497 (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateInitWait); 498 (void) xvdi_post_event(dip, XEN_HP_ADD); 499 500 return (DDI_SUCCESS); 501 502 failure_3: 503 xvdi_remove_event_handler(dip, NULL); 504 505 failure_2: 506 xnb_ks_free(xnbp); 507 508 failure_1: 509 kmem_cache_destroy(xnbp->xnb_tx_buf_cache); 510 511 failure_0: 512 mutex_destroy(&xnbp->xnb_state_lock); 513 mutex_destroy(&xnbp->xnb_rx_lock); 514 mutex_destroy(&xnbp->xnb_tx_lock); 515 516 failure: 517 vmem_free(heap_arena, xnbp->xnb_rx_va, PAGESIZE); 518 kmem_free(xnbp, sizeof (*xnbp)); 519 return (DDI_FAILURE); 520 } 521 522 void 523 xnb_detach(dev_info_t *dip) 524 { 525 xnb_t *xnbp = ddi_get_driver_private(dip); 526 527 ASSERT(xnbp != NULL); 528 ASSERT(!xnbp->xnb_connected); 529 ASSERT(xnbp->xnb_tx_buf_count == 0); 530 531 xnb_disconnect_rings(dip); 532 533 xvdi_remove_event_handler(dip, NULL); 534 535 xnb_ks_free(xnbp); 536 537 kmem_cache_destroy(xnbp->xnb_tx_buf_cache); 538 539 ddi_set_driver_private(dip, NULL); 540 541 mutex_destroy(&xnbp->xnb_state_lock); 542 mutex_destroy(&xnbp->xnb_rx_lock); 543 mutex_destroy(&xnbp->xnb_tx_lock); 544 545 if (xnbp->xnb_rx_cpop_count > 0) 546 kmem_free(xnbp->xnb_rx_cpop, sizeof (xnbp->xnb_rx_cpop[0]) 547 * xnbp->xnb_rx_cpop_count); 548 549 ASSERT(xnbp->xnb_rx_va != NULL); 550 vmem_free(heap_arena, xnbp->xnb_rx_va, PAGESIZE); 551 552 kmem_free(xnbp, sizeof (*xnbp)); 553 } 554 555 /* 556 * Allocate a page from the hypervisor to be flipped to the peer. 557 * 558 * Try to get pages in batches to reduce the overhead of calls into 559 * the balloon driver. 560 */ 561 static mfn_t 562 xnb_alloc_page(xnb_t *xnbp) 563 { 564 #define WARNING_RATE_LIMIT 100 565 #define BATCH_SIZE 256 566 static mfn_t mfns[BATCH_SIZE]; /* common across all instances */ 567 static int nth = BATCH_SIZE; 568 mfn_t mfn; 569 570 mutex_enter(&xnb_alloc_page_lock); 571 if (nth == BATCH_SIZE) { 572 if (balloon_alloc_pages(BATCH_SIZE, mfns) != BATCH_SIZE) { 573 xnbp->xnb_stat_allocation_failure++; 574 mutex_exit(&xnb_alloc_page_lock); 575 576 /* 577 * Try for a single page in low memory situations. 578 */ 579 if (balloon_alloc_pages(1, &mfn) != 1) { 580 if ((xnbp->xnb_stat_small_allocation_failure++ 581 % WARNING_RATE_LIMIT) == 0) 582 cmn_err(CE_WARN, "xnb_alloc_page: " 583 "Cannot allocate memory to " 584 "transfer packets to peer."); 585 return (0); 586 } else { 587 xnbp->xnb_stat_small_allocation_success++; 588 return (mfn); 589 } 590 } 591 592 nth = 0; 593 xnbp->xnb_stat_allocation_success++; 594 } 595 596 mfn = mfns[nth++]; 597 mutex_exit(&xnb_alloc_page_lock); 598 599 ASSERT(mfn != 0); 600 601 return (mfn); 602 #undef BATCH_SIZE 603 #undef WARNING_RATE_LIMIT 604 } 605 606 /* 607 * Free a page back to the hypervisor. 608 * 609 * This happens only in the error path, so batching is not worth the 610 * complication. 611 */ 612 static void 613 xnb_free_page(xnb_t *xnbp, mfn_t mfn) 614 { 615 _NOTE(ARGUNUSED(xnbp)); 616 int r; 617 pfn_t pfn; 618 619 pfn = xen_assign_pfn(mfn); 620 pfnzero(pfn, 0, PAGESIZE); 621 xen_release_pfn(pfn); 622 623 if ((r = balloon_free_pages(1, &mfn, NULL, NULL)) != 1) { 624 cmn_err(CE_WARN, "free_page: cannot decrease memory " 625 "reservation (%d): page kept but unusable (mfn = 0x%lx).", 626 r, mfn); 627 } 628 } 629 630 /* 631 * Similar to RING_HAS_UNCONSUMED_REQUESTS(&xnbp->rx_ring) but using 632 * local variables. Used in both xnb_to_peer() and xnb_copy_to_peer(). 633 */ 634 #define XNB_RING_HAS_UNCONSUMED_REQUESTS(_r) \ 635 ((((_r)->sring->req_prod - loop) < \ 636 (RING_SIZE(_r) - (loop - prod))) ? \ 637 ((_r)->sring->req_prod - loop) : \ 638 (RING_SIZE(_r) - (loop - prod))) 639 640 /* 641 * Pass packets to the peer using page flipping. 642 */ 643 mblk_t * 644 xnb_to_peer(xnb_t *xnbp, mblk_t *mp) 645 { 646 mblk_t *free = mp, *prev = NULL; 647 size_t len; 648 gnttab_transfer_t *gop; 649 boolean_t notify; 650 RING_IDX loop, prod, end; 651 652 /* 653 * For each packet the sequence of operations is: 654 * 655 * 1. get a new page from the hypervisor. 656 * 2. get a request slot from the ring. 657 * 3. copy the data into the new page. 658 * 4. transfer the page to the peer. 659 * 5. update the request slot. 660 * 6. kick the peer. 661 * 7. free mp. 662 * 663 * In order to reduce the number of hypercalls, we prepare 664 * several packets for the peer and perform a single hypercall 665 * to transfer them. 666 */ 667 668 mutex_enter(&xnbp->xnb_rx_lock); 669 670 /* 671 * If we are not connected to the peer or have not yet 672 * finished hotplug it is too early to pass packets to the 673 * peer. 674 */ 675 if (!(xnbp->xnb_connected && xnbp->xnb_hotplugged)) { 676 mutex_exit(&xnbp->xnb_rx_lock); 677 DTRACE_PROBE(flip_rx_too_early); 678 xnbp->xnb_stat_rx_too_early++; 679 return (mp); 680 } 681 682 loop = xnbp->xnb_rx_ring.req_cons; 683 prod = xnbp->xnb_rx_ring.rsp_prod_pvt; 684 gop = xnbp->xnb_rx_top; 685 686 while ((mp != NULL) && 687 XNB_RING_HAS_UNCONSUMED_REQUESTS(&xnbp->xnb_rx_ring)) { 688 689 mfn_t mfn; 690 pfn_t pfn; 691 netif_rx_request_t *rxreq; 692 netif_rx_response_t *rxresp; 693 char *valoop; 694 mblk_t *ml; 695 uint16_t cksum_flags; 696 697 /* 1 */ 698 if ((mfn = xnb_alloc_page(xnbp)) == 0) { 699 xnbp->xnb_stat_rx_defer++; 700 break; 701 } 702 703 /* 2 */ 704 rxreq = RING_GET_REQUEST(&xnbp->xnb_rx_ring, loop); 705 706 #ifdef XNB_DEBUG 707 if (!(rxreq->id < NET_RX_RING_SIZE)) 708 cmn_err(CE_PANIC, "xnb_to_peer: " 709 "id %d out of range in request 0x%p", 710 rxreq->id, (void *)rxreq); 711 #endif /* XNB_DEBUG */ 712 713 /* Assign a pfn and map the new page at the allocated va. */ 714 pfn = xen_assign_pfn(mfn); 715 hat_devload(kas.a_hat, xnbp->xnb_rx_va, PAGESIZE, 716 pfn, PROT_READ | PROT_WRITE, HAT_LOAD); 717 718 /* 3 */ 719 len = 0; 720 valoop = xnbp->xnb_rx_va; 721 for (ml = mp; ml != NULL; ml = ml->b_cont) { 722 size_t chunk = ml->b_wptr - ml->b_rptr; 723 724 bcopy(ml->b_rptr, valoop, chunk); 725 valoop += chunk; 726 len += chunk; 727 } 728 729 ASSERT(len < PAGESIZE); 730 731 /* Release the pfn. */ 732 hat_unload(kas.a_hat, xnbp->xnb_rx_va, PAGESIZE, 733 HAT_UNLOAD_UNMAP); 734 xen_release_pfn(pfn); 735 736 /* 4 */ 737 gop->mfn = mfn; 738 gop->domid = xnbp->xnb_peer; 739 gop->ref = rxreq->gref; 740 741 /* 5.1 */ 742 rxresp = RING_GET_RESPONSE(&xnbp->xnb_rx_ring, prod); 743 rxresp->offset = 0; 744 rxresp->flags = 0; 745 746 cksum_flags = xnbp->xnb_flavour->xf_cksum_to_peer(xnbp, mp); 747 if (cksum_flags != 0) 748 xnbp->xnb_stat_rx_cksum_deferred++; 749 rxresp->flags |= cksum_flags; 750 751 rxresp->id = RING_GET_REQUEST(&xnbp->xnb_rx_ring, prod)->id; 752 rxresp->status = len; 753 754 loop++; 755 prod++; 756 gop++; 757 prev = mp; 758 mp = mp->b_next; 759 } 760 761 /* 762 * Did we actually do anything? 763 */ 764 if (loop == xnbp->xnb_rx_ring.req_cons) { 765 mutex_exit(&xnbp->xnb_rx_lock); 766 return (mp); 767 } 768 769 end = loop; 770 771 /* 772 * Unlink the end of the 'done' list from the remainder. 773 */ 774 ASSERT(prev != NULL); 775 prev->b_next = NULL; 776 777 if (HYPERVISOR_grant_table_op(GNTTABOP_transfer, xnbp->xnb_rx_top, 778 loop - xnbp->xnb_rx_ring.req_cons) != 0) { 779 cmn_err(CE_WARN, "xnb_to_peer: transfer operation failed"); 780 } 781 782 loop = xnbp->xnb_rx_ring.req_cons; 783 prod = xnbp->xnb_rx_ring.rsp_prod_pvt; 784 gop = xnbp->xnb_rx_top; 785 786 while (loop < end) { 787 int16_t status = NETIF_RSP_OKAY; 788 789 if (gop->status != 0) { 790 status = NETIF_RSP_ERROR; 791 792 /* 793 * If the status is anything other than 794 * GNTST_bad_page then we don't own the page 795 * any more, so don't try to give it back. 796 */ 797 if (gop->status != GNTST_bad_page) 798 gop->mfn = 0; 799 } else { 800 /* The page is no longer ours. */ 801 gop->mfn = 0; 802 } 803 804 if (gop->mfn != 0) 805 /* 806 * Give back the page, as we won't be using 807 * it. 808 */ 809 xnb_free_page(xnbp, gop->mfn); 810 else 811 /* 812 * We gave away a page, update our accounting 813 * now. 814 */ 815 balloon_drv_subtracted(1); 816 817 /* 5.2 */ 818 if (status != NETIF_RSP_OKAY) { 819 RING_GET_RESPONSE(&xnbp->xnb_rx_ring, prod)->status = 820 status; 821 } else { 822 xnbp->xnb_stat_ipackets++; 823 xnbp->xnb_stat_rbytes += len; 824 } 825 826 loop++; 827 prod++; 828 gop++; 829 } 830 831 xnbp->xnb_rx_ring.req_cons = loop; 832 xnbp->xnb_rx_ring.rsp_prod_pvt = prod; 833 834 /* 6 */ 835 /* LINTED: constant in conditional context */ 836 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&xnbp->xnb_rx_ring, notify); 837 if (notify) { 838 ec_notify_via_evtchn(xnbp->xnb_evtchn); 839 xnbp->xnb_stat_rx_notify_sent++; 840 } else { 841 xnbp->xnb_stat_rx_notify_deferred++; 842 } 843 844 if (mp != NULL) 845 xnbp->xnb_stat_rx_defer++; 846 847 mutex_exit(&xnbp->xnb_rx_lock); 848 849 /* Free mblk_t's that we consumed. */ 850 freemsgchain(free); 851 852 return (mp); 853 } 854 855 /* Helper functions for xnb_copy_to_peer(). */ 856 857 /* 858 * Grow the array of copy operation descriptors. 859 */ 860 static boolean_t 861 grow_cpop_area(xnb_t *xnbp) 862 { 863 size_t count; 864 gnttab_copy_t *new; 865 866 ASSERT(MUTEX_HELD(&xnbp->xnb_rx_lock)); 867 868 count = xnbp->xnb_rx_cpop_count + CPOP_DEFCNT; 869 870 if ((new = kmem_alloc(sizeof (new[0]) * count, KM_NOSLEEP)) == NULL) { 871 xnbp->xnb_stat_other_allocation_failure++; 872 return (B_FALSE); 873 } 874 875 bcopy(xnbp->xnb_rx_cpop, new, 876 sizeof (xnbp->xnb_rx_cpop[0]) * xnbp->xnb_rx_cpop_count); 877 878 kmem_free(xnbp->xnb_rx_cpop, 879 sizeof (xnbp->xnb_rx_cpop[0]) * xnbp->xnb_rx_cpop_count); 880 881 xnbp->xnb_rx_cpop = new; 882 xnbp->xnb_rx_cpop_count = count; 883 884 xnbp->xnb_stat_rx_cpoparea_grown++; 885 886 return (B_TRUE); 887 } 888 889 /* 890 * Check whether an address is on a page that's foreign to this domain. 891 */ 892 static boolean_t 893 is_foreign(void *addr) 894 { 895 pfn_t pfn = hat_getpfnum(kas.a_hat, addr); 896 897 return ((pfn & PFN_IS_FOREIGN_MFN) == PFN_IS_FOREIGN_MFN); 898 } 899 900 /* 901 * Insert a newly allocated mblk into a chain, replacing the old one. 902 */ 903 static mblk_t * 904 replace_msg(mblk_t *mp, size_t len, mblk_t *mp_prev, mblk_t *ml_prev) 905 { 906 uint32_t start, stuff, end, value, flags; 907 mblk_t *new_mp; 908 909 new_mp = copyb(mp); 910 if (new_mp == NULL) { 911 cmn_err(CE_PANIC, "replace_msg: cannot alloc new message" 912 "for %p, len %lu", (void *) mp, len); 913 } 914 915 mac_hcksum_get(mp, &start, &stuff, &end, &value, &flags); 916 mac_hcksum_set(new_mp, start, stuff, end, value, flags); 917 918 new_mp->b_next = mp->b_next; 919 new_mp->b_prev = mp->b_prev; 920 new_mp->b_cont = mp->b_cont; 921 922 /* Make sure we only overwrite pointers to the mblk being replaced. */ 923 if (mp_prev != NULL && mp_prev->b_next == mp) 924 mp_prev->b_next = new_mp; 925 926 if (ml_prev != NULL && ml_prev->b_cont == mp) 927 ml_prev->b_cont = new_mp; 928 929 mp->b_next = mp->b_prev = mp->b_cont = NULL; 930 freemsg(mp); 931 932 return (new_mp); 933 } 934 935 /* 936 * Set all the fields in a gnttab_copy_t. 937 */ 938 static void 939 setup_gop(xnb_t *xnbp, gnttab_copy_t *gp, uchar_t *rptr, 940 size_t s_off, size_t d_off, size_t len, grant_ref_t d_ref) 941 { 942 ASSERT(xnbp != NULL && gp != NULL); 943 944 gp->source.offset = s_off; 945 gp->source.u.gmfn = pfn_to_mfn(hat_getpfnum(kas.a_hat, (caddr_t)rptr)); 946 gp->source.domid = DOMID_SELF; 947 948 gp->len = (uint16_t)len; 949 gp->flags = GNTCOPY_dest_gref; 950 gp->status = 0; 951 952 gp->dest.u.ref = d_ref; 953 gp->dest.offset = d_off; 954 gp->dest.domid = xnbp->xnb_peer; 955 } 956 957 /* 958 * Pass packets to the peer using hypervisor copy operations. 959 */ 960 mblk_t * 961 xnb_copy_to_peer(xnb_t *xnbp, mblk_t *mp) 962 { 963 mblk_t *free = mp, *mp_prev = NULL, *saved_mp = mp; 964 mblk_t *ml, *ml_prev; 965 boolean_t notify; 966 RING_IDX loop, prod; 967 int i; 968 969 /* 970 * If the peer does not pre-post buffers for received packets, 971 * use page flipping to pass packets to it. 972 */ 973 if (!xnbp->xnb_rx_hv_copy) 974 return (xnb_to_peer(xnbp, mp)); 975 976 /* 977 * For each packet the sequence of operations is: 978 * 979 * 1. get a request slot from the ring. 980 * 2. set up data for hypercall (see NOTE below) 981 * 3. have the hypervisore copy the data 982 * 4. update the request slot. 983 * 5. kick the peer. 984 * 985 * NOTE ad 2. 986 * In order to reduce the number of hypercalls, we prepare 987 * several mblks (mp->b_cont != NULL) for the peer and 988 * perform a single hypercall to transfer them. We also have 989 * to set up a seperate copy operation for every page. 990 * 991 * If we have more than one packet (mp->b_next != NULL), we do 992 * this whole dance repeatedly. 993 */ 994 995 mutex_enter(&xnbp->xnb_rx_lock); 996 997 if (!(xnbp->xnb_connected && xnbp->xnb_hotplugged)) { 998 mutex_exit(&xnbp->xnb_rx_lock); 999 DTRACE_PROBE(copy_rx_too_early); 1000 xnbp->xnb_stat_rx_too_early++; 1001 return (mp); 1002 } 1003 1004 loop = xnbp->xnb_rx_ring.req_cons; 1005 prod = xnbp->xnb_rx_ring.rsp_prod_pvt; 1006 1007 while ((mp != NULL) && 1008 XNB_RING_HAS_UNCONSUMED_REQUESTS(&xnbp->xnb_rx_ring)) { 1009 netif_rx_request_t *rxreq; 1010 size_t d_offset, len; 1011 int item_count; 1012 gnttab_copy_t *gop_cp; 1013 netif_rx_response_t *rxresp; 1014 uint16_t cksum_flags; 1015 int16_t status = NETIF_RSP_OKAY; 1016 1017 /* 1 */ 1018 rxreq = RING_GET_REQUEST(&xnbp->xnb_rx_ring, loop); 1019 1020 #ifdef XNB_DEBUG 1021 if (!(rxreq->id < NET_RX_RING_SIZE)) 1022 cmn_err(CE_PANIC, "xnb_copy_to_peer: " 1023 "id %d out of range in request 0x%p", 1024 rxreq->id, (void *)rxreq); 1025 #endif /* XNB_DEBUG */ 1026 1027 /* 2 */ 1028 d_offset = 0; 1029 len = 0; 1030 item_count = 0; 1031 1032 gop_cp = xnbp->xnb_rx_cpop; 1033 1034 /* 1035 * We walk the b_cont pointers and set up a 1036 * gnttab_copy_t for each sub-page chunk in each data 1037 * block. 1038 */ 1039 /* 2a */ 1040 for (ml = mp, ml_prev = NULL; ml != NULL; ml = ml->b_cont) { 1041 size_t chunk = ml->b_wptr - ml->b_rptr; 1042 uchar_t *r_tmp, *rpt_align; 1043 size_t r_offset; 1044 1045 /* 1046 * The hypervisor will not allow us to 1047 * reference a foreign page (e.g. one 1048 * belonging to another domain) by mfn in the 1049 * copy operation. If the data in this mblk is 1050 * on such a page we must copy the data into a 1051 * local page before initiating the hypervisor 1052 * copy operation. 1053 */ 1054 if (is_foreign(ml->b_rptr) || is_foreign(ml->b_wptr)) { 1055 mblk_t *ml_new = replace_msg(ml, chunk, 1056 mp_prev, ml_prev); 1057 1058 /* We can still use old ml, but not *ml! */ 1059 if (free == ml) 1060 free = ml_new; 1061 if (mp == ml) 1062 mp = ml_new; 1063 ml = ml_new; 1064 1065 xnbp->xnb_stat_rx_foreign_page++; 1066 } 1067 1068 rpt_align = (uchar_t *)ALIGN2PAGE(ml->b_rptr); 1069 r_offset = (uint16_t)(ml->b_rptr - rpt_align); 1070 r_tmp = ml->b_rptr; 1071 1072 if (d_offset + chunk > PAGESIZE) 1073 cmn_err(CE_PANIC, "xnb_copy_to_peer: mp %p " 1074 "(svd: %p), ml %p,rpt_alg. %p, d_offset " 1075 "(%lu) + chunk (%lu) > PAGESIZE %d!", 1076 (void *)mp, (void *)saved_mp, (void *)ml, 1077 (void *)rpt_align, 1078 d_offset, chunk, (int)PAGESIZE); 1079 1080 while (chunk > 0) { 1081 size_t part_len; 1082 1083 if (item_count == xnbp->xnb_rx_cpop_count) { 1084 if (!grow_cpop_area(xnbp)) 1085 goto failure; 1086 gop_cp = &xnbp->xnb_rx_cpop[item_count]; 1087 } 1088 /* 1089 * If our mblk crosses a page boundary, we need 1090 * to do a seperate copy for each page. 1091 */ 1092 if (r_offset + chunk > PAGESIZE) { 1093 part_len = PAGESIZE - r_offset; 1094 1095 DTRACE_PROBE3(mblk_page_crossed, 1096 (mblk_t *), ml, int, chunk, int, 1097 (int)r_offset); 1098 1099 xnbp->xnb_stat_rx_pagebndry_crossed++; 1100 } else { 1101 part_len = chunk; 1102 } 1103 1104 setup_gop(xnbp, gop_cp, r_tmp, r_offset, 1105 d_offset, part_len, rxreq->gref); 1106 1107 chunk -= part_len; 1108 1109 len += part_len; 1110 d_offset += part_len; 1111 r_tmp += part_len; 1112 /* 1113 * The 2nd, 3rd ... last copies will always 1114 * start at r_tmp, therefore r_offset is 0. 1115 */ 1116 r_offset = 0; 1117 gop_cp++; 1118 item_count++; 1119 } 1120 ml_prev = ml; 1121 1122 DTRACE_PROBE4(mblk_loop_end, (mblk_t *), ml, int, 1123 chunk, int, len, int, item_count); 1124 } 1125 /* 3 */ 1126 if (HYPERVISOR_grant_table_op(GNTTABOP_copy, xnbp->xnb_rx_cpop, 1127 item_count) != 0) { 1128 cmn_err(CE_WARN, "xnb_copy_to_peer: copy op. failed"); 1129 DTRACE_PROBE(HV_granttableopfailed); 1130 } 1131 1132 /* 4 */ 1133 rxresp = RING_GET_RESPONSE(&xnbp->xnb_rx_ring, prod); 1134 rxresp->offset = 0; 1135 1136 rxresp->flags = 0; 1137 1138 DTRACE_PROBE4(got_RX_rsp, int, (int)rxresp->id, int, 1139 (int)rxresp->offset, int, (int)rxresp->flags, int, 1140 (int)rxresp->status); 1141 1142 cksum_flags = xnbp->xnb_flavour->xf_cksum_to_peer(xnbp, mp); 1143 if (cksum_flags != 0) 1144 xnbp->xnb_stat_rx_cksum_deferred++; 1145 rxresp->flags |= cksum_flags; 1146 1147 rxresp->id = RING_GET_REQUEST(&xnbp->xnb_rx_ring, prod)->id; 1148 rxresp->status = len; 1149 1150 DTRACE_PROBE4(RX_rsp_set, int, (int)rxresp->id, int, 1151 (int)rxresp->offset, int, (int)rxresp->flags, int, 1152 (int)rxresp->status); 1153 1154 for (i = 0; i < item_count; i++) { 1155 if (xnbp->xnb_rx_cpop[i].status != 0) { 1156 DTRACE_PROBE2(cpop_status_nonnull, int, 1157 (int)xnbp->xnb_rx_cpop[i].status, 1158 int, i); 1159 status = NETIF_RSP_ERROR; 1160 } 1161 } 1162 1163 /* 5.2 */ 1164 if (status != NETIF_RSP_OKAY) { 1165 RING_GET_RESPONSE(&xnbp->xnb_rx_ring, prod)->status = 1166 status; 1167 xnbp->xnb_stat_rx_rsp_notok++; 1168 } else { 1169 xnbp->xnb_stat_ipackets++; 1170 xnbp->xnb_stat_rbytes += len; 1171 } 1172 1173 loop++; 1174 prod++; 1175 mp_prev = mp; 1176 mp = mp->b_next; 1177 } 1178 failure: 1179 /* 1180 * Did we actually do anything? 1181 */ 1182 if (loop == xnbp->xnb_rx_ring.req_cons) { 1183 mutex_exit(&xnbp->xnb_rx_lock); 1184 return (mp); 1185 } 1186 1187 /* 1188 * Unlink the end of the 'done' list from the remainder. 1189 */ 1190 ASSERT(mp_prev != NULL); 1191 mp_prev->b_next = NULL; 1192 1193 xnbp->xnb_rx_ring.req_cons = loop; 1194 xnbp->xnb_rx_ring.rsp_prod_pvt = prod; 1195 1196 /* 6 */ 1197 /* LINTED: constant in conditional context */ 1198 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&xnbp->xnb_rx_ring, notify); 1199 if (notify) { 1200 ec_notify_via_evtchn(xnbp->xnb_evtchn); 1201 xnbp->xnb_stat_rx_notify_sent++; 1202 } else { 1203 xnbp->xnb_stat_rx_notify_deferred++; 1204 } 1205 1206 if (mp != NULL) 1207 xnbp->xnb_stat_rx_defer++; 1208 1209 mutex_exit(&xnbp->xnb_rx_lock); 1210 1211 /* Free mblk_t structs we have consumed. */ 1212 freemsgchain(free); 1213 1214 return (mp); 1215 } 1216 1217 1218 static void 1219 xnb_tx_notify_peer(xnb_t *xnbp, boolean_t force) 1220 { 1221 boolean_t notify; 1222 1223 ASSERT(MUTEX_HELD(&xnbp->xnb_tx_lock)); 1224 1225 /* LINTED: constant in conditional context */ 1226 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&xnbp->xnb_tx_ring, notify); 1227 if (notify || force) { 1228 ec_notify_via_evtchn(xnbp->xnb_evtchn); 1229 xnbp->xnb_stat_tx_notify_sent++; 1230 } else { 1231 xnbp->xnb_stat_tx_notify_deferred++; 1232 } 1233 } 1234 1235 static void 1236 xnb_tx_mark_complete(xnb_t *xnbp, RING_IDX id, int16_t status) 1237 { 1238 RING_IDX i; 1239 netif_tx_response_t *txresp; 1240 1241 ASSERT(MUTEX_HELD(&xnbp->xnb_tx_lock)); 1242 1243 i = xnbp->xnb_tx_ring.rsp_prod_pvt; 1244 1245 txresp = RING_GET_RESPONSE(&xnbp->xnb_tx_ring, i); 1246 txresp->id = id; 1247 txresp->status = status; 1248 1249 xnbp->xnb_tx_ring.rsp_prod_pvt = i + 1; 1250 1251 /* 1252 * Note that we don't push the change to the peer here - that 1253 * is the callers responsibility. 1254 */ 1255 } 1256 1257 static void 1258 xnb_txbuf_recycle(xnb_txbuf_t *txp) 1259 { 1260 xnb_t *xnbp = txp->xt_xnbp; 1261 1262 kmem_cache_free(xnbp->xnb_tx_buf_cache, txp); 1263 1264 xnbp->xnb_tx_buf_outstanding--; 1265 } 1266 1267 static int 1268 xnb_txbuf_constructor(void *buf, void *arg, int kmflag) 1269 { 1270 _NOTE(ARGUNUSED(kmflag)); 1271 xnb_txbuf_t *txp = buf; 1272 xnb_t *xnbp = arg; 1273 size_t len; 1274 ddi_dma_cookie_t dma_cookie; 1275 uint_t ncookies; 1276 1277 txp->xt_free_rtn.free_func = xnb_txbuf_recycle; 1278 txp->xt_free_rtn.free_arg = (caddr_t)txp; 1279 txp->xt_xnbp = xnbp; 1280 txp->xt_next = NULL; 1281 1282 if (ddi_dma_alloc_handle(xnbp->xnb_devinfo, &buf_dma_attr, 1283 0, 0, &txp->xt_dma_handle) != DDI_SUCCESS) 1284 goto failure; 1285 1286 if (ddi_dma_mem_alloc(txp->xt_dma_handle, PAGESIZE, &data_accattr, 1287 DDI_DMA_STREAMING, 0, 0, &txp->xt_buf, &len, 1288 &txp->xt_acc_handle) != DDI_SUCCESS) 1289 goto failure_1; 1290 1291 if (ddi_dma_addr_bind_handle(txp->xt_dma_handle, NULL, txp->xt_buf, 1292 len, DDI_DMA_RDWR | DDI_DMA_STREAMING, DDI_DMA_DONTWAIT, 0, 1293 &dma_cookie, &ncookies) 1294 != DDI_DMA_MAPPED) 1295 goto failure_2; 1296 ASSERT(ncookies == 1); 1297 1298 txp->xt_mfn = xnb_btop(dma_cookie.dmac_laddress); 1299 txp->xt_buflen = dma_cookie.dmac_size; 1300 1301 DTRACE_PROBE(txbuf_allocated); 1302 1303 atomic_inc_32(&xnbp->xnb_tx_buf_count); 1304 xnbp->xnb_tx_buf_outstanding++; 1305 1306 return (0); 1307 1308 failure_2: 1309 ddi_dma_mem_free(&txp->xt_acc_handle); 1310 1311 failure_1: 1312 ddi_dma_free_handle(&txp->xt_dma_handle); 1313 1314 failure: 1315 1316 return (-1); 1317 } 1318 1319 static void 1320 xnb_txbuf_destructor(void *buf, void *arg) 1321 { 1322 xnb_txbuf_t *txp = buf; 1323 xnb_t *xnbp = arg; 1324 1325 (void) ddi_dma_unbind_handle(txp->xt_dma_handle); 1326 ddi_dma_mem_free(&txp->xt_acc_handle); 1327 ddi_dma_free_handle(&txp->xt_dma_handle); 1328 1329 atomic_dec_32(&xnbp->xnb_tx_buf_count); 1330 } 1331 1332 /* 1333 * Take packets from the peer and deliver them onward. 1334 */ 1335 static mblk_t * 1336 xnb_from_peer(xnb_t *xnbp) 1337 { 1338 RING_IDX start, end, loop; 1339 gnttab_copy_t *cop; 1340 xnb_txbuf_t **txpp; 1341 netif_tx_request_t *txreq; 1342 boolean_t work_to_do, need_notify = B_FALSE; 1343 mblk_t *head, *tail; 1344 int n_data_req, i; 1345 1346 ASSERT(MUTEX_HELD(&xnbp->xnb_tx_lock)); 1347 1348 head = tail = NULL; 1349 around: 1350 1351 /* LINTED: constant in conditional context */ 1352 RING_FINAL_CHECK_FOR_REQUESTS(&xnbp->xnb_tx_ring, work_to_do); 1353 if (!work_to_do) { 1354 finished: 1355 xnb_tx_notify_peer(xnbp, need_notify); 1356 1357 return (head); 1358 } 1359 1360 start = xnbp->xnb_tx_ring.req_cons; 1361 end = xnbp->xnb_tx_ring.sring->req_prod; 1362 1363 if ((end - start) > NET_TX_RING_SIZE) { 1364 /* 1365 * This usually indicates that the frontend driver is 1366 * misbehaving, as it's not possible to have more than 1367 * NET_TX_RING_SIZE ring elements in play at any one 1368 * time. 1369 * 1370 * We reset the ring pointers to the state declared by 1371 * the frontend and try to carry on. 1372 */ 1373 cmn_err(CE_WARN, "xnb_from_peer: domain %d tried to give us %u " 1374 "items in the ring, resetting and trying to recover.", 1375 xnbp->xnb_peer, (end - start)); 1376 1377 /* LINTED: constant in conditional context */ 1378 BACK_RING_ATTACH(&xnbp->xnb_tx_ring, 1379 (netif_tx_sring_t *)xnbp->xnb_tx_ring_addr, PAGESIZE); 1380 1381 goto around; 1382 } 1383 1384 loop = start; 1385 cop = xnbp->xnb_tx_cop; 1386 txpp = xnbp->xnb_tx_bufp; 1387 n_data_req = 0; 1388 1389 while (loop < end) { 1390 static const uint16_t acceptable_flags = 1391 NETTXF_csum_blank | 1392 NETTXF_data_validated | 1393 NETTXF_extra_info; 1394 uint16_t unexpected_flags; 1395 1396 txreq = RING_GET_REQUEST(&xnbp->xnb_tx_ring, loop); 1397 1398 unexpected_flags = txreq->flags & ~acceptable_flags; 1399 if (unexpected_flags != 0) { 1400 /* 1401 * The peer used flag bits that we do not 1402 * recognize. 1403 */ 1404 cmn_err(CE_WARN, "xnb_from_peer: " 1405 "unexpected flag bits (0x%x) from peer " 1406 "in transmit request", 1407 unexpected_flags); 1408 xnbp->xnb_stat_tx_unexpected_flags++; 1409 1410 /* Mark this entry as failed. */ 1411 xnb_tx_mark_complete(xnbp, txreq->id, NETIF_RSP_ERROR); 1412 need_notify = B_TRUE; 1413 1414 } else if (txreq->flags & NETTXF_extra_info) { 1415 struct netif_extra_info *erp; 1416 boolean_t status; 1417 1418 loop++; /* Consume another slot in the ring. */ 1419 ASSERT(loop <= end); 1420 1421 erp = (struct netif_extra_info *) 1422 RING_GET_REQUEST(&xnbp->xnb_tx_ring, loop); 1423 1424 switch (erp->type) { 1425 case XEN_NETIF_EXTRA_TYPE_MCAST_ADD: 1426 ASSERT(xnbp->xnb_multicast_control); 1427 status = xnbp->xnb_flavour->xf_mcast_add(xnbp, 1428 &erp->u.mcast.addr); 1429 break; 1430 case XEN_NETIF_EXTRA_TYPE_MCAST_DEL: 1431 ASSERT(xnbp->xnb_multicast_control); 1432 status = xnbp->xnb_flavour->xf_mcast_del(xnbp, 1433 &erp->u.mcast.addr); 1434 break; 1435 default: 1436 status = B_FALSE; 1437 cmn_err(CE_WARN, "xnb_from_peer: " 1438 "unknown extra type %d", erp->type); 1439 break; 1440 } 1441 1442 xnb_tx_mark_complete(xnbp, txreq->id, 1443 status ? NETIF_RSP_OKAY : NETIF_RSP_ERROR); 1444 need_notify = B_TRUE; 1445 1446 } else if ((txreq->offset > PAGESIZE) || 1447 (txreq->offset + txreq->size > PAGESIZE)) { 1448 /* 1449 * Peer attempted to refer to data beyond the 1450 * end of the granted page. 1451 */ 1452 cmn_err(CE_WARN, "xnb_from_peer: " 1453 "attempt to refer beyond the end of granted " 1454 "page in txreq (offset %d, size %d).", 1455 txreq->offset, txreq->size); 1456 xnbp->xnb_stat_tx_overflow_page++; 1457 1458 /* Mark this entry as failed. */ 1459 xnb_tx_mark_complete(xnbp, txreq->id, NETIF_RSP_ERROR); 1460 need_notify = B_TRUE; 1461 1462 } else { 1463 xnb_txbuf_t *txp; 1464 1465 txp = kmem_cache_alloc(xnbp->xnb_tx_buf_cache, 1466 KM_NOSLEEP); 1467 if (txp == NULL) 1468 break; 1469 1470 txp->xt_mblk = desballoc((unsigned char *)txp->xt_buf, 1471 txp->xt_buflen, 0, &txp->xt_free_rtn); 1472 if (txp->xt_mblk == NULL) { 1473 kmem_cache_free(xnbp->xnb_tx_buf_cache, txp); 1474 break; 1475 } 1476 1477 txp->xt_idx = loop; 1478 txp->xt_id = txreq->id; 1479 1480 cop->source.u.ref = txreq->gref; 1481 cop->source.domid = xnbp->xnb_peer; 1482 cop->source.offset = txreq->offset; 1483 1484 cop->dest.u.gmfn = txp->xt_mfn; 1485 cop->dest.domid = DOMID_SELF; 1486 cop->dest.offset = 0; 1487 1488 cop->len = txreq->size; 1489 cop->flags = GNTCOPY_source_gref; 1490 cop->status = 0; 1491 1492 *txpp = txp; 1493 1494 txpp++; 1495 cop++; 1496 n_data_req++; 1497 1498 ASSERT(n_data_req <= NET_TX_RING_SIZE); 1499 } 1500 1501 loop++; 1502 } 1503 1504 xnbp->xnb_tx_ring.req_cons = loop; 1505 1506 if (n_data_req == 0) 1507 goto around; 1508 1509 if (HYPERVISOR_grant_table_op(GNTTABOP_copy, 1510 xnbp->xnb_tx_cop, n_data_req) != 0) { 1511 1512 cmn_err(CE_WARN, "xnb_from_peer: copy operation failed"); 1513 1514 txpp = xnbp->xnb_tx_bufp; 1515 i = n_data_req; 1516 while (i > 0) { 1517 kmem_cache_free(xnbp->xnb_tx_buf_cache, *txpp); 1518 txpp++; 1519 i--; 1520 } 1521 1522 goto finished; 1523 } 1524 1525 txpp = xnbp->xnb_tx_bufp; 1526 cop = xnbp->xnb_tx_cop; 1527 i = n_data_req; 1528 1529 while (i > 0) { 1530 xnb_txbuf_t *txp = *txpp; 1531 1532 txreq = RING_GET_REQUEST(&xnbp->xnb_tx_ring, txp->xt_idx); 1533 1534 if (cop->status != 0) { 1535 #ifdef XNB_DEBUG 1536 cmn_err(CE_WARN, "xnb_from_peer: " 1537 "txpp 0x%p failed (%d)", 1538 (void *)*txpp, cop->status); 1539 #endif /* XNB_DEBUG */ 1540 xnb_tx_mark_complete(xnbp, txp->xt_id, NETIF_RSP_ERROR); 1541 freemsg(txp->xt_mblk); 1542 } else { 1543 mblk_t *mp; 1544 1545 mp = txp->xt_mblk; 1546 mp->b_rptr = mp->b_wptr = (unsigned char *)txp->xt_buf; 1547 mp->b_wptr += txreq->size; 1548 mp->b_next = NULL; 1549 1550 /* 1551 * If there are checksum flags, process them 1552 * appropriately. 1553 */ 1554 if ((txreq->flags & 1555 (NETTXF_csum_blank | NETTXF_data_validated)) 1556 != 0) { 1557 mp = xnbp->xnb_flavour->xf_cksum_from_peer(xnbp, 1558 mp, txreq->flags); 1559 xnbp->xnb_stat_tx_cksum_no_need++; 1560 1561 txp->xt_mblk = mp; 1562 } 1563 1564 if (head == NULL) { 1565 ASSERT(tail == NULL); 1566 head = mp; 1567 } else { 1568 ASSERT(tail != NULL); 1569 tail->b_next = mp; 1570 } 1571 tail = mp; 1572 1573 xnbp->xnb_stat_opackets++; 1574 xnbp->xnb_stat_obytes += txreq->size; 1575 1576 xnb_tx_mark_complete(xnbp, txp->xt_id, NETIF_RSP_OKAY); 1577 } 1578 1579 txpp++; 1580 cop++; 1581 i--; 1582 } 1583 1584 goto around; 1585 /* NOTREACHED */ 1586 } 1587 1588 static uint_t 1589 xnb_intr(caddr_t arg) 1590 { 1591 xnb_t *xnbp = (xnb_t *)arg; 1592 mblk_t *mp; 1593 1594 xnbp->xnb_stat_intr++; 1595 1596 mutex_enter(&xnbp->xnb_tx_lock); 1597 1598 ASSERT(xnbp->xnb_connected); 1599 1600 mp = xnb_from_peer(xnbp); 1601 1602 mutex_exit(&xnbp->xnb_tx_lock); 1603 1604 if (!xnbp->xnb_hotplugged) { 1605 xnbp->xnb_stat_tx_too_early++; 1606 goto fail; 1607 } 1608 if (mp == NULL) { 1609 xnbp->xnb_stat_spurious_intr++; 1610 goto fail; 1611 } 1612 1613 xnbp->xnb_flavour->xf_from_peer(xnbp, mp); 1614 1615 return (DDI_INTR_CLAIMED); 1616 1617 fail: 1618 freemsgchain(mp); 1619 return (DDI_INTR_CLAIMED); 1620 } 1621 1622 /* 1623 * Read our configuration from xenstore. 1624 */ 1625 boolean_t 1626 xnb_read_xs_config(xnb_t *xnbp) 1627 { 1628 char *xsname; 1629 char mac[ETHERADDRL * 3]; 1630 1631 xsname = xvdi_get_xsname(xnbp->xnb_devinfo); 1632 1633 if (xenbus_scanf(XBT_NULL, xsname, 1634 "mac", "%s", mac) != 0) { 1635 cmn_err(CE_WARN, "xnb_attach: " 1636 "cannot read mac address from %s", 1637 xsname); 1638 return (B_FALSE); 1639 } 1640 1641 if (ether_aton(mac, xnbp->xnb_mac_addr) != ETHERADDRL) { 1642 cmn_err(CE_WARN, 1643 "xnb_attach: cannot parse mac address %s", 1644 mac); 1645 return (B_FALSE); 1646 } 1647 1648 return (B_TRUE); 1649 } 1650 1651 /* 1652 * Read the configuration of the peer from xenstore. 1653 */ 1654 boolean_t 1655 xnb_read_oe_config(xnb_t *xnbp) 1656 { 1657 char *oename; 1658 int i; 1659 1660 oename = xvdi_get_oename(xnbp->xnb_devinfo); 1661 1662 if (xenbus_gather(XBT_NULL, oename, 1663 "event-channel", "%u", &xnbp->xnb_fe_evtchn, 1664 "tx-ring-ref", "%lu", &xnbp->xnb_tx_ring_ref, 1665 "rx-ring-ref", "%lu", &xnbp->xnb_rx_ring_ref, 1666 NULL) != 0) { 1667 cmn_err(CE_WARN, "xnb_read_oe_config: " 1668 "cannot read other-end details from %s", 1669 oename); 1670 return (B_FALSE); 1671 } 1672 1673 /* 1674 * Check whether our peer requests receive side hypervisor 1675 * copy. 1676 */ 1677 if (xenbus_scanf(XBT_NULL, oename, 1678 "request-rx-copy", "%d", &i) != 0) 1679 i = 0; 1680 if (i != 0) 1681 xnbp->xnb_rx_hv_copy = B_TRUE; 1682 1683 /* 1684 * Check whether our peer requests multicast_control. 1685 */ 1686 if (xenbus_scanf(XBT_NULL, oename, 1687 "request-multicast-control", "%d", &i) != 0) 1688 i = 0; 1689 if (i != 0) 1690 xnbp->xnb_multicast_control = B_TRUE; 1691 1692 /* 1693 * The Linux backend driver here checks to see if the peer has 1694 * set 'feature-no-csum-offload'. This is used to indicate 1695 * that the guest cannot handle receiving packets without a 1696 * valid checksum. We don't check here, because packets passed 1697 * to the peer _always_ have a valid checksum. 1698 * 1699 * There are three cases: 1700 * 1701 * - the NIC is dedicated: packets from the wire should always 1702 * have a valid checksum. If the hardware validates the 1703 * checksum then the relevant bit will be set in the packet 1704 * attributes and we will inform the peer. It can choose to 1705 * ignore the hardware verification. 1706 * 1707 * - the NIC is shared (VNIC) and a packet originates from the 1708 * wire: this is the same as the case above - the packets 1709 * will have a valid checksum. 1710 * 1711 * - the NIC is shared (VNIC) and a packet originates from the 1712 * host: the MAC layer ensures that all such packets have a 1713 * valid checksum by calculating one if the stack did not. 1714 */ 1715 1716 return (B_TRUE); 1717 } 1718 1719 void 1720 xnb_start_connect(xnb_t *xnbp) 1721 { 1722 dev_info_t *dip = xnbp->xnb_devinfo; 1723 1724 if (!xnb_connect_rings(dip)) { 1725 cmn_err(CE_WARN, "xnb_start_connect: " 1726 "cannot connect rings"); 1727 goto failed; 1728 } 1729 1730 if (!xnbp->xnb_flavour->xf_start_connect(xnbp)) { 1731 cmn_err(CE_WARN, "xnb_start_connect: " 1732 "flavour failed to connect"); 1733 goto failed; 1734 } 1735 1736 (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateConnected); 1737 return; 1738 1739 failed: 1740 xnbp->xnb_flavour->xf_peer_disconnected(xnbp); 1741 xnb_disconnect_rings(dip); 1742 (void) xvdi_switch_state(dip, XBT_NULL, 1743 XenbusStateClosed); 1744 (void) xvdi_post_event(dip, XEN_HP_REMOVE); 1745 } 1746 1747 static boolean_t 1748 xnb_connect_rings(dev_info_t *dip) 1749 { 1750 xnb_t *xnbp = ddi_get_driver_private(dip); 1751 struct gnttab_map_grant_ref map_op; 1752 1753 /* 1754 * Cannot attempt to connect the rings if already connected. 1755 */ 1756 ASSERT(!xnbp->xnb_connected); 1757 1758 /* 1759 * 1. allocate a vaddr for the tx page, one for the rx page. 1760 * 2. call GNTTABOP_map_grant_ref to map the relevant pages 1761 * into the allocated vaddr (one for tx, one for rx). 1762 * 3. call EVTCHNOP_bind_interdomain to have the event channel 1763 * bound to this domain. 1764 * 4. associate the event channel with an interrupt. 1765 * 5. enable the interrupt. 1766 */ 1767 1768 /* 1.tx */ 1769 xnbp->xnb_tx_ring_addr = vmem_xalloc(heap_arena, PAGESIZE, PAGESIZE, 1770 0, 0, 0, 0, VM_SLEEP); 1771 ASSERT(xnbp->xnb_tx_ring_addr != NULL); 1772 1773 /* 2.tx */ 1774 map_op.host_addr = (uint64_t)((long)xnbp->xnb_tx_ring_addr); 1775 map_op.flags = GNTMAP_host_map; 1776 map_op.ref = xnbp->xnb_tx_ring_ref; 1777 map_op.dom = xnbp->xnb_peer; 1778 hat_prepare_mapping(kas.a_hat, xnbp->xnb_tx_ring_addr, NULL); 1779 if (xen_map_gref(GNTTABOP_map_grant_ref, &map_op, 1, B_FALSE) != 0 || 1780 map_op.status != 0) { 1781 cmn_err(CE_WARN, "xnb_connect_rings: cannot map tx-ring page."); 1782 goto fail; 1783 } 1784 xnbp->xnb_tx_ring_handle = map_op.handle; 1785 1786 /* LINTED: constant in conditional context */ 1787 BACK_RING_INIT(&xnbp->xnb_tx_ring, 1788 (netif_tx_sring_t *)xnbp->xnb_tx_ring_addr, PAGESIZE); 1789 1790 /* 1.rx */ 1791 xnbp->xnb_rx_ring_addr = vmem_xalloc(heap_arena, PAGESIZE, PAGESIZE, 1792 0, 0, 0, 0, VM_SLEEP); 1793 ASSERT(xnbp->xnb_rx_ring_addr != NULL); 1794 1795 /* 2.rx */ 1796 map_op.host_addr = (uint64_t)((long)xnbp->xnb_rx_ring_addr); 1797 map_op.flags = GNTMAP_host_map; 1798 map_op.ref = xnbp->xnb_rx_ring_ref; 1799 map_op.dom = xnbp->xnb_peer; 1800 hat_prepare_mapping(kas.a_hat, xnbp->xnb_rx_ring_addr, NULL); 1801 if (xen_map_gref(GNTTABOP_map_grant_ref, &map_op, 1, B_FALSE) != 0 || 1802 map_op.status != 0) { 1803 cmn_err(CE_WARN, "xnb_connect_rings: cannot map rx-ring page."); 1804 goto fail; 1805 } 1806 xnbp->xnb_rx_ring_handle = map_op.handle; 1807 1808 /* LINTED: constant in conditional context */ 1809 BACK_RING_INIT(&xnbp->xnb_rx_ring, 1810 (netif_rx_sring_t *)xnbp->xnb_rx_ring_addr, PAGESIZE); 1811 1812 /* 3 */ 1813 if (xvdi_bind_evtchn(dip, xnbp->xnb_fe_evtchn) != DDI_SUCCESS) { 1814 cmn_err(CE_WARN, "xnb_connect_rings: " 1815 "cannot bind event channel %d", xnbp->xnb_evtchn); 1816 xnbp->xnb_evtchn = INVALID_EVTCHN; 1817 goto fail; 1818 } 1819 xnbp->xnb_evtchn = xvdi_get_evtchn(dip); 1820 1821 /* 1822 * It would be good to set the state to XenbusStateConnected 1823 * here as well, but then what if ddi_add_intr() failed? 1824 * Changing the state in the store will be noticed by the peer 1825 * and cannot be "taken back". 1826 */ 1827 mutex_enter(&xnbp->xnb_tx_lock); 1828 mutex_enter(&xnbp->xnb_rx_lock); 1829 1830 xnbp->xnb_connected = B_TRUE; 1831 1832 mutex_exit(&xnbp->xnb_rx_lock); 1833 mutex_exit(&xnbp->xnb_tx_lock); 1834 1835 /* 4, 5 */ 1836 if (ddi_add_intr(dip, 0, NULL, NULL, xnb_intr, (caddr_t)xnbp) 1837 != DDI_SUCCESS) { 1838 cmn_err(CE_WARN, "xnb_connect_rings: cannot add interrupt"); 1839 goto fail; 1840 } 1841 xnbp->xnb_irq = B_TRUE; 1842 1843 return (B_TRUE); 1844 1845 fail: 1846 mutex_enter(&xnbp->xnb_tx_lock); 1847 mutex_enter(&xnbp->xnb_rx_lock); 1848 1849 xnbp->xnb_connected = B_FALSE; 1850 1851 mutex_exit(&xnbp->xnb_rx_lock); 1852 mutex_exit(&xnbp->xnb_tx_lock); 1853 1854 return (B_FALSE); 1855 } 1856 1857 static void 1858 xnb_disconnect_rings(dev_info_t *dip) 1859 { 1860 xnb_t *xnbp = ddi_get_driver_private(dip); 1861 1862 if (xnbp->xnb_irq) { 1863 ddi_remove_intr(dip, 0, NULL); 1864 xnbp->xnb_irq = B_FALSE; 1865 } 1866 1867 if (xnbp->xnb_evtchn != INVALID_EVTCHN) { 1868 xvdi_free_evtchn(dip); 1869 xnbp->xnb_evtchn = INVALID_EVTCHN; 1870 } 1871 1872 if (xnbp->xnb_rx_ring_handle != INVALID_GRANT_HANDLE) { 1873 struct gnttab_unmap_grant_ref unmap_op; 1874 1875 unmap_op.host_addr = (uint64_t)(uintptr_t) 1876 xnbp->xnb_rx_ring_addr; 1877 unmap_op.dev_bus_addr = 0; 1878 unmap_op.handle = xnbp->xnb_rx_ring_handle; 1879 if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, 1880 &unmap_op, 1) != 0) 1881 cmn_err(CE_WARN, "xnb_disconnect_rings: " 1882 "cannot unmap rx-ring page (%d)", 1883 unmap_op.status); 1884 1885 xnbp->xnb_rx_ring_handle = INVALID_GRANT_HANDLE; 1886 } 1887 1888 if (xnbp->xnb_rx_ring_addr != NULL) { 1889 hat_release_mapping(kas.a_hat, xnbp->xnb_rx_ring_addr); 1890 vmem_free(heap_arena, xnbp->xnb_rx_ring_addr, PAGESIZE); 1891 xnbp->xnb_rx_ring_addr = NULL; 1892 } 1893 1894 if (xnbp->xnb_tx_ring_handle != INVALID_GRANT_HANDLE) { 1895 struct gnttab_unmap_grant_ref unmap_op; 1896 1897 unmap_op.host_addr = (uint64_t)(uintptr_t) 1898 xnbp->xnb_tx_ring_addr; 1899 unmap_op.dev_bus_addr = 0; 1900 unmap_op.handle = xnbp->xnb_tx_ring_handle; 1901 if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, 1902 &unmap_op, 1) != 0) 1903 cmn_err(CE_WARN, "xnb_disconnect_rings: " 1904 "cannot unmap tx-ring page (%d)", 1905 unmap_op.status); 1906 1907 xnbp->xnb_tx_ring_handle = INVALID_GRANT_HANDLE; 1908 } 1909 1910 if (xnbp->xnb_tx_ring_addr != NULL) { 1911 hat_release_mapping(kas.a_hat, xnbp->xnb_tx_ring_addr); 1912 vmem_free(heap_arena, xnbp->xnb_tx_ring_addr, PAGESIZE); 1913 xnbp->xnb_tx_ring_addr = NULL; 1914 } 1915 } 1916 1917 static void 1918 xnb_oe_state_change(dev_info_t *dip, ddi_eventcookie_t id, 1919 void *arg, void *impl_data) 1920 { 1921 _NOTE(ARGUNUSED(id, arg)); 1922 xnb_t *xnbp = ddi_get_driver_private(dip); 1923 XenbusState new_state = *(XenbusState *)impl_data; 1924 1925 ASSERT(xnbp != NULL); 1926 1927 switch (new_state) { 1928 case XenbusStateConnected: 1929 /* spurious state change */ 1930 if (xnbp->xnb_connected) 1931 return; 1932 1933 if (!xnb_read_oe_config(xnbp) || 1934 !xnbp->xnb_flavour->xf_peer_connected(xnbp)) { 1935 cmn_err(CE_WARN, "xnb_oe_state_change: " 1936 "read otherend config error"); 1937 (void) xvdi_switch_state(dip, XBT_NULL, 1938 XenbusStateClosed); 1939 (void) xvdi_post_event(dip, XEN_HP_REMOVE); 1940 1941 break; 1942 } 1943 1944 1945 mutex_enter(&xnbp->xnb_state_lock); 1946 xnbp->xnb_fe_status = XNB_STATE_READY; 1947 if (xnbp->xnb_be_status == XNB_STATE_READY) 1948 xnb_start_connect(xnbp); 1949 mutex_exit(&xnbp->xnb_state_lock); 1950 1951 /* 1952 * Now that we've attempted to connect it's reasonable 1953 * to allow an attempt to detach. 1954 */ 1955 xnbp->xnb_detachable = B_TRUE; 1956 1957 break; 1958 1959 case XenbusStateClosing: 1960 (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateClosing); 1961 1962 break; 1963 1964 case XenbusStateClosed: 1965 xnbp->xnb_flavour->xf_peer_disconnected(xnbp); 1966 1967 mutex_enter(&xnbp->xnb_tx_lock); 1968 mutex_enter(&xnbp->xnb_rx_lock); 1969 1970 xnb_disconnect_rings(dip); 1971 xnbp->xnb_connected = B_FALSE; 1972 1973 mutex_exit(&xnbp->xnb_rx_lock); 1974 mutex_exit(&xnbp->xnb_tx_lock); 1975 1976 (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateClosed); 1977 (void) xvdi_post_event(dip, XEN_HP_REMOVE); 1978 /* 1979 * In all likelyhood this is already set (in the above 1980 * case), but if the peer never attempted to connect 1981 * and the domain is destroyed we get here without 1982 * having been through the case above, so we set it to 1983 * be sure. 1984 */ 1985 xnbp->xnb_detachable = B_TRUE; 1986 1987 break; 1988 1989 default: 1990 break; 1991 } 1992 } 1993 1994 static void 1995 xnb_hp_state_change(dev_info_t *dip, ddi_eventcookie_t id, 1996 void *arg, void *impl_data) 1997 { 1998 _NOTE(ARGUNUSED(id, arg)); 1999 xnb_t *xnbp = ddi_get_driver_private(dip); 2000 xendev_hotplug_state_t state = *(xendev_hotplug_state_t *)impl_data; 2001 2002 ASSERT(xnbp != NULL); 2003 2004 switch (state) { 2005 case Connected: 2006 /* spurious hotplug event */ 2007 if (xnbp->xnb_hotplugged) 2008 break; 2009 2010 if (!xnb_read_xs_config(xnbp)) 2011 break; 2012 2013 if (!xnbp->xnb_flavour->xf_hotplug_connected(xnbp)) 2014 break; 2015 2016 mutex_enter(&xnbp->xnb_tx_lock); 2017 mutex_enter(&xnbp->xnb_rx_lock); 2018 2019 xnbp->xnb_hotplugged = B_TRUE; 2020 2021 mutex_exit(&xnbp->xnb_rx_lock); 2022 mutex_exit(&xnbp->xnb_tx_lock); 2023 2024 mutex_enter(&xnbp->xnb_state_lock); 2025 xnbp->xnb_be_status = XNB_STATE_READY; 2026 if (xnbp->xnb_fe_status == XNB_STATE_READY) 2027 xnb_start_connect(xnbp); 2028 mutex_exit(&xnbp->xnb_state_lock); 2029 2030 break; 2031 2032 default: 2033 break; 2034 } 2035 } 2036 2037 static struct modldrv modldrv = { 2038 &mod_miscops, "xnb", 2039 }; 2040 2041 static struct modlinkage modlinkage = { 2042 MODREV_1, &modldrv, NULL 2043 }; 2044 2045 int 2046 _init(void) 2047 { 2048 int i; 2049 2050 mutex_init(&xnb_alloc_page_lock, NULL, MUTEX_DRIVER, NULL); 2051 2052 i = mod_install(&modlinkage); 2053 if (i != DDI_SUCCESS) 2054 mutex_destroy(&xnb_alloc_page_lock); 2055 2056 return (i); 2057 } 2058 2059 int 2060 _info(struct modinfo *modinfop) 2061 { 2062 return (mod_info(&modlinkage, modinfop)); 2063 } 2064 2065 int 2066 _fini(void) 2067 { 2068 int i; 2069 2070 i = mod_remove(&modlinkage); 2071 if (i == DDI_SUCCESS) 2072 mutex_destroy(&xnb_alloc_page_lock); 2073 2074 return (i); 2075 } 2076